Good morning, thanks for replying.
Here I attach the rest of the code:
Data loading
def downsample(x_array, size):
newsize = (size, size)
x_array = np.reshape(x_array, (x_array.shape[0], x_array.shape[1], 1))
new_array = tf.image.resize(x_array, newsize)
return new_array.numpy()
def data(num_train_samples=None, num_test_samples=None, shuffle=False, resize=None):
(train_X, train_y), (test_X, test_y) = mnist.load_data()
X_train_filtered = train_X[np.isin(train_y, [0, 1])]
y_train_filtered = train_y[np.isin(train_y, [0, 1])]
X_test_filtered = test_X[np.isin(test_y, [0, 1])]
y_test_filtered = test_y[np.isin(test_y, [0, 1])]
X_train_filtered = X_train_filtered.astype('float32') / 255
X_test_filtered = X_test_filtered.astype('float32') / 255
X_train_new = []
X_test_new = []
if resize is not None and resize <= 28:
for train in X_train_filtered:
X_train_new.append(downsample(train, resize))
for test in X_test_filtered:
X_test_new.append(downsample(test, resize))
else:
raise Exception("The new size must be smaller than the actual Mnist size that is 28!")
### shuffle
X_train_new = np.array(X_train_new)
X_test_new = np.array(X_test_new)
train_indices = np.arange(len(X_train_new))
test_indices = np.arange(len(X_test_new))
if shuffle == True:
np.random.shuffle(train_indices)
np.random.shuffle(test_indices)
if num_train_samples is not None:
X_train_ = np.array(X_train_new)[:num_train_samples]
y_train_filtered = y_train_filtered[:num_train_samples]
if num_test_samples is not None:
X_test_ = np.array(X_test_new)[:num_test_samples]
y_test_filtered = y_test_filtered[:num_test_samples]
X_train_ = X_train_.reshape(X_train_.shape[0], X_train_.shape[1]*X_train_.shape[2])
X_test_ = X_test_.reshape(X_test_.shape[0], X_test_.shape[1]*X_test_.shape[2])
return (
jnp.asarray(X_train_),
jnp.asarray(y_train_filtered),
jnp.asarray(X_test_),
jnp.asarray(y_test_filtered),
)
new_shape = 8
X_train, y_train, X_test, y_test = data(num_train_samples=200, num_test_samples=400, shuffle=True, resize=new_shape)
Quantum circuit
def feature_map_basic(X, wires=n_qubits):
idx = 0
qml.Barrier()
for i in range(wires):
qml.Rot(phi=X[idx+0], theta=X[idx+1], omega=X[idx+2], wires=i)
idx +=3
qml.Rot(phi=X[idx+0], theta=X[idx+1], omega=X[idx+2], wires=i)
idx +=3
qml.Rot(phi=X[idx+0], theta=X[idx+1], omega=X[idx+2], wires=i)
idx +=3
qml.Barrier()
def qlayer__(X, params):
idx = 0
for i in range(n_qubits):
qml.Rot(phi=params[0+idx], theta=params[1+idx], omega=params[2+idx], wires=i)
idx +=3
qml.CNOT(wires=[0,1])
qml.CNOT(wires=[2,4])
qml.CNOT(wires=[0,3])
qml.CNOT(wires=[5,7])
qml.CNOT(wires=[6,2])
qml.Barrier()
for i in range(n_qubits):
qml.Rot(phi=params[0+idx], theta=params[1+idx], omega=params[2+idx], wires=i)
idx +=3
def qlayer2__(X, params):
idx = 0
for i in range(n_qubits):
qml.Rot(phi=params[0+idx], theta=params[1+idx], omega=params[2+idx], wires=i)
idx +=3
qml.CNOT(wires=[1,2])
qml.CNOT(wires=[0,5])
qml.CNOT(wires=[4,3])
qml.CNOT(wires=[7,0])
qml.CNOT(wires=[5,6])
qml.Barrier()
for i in range(n_qubits):
qml.Rot(phi=params[0+idx], theta=params[1+idx], omega=params[2+idx], wires=i)
idx +=3
dev2 = qml.device("default.qubit", wires=n_qubits)
@qml.qnode(dev2)
def qnode2(Xval, params):
feature_map_basic(Xval)
qlayer__(Xval, params)
feature_map_basic(Xval)
qlayer2__(Xval, params)
qml.Barrier()
return qml.expval(qml.PauliZ(0))
Training and useful functions
@jax.jit
def optimizer_update(opt_state, params, x, y, ):
loss_value, grads = jax.value_and_grad(lambda theta: binary_crossentropy(x, y, theta,))(params)
updates, opt_state = optimizer.update(grads, opt_state)
params = optax.apply_updates(params, updates)
return params, opt_state, loss_value
epochs = 500
batch_size = 10#
seed = 199
qnn_batched = jax.vmap(qnode2, (0, None,))
qnn = jax.jit(qnn_batched)
#Lists to save data
costs = []
val_costs = []
train_per_epoch = []
val_per_epoch = []
acc_per_epoch = []
#Creating the initial random parameters for the QNN
key = jax.random.PRNGKey(seed)
initial_params = jax.random.normal(key, shape=(param_per_gate*n_qubits*gate_per_layer*layers,))
key = jax.random.split(key)[0]
params = jnp.copy(initial_params)
#Optimizer initialization
optimizer = optax.adam(learning_rate=0.01)
opt_state = optimizer.init(initial_params)
for epoch in range(1,epochs+1):
# Generation of random indices to be used for batch
idxs_dataset = jax.random.choice(key, jnp.array(list(range(X_train.shape[0]))), shape=(X_train.shape[0],), replace=False)
key = jax.random.split(key)[0]
params, opt_state, cost = optimizer_update(opt_state, params, X_train, y_train)
cost = binary_crossentropy(X_train, y_train,params)
costs.append(cost)
acc = calculate_accuracy(params, X_train, y_train)
val_cost = binary_crossentropy(X_test, y_test, params, )
val_costs.append(val_cost)
train_per_epoch.append(cost)
val_per_epoch.append(val_cost)
acc_per_epoch.append(acc)
Of course you add the previous functions I have uploaded in the first comment.
Hope this is correct for you.
The question would be if you can go more in details about the multiclass classification implementation. If I understood correctly, should I modify the cross_entropy like this?
def hybrid_layer(ypredicted):
return tf.keras.layers.Softmax()
def cate_cross(X, y, theta):
ytrue = jnp.array(y)
#ypred = mapping(qnn(X, theta)) #qnn is the model I made
ypred = qnn(X, theta)
ypred_multiclass = hybrid_layer(ypred).numpy()
epsilon = 1e-8 # to avoid log overflow (to -infty)
ypred_bounded = jnp.clip(ypred_multiclass, epsilon, 1 - epsilon) # to bound the value of ypred
loss = ytrue*jnp.log10(ypred_bounded)
return -jnp.mean(loss)
Obviously I need to measure two qubits and probably I need to flatten the result of the expval in order to have a (1,4) array.
It may be unnecessary to map the values out of the circuit into positive values in the range {0,1} since softmax should make everything positive, right? How to deal with accuracy in this case? Is this correct? How can I implement the second advice you gave me?
Anyway, thanks a lot!!!