The loss function decreases, but the test accuracy is always 0.5

@qml.qnode(dev1)   
def conv_net(features, para):
    """
    qml.MottonenStatePreparation(state_vector=Norm1DArray(features), wires=range(wires1))
    qml.adjoint(convolutional_layer)(para[:63])
    qml.CNOT(wires=[0, 3])
    qml.CNOT(wires=[1, 4])
    qml.CNOT(wires=[2, 5])
    """
    #qml.CRY(para[0], wires=[3, 4])
    #qml.CRY(para[1], wires=[4, 5])
    #qml.CRY(para[2], wires=[5, 3])
    """    
    qml.CRY(para[63], wires=[3, 4])
    qml.CRY(para[64], wires=[4, 5])
    qml.CRY(para[65], wires=[5, 3])
    """
    qml.MottonenStatePreparation(state_vector=Norm1DArray(features), wires=range(wires1))
    for W in para:
        layer(W)

    return qml.expval(qml.PauliZ(5))

def variational_classifier(para, bias, features):
    return conv_net(features, para) + bias

def cost(para, bias, features, labels):
    predictions = [variational_classifier(para, bias, f) for f in features]
    return square_loss(labels, predictions)
digits = datasets.load_digits()
features, labels = digits.data, digits.target

# only use first two classes
features = features[np.where((labels == 0) | (labels == 1))]
labels = labels[np.where((labels == 0) | (labels == 1))]

num_train = int(0.75 * len(labels))
num_test = len(labels) - num_train

# normalize data
features = features / np.linalg.norm(features, axis=1).reshape((-1, 1))
index = np.random.permutation(range(len(labels)))
print(index)

x_train = features[index[:num_train]]
y_train = labels[index[:num_train]]
x_test = features[index[num_train:]]
y_test = labels[index[num_train:]]
print(x_train, x_train.shape)
print(y_train, y_train.shape)
print(x_test, x_test.shape)
print(y_test, y_test.shape)
opt = qml.NesterovMomentumOptimizer(0.05)
batch_size = 5

# train the variational classifier
weights = para_init
bias = bias_init

for it in range(max_iterations):

    # Update the weights by one optimizer step
    batch_index = np.random.randint(0, num_train, (batch_size,))
    x_train_batch = x_train[batch_index]
    y_train_batch = y_train[batch_index]
    weights, bias, _, _ = opt.step(cost, weights, bias, x_train_batch, y_train_batch)
    #print(weights)

    # Compute predictions on train and validation set
    predictions_train = [np.sign(variational_classifier(weights, bias, f)) for f in x_train]
    predictions_val = [np.sign(variational_classifier(weights, bias, f)) for f in x_test]

    # Compute accuracy on train and validation set
    acc_train = accuracy(y_train, predictions_train)
    acc_val = accuracy(y_test, predictions_val)

    print( "Iter: {:5d} | Cost: {:0.7f} | Acc train: {:0.7f} | Acc validation: {:0.7f} " 
          "".format(it + 1, cost(weights, bias, features, labels), acc_train, acc_val))

Iter: 1 | Cost: 0.2924991 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 2 | Cost: 0.2882189 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 3 | Cost: 0.2817786 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 4 | Cost: 0.2812447 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 5 | Cost: 0.2809657 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 6 | Cost: 0.3025342 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 7 | Cost: 0.3086777 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 8 | Cost: 0.3035319 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 9 | Cost: 0.3161824 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 10 | Cost: 0.3167217 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 11 | Cost: 0.3173991 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 12 | Cost: 0.3274824 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 13 | Cost: 0.3235402 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 14 | Cost: 0.3237779 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 15 | Cost: 0.3283499 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 16 | Cost: 0.3373461 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 17 | Cost: 0.3376850 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 18 | Cost: 0.3380883 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 19 | Cost: 0.3362846 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 20 | Cost: 0.3341627 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 21 | Cost: 0.3362121 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 22 | Cost: 0.3398094 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 23 | Cost: 0.3462145 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 24 | Cost: 0.3541274 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 25 | Cost: 0.3497878 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 26 | Cost: 0.3574255 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 27 | Cost: 0.3494558 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 28 | Cost: 0.3309330 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 29 | Cost: 0.3135182 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 30 | Cost: 0.3191676 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 31 | Cost: 0.3242883 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 32 | Cost: 0.3209517 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 33 | Cost: 0.3245124 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 34 | Cost: 0.3203839 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 35 | Cost: 0.3238435 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 36 | Cost: 0.3222851 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 37 | Cost: 0.3225848 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 38 | Cost: 0.3254254 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 39 | Cost: 0.3240605 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 40 | Cost: 0.3343621 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 41 | Cost: 0.3347087 | Acc train: 0.5037037 | Acc validation: 0.5111111
Iter: 42 | Cost: 0.3311704 | Acc train: 0.5037037 | Acc validation: 0.5111111

I use the reference case: Variational classifier — PennyLane documentation for the binary classification of 0 and 1 in MNIST. And tried different Ansatz and optimizers, the result is that the loss function drops, but the test accuracy and training accuracy are always around 0.5. How to solve it?

@Maria_Schuld
@isaacdevlugt
@CatalinaAlbornoz

Hi @RX1 It would seem that your model isn’t generalizing well for the training data. I would first try making a smaller example of your problem just to make sure that the issue isn’t in the code. Then you can make a conclusion about whether your circuit is maybe not being able to generalize or if it can be something else.

You can try for instance modifying your ansatz, your cost function or something else.

I hope this helps!