I use the following dev:

`dev = qml.device('qiskit.aer', wires=wires)`

Training cost function:

```
def cost(params, features, labels):
predictions = [variational_classifier(params, feat) for feat in features]
return square_loss(labels, predictions)
```

The training function is as follows:

```
def TrainCost(params_init):
opt = qml.NesterovMomentumOptimizer(0.09)
batch_size = 30
weights = params_init
#bias = np.array(0.0, requires_grad=True)
his_acc_val = []
his_acc_train = []
his_cost = []
f1_train = []
f1_test = []
his_weights = []
for it in range(30):
# Update the weights by one optimizer step
batch_index = np.random.randint(0, 2 * train_size, (batch_size,))
feats_train_batch = feats_train[batch_index]
Y_train_batch = Y_train[batch_index]
#cost(params, bias, features, labels):
weights, _, _ = opt.step(cost, weights, feats_train_batch, Y_train_batch)
#print(bias)
his_weights.append(weights)
# Compute predictions on train and validation set
predictions_train = [np.sign(variational_classifier(weights, feat)) for feat in feats_train]
predictions_val = [np.sign(variational_classifier(weights, feat)) for feat in feats_val]
# print(Y_train)
# print()
# print(predictions_train)
# a = 1 / 0
#print(Counter(predictions_train))
# print(Counter(predictions_val))
# Compute accuracy on train and validation set
acc_train = accuracy(Y_train, predictions_train)
acc_val = accuracy(Y_val, predictions_val)
a = f1_score(Y_train, predictions_train)
b = f1_score(Y_val, predictions_val)
print(a, b)
f1_train.append(a)
f1_test.append(b)
his_acc_val.append(acc_val)
his_acc_train.append(acc_train)
his_cost.append(cost(weights, features, Y))
print("Iter: {:5d} | Cost: {:0.7f} | Acc train: {:0.7f} | Acc validation: {:0.7f} "
"".format(it + 1, cost(weights, features, Y), acc_train, acc_val))
```

Training results:

@isaacdevlugt @CatalinaAlbornoz @Maria_Schuld