Hi, I also have another questions about the training of my Hybrid Pennylane - Torch model.
import torch
import torch.nn as nn
# from qiskit_aer.noise import NoiseModel, depolarizing_error
import pennylane as qml
from pennylane import numpy as np
n_qubits = 10
n_classes = 3
dev = qml.device("default.mixed", wires=n_qubits, shots = 1)
def state_preparation(features):
qml.AmplitudeEmbedding(features, wires=range(n_qubits), normalize=True)
def embedding_layer(params):
# params shape: (10,)
for i in range(n_qubits):
qml.H(wires=i)
for i in range(n_qubits):
qml.RY(params[i], wires=i)
def vqc(params, index): # Main VQC
# params shape: (12,)
qml.RX(params[0], wires=index)
qml.RY(params[1], wires=index + 1)
qml.RY(params[2], wires=index)
qml.RZ(params[3], wires=index + 1)
qml.RZ(params[4], wires=index)
qml.RX(params[5], wires=index + 1)
qml.CZ(wires=[index, index + 1])
qml.RZ(params[6], wires=index)
qml.RX(params[7], wires=index + 1)
qml.RY(params[8], wires=index)
qml.RZ(params[9], wires=index + 1)
qml.RX(params[10], wires=index)
qml.RY(params[11], wires=index + 1)
qml.CNOT(wires=[index + 1, index])
@qml.qnode(dev, interface="torch")
def full_circuit(inputs, all_params):
embed_len = 10
vqc1_len = 12 * (n_qubits // 2) # 60
vqc2_len = 12 * (n_qubits // 2) # another 60
sel_len = embed_len + vqc1_len + vqc2_len # Actually on 30 parameters
embedding_params = all_params[:embed_len]
vqc1_params = all_params[embed_len:embed_len + vqc1_len]
vqc2_params = all_params[embed_len + vqc1_len:sel_len]
sel_params = all_params[sel_len:]
vqc1_params = vqc1_params.view(n_qubits // 2, 12)
vqc2_params = vqc2_params.view(n_qubits // 2, 12)
sel_params = sel_params.view(1, n_qubits, 3)
state_preparation(inputs)
embedding_layer(embedding_params)
for i in range(n_qubits // 2):
vqc(vqc1_params[i], i * 2)
for i in range(n_qubits // 2):
vqc(vqc2_params[i], i * 2)
for i in range(n_qubits):
qml.CNOT(wires=[i, (i + 1) % n_qubits])
qml.DepolarizingChannel(0.01, wires = 0)
qml.StronglyEntanglingLayers(weights=sel_params, wires=range(n_qubits))
return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]
# Total number of parameters
total_num_params = (
10 + # embedding_layer
12 * (n_qubits // 2) + # vqc1
12 * (n_qubits // 2) + # vqc2 (repeated)
1 * n_qubits * 3 # StronglyEntanglingLayers
)
weight_shapes = {"all_params": total_num_params}
# PyTorch model wrapper
class QuantumModel(nn.Module):
def __init__(self):
super().__init__()
self.q_layer = qml.qnn.TorchLayer(full_circuit, weight_shapes, torch.nn.init.normal_)
self.fc = nn.Linear(n_qubits, n_classes, dtype = torch.float64)
def forward(self, x):
q_out = self.q_layer(x)
logits = self.fc(q_out) # raw scores, no softmax here
return logits
import torch.optim as optim
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = QuantumModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.5, weight_decay = 0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
num_epochs = 20
# Track losses and accuracies
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []
for epoch in range(1, num_epochs + 1):
model.train()
train_loss = 0.0
correct_train = 0
total_train = 0
for inputs, labels in train_loader:
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
logits = model(inputs)
loss = criterion(logits, labels)
loss.backward()
optimizer.step()
train_loss += loss.item() * inputs.size(0)
pred_labels = torch.argmax(logits, dim=1)
correct_train += (pred_labels == labels).sum().item()
total_train += labels.size(0)
train_loss /= total_train
train_acc = correct_train / total_train
model.eval()
val_loss = 0.0
correct_val = 0
total_val = 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs = inputs.to(device)
labels = labels.to(device)
logits = model(inputs)
loss = criterion(logits, labels)
val_loss += loss.item() * inputs.size(0)
pred_labels = torch.argmax(logits, dim=1)
correct_val += (pred_labels == labels).sum().item()
total_val += labels.size(0)
val_loss /= total_val
val_acc = correct_val / total_val
# Store metrics
train_losses.append(train_loss)
train_accuracies.append(train_acc)
val_losses.append(val_loss)
val_accuracies.append(val_acc)
print(f"Epoch {epoch}/{num_epochs} — Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
scheduler.step()
My data has length 1024 and I have been training with around 1000 samples for 250 minutes, but no result has been printed out(like for each epoch). I want to ask some questions:
-
What is the method of differentiating for default.mixed device? I believe it is not backpropagation anymore so it would take longer?
-
Can you suggest how can I add noise to my circuit while still able to run the circuit training process with my model? My problem is a multiclass problem.