Hi I am training a hybrid network with a QNode as PyTorch-Layer. Transfer learning is no problem but I can’t train the whole network. It seems that the pennylane/torch interface tries to differentiate the inputs which results in this error. I inserted a print in the pennyLane/torch.py module to confirm this.
My StackTrace:
ValueError Traceback (most recent call last)
<ipython-input-5-846e188427fd> in <module>
67 optimizer = optim.SGD(hybrid_model.parameters(), lr=0.01, momentum=0.8)
68
---> 69 qHist = train(device, hybrid_model, optimizer, criterion, 1, dataloader_train, dataloader_test)
70
71 plotTrainingResults([cHist, qHist], ["classical", "quantum"])
~\Hahn_schickard\jupyter\pyTorch_utils.py in train(device, model, optimizer, criterion, epochs, training, testing)
35 correct += (predicted == labels).float().sum()
36 loss = criterion(outputs, labels)
---> 37 loss.backward()
38 optimizer.step()
39 batch_percentage = i*20//number_batches
~\anaconda3\envs\myenv\lib\site-packages\torch\tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
243 create_graph=create_graph,
244 inputs=inputs)
--> 245 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
246
247 def register_hook(self, hook):
~\anaconda3\envs\myenv\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
145 Variable._execution_engine.run_backward(
146 tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 147 allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
148
149
~\anaconda3\envs\myenv\lib\site-packages\torch\autograd\function.py in apply(self, *args)
87 def apply(self, *args):
88 # _forward_cls is defined by derived class
---> 89 return self._forward_cls.backward(self, *args) # type: ignore
90
91
~\anaconda3\envs\myenv\lib\site-packages\pennylane\interfaces\torch.py in backward(ctx, dy)
176 if dy.is_cuda:
177 cuda_device = dy.get_device()
--> 178 vjp = dy.view(1, -1) @ ctx.jacobian.apply(ctx, *ctx.saved_tensors).to(dy)
179 vjp = torch.unbind(vjp.view(-1))
180 return (None,) + tuple(vjp)
~\anaconda3\envs\myenv\lib\site-packages\pennylane\interfaces\torch.py in forward(ctx_, parent_ctx, *input_)
124 ctx_.dy = parent_ctx.dy
125 ctx_.save_for_backward(*input_)
--> 126 jacobian = _evaluate_grad_matrix("jacobian")
127 return jacobian
128
~\anaconda3\envs\myenv\lib\site-packages\pennylane\interfaces\torch.py in _evaluate_grad_matrix(grad_matrix_fn)
109 print(ctx.args)
110 grad_matrix = getattr(tape, grad_matrix_fn)(
--> 111 device, params=ctx.args, **tape.jacobian_options
112 )
113 tape.set_parameters(ctx.all_params, trainable_only=False)
~\anaconda3\envs\myenv\lib\site-packages\pennylane\tape\qubit_param_shift.py in jacobian(self, device, params, **options)
122 self._append_evA_tape = True
123 self._evA_result = None
--> 124 return super().jacobian(device, params, **options)
125
126 def parameter_shift(self, idx, params, **options):
~\anaconda3\envs\myenv\lib\site-packages\pennylane\tape\jacobian_tape.py in jacobian(self, device, params, **options)
514
515 # perform gradient method validation
--> 516 diff_methods = self._grad_method_validation(method)
517
518 if not self._has_trainable_params(params, diff_methods):
~\anaconda3\envs\myenv\lib\site-packages\pennylane\tape\jacobian_tape.py in _grad_method_validation(self, method)
198
199 if nondiff_params:
--> 200 raise ValueError(f"Cannot differentiate with respect to parameter(s) {nondiff_params}")
201
202 numeric_params = {idx for idx, g in diff_methods.items() if g == "F"}
ValueError: Cannot differentiate with respect to parameter(s) {0}
I use a qml.qnnTorchLayer with parameters being inputs, weights; exactly as described in
https://pennylane.readthedocs.io/en/stable/code/api/pennylane.qnn.TorchLayer.html
Here is my code:
def rotation_layer(params, qubits):
# parametrized ry, rz rotations
n = len(qubits)
for i,q in enumerate(qubits):
qml.RY(params[i], wires=q)
for i,q in enumerate(qubits):
qml.RY(params[i+n], wires=q)
def entanglement_layer(qubits):
n = len(qubits)
for i in range(0, n):
qml.CNOT(wires=[qubits[i], qubits[(i+1)%n]])
@qml.qnode(qml.device('default.qubit', wires=4))
def QNode4(inputs, weights):
inputs = F.normalize(inputs,dim=-1,p=2) #L2-normalization
N = len(inputs)
n = int(m.log(N,2))
# data encoding
qml.QubitStateVector(inputs, wires=range(n))
#variational circuit
measureWires = range(n)
for w in weights:
rotation_layer(w, measureWires)
entanglement_layer(measureWires)
#measure
return qml.probs(wires = measureWires)
# model class
class QNet(nn.Module):
def __init__(self):
super(QNet, self).__init__()
# 1 input image channel, 6 output channels, 5x5 square convolution
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(16 * 16, 128) # 5*5 from image dimension
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 16)
n_qubits = 4
n_layers = 2
self.fc4 = qml.qnn.TorchLayer(QNode4, {"weights": (n_layers, n_qubits**2)})
self.fc5 = nn.Linear(16, 10)
def forward(self, x):
# Max pooling over a (2, 2) window
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
# If the size is a square, you can specify with a single number
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
# sqrt because output are amplitudes. This way input=output if there is only data encoding
x = torch.sqrt(self.fc4(x)).to(device)
x = self.fc5(x)
return x
#training
hybrid_model = QNet().to(device)
epochs = 16
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(hybrid_model.parameters(), lr=0.01, momentum=0.8)
qHist = train(device, hybrid_model, optimizer, criterion, 1, dataloader_train, dataloader_test)
plotTrainingResults([cHist, qHist], ["classical", "quantum"])
I can’t find a way to make pennylane stop trying to differentiate the inputs and as far as I understand the qml.qnn.TorchLayer class, this shouldn’t be happening in the first pace.