I have a quantum neural network (QNN) model that depends on it’s own spatial derivative for training as the derivatives are part of the loss function.
I’m using the pytorch interface, and I try to use autodiff (currently using the diagonal of torch.autodiff.functional.jacobian because I need the derivative of the i’th output with respect to the i’th input) to calculate the derivative.
Also, I use angle encoding to encode x coordinations and PauliZ measurements, where in between I use StronglyEntanglingLayers. There is scaling to get the inputs between [0,pi], so my QuantumNetworkClass is the following:
QuantumNetworkClass
import pennylane as qml
from torch import nn
class QuantumNetworkClass(nn.Module):
def __init__(self, n_qubits=8, n_layers=2, x_scale_factor=1.0, x_scale_bias=0.0):
super().__init__()
self.n_qubits = n_qubits # without the ancillae
self.n_layers = n_layers
self.weight_shapes = None
self.x_scale_factor = x_scale_factor
self.x_scale_bias = x_scale_bias
self.q_layer, self.qnode = self.create_hybrid_network()
def create_hybrid_network(self):
dev = qml.device("default.qubit", wires=self.n_qubits)
shape = qml.StronglyEntanglingLayers.shape(n_layers=self.n_layers, n_wires=self.n_qubits)
self.weight_shapes = {"weights": shape}
@qml.qnode(dev, interface="torch", diff_method="backprop", differentiable=True, max_diff=1)
def qnode(inputs, weights):
scaled_inputs = (inputs - self.x_scale_bias) * self.x_scale_factor
qml.AngleEmbedding(scaled_inputs, wires=range(self.n_qubits))
qml.StronglyEntanglingLayers(weights, wires=range(self.n_qubits))
return [qml.expval(qml.PauliZ(wires=i)) for i in range(self.n_qubits)]
return qml.qnn.TorchLayer(qnode, self.weight_shapes), qnode
def forward(self, inputs):
return self.q_layer(inputs)
The whole wrapper class:
QNNClass
from IPython import display
import torch
from torch import optim
import matplotlib.pyplot as plt
from importlib import reload
from torch.autograd.functional import jacobian
import QuantumNetworkClass as QN
reload(QN)
class QNNClass:
def __init__(self, n_qubits=8, n_layers=2, lr=0.2, epochs=200, x_range=[0, torch.pi], plot_idx=10):
torch.manual_seed(42)
self.n_qubits = n_qubits
self.n_layers = n_layers
self.lr = lr
self.epochs = epochs
self.x_range = x_range
self.x_scale_bias = x_range[0]
self.x_scale_factor = self.init_x_scale_factor()
self.collocation_points = self.init_collocation_points()
self.collocation_points.requires_grad = True
self.model = self.init_model()
self.opt = optim.Adam(self.model.parameters(), lr=lr)
self.loss_arr = []
self.plot_idx = plot_idx
def init_collocation_points(self):
return torch.linspace(self.x_range[0], self.x_range[1], self.n_qubits, dtype=torch.float32)
def init_x_scale_factor(self):
return torch.pi / (self.x_range[1] - self.x_range[0]) # scale the input to [0, pi]
def init_model(self):
qnet = QN.QuantumNetworkClass
q_layer = qnet(n_qubits=self.n_qubits, n_layers=self.n_layers, x_scale_factor=self.x_scale_factor,
x_scale_bias=self.x_scale_bias)
layers = [q_layer]
model = torch.nn.Sequential(*layers)
return model
def spatial_derivative(self, u, spacing): # The spatial derivative for comparison
spacing = (spacing.clone().detach(),)
d_u_dx = torch.gradient(u, spacing=spacing, edge_order=2)
return d_u_dx[0]
def loss_fn(self, collocation_points, d_u_dx=None):
return (torch.sum((d_u_dx - torch.cos(torch.tensor(collocation_points))) ** 2)) / self.n_qubits
def show_plots(self, collocation_points, meas_vec=None, d_u_dx=None):
fig, axs = plt.subplots(2)
du = self.spatial_derivative(meas_vec, collocation_points)
axs[0].plot(collocation_points.detach().numpy(), meas_vec.detach().numpy(), label=f'meas_vec')
axs[1].plot(collocation_points.detach().numpy(), d_u_dx.detach().numpy(),
label=f'torch.autograd.functional.jacobian')
axs[1].plot(collocation_points.detach().numpy(), du.detach().numpy(), 'r--', label='torch.gradient')
axs[1].legend()
axs[0].set_title("u(x)")
axs[1].set_title("du/dx")
# Show the plot
plt.tight_layout(rect=(0, 0.03, 1, 0.95))
plt.show()
display.display(plt.gcf())
def min_and_idx(self, arr):
arr_tensor = torch.tensor(arr)
min_val = torch.min(arr_tensor)
min_idx = torch.argmin(arr_tensor)
return min_val, min_idx
def mid_prints(self, avg_loss, epoch):
best_loss, best_idx = self.min_and_idx(self.loss_arr)
print("Loss at epoch {}: {:.6f}".format(epoch, avg_loss))
print("Best loss so far: {:.6f}, in index: {}".format(best_loss, best_idx))
def plot_and_print(self, collocation_points, y_pred, d_u_dx, loss, epoch, show_plots, clear_output=True):
if clear_output:
display.clear_output()
if show_plots:
self.show_plots(collocation_points, meas_vec=y_pred, d_u_dx=d_u_dx)
self.mid_prints(loss, epoch)
def optimization(self, show_plots=True, clear_output=True):
print("Starting optimization loop")
for epoch in range(self.epochs):
self.opt.zero_grad(set_to_none=False) # check if adding False makes any difference
u_out = self.model(self.collocation_points)
t = self.collocation_points
jacob_mat_u = jacobian(lambda t_: self.model(t_), t, create_graph=True)
u_grads = torch.diag(jacob_mat_u)
loss = self.loss_fn(self.collocation_points, d_u_dx=u_grads)
self.loss_arr.append(loss)
loss.backward()
self.opt.step()
y_pred = self.model(self.collocation_points)
jacob_mat_y = jacobian(lambda t_: self.model(t_), t, create_graph=True)
d_y_dx = torch.diag(jacob_mat_y)
if epoch % self.plot_idx == (self.plot_idx - 1) or epoch == self.epochs - 1:
self.plot_and_print(self.collocation_points, y_pred, d_y_dx, loss, epoch, show_plots,
clear_output)
best_loss, best_idx = self.min_and_idx(self.loss_arr)
print("\n best loss: {:.6f}, at epoch: {}".format(best_loss, best_idx))
hyperparameters = {'n_qubits': 8, 'n_layers': 1, 'lr': 0.07}
test = True
# test = False
if test:
epochs = 1
plot_idx = 1
else:
epochs = 300
plot_idx = 20
if hyperparameters:
qnn = QNNClass(
epochs=epochs,
n_qubits=hyperparameters['n_qubits'],
n_layers=hyperparameters['n_layers'],
lr=hyperparameters['lr'],
x_range=[0, torch.pi],
plot_idx=plot_idx)
qnn.optimization(show_plots=True, clear_output=False)
When I comment the StronglyEntanglingLayers
, the results are what I expect, as linespace points after angle embedding and PuailiZ measurement get a cosine shape due to the projection, and the derivative is a minus sine (both for the jacobian and the spatial one):
But when I uncomment the StronglyEntanglingLayers
and get more of random-like result (due to random initialization of the weights), the jacobian and the spatial don’t agree, and it seems that the jacobian is false, especially in the first and last points:
Why is that happening? Am I doing something wrong?
qml.about()
Name: PennyLane
Version: 0.35.1
Summary: PennyLane is a cross-platform Python library for quantum computing, quantum machine learning, and quantum chemistry. Train a quantum computer the same way as a neural network.
Home-page: https://github.com/PennyLaneAI/pennylane
Author:
Author-email:
License: Apache License 2.0
Location: /home/ziv/anaconda3/envs/cuda-quantum-0.46/lib/python3.10/site-packages
Requires: appdirs, autograd, autoray, cachetools, networkx, numpy, pennylane-lightning, requests, rustworkx, scipy, semantic-version, toml, typing-extensions
Required-by: PennyLane-Catalyst, PennyLane-qiskit, PennyLane_Lightning, PennyLane_Lightning_GPU, PennyLane_Lightning_Kokkos
Platform info: Linux-5.15.167.4-microsoft-standard-WSL2-x86_64-with-glibc2.35
Python version: 3.10.13
Numpy version: 1.26.4
Scipy version: 1.12.0
Installed devices:
- lightning.qubit (PennyLane_Lightning-0.36.0)
- default.clifford (PennyLane-0.35.1)
- default.gaussian (PennyLane-0.35.1)
- default.mixed (PennyLane-0.35.1)
- default.qubit (PennyLane-0.35.1)
- default.qubit.autograd (PennyLane-0.35.1)
- default.qubit.jax (PennyLane-0.35.1)
- default.qubit.legacy (PennyLane-0.35.1)
- default.qubit.tf (PennyLane-0.35.1)
- default.qubit.torch (PennyLane-0.35.1)
- default.qutrit (PennyLane-0.35.1)
- null.qubit (PennyLane-0.35.1)
- lightning.kokkos (PennyLane_Lightning_Kokkos-0.35.1)
- lightning.gpu (PennyLane_Lightning_GPU-0.35.1)
- nvidia.custatevec (PennyLane-Catalyst-0.5.0)
- nvidia.cutensornet (PennyLane-Catalyst-0.5.0)
- softwareq.qpp (PennyLane-Catalyst-0.5.0)
- qiskit.aer (PennyLane-qiskit-0.36.0)
- qiskit.basicaer (PennyLane-qiskit-0.36.0)
- qiskit.basicsim (PennyLane-qiskit-0.36.0)
- qiskit.ibmq (PennyLane-qiskit-0.36.0)
- qiskit.ibmq.circuit_runner (PennyLane-qiskit-0.36.0)
- qiskit.ibmq.sampler (PennyLane-qiskit-0.36.0)
- qiskit.remote (PennyLane-qiskit-0.36.0)
Thanks in advance!