Unexpected results when using torch.autodiff.functional.jacobian with QNN

I have a quantum neural network (QNN) model that depends on it’s own spatial derivative for training as the derivatives are part of the loss function.
I’m using the pytorch interface, and I try to use autodiff (currently using the diagonal of torch.autodiff.functional.jacobian because I need the derivative of the i’th output with respect to the i’th input) to calculate the derivative.
Also, I use angle encoding to encode x coordinations and PauliZ measurements, where in between I use StronglyEntanglingLayers. There is scaling to get the inputs between [0,pi], so my QuantumNetworkClass is the following:

QuantumNetworkClass
import pennylane as qml
from torch import nn

class QuantumNetworkClass(nn.Module):
    def __init__(self, n_qubits=8, n_layers=2, x_scale_factor=1.0, x_scale_bias=0.0):
        super().__init__()
        self.n_qubits = n_qubits  # without the ancillae
        self.n_layers = n_layers
        self.weight_shapes = None
        self.x_scale_factor = x_scale_factor
        self.x_scale_bias = x_scale_bias
        self.q_layer, self.qnode = self.create_hybrid_network()

    def create_hybrid_network(self):
        dev = qml.device("default.qubit", wires=self.n_qubits)
        shape = qml.StronglyEntanglingLayers.shape(n_layers=self.n_layers, n_wires=self.n_qubits)
        self.weight_shapes = {"weights": shape}

        @qml.qnode(dev, interface="torch", diff_method="backprop", differentiable=True, max_diff=1)
        def qnode(inputs, weights):
            scaled_inputs = (inputs - self.x_scale_bias) * self.x_scale_factor
            qml.AngleEmbedding(scaled_inputs, wires=range(self.n_qubits))
            qml.StronglyEntanglingLayers(weights, wires=range(self.n_qubits))
            return [qml.expval(qml.PauliZ(wires=i)) for i in range(self.n_qubits)]
        return qml.qnn.TorchLayer(qnode, self.weight_shapes), qnode

    def forward(self, inputs):
        return self.q_layer(inputs)

The whole wrapper class:

QNNClass
from IPython import display
import torch
from torch import optim
import matplotlib.pyplot as plt
from importlib import reload
from torch.autograd.functional import jacobian
import QuantumNetworkClass as QN


reload(QN)


class QNNClass:
    def __init__(self, n_qubits=8, n_layers=2, lr=0.2, epochs=200, x_range=[0, torch.pi], plot_idx=10):
        torch.manual_seed(42)
        self.n_qubits = n_qubits
        self.n_layers = n_layers
        self.lr = lr
        self.epochs = epochs
        self.x_range = x_range
        self.x_scale_bias = x_range[0]
        self.x_scale_factor = self.init_x_scale_factor()
        self.collocation_points = self.init_collocation_points()
        self.collocation_points.requires_grad = True
        self.model = self.init_model()
        self.opt = optim.Adam(self.model.parameters(), lr=lr)
        self.loss_arr = []
        self.plot_idx = plot_idx

    def init_collocation_points(self):
        return torch.linspace(self.x_range[0], self.x_range[1], self.n_qubits, dtype=torch.float32)

    def init_x_scale_factor(self):
        return torch.pi / (self.x_range[1] - self.x_range[0])  # scale the input to [0, pi]

    def init_model(self):
        qnet = QN.QuantumNetworkClass
        q_layer = qnet(n_qubits=self.n_qubits, n_layers=self.n_layers, x_scale_factor=self.x_scale_factor,
                       x_scale_bias=self.x_scale_bias)
        layers = [q_layer]
        model = torch.nn.Sequential(*layers)
        return model

    def spatial_derivative(self, u, spacing):  # The spatial derivative for comparison
        spacing = (spacing.clone().detach(),)
        d_u_dx = torch.gradient(u, spacing=spacing, edge_order=2)
        return d_u_dx[0]

    def loss_fn(self, collocation_points, d_u_dx=None):
        return (torch.sum((d_u_dx - torch.cos(torch.tensor(collocation_points))) ** 2)) / self.n_qubits

    def show_plots(self, collocation_points, meas_vec=None, d_u_dx=None):
        fig, axs = plt.subplots(2)
        du = self.spatial_derivative(meas_vec, collocation_points)
        axs[0].plot(collocation_points.detach().numpy(), meas_vec.detach().numpy(), label=f'meas_vec')
        axs[1].plot(collocation_points.detach().numpy(), d_u_dx.detach().numpy(),
                        label=f'torch.autograd.functional.jacobian')
        axs[1].plot(collocation_points.detach().numpy(), du.detach().numpy(), 'r--', label='torch.gradient')
        axs[1].legend()
        axs[0].set_title("u(x)")
        axs[1].set_title("du/dx")

        # Show the plot
        plt.tight_layout(rect=(0, 0.03, 1, 0.95))
        plt.show()
        display.display(plt.gcf())

    def min_and_idx(self, arr):
        arr_tensor = torch.tensor(arr)
        min_val = torch.min(arr_tensor)
        min_idx = torch.argmin(arr_tensor)
        return min_val, min_idx

    def mid_prints(self, avg_loss, epoch):
        best_loss, best_idx = self.min_and_idx(self.loss_arr)
        print("Loss at epoch {}: {:.6f}".format(epoch, avg_loss))
        print("Best loss so far: {:.6f}, in index: {}".format(best_loss, best_idx))

    def plot_and_print(self, collocation_points, y_pred, d_u_dx, loss, epoch, show_plots, clear_output=True):
        if clear_output:
            display.clear_output()
        if show_plots:
            self.show_plots(collocation_points, meas_vec=y_pred, d_u_dx=d_u_dx)
        self.mid_prints(loss, epoch)

    def optimization(self, show_plots=True, clear_output=True):
        print("Starting optimization loop")
        for epoch in range(self.epochs):
            self.opt.zero_grad(set_to_none=False)  # check if adding False makes any difference
            u_out = self.model(self.collocation_points)
            t = self.collocation_points
            jacob_mat_u = jacobian(lambda t_: self.model(t_), t, create_graph=True)
            u_grads = torch.diag(jacob_mat_u)

            loss = self.loss_fn(self.collocation_points, d_u_dx=u_grads)
            self.loss_arr.append(loss)
            loss.backward()

            self.opt.step()
            y_pred = self.model(self.collocation_points)
            jacob_mat_y = jacobian(lambda t_: self.model(t_), t, create_graph=True)
            d_y_dx = torch.diag(jacob_mat_y)
            if epoch % self.plot_idx == (self.plot_idx - 1) or epoch == self.epochs - 1:
                self.plot_and_print(self.collocation_points, y_pred, d_y_dx, loss, epoch, show_plots,
                                    clear_output)
        best_loss, best_idx = self.min_and_idx(self.loss_arr)
        print("\n best loss: {:.6f}, at epoch: {}".format(best_loss, best_idx))


  hyperparameters = {'n_qubits': 8, 'n_layers': 1, 'lr': 0.07}
  test = True
  # test = False

  if test:
      epochs = 1
      plot_idx = 1
  else:
      epochs = 300
      plot_idx = 20

  if hyperparameters:
      qnn = QNNClass(
          epochs=epochs,
          n_qubits=hyperparameters['n_qubits'],
          n_layers=hyperparameters['n_layers'],
          lr=hyperparameters['lr'],
          x_range=[0, torch.pi],
          plot_idx=plot_idx)
      qnn.optimization(show_plots=True, clear_output=False)

When I comment the StronglyEntanglingLayers, the results are what I expect, as linespace points after angle embedding and PuailiZ measurement get a cosine shape due to the projection, and the derivative is a minus sine (both for the jacobian and the spatial one):

But when I uncomment the StronglyEntanglingLayers and get more of random-like result (due to random initialization of the weights), the jacobian and the spatial don’t agree, and it seems that the jacobian is false, especially in the first and last points:

Why is that happening? Am I doing something wrong?

qml.about()
Name: PennyLane
Version: 0.35.1
Summary: PennyLane is a cross-platform Python library for quantum computing, quantum machine learning, and quantum chemistry. Train a quantum computer the same way as a neural network.
Home-page: https://github.com/PennyLaneAI/pennylane
Author: 
Author-email: 
License: Apache License 2.0
Location: /home/ziv/anaconda3/envs/cuda-quantum-0.46/lib/python3.10/site-packages
Requires: appdirs, autograd, autoray, cachetools, networkx, numpy, pennylane-lightning, requests, rustworkx, scipy, semantic-version, toml, typing-extensions
Required-by: PennyLane-Catalyst, PennyLane-qiskit, PennyLane_Lightning, PennyLane_Lightning_GPU, PennyLane_Lightning_Kokkos

Platform info:           Linux-5.15.167.4-microsoft-standard-WSL2-x86_64-with-glibc2.35
Python version:          3.10.13
Numpy version:           1.26.4
Scipy version:           1.12.0
Installed devices:
- lightning.qubit (PennyLane_Lightning-0.36.0)
- default.clifford (PennyLane-0.35.1)
- default.gaussian (PennyLane-0.35.1)
- default.mixed (PennyLane-0.35.1)
- default.qubit (PennyLane-0.35.1)
- default.qubit.autograd (PennyLane-0.35.1)
- default.qubit.jax (PennyLane-0.35.1)
- default.qubit.legacy (PennyLane-0.35.1)
- default.qubit.tf (PennyLane-0.35.1)
- default.qubit.torch (PennyLane-0.35.1)
- default.qutrit (PennyLane-0.35.1)
- null.qubit (PennyLane-0.35.1)
- lightning.kokkos (PennyLane_Lightning_Kokkos-0.35.1)
- lightning.gpu (PennyLane_Lightning_GPU-0.35.1)
- nvidia.custatevec (PennyLane-Catalyst-0.5.0)
- nvidia.cutensornet (PennyLane-Catalyst-0.5.0)
- softwareq.qpp (PennyLane-Catalyst-0.5.0)
- qiskit.aer (PennyLane-qiskit-0.36.0)
- qiskit.basicaer (PennyLane-qiskit-0.36.0)
- qiskit.basicsim (PennyLane-qiskit-0.36.0)
- qiskit.ibmq (PennyLane-qiskit-0.36.0)
- qiskit.ibmq.circuit_runner (PennyLane-qiskit-0.36.0)
- qiskit.ibmq.sampler (PennyLane-qiskit-0.36.0)
- qiskit.remote (PennyLane-qiskit-0.36.0)

Thanks in advance!

Hi @ZivChen, do you get the same results with v0.39 of PennyLane?

If so, would you be able to share a minimal reproducible example that exhibits this behaviour? Eg using a small dataset and sharing all of the code needed such that we can copy-paste it and try to reproduce the issue.

Hi Catalina, Thanks for the reply.

Yes, I get the same results with v0.39 of PennyLane.

Here is a working code:

full code
from IPython import display
import torch
from torch import optim, nn
import matplotlib.pyplot as plt
from importlib import reload
from torch.autograd.functional import jacobian
import pennylane as qml

class QuantumNetworkClass(nn.Module):
    def __init__(self, n_qubits=8, n_layers=2, x_scale_factor=1.0, x_scale_bias=0.0):
        super().__init__()
        self.n_qubits = n_qubits  # without the ancillae
        self.n_layers = n_layers
        self.weight_shapes = None
        self.x_scale_factor = x_scale_factor
        self.x_scale_bias = x_scale_bias
        self.q_layer, self.qnode = self.create_hybrid_network()

    def create_hybrid_network(self):
        dev = qml.device("default.qubit", wires=self.n_qubits)
        shape = qml.StronglyEntanglingLayers.shape(n_layers=self.n_layers, n_wires=self.n_qubits)
        self.weight_shapes = {"weights": shape}

        @qml.qnode(dev, interface="torch", diff_method="backprop", differentiable=True, max_diff=1)
        def qnode(inputs, weights):
            scaled_inputs = (inputs - self.x_scale_bias) * self.x_scale_factor
            qml.AngleEmbedding(scaled_inputs, wires=range(self.n_qubits))
            qml.StronglyEntanglingLayers(weights, wires=range(self.n_qubits))
            return [qml.expval(qml.PauliZ(wires=i)) for i in range(self.n_qubits)]
        return qml.qnn.TorchLayer(qnode, self.weight_shapes), qnode

    def forward(self, inputs):
        return self.q_layer(inputs)

class QNNClass:
    def __init__(self, n_qubits=8, n_layers=2, lr=0.2, epochs=200, x_range=[0, torch.pi], plot_idx=10):
        torch.manual_seed(42)
        self.n_qubits = n_qubits
        self.n_layers = n_layers
        self.lr = lr
        self.epochs = epochs
        self.x_range = x_range
        self.x_scale_bias = x_range[0]
        self.x_scale_factor = self.init_x_scale_factor()
        self.collocation_points = self.init_collocation_points()
        self.collocation_points.requires_grad = True
        self.model = self.init_model()
        self.opt = optim.Adam(self.model.parameters(), lr=lr)
        self.loss_arr = []
        self.plot_idx = plot_idx

    def init_collocation_points(self):
        return torch.linspace(self.x_range[0], self.x_range[1], self.n_qubits, dtype=torch.float32)

    def init_x_scale_factor(self):
        return torch.pi / (self.x_range[1] - self.x_range[0])  # scale the input to [0, pi]

    def init_model(self):
        qnet = QuantumNetworkClass
        q_layer = qnet(n_qubits=self.n_qubits, n_layers=self.n_layers, x_scale_factor=self.x_scale_factor,
                       x_scale_bias=self.x_scale_bias)
        layers = [q_layer]
        model = torch.nn.Sequential(*layers)
        return model

    def spatial_derivative(self, u, spacing):  # The spatial derivative for comparison
        spacing = (spacing.clone().detach(),)
        d_u_dx = torch.gradient(u, spacing=spacing, edge_order=2)
        return d_u_dx[0]

    def loss_fn(self, collocation_points, d_u_dx=None):
        return (torch.sum((d_u_dx - torch.cos(torch.tensor(collocation_points))) ** 2)) / self.n_qubits

    def show_plots(self, collocation_points, meas_vec=None, d_u_dx=None):
        fig, axs = plt.subplots(2)
        du = self.spatial_derivative(meas_vec, collocation_points)
        axs[0].plot(collocation_points.detach().numpy(), meas_vec.detach().numpy(), label=f'meas_vec')
        axs[1].plot(collocation_points.detach().numpy(), d_u_dx.detach().numpy(),
                        label=f'torch.autograd.functional.jacobian')
        axs[1].plot(collocation_points.detach().numpy(), du.detach().numpy(), 'r--', label='torch.gradient')
        axs[1].legend()
        axs[0].set_title("u(x)")
        axs[1].set_title("du/dx")

        # Show the plot
        plt.tight_layout(rect=(0, 0.03, 1, 0.95))
        plt.show()
        display.display(plt.gcf())

    def min_and_idx(self, arr):
        arr_tensor = torch.tensor(arr)
        min_val = torch.min(arr_tensor)
        min_idx = torch.argmin(arr_tensor)
        return min_val, min_idx

    def mid_prints(self, avg_loss, epoch):
        best_loss, best_idx = self.min_and_idx(self.loss_arr)
        print("Loss at epoch {}: {:.6f}".format(epoch, avg_loss))
        print("Best loss so far: {:.6f}, in index: {}".format(best_loss, best_idx))

    def plot_and_print(self, collocation_points, y_pred, d_u_dx, loss, epoch, show_plots, clear_output=True):
        if clear_output:
            display.clear_output()
        if show_plots:
            self.show_plots(collocation_points, meas_vec=y_pred, d_u_dx=d_u_dx)
        self.mid_prints(loss, epoch)

    def optimization(self, show_plots=True, clear_output=True):
        print("Starting optimization loop")
        for epoch in range(self.epochs):
            self.opt.zero_grad(set_to_none=False)  # check if adding False makes any difference
            u_out = self.model(self.collocation_points)
            t = self.collocation_points
            jacob_mat_u = jacobian(lambda t_: self.model(t_), t, create_graph=True)
            u_grads = torch.diag(jacob_mat_u)

            loss = self.loss_fn(self.collocation_points, d_u_dx=u_grads)
            self.loss_arr.append(loss)
            loss.backward()

            self.opt.step()
            y_pred = self.model(self.collocation_points)
            jacob_mat_y = jacobian(lambda t_: self.model(t_), t, create_graph=True)
            d_y_dx = torch.diag(jacob_mat_y)
            if epoch % self.plot_idx == (self.plot_idx - 1) or epoch == self.epochs - 1:
                self.plot_and_print(self.collocation_points, y_pred, d_y_dx, loss, epoch, show_plots,
                                    clear_output)
        best_loss, best_idx = self.min_and_idx(self.loss_arr)
        print("\n best loss: {:.6f}, at epoch: {}".format(best_loss, best_idx))


hyperparameters = {'n_qubits': 8, 'n_layers': 1, 'lr': 0.07}
test = True
# test = False

if test:
  epochs = 1
  plot_idx = 1
else:
  epochs = 300
  plot_idx = 20

if hyperparameters:
    qnn = QNNClass(
      epochs=epochs,
      n_qubits=hyperparameters['n_qubits'],
      n_layers=hyperparameters['n_layers'],
      lr=hyperparameters['lr'],
      x_range=[0, torch.pi],
      plot_idx=plot_idx)
qnn.optimization(show_plots=True, clear_output=False)

qml.about()
Name: PennyLane
Version: 0.39.0
Summary: PennyLane is a cross-platform Python library for quantum computing, quantum machine learning, and quantum chemistry. Train a quantum computer the same way as a neural network.
Home-page: https://github.com/PennyLaneAI/pennylane
Author: 
Author-email: 
License: Apache License 2.0
Location: /home/ziv/anaconda3/envs/cuda-quantum/lib/python3.10/site-packages
Requires: appdirs, autograd, autoray, cachetools, networkx, numpy, packaging, pennylane-lightning, requests, rustworkx, scipy, toml, typing-extensions
Required-by: PennyLane-Catalyst, PennyLane-qiskit, PennyLane_Lightning, PennyLane_Lightning_GPU, PennyLane_Lightning_Kokkos

Platform info:           Linux-5.15.167.4-microsoft-standard-WSL2-x86_64-with-glibc2.35
Python version:          3.10.13
Numpy version:           1.26.4
Scipy version:           1.12.0
Installed devices:
- lightning.kokkos (PennyLane_Lightning_Kokkos-0.35.1)
- qiskit.aer (PennyLane-qiskit-0.35.1)
- qiskit.basicaer (PennyLane-qiskit-0.35.1)
- qiskit.ibmq (PennyLane-qiskit-0.35.1)
- qiskit.ibmq.circuit_runner (PennyLane-qiskit-0.35.1)
- qiskit.ibmq.sampler (PennyLane-qiskit-0.35.1)
- qiskit.remote (PennyLane-qiskit-0.35.1)
- lightning.gpu (PennyLane_Lightning_GPU-0.35.1)
- nvidia.custatevec (PennyLane-Catalyst-0.5.0)
- nvidia.cutensornet (PennyLane-Catalyst-0.5.0)
- softwareq.qpp (PennyLane-Catalyst-0.5.0)
- lightning.qubit (PennyLane_Lightning-0.39.0)
- default.clifford (PennyLane-0.39.0)
- default.gaussian (PennyLane-0.39.0)
- default.mixed (PennyLane-0.39.0)
- default.qubit (PennyLane-0.39.0)
- default.qutrit (PennyLane-0.39.0)
- default.qutrit.mixed (PennyLane-0.39.0)
- default.tensor (PennyLane-0.39.0)
- null.qubit (PennyLane-0.39.0)
- reference.qubit (PennyLane-0.39.0)

Hi @ZivChen ,

Adding StronglyEntanglingLayers effectively changes your model, and thus changes both the output of the model u and its derivative.

I did a test by changing StronglyEntanglingLayers to act on just a subset of the qubits at a time. What I see is that adding more qubits and layers significantly changes u, and I’m not sure it will help you obtain any advantage compared to just using a classical neural net.

Quantum models using rotations, such as the ones in StronglyEntanglingLayers, are good for problems that can be represented by Fourier series. I’m not sure about the full scope of your problem here but you might want to take this into account.

new code
from IPython import display
import torch
from torch import optim, nn
import matplotlib.pyplot as plt
from importlib import reload
from torch.autograd.functional import jacobian
import pennylane as qml

class QuantumNetworkClass(nn.Module):
    def __init__(self, n_qubits=8, n_layers=2, x_scale_factor=1.0, x_scale_bias=0.0):
        super().__init__()
        self.n_qubits = n_qubits  # without the ancillae
        self.n_layers = n_layers
        self.weight_shapes = None
        self.x_scale_factor = x_scale_factor
        self.x_scale_bias = x_scale_bias
        self.q_layer, self.qnode = self.create_hybrid_network()

    def create_hybrid_network(self):
        dev = qml.device("default.qubit", wires=self.n_qubits)

        # Modified: changed n_wires=self.n_qubits to n_wires=ansatz_qubits
        shape = qml.StronglyEntanglingLayers.shape(n_layers=self.n_layers, n_wires=ansatz_qubits)

        self.weight_shapes = {"weights": shape}

        @qml.qnode(dev, interface="torch", diff_method="backprop", max_diff=1) # Modified: removed differentiable=True
        def qnode(inputs, weights):
            scaled_inputs = (inputs - self.x_scale_bias) * self.x_scale_factor
            qml.AngleEmbedding(scaled_inputs, wires=range(self.n_qubits))

            # Modified: changed wires=range(self.n_qubits) to wires=range(ansatz_qubits)
            qml.StronglyEntanglingLayers(weights, wires=range(ansatz_qubits))

            return [qml.expval(qml.PauliZ(wires=i)) for i in range(self.n_qubits)]
        return qml.qnn.TorchLayer(qnode, self.weight_shapes), qnode

    def forward(self, inputs):
        return self.q_layer(inputs)

class QNNClass:
    def __init__(self, n_qubits=8, n_layers=2, lr=0.2, epochs=200, x_range=[0, torch.pi], plot_idx=10):
        torch.manual_seed(42)
        self.n_qubits = n_qubits
        self.n_layers = n_layers
        self.lr = lr
        self.epochs = epochs
        self.x_range = x_range
        self.x_scale_bias = x_range[0]
        self.x_scale_factor = self.init_x_scale_factor()
        self.collocation_points = self.init_collocation_points()
        self.collocation_points.requires_grad = True
        self.model = self.init_model()
        self.opt = optim.Adam(self.model.parameters(), lr=lr)
        self.loss_arr = []
        self.plot_idx = plot_idx

    def init_collocation_points(self):
        return torch.linspace(self.x_range[0], self.x_range[1], self.n_qubits, dtype=torch.float32)

    def init_x_scale_factor(self):
        return torch.pi / (self.x_range[1] - self.x_range[0])  # scale the input to [0, pi]

    def init_model(self):
        qnet = QuantumNetworkClass
        q_layer = qnet(n_qubits=self.n_qubits, n_layers=self.n_layers, x_scale_factor=self.x_scale_factor,
                       x_scale_bias=self.x_scale_bias)
        layers = [q_layer]
        model = torch.nn.Sequential(*layers)
        return model

    def spatial_derivative(self, u, spacing):  # The spatial derivative for comparison
        spacing = (spacing.clone().detach(),)
        d_u_dx = torch.gradient(u, spacing=spacing, edge_order=2)
        return d_u_dx[0]

    def loss_fn(self, collocation_points, d_u_dx=None):
        new = collocation_points.clone().detach() # Modified: added
        return (torch.sum((d_u_dx - torch.cos(new)) ** 2)) / self.n_qubits # Modified: changed torch.tensor(...) to new

    def show_plots(self, collocation_points, meas_vec=None, d_u_dx=None):
        fig, axs = plt.subplots(2)
        du = self.spatial_derivative(meas_vec, collocation_points)
        axs[0].plot(collocation_points.detach().numpy(), meas_vec.detach().numpy(), label=f'meas_vec')
        axs[1].plot(collocation_points.detach().numpy(), d_u_dx.detach().numpy(),
                        label=f'torch.autograd.functional.jacobian')
        axs[1].plot(collocation_points.detach().numpy(), du.detach().numpy(), 'r--', label='torch.gradient')
        axs[1].legend()
        axs[0].set_title("u(x)")
        axs[1].set_title("du/dx")

        # Show the plot
        plt.tight_layout(rect=(0, 0.03, 1, 0.95))
        plt.show()
        display.display(plt.gcf())

    def min_and_idx(self, arr):
        arr_tensor = torch.tensor(arr)
        min_val = torch.min(arr_tensor)
        min_idx = torch.argmin(arr_tensor)
        return min_val, min_idx

    def mid_prints(self, avg_loss, epoch):
        best_loss, best_idx = self.min_and_idx(self.loss_arr)
        print("Loss at epoch {}: {:.6f}".format(epoch, avg_loss))
        print("Best loss so far: {:.6f}, in index: {}".format(best_loss, best_idx))

    def plot_and_print(self, collocation_points, y_pred, d_u_dx, loss, epoch, show_plots, clear_output=True):
        if clear_output:
            display.clear_output()
        if show_plots:
            self.show_plots(collocation_points, meas_vec=y_pred, d_u_dx=d_u_dx)
        self.mid_prints(loss, epoch)

    def optimization(self, show_plots=True, clear_output=True):
        print("Starting optimization loop")
        for epoch in range(self.epochs):
            self.opt.zero_grad(set_to_none=False)  # check if adding False makes any difference
            u_out = self.model(self.collocation_points)
            t = self.collocation_points
            jacob_mat_u = jacobian(lambda t_: self.model(t_), t, create_graph=True)
            u_grads = torch.diag(jacob_mat_u)

            loss = self.loss_fn(self.collocation_points, d_u_dx=u_grads)
            self.loss_arr.append(loss)
            loss.backward()

            self.opt.step()
            y_pred = self.model(self.collocation_points)
            jacob_mat_y = jacobian(lambda t_: self.model(t_), t, create_graph=True)
            d_y_dx = torch.diag(jacob_mat_y)
            if epoch % self.plot_idx == (self.plot_idx - 1) or epoch == self.epochs - 1:
                self.plot_and_print(self.collocation_points, y_pred, d_y_dx, loss, epoch, show_plots,
                                    clear_output)
        best_loss, best_idx = self.min_and_idx(self.loss_arr)
        print("\n best loss: {:.6f}, at epoch: {}".format(best_loss, best_idx))


hyperparameters = {'n_qubits': 8, 'n_layers': 1, 'lr': 0.07}
test = True
# test = False

if test:
  epochs = 1
  plot_idx = 1
else:
  epochs = 300
  plot_idx = 20

# Modified: Added a for loop on top of the hyperparameter optimization
for i in range(8):
    ansatz_qubits = i+1
    if hyperparameters:
        qnn = QNNClass(
          epochs=epochs,
          n_qubits=hyperparameters['n_qubits'],
          n_layers=hyperparameters['n_layers'],
          lr=hyperparameters['lr'],
          x_range=[0, torch.pi],
          plot_idx=plot_idx)
    qnn.optimization(show_plots=True, clear_output=False)
1 Like

Hi @CatalinaAlbornoz, thanks for the reply!

it is a major thing to take into account and I would certainly check the effects of altering the model and and/or incorporate QFT in it.

Besides that, is there any reason you find that the autodiff doesn’t work? can I count on it in other models?

Thanks a lot!
Ziv

Hi @CatalinaAlbornoz,

I have few more questions about it.

If I use other types of entangling layers (like BasicEntanglerLayers or other), would it be better for this kind of operation?

Besides that, can I represent the model in the Fourier domain if I use angle embedding? I’m asking because the states themselves do not represent something meaningful and I only encode n points = n qubits.
Another thought that I had is if I move to QFT based QML model a: does it supposed to make a difference to use QFT after the embedding, then the entangling layers and finish with iQFT? Or there’s no reason for it to work?
The new Qnode will be:

new Qnode
@qml.qnode(dev, interface="torch", diff_method="backprop", differentiable=True, max_diff=1)
        def qnode(inputs, weights):
            scaled_inputs = (inputs - self.x_scale_bias) * self.x_scale_factor
            qml.AngleEmbedding(scaled_inputs, wires=range(self.n_qubits))
            qml.QFT(wires=range(self.n_qubits))
            qml.StronglyEntanglingLayers(weights, wires=range(self.n_qubits))
            qml.adjoint(qml.QFT)(wires=range(self.n_qubits))  # inverse QFT
            return [qml.expval(qml.PauliZ(wires=i)) for i in range(self.n_qubits)]```

thanks a lot for your kind help, I really appreciate it!
Ziv

Hi @ZivChen ,

The QFT is useful to analyze periodic signals. I don’t think that’s the case for you so it probably won’t help. Changing the embedding might help since it changes the way the data is mapped into states. However I think the main issue you have is that the function u isn’t always smooth, hence why the gradients don’t work. So I think the best research question here would be to analyze how the circuit should look so that your function is smooth.

I hope this helps!

1 Like

Thanks a lot for the thorough reply, I’ll look into what you suggested!

1 Like