Question of parameter-shift method in broadcasting

Hello! If applicable, put your complete code example down below. Make sure that your code:

  • is 100% self-contained — someone can copy-paste exactly what is here and run it to
    reproduce the behaviour you are observing
  • includes comments
    I want to run a hybrid quantum CNN model. However if I use the parameter-shift method @qml.qnode(dev, diff_method=“parameter-shift”), an error returns, that is ------
    ‘NotImplementedError: Computing the gradient of broadcasted tapes with respect to the broadcasted parameters using the parameter-shift rule gradient transform is currently not supported. See #4462 for details.’ Is there any method to fix the error? Thank you!
# Put code here

Define the quantum circuit using PennyLane

n_qubits = 5
dev = qml.device(“default.qubit”, wires=n_qubits)

@qml.qnode(dev, diff_method=“parameter-shift”)
def qnode(inputs, weights):
qml.AngleEmbedding(inputs, wires=range(n_qubits))
qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]

Define the QLayer

n_layers = 1
weight_shapes = {“weights”: (n_layers, n_qubits)}

Define a simple CNN architecture

class Net(nn.Module):
def init(self):
super(Net, self).init()
# Convolutional layer 1 with 1 input channels (for greyscale images), 16 output channels, and 5x5 kernel
self.conv1 = nn.Conv2d(1, 16, 5, stride=1, padding=2)
# Batch normalization after convolutional layer 1
self.bn1 = nn.BatchNorm2d(16)
# Max pooling layer with a 2x2 window
self.pool = nn.MaxPool2d(2, 2)
# Convolutional layer 2 with 16 input channels (from the previous layer), 32 output channels, and 5x5 kernel
self.conv2 = nn.Conv2d(16, 32, 5, stride=1, padding=2)
# Batch normalization after convolutional layer 2
self.bn2 = nn.BatchNorm2d(32)
# Quantum layer
self.qlayer1 = qml.qnn.TorchLayer(qnode, weight_shapes)
self.qlayer2 = qml.qnn.TorchLayer(qnode, weight_shapes)
self.qlayer3 = qml.qnn.TorchLayer(qnode, weight_shapes)
self.qlayer4 = qml.qnn.TorchLayer(qnode, weight_shapes)
# Fully connected layers
self.fc1 = nn.Linear(32 * 7 * 7, 120)
self.fc2 = nn.Linear(120, 20)
self.fc3 = nn.Linear(20, 10)

def forward(self, x):
    # Propagate the input through the CNN layers
    x = self.pool(F.relu(self.bn1(self.conv1(x))))
    x = self.pool(F.relu(self.bn2(self.conv2(x))))
    # Flatten the output from the convolutional layers
    x = x.view(-1, 32 * 7 * 7)
    # Pass the output to the quantum layer
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x_1, x_2, x_3, x_4 = torch.split(x, 5, dim=1)
    x_1 = self.qlayer1(x_1)
    x_2 = self.qlayer2(x_2)
    x_3 = self.qlayer3(x_3)
    x_4 = self.qlayer4(x_4)
    x = torch.cat([x_1, x_2, x_3, x_4], axis=1)
    x = self.fc3(x)
    return x

device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
model = Net().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.01)

from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, Subset
num_train_images = 1000
num_test_images = 200

Download and load the MNIST dataset

train_data = datasets.MNIST(
root=‘data’,
train=True,
transform=ToTensor(),
download=True,
)
test_data = datasets.MNIST(
root=‘data’,
train=False,
transform=ToTensor()
)

Create subsets of the data

train_subset = Subset(train_data, range(num_train_images))
test_subset = Subset(test_data, range(num_test_images))

Create data loaders for the subsets

train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)

Example: Print the number of images in the subsets

print(f’Number of training images: {len(train_loader.dataset)}‘)
print(f’Number of test images: {len(test_loader.dataset)}’)

dataset = train_data

Initialize your CNN model

cnn = Net()

Define loss function and optimizer

criterion = nn.CrossEntropyLoss() # Cross-entropy loss for classification
optimizer = torch.optim.SGD(cnn.parameters(), lr=0.001, momentum=0.9) # Stochastic Gradient Descent optimizer

Split your data into training and validation sets

train_size = int(0.8 * len(dataset))
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=4, shuffle=True)
#val_loader = torch.utils.data.DataLoader(val_set, batch_size=4, shuffle=False)

Training loop

num_epochs = 10
for epoch in range(num_epochs):
ct = datetime.datetime.now()
print(f"{epoch=}, {ct}")
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
inputs, labels = data
optimizer.zero_grad() # Zero the parameter gradients to avoid accumulation
outputs = cnn(inputs) # Forward pass
loss = criterion(outputs, labels) # Compute the loss
loss.backward() # Backpropagation
optimizer.step() # Update the model parameters
print(‘Finished Training’)

If you want help with diagnosing an error, please put the full error message below:

# Put full error message here

And, finally, make sure to include the versions of your packages. Specifically, show us the output of qml.about().
Name: PennyLane
Version: 0.37.0
Summary: PennyLane is a cross-platform Python library for quantum computing, quantum machine learning, and quantum chemistry. Train a quantum computer the same way as a neural network.
Home-page: GitHub - PennyLaneAI/pennylane: PennyLane is a cross-platform Python library for quantum computing, quantum machine learning, and quantum chemistry. Train a quantum computer the same way as a neural network.
Author:
Author-email:
License: Apache License 2.0
Location: d:\anaconda3\lib\site-packages
Requires: appdirs, autograd, autoray, cachetools, networkx, numpy, packaging, pennylane-lightning, requests, rustworkx, scipy, semantic-version, toml, typing-extensions
Required-by: PennyLane_Lightning

Platform info: Windows-10-10.0.22631-SP0
Python version: 3.10.9
Numpy version: 1.26.4
Scipy version: 1.10.0
Installed devices:

  • default.clifford (PennyLane-0.37.0)
  • default.gaussian (PennyLane-0.37.0)
  • default.mixed (PennyLane-0.37.0)
  • default.qubit (PennyLane-0.37.0)
  • default.qubit.autograd (PennyLane-0.37.0)
  • default.qubit.jax (PennyLane-0.37.0)
  • default.qubit.legacy (PennyLane-0.37.0)
  • default.qubit.tf (PennyLane-0.37.0)
  • default.qubit.torch (PennyLane-0.37.0)
  • default.qutrit (PennyLane-0.37.0)
  • default.qutrit.mixed (PennyLane-0.37.0)
  • default.tensor (PennyLane-0.37.0)
  • null.qubit (PennyLane-0.37.0)
  • lightning.qubit (PennyLane_Lightning-0.37.0)

Hi @Noah_niu , welcome to the Forum!

My guess is that your code is a modified version of one of our demos. Is this right? If so, which demo did you use as your base?

On the other hand, it looks like diff_method=“parameter-shift” is not supported for your workflow. Are you able to remove this? For all devices if you don’t set any diff_method PennyLane will choose the best one for that device. In the case of default.qubit it will try to choose ‘backprop’. My guess is that this will fix your issues. Let me know if this works for you!

Yes, if I remove the diff_method, the codes work. But my question is, is there any method to use the parameter-shift rule in broadcasting since backpropagation cannot be applied to real quantum devices? If I don’t use broadcasting but update the quantum parameter using the parameter-shift rule manually, the code could work but the training time was too long even for just one epoch. Can you give me some advice on this? I appreciate your help!

Hi @Noah_niu, I’m not sure whether there are other options. Let me look into it and get back to you on this.

Many thanks in advance!

Hi @Noah_niu !

Sorry it took me some time to look into this.
It doesn’t seem like there’s any obvious solution on how to make this work with TorchLayer.
Using finite-diff also doesn’t work.

You need to avoid using TorchLayer completely and make a class that inherits from nn.Module.

I made a working example based on the TorchLayers demo and the Transfer Learning demo. See the code below:

import torch
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_moons

# Set random seeds
torch.manual_seed(42)
np.random.seed(42)

X, y = make_moons(n_samples=200, noise=0.1)
y_ = torch.unsqueeze(torch.tensor(y), 1)  # used for one-hot encoded labels
y_hot = torch.scatter(torch.zeros((200, 2)), 1, y_, 1)

c = ["#1f77b4" if y_ == 0 else "#ff7f0e" for y_ in y]  # colours for each class
plt.axis("off")
plt.scatter(X[:, 0], X[:, 1], c=c)
plt.show()

import pennylane as qml
import torch.nn as nn

n_qubits = 2
n_layers = 6
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev,diff_method='parameter-shift')
def qnode(inputs, weights):
    qml.AngleEmbedding(inputs, wires=range(n_qubits))
    qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]

class HybridModel(nn.Module):

    def __init__(self):

        super().__init__()
        self.clayer_1 = nn.Linear(2, n_qubits)
        self.q_params = nn.Parameter(torch.randn(n_layers , n_qubits))
        self.clayer_2 = nn.Linear(n_qubits, 2)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, inputs):
        q_in = self.clayer_1(inputs)

        # Apply the quantum circuit to each element of the batch and append to q_out
        q_out = torch.Tensor(0, n_qubits)

        for elem in q_in:
            q_out_elem = torch.hstack(qnode(elem, self.q_params)).float().unsqueeze(0)
            q_out = torch.cat((q_out, q_out_elem))

        # return the two-dimensional prediction from the postprocessing layer
        x = self.clayer_2(q_out)
        return self.softmax(x)

model = HybridModel()

# Data and batches
X = torch.tensor(X, requires_grad=True).float()
y_hot = y_hot.float()

batch_size = 5
batches = 200 // batch_size

data_loader = torch.utils.data.DataLoader(
    list(zip(X, y_hot)), batch_size=batch_size, shuffle=True, drop_last=True
)

# Optimization
epochs = 6
loss = nn.L1Loss()
opt = torch.optim.SGD(model.parameters(), lr=0.2)

for epoch in range(epochs):

    running_loss = 0

    for xs, ys in data_loader:
        opt.zero_grad()

        loss_evaluated = loss(model(xs), ys)
        loss_evaluated.backward()

        opt.step()

        running_loss += loss_evaluated

    avg_loss = running_loss / batches
    print("Average loss over epoch {}: {:.4f}".format(epoch + 1, avg_loss))

y_pred = model(X)
predictions = torch.argmax(y_pred, axis=1).detach().numpy()

correct = [1 if p == p_true else 0 for p, p_true in zip(predictions, y)]
accuracy = sum(correct) / len(correct)
print(f"Accuracy: {accuracy * 100}%")

It took me a while to make it but I hope it can help you to see how to update your code by using nn.Parameter instead of TorchLayer and breaking up the batches with for elem in q_in:.
Let me know if you have any questions on the code specifics!