Parameter broadcast bug

Currently, I’m working on a quantum neural network that integrates a Qnode into a PyTorch model. To accelerate the computation, I use the ‘Parameter broadcast’ feature. The device I used is default.qubit. However, it only works when the diff_method is backprop. When use parameter_shift or adjoint, it will throw error like


import numpy as np
from sklearn.datasets import make_moons
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import pennylane as qml
import sys
from time import perf_counter
import math
class Model(nn.Module):
    def __init__(self, device, diff_method="backprop"):
        super().__init__()

        self.cnet_in = self.cnet__in()
        n_qubits = math.ceil(math.log2(16))
        self.qlayer = self.qnode(device,n_qubits,diff_method)
        quantum_weights = np.random.normal(0, np.pi, (2*(n_qubits-1),))
        self.quantum_weights = nn.parameter.Parameter(torch.tensor(quantum_weights,\
                                    dtype=torch.float32,requires_grad=True).cuda())
        self.cnet_out = self.cnet__out()

    def cnet__in(self):
        layers = [nn.Linear(2,10), nn.ReLU(True), nn.Linear(10,16), nn.Tanh()]
        return nn.Sequential(*layers)  

    def cnet__out(self):
        layers = [nn.Linear(1,10), nn.ReLU(True), nn.Linear(10,2), nn.Tanh()]
        return nn.Sequential(*layers) 

    def qnode(self,device,n_qubits,diff_method):
        dev = qml.device(device, wires=n_qubits)
        def block(weights, wires):
            qml.RX(weights[0], wires=wires[0])
            qml.RY(weights[1], wires=wires[1])
            qml.CNOT(wires=wires)
        @qml.qnode(dev, interface="torch", diff_method=diff_method)
        def qnode(inputs,weights):
            # print("excute qnode ",weights.shape,inputs.shape)
            qml.AmplitudeEmbedding(features=inputs, wires=range(n_qubits),pad_with=0.,normalize=True) 
            # qml.Hadamard(wires=n_qubits-1)
            weights = weights.reshape(-1,2)
            qml.MPS(
                wires=range(n_qubits),
                n_block_wires=2,
                block=block,
                n_params_block=2,
                template_weights=weights,
            )
            r = qml.PauliZ(wires=n_qubits-1)
            # print("r ",r)
            return qml.expval(r)
        return qnode

    def forward(self, x):
        print("self.quantum_weights ",self.quantum_weights.shape)
        x1 = self.cnet_in(x)
        print('x ',x1.shape, x1.requires_grad)
        x2 = self.qlayer(x1,self.quantum_weights)
        x2 = x2.cpu().to(torch.float)
        x2 = x2.reshape(-1,1)
        print("X2 ",x2.shape)
        x_output = self.cnet_out(x2)
        print("x_output ",x_output.shape)
        return x_output

def train(X, y_hot, dev_name, diff_method):
    
    model  = Model(dev_name, diff_method)
    
    # Train the model
    opt = torch.optim.SGD(model.parameters(), lr=0.2)
    loss = torch.nn.L1Loss()

    X = torch.tensor(X, requires_grad=False).float()
    y_hot = y_hot.float()

    batch_size = 5
    batches = 200 // batch_size

    data_loader = torch.utils.data.DataLoader(
        list(zip(X, y_hot)), batch_size=batch_size, shuffle=True, drop_last=True
    )

    epochs = 6

    for epoch in range(epochs):

        running_loss = 0

        for xs, ys in data_loader:
            opt.zero_grad()

            loss_evaluated = loss(model(xs), ys)
            loss_evaluated.backward()

            opt.step()

            running_loss += loss_evaluated

        avg_loss = running_loss / batches
        print("Average loss over epoch {}: {:.4f}".format(epoch + 1, avg_loss))

    y_pred = model(X)
    predictions = torch.argmax(y_pred, axis=1).detach().numpy()

    correct = [1 if p == p_true else 0 for p, p_true in zip(predictions, y)]
    accuracy = sum(correct) / len(correct)
    print(f"Accuracy: {accuracy * 100}%")

torch.manual_seed(42)
np.random.seed(42)
X, y = make_moons(n_samples=200, noise=0.1)
y_ = torch.unsqueeze(torch.tensor(y), 1)  # used for one-hot encoded labels
y_hot = torch.scatter(torch.zeros((200, 2)), 1, y_, 1)
begin_time = perf_counter()
train(X, y_hot, 'default.qubit', 'adjoint')
end_time = perf_counter()
runtime = end_time-begin_time
print(f'Runtime: {runtime:.2e} s or {(runtime/60):.2e} min.')

And the full error message below:

self.quantum_weights  torch.Size([6])
x  torch.Size([5, 16]) True
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-93-0d7150a7efeb> in <cell line: 7>()
      5 y_hot = torch.scatter(torch.zeros((200, 2)), 1, y_, 1)
      6 begin_time = perf_counter()
----> 7 train(X, y_hot, 'default.qubit', 'adjoint')
      8 end_time = perf_counter()
      9 runtime = end_time-begin_time

15 frames
<ipython-input-91-467a37879882> in train(X, y_hot, dev_name, diff_method)
     91             opt.zero_grad()
     92 
---> 93             loss_evaluated = loss(model(xs), ys)
     94             loss_evaluated.backward()
     95 

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
   1499                 or _global_backward_pre_hooks or _global_backward_hooks
   1500                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501             return forward_call(*args, **kwargs)
   1502         # Do not call functions when jit is used
   1503         full_backward_hooks, non_full_backward_hooks = [], []

<ipython-input-91-467a37879882> in forward(self, x)
     56         x1 = self.cnet_in(x)
     57         print('x ',x1.shape, x1.requires_grad)
---> 58         x2 = self.qlayer(x1,self.quantum_weights)
     59         x2 = x2.cpu().to(torch.float)
     60         x2 = x2.reshape(-1,1)

/usr/local/lib/python3.10/dist-packages/pennylane/qnode.py in __call__(self, *args, **kwargs)
    865                 self.execute_kwargs.pop("mode")
    866             # pylint: disable=unexpected-keyword-arg
--> 867             res = qml.execute(
    868                 [self.tape],
    869                 device=self.device,

/usr/local/lib/python3.10/dist-packages/pennylane/interfaces/execution.py in execute(tapes, device, gradient_fn, interface, grad_on_execution, gradient_kwargs, cache, cachesize, max_diff, override_shots, expand_fn, max_expansion, device_batch_transform)
    427         if gradient_kwargs.get("method", "") == "adjoint_jacobian":
    428             mode = "forward" if grad_on_execution else "backward"
--> 429             tapes = _adjoint_jacobian_expansion(tapes, mode, interface, max_expansion)
    430 
    431         # grad on execution or best was chosen

/usr/local/lib/python3.10/dist-packages/pennylane/interfaces/execution.py in _adjoint_jacobian_expansion(tapes, grad_on_execution, interface, max_expansion)
     81     for i, tape in enumerate(tapes):
     82         if any(not stop_at(op) for op in tape.operations):
---> 83             tapes[i] = tape.expand(stop_at=stop_at, depth=max_expansion)
     84 
     85     return tapes

/usr/local/lib/python3.10/dist-packages/pennylane/tape/qscript.py in expand(self, depth, stop_at, expand_measurements)
   1077         RY(0.2, wires=['a'])]
   1078         """
-> 1079         new_script = qml.tape.tape.expand_tape(
   1080             self, depth=depth, stop_at=stop_at, expand_measurements=expand_measurements
   1081         )

/usr/local/lib/python3.10/dist-packages/pennylane/tape/tape.py in expand_tape(tape, depth, stop_at, expand_measurements)
    210 
    211             # recursively expand out the newly created tape
--> 212             expanded_tape = expand_tape(obj, stop_at=stop_at, depth=depth - 1)
    213 
    214             new_prep.extend(expanded_tape._prep)

/usr/local/lib/python3.10/dist-packages/pennylane/tape/tape.py in expand_tape(tape, depth, stop_at, expand_measurements)
    202                 # Object is an operation; query it for its expansion
    203                 try:
--> 204                     obj = obj.expand()
    205                 except DecompositionUndefinedError:
    206                     # Object does not define an expansion; treat this as

/usr/local/lib/python3.10/dist-packages/pennylane/operation.py in expand(self)
   1390             raise DecompositionUndefinedError
   1391 
-> 1392         qscript = qml.tape.make_qscript(self.decomposition)()
   1393 
   1394         if not self.data:

/usr/local/lib/python3.10/dist-packages/pennylane/tape/qscript.py in wrapper(*args, **kwargs)
   1376     def wrapper(*args, **kwargs):
   1377         with AnnotatedQueue() as q:
-> 1378             result = fn(*args, **kwargs)
   1379 
   1380         qscript = QuantumScript.from_queue(q)

/usr/local/lib/python3.10/dist-packages/pennylane/operation.py in decomposition(self)
   1200             list[Operator]: decomposition of the operator
   1201         """
-> 1202         return self.compute_decomposition(
   1203             *self.parameters, wires=self.wires, **self.hyperparameters
   1204         )

/usr/local/lib/python3.10/dist-packages/pennylane/templates/state_preparations/mottonen.py in compute_decomposition(state_vector, wires)
    359         # Apply inverse y rotation cascade to prepare correct absolute values of amplitudes
    360         for k in range(len(wires_reverse), 0, -1):
--> 361             alpha_y_k = _get_alpha_y(a, len(wires_reverse), k)
    362             control = wires_reverse[k:]
    363             target = wires_reverse[k - 1]

/usr/local/lib/python3.10/dist-packages/pennylane/templates/state_preparations/mottonen.py in _get_alpha_y(a, n, k)
    195         for j in range(2 ** (n - k))
    196     ]
--> 197     numerator = qml.math.take(a, indices=indices_numerator, axis=-1)
    198     numerator = qml.math.sum(qml.math.abs(numerator) ** 2, axis=-1)
    199 

/usr/local/lib/python3.10/dist-packages/autoray/autoray.py in do(fn, like, *args, **kwargs)
     77     """
     78     backend = choose_backend(fn, *args, like=like, **kwargs)
---> 79     return get_lib_fn(backend, fn)(*args, **kwargs)
     80 
     81 

/usr/local/lib/python3.10/dist-packages/pennylane/math/single_dispatch.py in _take_torch(tensor, indices, axis, **_)
    538 
    539     fancy_indices = [slice(None)] * axis + [indices]
--> 540     return tensor[fancy_indices]
    541 
    542 

IndexError: index 8 is out of bounds for dimension 0 with size 5

And the qml.about()

I think the reason is that the input features that need to be broadcasted require the grad (requires_grad = True), but the examples of the parameter broadcast have the inputs of which the requires_grad = False

Hey @Yan_Li! It doesn’t look like you’re using it, but we have a dedicated module in Pennylane for handling how Pennylane interfaces with Pytorch. You can check it out here: Turning quantum nodes into Torch Layers — PennyLane documentation

Currently, true parameter broadcasting within qnn.TorchLayer isn’t supported. I.e., you can still “broadcast”, but what happens under the hood is a serial execution, not parallel. So, it looks like broadcasting is happening, but really isn’t.

There’s a PR for this on our Github that will add support for true broadcasting if you’re interested in following its progress!

Regarding your specific error, adjoint doesn’t support parameter-broadcasting… so the error you’re getting is a little strange. In any case, we should have a more verbose and informative error message.

In the mean time, what you can do is:

  • Add qml.trasnforms.broadcast_expand to the qnode:
        @qml.transforms.broadcast_expand
        @qml.qnode(dev, interface="torch", diff_method=diff_method)
        def qnode(inputs,weights):
            # print("excute qnode ",weights.shape,inputs.shape)
            qml.AmplitudeEmbedding(features=inputs, wires=range(n_qubits),pad_with=0.,normalize=True) 
            # qml.Hadamard(wires=n_qubits-1)
            weights = weights.reshape(-1,2)
            qml.MPS(
                wires=range(n_qubits),
                n_block_wires=2,
                block=block,
                n_params_block=2,
                template_weights=weights,
            )
            r = qml.PauliZ(wires=n_qubits-1)
            # print("r ",r)
            return qml.expval(r)
  • switch to a different differentiation method (backprop)
  • switch to a different device that doesn’t think its should be supporting parameter broadcasting
1 Like

I opened an issue to try and have the error message fixed :slight_smile:

1 Like

I’m currently using MottonenStatePreparation and I get the same error, so I’ve looked into its code:

def _get_alpha_z(omega, n, k):
    r"""Computes the rotation angles required to implement the uniformly-controlled Z rotation
    applied to the :math:`k`th qubit.

    The :math:`j`th angle is related to the phases omega of the desired amplitudes via:

    .. math:: \alpha^{z,k}_j = \sum_{l=1}^{2^{k-1}} \frac{\omega_{(2j-1) 2^{k-1}+l} - \omega_{(2j-2) 2^{k-1}+l}}{2^{k-1}}

    Args:
        omega (tensor_like): phases of the state to prepare
        n (int): total number of qubits for the uniformly-controlled rotation
        k (int): index of current qubit

    Returns:
        array representing :math:`\alpha^{z,k}`
    """
    indices1 = [
        [(2 * j - 1) * 2 ** (k - 1) + l - 1 for l in range(1, 2 ** (k - 1) + 1)]
        for j in range(1, 2 ** (n - k) + 1)
    ]
    indices2 = [
        [(2 * j - 2) * 2 ** (k - 1) + l - 1 for l in range(1, 2 ** (k - 1) + 1)]
        for j in range(1, 2 ** (n - k) + 1)
    ]

    term1 = qml.math.take(omega, indices=indices1, axis=-1)
    term2 = qml.math.take(omega, indices=indices2, axis=-1)
    diff = (term1 - term2) / 2 ** (k - 1)

    return qml.math.sum(diff, axis=-1)


def _get_alpha_y(a, n, k):
    r"""Computes the rotation angles required to implement the uniformly controlled Y rotation
    applied to the :math:`k`th qubit.

    The :math:`j`-th angle is related to the absolute values, a, of the desired amplitudes via:

    .. math:: \alpha^{y,k}_j = 2 \arcsin \sqrt{ \frac{ \sum_{l=1}^{2^{k-1}} a_{(2j-1)2^{k-1} +l}^2  }{ \sum_{l=1}^{2^{k}} a_{(j-1)2^{k} +l}^2  } }

    Args:
        a (tensor_like): absolute values of the state to prepare
        n (int): total number of qubits for the uniformly-controlled rotation
        k (int): index of current qubit

    Returns:
        array representing :math:`\alpha^{y,k}`
    """
    indices_numerator = [
        [(2 * (j + 1) - 1) * 2 ** (k - 1) + l for l in range(2 ** (k - 1))]
        for j in range(2 ** (n - k))
    ]
    numerator = qml.math.take(a, indices=indices_numerator, axis=-1)
    numerator = qml.math.sum(qml.math.abs(numerator) ** 2, axis=-1)

    indices_denominator = [[j * 2**k + l for l in range(2**k)] for j in range(2 ** (n - k))]
    denominator = qml.math.take(a, indices=indices_denominator, axis=-1)
    denominator = qml.math.sum(qml.math.abs(denominator) ** 2, axis=-1)

    # Divide only where denominator is zero, else leave initial value of zero.
    # The equation guarantees that the numerator is also zero in the corresponding entries.

    with np.errstate(divide="ignore", invalid="ignore"):
        division = numerator / denominator

    # Cast the numerator and denominator to ensure compatibility with interfaces
    division = qml.math.cast(division, np.float64)
    denominator = qml.math.cast(denominator, np.float64)

    division = qml.math.where(denominator != 0.0, division, 0.0)

    return 2 * qml.math.arcsin(qml.math.sqrt(division))

The problem here I believe is that in these two functions which mottonen.py uses in its compute_decomposition function, qml.math.take is used with a negative axis index. At least in the torch implementation, which I am using, the take function isn’t really built to handle negative axis indices:

def _take_torch(tensor, indices, axis=None, **_):
    """Torch implementation of np.take"""
    torch = _i("torch")

    if not isinstance(indices, torch.Tensor):
        indices = torch.as_tensor(indices)

    if axis is None:
        return tensor.flatten()[indices]

    if indices.ndim == 1:
        if (indices < 0).any():
            # index_select doesn't allow negative indices
            dim_length = tensor.size()[0] if axis is None else tensor.shape[axis]
            indices = torch.where(indices >= 0, indices, indices + dim_length)

        return torch.index_select(tensor, dim=axis, index=indices)

    fancy_indices = [slice(None)] * axis + [indices]
    return tensor[fancy_indices]

Basically, what seems to happen here is when you set axis=-1, the code still ends up applying the “take” to the first (zeroth) axis and not the last one, since [slice(None)] * axis produces an empty array for all axis<=1. At least that’s the behavior for 2d tensors.

Hi @neak11 , welcome to the Forum and thank you for your comment here!

Do you have a minimal code example that is self-contained so that we can try to replicate your error? I understand that the sign in the axis could be causing issues so I’d love to see your specific code. Maybe it falls within a known situation that isn’t supported, or maybe it’s a new and unknown problem.

Hi @CatalinaAlbornoz , I’ve posted this code in the GitHub issue about the same error:

import pennylane as qml
from pennylane import qnn
import torch
from torch import nn
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader


images = datasets.FakeData(size=8000, image_size=(1, 8, 8), num_classes=1, transform=ToTensor())


device = qml.device('default.qubit', wires=6, shots=None)


@qml.qnode(device, interface='torch', diff_method='adjoint')
def circuit(inputs, param):
    norms = torch.norm(inputs, dim=1)
    inputs = inputs/torch.stack([norms for i in range(inputs.size(dim=1))], dim=1)
    qml.MottonenStatePreparation(inputs, wires=range(6))
    qml.BasicEntanglerLayers(param, wires=range(6))
    return qml.expval(qml.PauliY(wires=0))


param_shape = qml.BasicEntanglerLayers.shape(1, 6)
dictionary = {"param": param_shape}
quantum_layer = qnn.TorchLayer(circuit, dictionary)

flat_layer = nn.Flatten(1, -1)
model_net = nn.Sequential(flat_layer, quantum_layer).to("cpu")

dataloader_images = DataLoader(images, batch_size=10, shuffle=True)
losses = nn.L1Loss()
optim = torch.optim.SGD(model_net.parameters(), lr=1e-6)


def training(dataloader, model, lossf, optimizer):
    model.train()
    for batch, (data, labels) in enumerate(dataloader):
        result = model(data)
        loss = lossf(result, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()


for ep in range(10):
    training(dataloader_images, model_net, losses, optim)

Hi @neak11 ,

Thank you! I’m linking the issue here for future reference.

I can replicate your issue. We’re looking into it.

Hi!

For anyone looking at this thread the problem is solved with using qml.StatePrep instead of qml.MottonenStatePreparation since the latter doesn’t have broadcasting support at the moment.

The Torch axis=-1 has been fixed.

2 Likes