GD w.r.t. qml.sample dependent cost function raises ValueError

I’m trying to build a quantum model that updates its variational parameters with respect to a measurement-based cost function. Below is a very watered-down version of my actual code that I hope narrows in on the issue. Here, I’ve built a simple circuit that contains depth * num_qubits * 3 trainable parameters. However, instead of returning qml.probs, I return shots number of Z-basis measurements on each qubit using qml.sample. My cost function sums the measurements on each qubit for each shot and returns the number of shots for which the measurements summed to 0. Minimizing the cost, therefore, would mean that the circuit is only producing identical measurements across each qubit.

import pennylane as qml
import pennylane.numpy as np

num_qubits = 2 
shots = 500 
depth = 2
params = (2*np.pi * np.random.randn(depth, num_qubits, 3))
dev = qml.device("default.qubit", wires=wires, shots=shots)

@qml.qnode(dev)
def circuit(params):
    
    for i in range(num_qubits):
        qml.Hadamard(wires=i)
        
    for i in range(num_qubits):
        qml.RX(np.pi/4.0, wires=i)
        
    for i in range(num_qubits):
        for j in range(depth):
            qml.Rot(params[i][j][0], params[i][j][1], params[i][j][2], wires=i)
            
    return [qml.sample(qml.PauliZ(i)) for i in range(num_qubits)]


def cost(params):
    sample = np.sum(circuit(params), axis=0)
    count = np.sum(sample == 0)
    return count

As a sanity check, evaluating the cost function with respect to the randomly initialized parameters cost(params) returns tensor(261, requires_grad=True), which, for 500 shots and no training yet, makes sense. So now I define the GD descent optimizer and try to update the parameters just once.

opt = qml.GradientDescentOptimizer(stepsize = 0.1)
new_params = opt.step(lambda var: cost(var), params)

and the result is

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
TypeError: float() argument must be a string or a number, not 'ArrayBox'

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
<ipython-input-47-3763ed2707ea> in <module>
----> 1 params = opt.step(lambda var: cost(var), params)

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/pennylane/optimize/gradient_descent.py in step(self, objective_fn, grad_fn, *args, **kwargs)
     96         """
     97 
---> 98         g, _ = self.compute_grad(objective_fn, args, kwargs, grad_fn=grad_fn)
     99         new_args = self.apply_grad(g, args)
    100 

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/pennylane/optimize/gradient_descent.py in compute_grad(objective_fn, args, kwargs, grad_fn)
    125         """
    126         g = get_gradient(objective_fn) if grad_fn is None else grad_fn
--> 127         grad = g(*args, **kwargs)
    128         forward = getattr(g, "forward", None)
    129 

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/pennylane/_grad.py in __call__(self, *args, **kwargs)
     94         """Evaluates the gradient function, and saves the function value
     95         calculated during the forward pass in :attr:`.forward`."""
---> 96         grad_value, ans = self._get_grad_fn(args)(*args, **kwargs)
     97         self._forward = ans
     98         return grad_value

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/autograd/wrap_util.py in nary_f(*args, **kwargs)
     18             else:
     19                 x = tuple(args[i] for i in argnum)
---> 20             return unary_operator(unary_f, x, *nary_op_args, **nary_op_kwargs)
     21         return nary_f
     22     return nary_operator

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/pennylane/_grad.py in _grad_with_forward(fun, x)
    111         difference being that it returns both the gradient *and* the forward pass
    112         value."""
--> 113         vjp, ans = _make_vjp(fun, x)
    114 
    115         if not vspace(ans).size == 1:

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/autograd/core.py in make_vjp(fun, x)
      8 def make_vjp(fun, x):
      9     start_node = VJPNode.new_root()
---> 10     end_value, end_node =  trace(start_node, fun, x)
     11     if end_node is None:
     12         def vjp(g): return vspace(x).zeros()

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/autograd/tracer.py in trace(start_node, fun, x)
      8     with trace_stack.new_trace() as t:
      9         start_box = new_box(x, t, start_node)
---> 10         end_box = fun(start_box)
     11         if isbox(end_box) and end_box._trace == start_box._trace:
     12             return end_box._value, end_box._node

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/autograd/wrap_util.py in unary_f(x)
     13                 else:
     14                     subargs = subvals(args, zip(argnum, x))
---> 15                 return fun(*subargs, **kwargs)
     16             if isinstance(argnum, int):
     17                 x = args[argnum]

<ipython-input-47-3763ed2707ea> in <lambda>(var)
----> 1 params = opt.step(lambda var: cost(var), params)

<ipython-input-46-1b01b6714e7e> in cost(params)
      1 def cost(params):
----> 2     sample = np.sum(circuit(params), axis=0)
      3     count = np.sum(sample == 0)
      4     return count

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/pennylane/tape/qnode.py in __call__(self, *args, **kwargs)
    531 
    532         # execute the tape
--> 533         res = self.qtape.execute(device=self.device)
    534 
    535         # FIX: If the qnode swapped the device, increase the num_execution value on the original device.

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/pennylane/tape/tapes/tape.py in execute(self, device, params)
   1068             params = self.get_parameters()
   1069 
-> 1070         return self._execute(params, device=device)
   1071 
   1072     def execute_device(self, params, device):

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/pennylane/tape/tapes/tape.py in execute_device(self, params, device)
   1099 
   1100         if isinstance(device, qml.QubitDevice):
-> 1101             res = device.execute(self)
   1102         else:
   1103             res = device.execute(self.operations, self.observables, {})

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/pennylane/_qubit_device.py in execute(self, circuit, **kwargs)
    206         # generate computational basis samples
    207         if (not self.analytic) or circuit.is_sampled:
--> 208             self._samples = self.generate_samples()
    209 
    210         # compute the required statistics

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/pennylane/_qubit_device.py in generate_samples(self)
    415         rotated_prob = self.analytic_probability()
    416 
--> 417         samples = self.sample_basis_states(number_of_states, rotated_prob)
    418         return QubitDevice.states_to_binary(samples, self.num_wires)
    419 

~/opt/anaconda3/envs/qml/lib/python3.8/site-packages/pennylane/_qubit_device.py in sample_basis_states(self, number_of_states, state_probability)
    431         """
    432         basis_states = np.arange(number_of_states)
--> 433         return np.random.choice(basis_states, self.shots, p=state_probability)
    434 
    435     @staticmethod

mtrand.pyx in numpy.random.mtrand.RandomState.choice()

ValueError: setting an array element with a sequence.

Am I doing something wrong, or is it an issue with the experimental qml.tape package? If it is a deeper issue, what would you recommend as a work-around?

Thanks so much for your help in advance!

Hi @ryanhill1!

Unfortunately, the behavior you see is expected — PennyLane currently does not support differentiation of QNodes that return samples; only gradients of deterministic measurement statistics, such as expectation values,

This is because sampling is stochastic in nature, and the gradient is not well defined. (A similar problem is occurs in classical ML software, where introducing stochasticity will break the automatic differentiation).

The latest development version of PennyLane, however, has a new feature that will solve this problem — you can now pass batches of shots to the QNode, which will be taken into account when performing measurement statistics such as expectation values.

For example, consider:

dev = qml.device("default.qubit", wires=num_qubits, shots=[(1, shots)])

Here, we are specifying to the device that we want to compute single-shot expectation values, shots number of times. Since we are now requesting expectation values (albeit, single-shot expectation values), the gradient is now well-defined! We can continue as in your code, with slight tweaks:

import pennylane as qml
import pennylane.numpy as np

num_qubits = 2
shots = 500
depth = 2
params = 2 * np.pi * np.random.randn(depth, num_qubits, 3)
dev = qml.device("default.qubit", wires=num_qubits, shots=[(1, shots)])


@qml.qnode(dev)
def circuit(params):

    for i in range(num_qubits):
        qml.Hadamard(wires=i)

    for i in range(num_qubits):
        qml.RX(np.pi / 4.0, wires=i)

    for i in range(num_qubits):
        for j in range(depth):
            qml.Rot(params[i][j][0], params[i][j][1], params[i][j][2], wires=i)

    return [qml.expval(qml.PauliZ(i)) for i in range(num_qubits)]


def cost(params):
    sample = np.sum(circuit(params).T, axis=0)
    count = np.sum(sample == 0)
    return count

If you try to differentiate the cost function, it will now work, however you will find that the gradient will always be zero:

>>> qml.grad(cost)(params)
/home/josh/miniconda3/envs/37/lib/python3.7/site-packages/autograd/tracer.py:14: UserWarning: Output seems independent of input.
  warnings.warn("Output seems independent of input.")
[[[0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]]]

As far as I can tell, this is because Autograd (which PennyLane is using as a classical autodiff backend) does not support boolean operations such as np.sum(sample == 0). I tried replacing it with np.count_nonzero(sample), but ran into the same issue.

This is most likely because these operations are discrete, and thus not differentiable.

You may have to reformulate your cost function to be a continuous function before computing the gradient will work!

1 Like