Quantum natural gradient optimizer with user-defined gates

Hi,

I was trying to use the quantum natural gradient (QNG) optimizer on a toy VQE problem where the ansatz employs user-defined gates ‘XX’ and ‘YY’ (defined below).

When trying to take an optimizer step, I got the following error: “ValueError: solve1: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (m,m),(m)->(m) (size 3 is different from 4)” coming from

np.linalg.solve(self.metric_tensor, grad_flat)

I didn’t encounter this error when using a circuit with Pennylane-defined gates. I think the error might be because I use the same parameter for both XX and YY gates but wasn’t sure how to fix this error.

This was my code:

from pennylane import numpy as np
import pennylane as qml

dev = qml.device("default.qubit", wires=2)
dev.operations.update({"XX", "YY"})

def circuit(params, wires=[0, 1]):
    qml.PauliX(wires=wires[1])
    qml.RZ(params[0], wires=wires[0])
    qml.RZ(params[1], wires=wires[1])
    XX(params[2], wires=wires)
    YY(params[2], wires=wires)

coeffs = [1, 1]
obs = [qml.PauliX(0), qml.PauliZ(1)]
H = qml.Hamiltonian(coeffs, obs)
cost_fn = qml.ExpvalCost(circuit, H, dev)

init_params = np.random.uniform(size=3)

opt = qml.QNGOptimizer(stepsize=0.01, diag_approx=False)

params, prev_energy = opt.step_and_cost(cost_fn, init_params)

And the two user-defined gates are:

class XX(qml.operation.Operation):
    num_params = 1 
    num_wires = 2 
    par_domain = "R" 
    grad_method = "A" 
    generator = [np.kron(X, X), -0.5]
    
    @staticmethod
    def _matrix(*params):
        return np.cos(params[0]/2)*np.eye(4) + 1.j*np.sin(params[0]/2)*np.kron(X,X)

class YY(qml.operation.Operation):
    num_params = 1 
    num_wires = 2 
    par_domain = "R" 
    grad_method = "A"
    generator = [np.kron(Y, Y), -0.5]
    
    @staticmethod
    def _matrix(*params):
        return np.cos(params[0]/2)*np.eye(4) + 1.j*np.sin(params[0]/2)*np.kron(Y,Y)

Hi @hsim!

The issue here is that the metric tensor is only defined for the quantum circuit gate arguments, while the optimizer is instead optimizing the QNode arguments.

This is a bit of subtlety—in most cases, there is a 1-1 mapping between QNode arguments and gate arguments, so this issue doesn’t arise! As you note, however, the repeated parameter between the XX and YY breaks this 1-1 mapping.

In other words: the optimizer is assuming that the object it is optimizing is a pure quantum circuit with a well-defined metric tensor with respect to QNode arguments, which is not the case.

Workarounds

One workaround is to write out the QNG optimization manually using qml.metric_tensor (which extracts the metric tensor of the underlying quantum circuit), but extend it to take into account classical processing between QNode args and gate args.

This is pretty simple to do.

Let

  • x\in \mathbb{R}^m be the QNode parameters

  • f: \mathbb{R}^m \rightarrow \mathbb{R}^n be the function representing the transformation from QNode arguments to gate arguments,

  • q: \mathbb{R}^n \rightarrow \mathbb{R} be the quantum function,

  • C = q \circ f: \mathbb{R}^m\rightarrow \mathbb{R} the overall cost function, and

  • g^{-1} be the pseudo-inverse of the Fubini-Study metric tensor of q.

Our cost function is therefore C(x) = q(f(x)), with a (quantum) natural gradient of

\nabla_{\text{NatGrad}}~C(x) =\mathbf{J}_q(x) ~ g^{-1} ~\mathbf{J}_f(x).

We can use the qml.transforms.classical_jacobian() transform to extract \mathbf{J}_f(x), qml.metric_tensor() to extract g, and the (not well documented) qnode.qtape.jacobian method to extract \mathbf{J}_q(x):

from pennylane import numpy as np
import pennylane as qml
from scipy.linalg import pinvh

dev = qml.device("default.qubit", wires=2)

@qml.qnode(dev)
def circuit(params, wires=[0, 1]):
    qml.Hadamard(wires=wires[0])
    qml.Hadamard(wires=wires[1])
    qml.RZ(params[0], wires=wires[0])
    qml.RZ(params[0], wires=wires[1])
    qml.CNOT(wires=[0, 1])
    qml.RZ(params[1], wires=wires[1])
    qml.CRX(params[2], wires=wires)
    qml.CRY(params[2], wires=wires)
    return qml.expval(qml.PauliY(0))

init_params = np.array([0.1, 0.2, 0.3])

# extract Jf(x)
Jf = qml.transforms.classical_jacobian(circuit)(init_params)

# extract Jq(x)
circuit.construct([init_params], {})
Jq = circuit.qtape.jacobian(params=Jf @ init_params, device=dev)

# extract g
g= qml.metric_tensor(circuit)(init_params)

# compute the natural gradient
print("NatGad =", Jq @ pinvh(g) @ Jf)

Or, writing this as a function:

def natgrad(qnode):
    Jf = qml.transforms.classical_jacobian(qnode)
    g= qml.metric_tensor(circuit)
    dev = qnode.device

    def _natgrad(params):
        qnode.construct([params], {})
        gate_params = Jf(params) @ params
        Jq = circuit.qtape.jacobian(params=gate_params, device=dev)
        return Jq @ pinvh(g(params)) @ Jf(params)

    return _natgrad

natgrad(circuit)(init_params)

Note: in the case where J_f = I, this reduces down to our standard natural gradient result:

@qml.qnode(dev)
def circuit(params, wires=[0, 1]):
    qml.Hadamard(wires=wires[0])
    qml.Hadamard(wires=wires[1])
    qml.RZ(params[0], wires=wires[0])
    # qml.RZ(params[0], wires=wires[1])
    qml.CNOT(wires=[0, 1])
    qml.RZ(params[1], wires=wires[1])
    qml.CRX(params[2], wires=wires)
    # qml.CRY(params[2], wires=wires)
    return qml.expval(qml.PauliY(0))

print("Our extended natgrad = ", natgrad(circuit)(init_params))

g = qml.metric_tensor(circuit)(init_params)
print("Standard natgrad =", pinvh(g) @ qml.grad(circuit)(init_params))

Additional thoughts:

  1. We should add a qml.transforms.quantum_jacobian function to make it easier to extract the quantum portion of the Jacobian directly :slight_smile:

  2. Long term, it may make sense to build this directly into PennyLane. That is, re-position QNG as a gradient method, rather than an optimization technique.

1 Like

Thanks, @josh!

How would I extend this for qml.ExpvalCost which, if I understood correctly, uses a collection of QNodes?

No worries @hsim!

I’ve generalized my previous answer to account for ExpvalCost:

def quantum_jacobian(qnode):
    """Returns a function that computes the purely
    quantum Jacobian of a QNode"""
    if isinstance(qnode, qml.ExpvalCost):
        qnodes = qnode.qnodes
        coeffs = qnode.hamiltonian.coeffs

        def _jacobian(*args, **kwargs):
            jacs = [quantum_jacobian(q)(*args, **kwargs) for q in qnodes]
            return qml.math.sum([c * j for c, j in zip(coeffs, jacs)], axis=0)

        return _jacobian

    dev = qnode.device

    def _jacobian(*args, **kwargs):
        qnode.construct(args, kwargs)
        params = qml.math.stack(qnode.qtape.get_parameters())

        def func(params):
            return qml.math.stack(qnode.qtape.execute(params=params, device=dev))

        return qml.jacobian(func)(params)

    return _jacobian


def natgrad(qnode):
    """Returns a function that computes
    the natural gradient of a QNode"""
    Jq = quantum_jacobian(qnode)
    g = qml.metric_tensor(qnode)

    if isinstance(qnode, qml.ExpvalCost):
        qnode = qnode.qnodes[0]

    Jf = qml.transforms.classical_jacobian(qnode)

    def _natgrad(*args, **kwargs):
        mt = g(*args, **kwargs)
        c = Jf(*args, **kwargs)
        q = Jq(*args, **kwargs)
        ng = qml.math.dot(q, pinvh(mt))
        return qml.math.squeeze(qml.math.dot(ng, c))

    return _natgrad

With these functions defined, the following now works:

dev = qml.device("default.qubit", wires=2)

def circuit(params, wires=[0, 1]):
    qml.Hadamard(wires=wires[0])
    qml.Hadamard(wires=wires[1])
    qml.RZ(params[0], wires=wires[0])
    qml.RZ(params[0], wires=wires[1])
    qml.CNOT(wires=[0, 1])
    qml.RZ(params[1], wires=wires[1])
    qml.CRX(params[2], wires=wires)
    qml.CRY(params[2], wires=wires)

coeffs = [1, 1]
obs = [qml.PauliX(0), qml.PauliZ(1)]
H = qml.Hamiltonian(coeffs, obs)

circuit = qml.ExpvalCost(circuit, H, dev, interface="autograd")
init_params = np.array([0.1, 0.2, 0.3])

>>> print("Cost function=", circuit(init_params))
Cost function= 0.8501644188535566
>>> print("Our extended natgrad = ", natgrad(circuit)(init_params))
Our extended natgrad =  [ -0.79210435   0.666934   -44.93205788]

Disclaimer: I’ve tested that this produces the correct output when parameters are not repeated, but have not tested it otherwise!