Backward Errors while Training Quantum Circuit

Hi, I met some problems while doing loss.backward() related to the trainable parameters of
quantum circuit.

The example code:

import pennylane as qml
import torch
import torch.nn as nn
import numpy as np
import math

def create_qnode(info_size, anc_size):
    
    info_qubit = int(np.ceil(math.log2(info_size)))     
    anc_qubit = int(np.ceil(math.log2(anc_size)))  
    total_qubit = info_qubit + anc_qubit
    
    dev = qml.device("default.qubit", wires=total_qubit)

    @qml.qnode(dev)
    def qnode(input_feature, weight):
        
        qml.AmplitudeEmbedding(features=input_feature, wires=range(total_qubit), normalize=True)
        for ii in range(5):
            for jj in range(anc_qubit):
                qml.RY(weight[jj], wires=info_qubit+jj)
            for jj in range(anc_qubit-1):
                qml.CNOT(wires=[info_qubit+jj,info_qubit+jj+1])

        return qml.probs(wires=list(range(total_qubit)))
            
    return qnode


def QuantumCircuit(input_state, info_size, anc_size, params):
    
    qnode = create_qnode(info_size=info_size, anc_size=anc_size)
    qnode_output_tmp = qnode(input_state.clone(),params[0].detach().numpy())       
    qnode_output = torch.from_numpy(qnode_output_tmp)
    
    return qnode_output



information_size = 4
ancilla_size = 4

q_params = nn.ParameterList([nn.Parameter(torch.randn(5 * information_size), requires_grad=True)])
output = QuantumCircuit(input_state=torch.arange(16), info_size=information_size, anc_size=ancilla_size, params=q_params)

target = output[0]
label = torch.tensor(0.5,dtype=torch.float64)

criterion = nn.BCELoss()
loss = criterion(output[0],label)
loss.backward()

I get an error from it:

And I thought the error meant it was unsuccessful while computing the gradient on parameters.
This line from the self-defined function QuantumCircuit(...) might have caused this error.

    qnode_output_tmp = qnode(input_state.clone(), params[0].detach().numpy())       

The variable/parameter params[0].detach().numpy() detaches the params[0] from the current computational graph and makes loss.backward() fail to require gradient, so I thought this is why the above error occurred.

However, when I removed detach() from the params[0], such as:

qnode_output_tmp = qnode(input_state.clone(),params[0])

And the other error might show:

Please help me if you have any idea to fix the above error, thanks! :face_holding_back_tears: :face_holding_back_tears:

Hey @mini!

I highly recommend that you use PennyLane’s builtin functionality for interfacing with PyTorch. Here are some resources:

I don’t immediately see anything in your code that can’t be done with our PyTorch functionality :slight_smile:. It will be much easier for you and me to debug if you use what we have.

Let me know if this helps!

1 Like

@isaacdevlugt Thanks for replying.
I tried to use TorchLayer in my model, such as below

import pennylane as qml
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import math


def create_qnode(info_size, anc_size, layer_num):
    
    info_qubit = int(np.ceil(math.log2(info_size)))     
    anc_qubit = int(np.ceil(math.log2(anc_size)))  
    total_qubit = info_qubit + anc_qubit
    
    dev = qml.device("default.qubit", wires=total_qubit)

    @qml.qnode(dev)
    def qnode(inputs, weight):
        
        qml.AmplitudeEmbedding(features=inputs, wires=range(total_qubit), normalize=True)
        for ii in range(layer_num):
            for jj in range(anc_qubit):
                qml.RY(weight[jj], wires=info_qubit+jj)
            for jj in range(anc_qubit-1):
                qml.CNOT(wires=[info_qubit+jj,info_qubit+jj+1])

        return qml.probs(wires=list(range(total_qubit)))
            
    return qnode



class QuantumSystem(nn.Module):
    def __init__(self, info_size_1, anc_size, layer_num):
        
        super().__init__()
        
        self.info_size_1 = info_size_1
        self.anc_size = anc_size
        self.layer_num = layer_num
        
        self.anc_qubit = int(np.ceil(math.log2(anc_size)))  

        
        self.qnode_1 = create_qnode(self.info_size_1, self.anc_size, self.layer_num)
        self.weight_shape_1 = {"weight": (self.layer_num, self.anc_qubit)}
        self.qlayer_1 = qml.qnn.TorchLayer(self.qnode_1, self.weight_shape_1)
        
        
        
    def forward(self, x):
        
        
        output_tmp = self.qlayer_1(x)
        
        final_output = torch.sqrt(output_tmp)      
        # do some postprocessing for the qnode_1 output result, just take torch.sqrt() for example.    

        
        return final_output
        
        

model = QuantumSystem(info_size_1=4, anc_size=4, layer_num=5)        
noise = torch.randn(16)


label = torch.tensor(0.5,dtype=torch.float32)


criterion = nn.BCELoss()
opt = optim.SGD(model.parameters(),lr=0.1)
# print(output.shape)
# print(target)
# print(label)
   
        

    
for epoch in range(1000):
    
    opt.zero_grad()
    
    output = model(noise)
    target = output[0,0]
    print(target)
    
    loss = criterion(target,label)
    loss.backward() 

    opt.step()
    
    
    print("Epoch:{0}, Loss:{1}".format(epoch,loss))

The code worked, but the training was not stable. Sometimes it converges to a result that is far from the given label. Does this happen because the expressibility of my Quantum Circuit is not high enough?
And if it’s possible, could you please give me some advice for my TorchLayer coding? I’m not sure whether the method I coded is reasonable or not while adding the customized qnode into the class.
Thank you!

The code worked, but the training was not stable. Sometimes it converges to a result that is far from the given label.

The answer to this question is multi-faceted — it could be your parameter initialization strategy, the presence of Barren plateaus, a poor choice of cost function, your optimization method (e.g., gradient descent, ADAM, etc.), and more. The trainability and expressibility of neural networks (quantum or classical) is a non-trivial topic and an active area of research!

And if it’s possible, could you please give me some advice for my TorchLayer coding? I’m not sure whether the method I coded is reasonable or not while adding the customized qnode into the class.

It seems fine to me :slight_smile:. Although, you might be able to simplify your code by not using a class inheriting from nn.Module since your model just has one layer in it. Check out the second link I posted in my first response to you!

@isaacdevlugt Thanks again!
I have another question about applying TorchLayer. (My model will contain multiple layers, so I still make it inherit from nn.Module)

I’m confused about how to create multiple qlayer depending on the input x dimension.

I try to use the list self.qlayer = [] to save the qlayers, but this might cause the optimizer cannot receive the parameter list from my model. ValueError: optimizer got an empty parameter list

The details of the error message and the full code are shown below.

Is there any suitable method for creating multiple qlayer based on input dimension in the class?

The error message:

The full code:

import pennylane as qml
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import math


def create_qnode(info_size, anc_size, layer_num):
    
    info_qubit = int(np.ceil(math.log2(info_size)))     
    anc_qubit = int(np.ceil(math.log2(anc_size)))  
    total_qubit = info_qubit + anc_qubit
    
    dev = qml.device("default.qubit", wires=total_qubit)

    @qml.qnode(dev)
    def qnode(inputs, weights):
        
        qml.AmplitudeEmbedding(features=inputs, wires=range(total_qubit), normalize=True)
        for ii in range(layer_num):
            for jj in range(anc_qubit):
                qml.RY(weights[ii][jj], wires=info_qubit+jj)
            for jj in range(anc_qubit-1):
                qml.CNOT(wires=[info_qubit+jj,info_qubit+jj+1])

        return qml.probs(wires=list(range(total_qubit)))
            
    return qnode



class QuantumSystem(nn.Module):
    def __init__(self, info_total_size, anc_size, layer_num):
        
        super().__init__()
        
        self.info_dim = info_total_size[0]
        self.info_size = info_total_size[1]
        self.info_qubit = int(np.ceil(math.log2(self.info_size)))     
        
        self.anc_size = anc_size
        self.anc_qubit = int(np.ceil(math.log2(anc_size)))  
        
        self.layer_num = layer_num

        self.qnode = create_qnode(self.info_size, self.anc_size, self.layer_num)
        self.weight_shape = {"weights": (self.layer_num, self.anc_qubit)}
        self.qlayer = []
        for i in range(self.info_dim):
            self.qlayer.append(qml.qnn.TorchLayer(self.qnode, self.weight_shape))
            

        
    def forward(self, x):

        final_output = torch.zeros(self.info_dim, int(2**self.info_size))
        
        for input_dim in range(self.info_dim):
            output_tmp = self.qlayer[input_dim](x[input_dim])
        
            # do some postprocessing for the qlayer output, just take torch.sqrt() for example.    
            output_tmp = torch.sqrt(output_tmp)      

            final_output[input_dim,:] = output_tmp

        return final_output


model = QuantumSystem(info_total_size=(3,4), anc_size=4, layer_num=5)    
noise = torch.reshape(torch.randn(3*16), (3,16))
output = model(noise) 
print('model output: ')
print(output)

label = torch.full((3,), 0.5,dtype=torch.float32)

criterion = nn.BCELoss()
opt = optim.SGD(model.parameters(),lr=0.1)

   

# Training
for epoch in range(1000):
    
    opt.zero_grad()
    
    output = model(noise)
    target = output[:,0]
    
    loss = criterion(target,label)
    loss.backward() 

    opt.step()
    
    
    print("Epoch:{0}, Loss:{1}".format(epoch,loss))

I think the issue is with self.qlayer. It’s just that it’s a plain old list and torch needs to see it as a model itself:

class QuantumSystem(nn.Module):
    def __init__(self, info_total_size, anc_size, layer_num):
        super().__init__()

        self.info_dim = info_total_size[0]
        self.info_size = info_total_size[1]
        self.info_qubit = int(np.ceil(math.log2(self.info_size)))

        self.anc_size = anc_size
        self.anc_qubit = int(np.ceil(math.log2(anc_size)))

        self.layer_num = layer_num

        self.qnode = create_qnode(self.info_size, self.anc_size, self.layer_num)
        self.weight_shape = {"weights": (self.layer_num, self.anc_qubit)}
        self.qlayer = []
        for i in range(self.info_dim):
            self.qlayer.append(qml.qnn.TorchLayer(self.qnode, self.weight_shape))
        self.layers = torch.nn.Sequential(*self.qlayer)

    def forward(self, x):
        final_output = torch.zeros(self.info_dim, int(2**self.info_size))

        for input_dim in range(self.info_dim):
            output_tmp = self.layers[input_dim](x[input_dim])

            # do some postprocessing for the qlayer output, just take torch.sqrt() for example.
            output_tmp = torch.sqrt(output_tmp)

            final_output[input_dim, :] = output_tmp

        return final_output

Wrapping everything in Sequential seems to work for me :slight_smile:

model output: 
tensor([[0.3163, 0.0572, 0.1248, 0.0311, 0.3536, 0.0639, 0.5973, 0.2477, 0.1150,
         0.1619, 0.0878, 0.2923, 0.1062, 0.1114, 0.3564, 0.2228],
        [0.0131, 0.0589, 0.0191, 0.2001, 0.2947, 0.2712, 0.0328, 0.1475, 0.1682,
         0.1591, 0.5194, 0.4568, 0.0923, 0.0876, 0.3426, 0.3272],
        [0.7746, 0.3823, 0.3189, 0.0036, 0.0428, 0.0525, 0.0348, 0.1292, 0.0166,
         0.0772, 0.2937, 0.0271, 0.0428, 0.1497, 0.0729, 0.0829]],
       grad_fn=<CopySlices>)
Epoch:0, Loss:1.2708743810653687
Epoch:1, Loss:1.0129913091659546
Epoch:2, Loss:0.9862573742866516
1 Like