Hybrid-quantum-classical convolutional neural network, the gradient of circuit parameters can not update

#  # -*- coding: utf-8 -*-
import torch
import torch.nn.functional as F
from torch import nn
from quanvolutional_neural_network_plankton_2_quantum_layer import Quanvolution
from torchvision import transforms, datasets
import torch.optim as optim
from tensorboardX import SummaryWriter
from PIL import Image
from scipy.io import loadmat
from torch.utils.data import Dataset, DataLoader, TensorDataset
import numpy as np
from sklearn.metrics import f1_score,precision_score,recall_score,confusion_matrix
import xlwt
import warnings
from collections import Counter
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

def write_excel_xls(path,sheet_name,colum_value,colum_name):    
    colum_num=len(colum_name)
#    value = value.tostring()
    workbook = xlwt.Workbook(encoding = 'utf-8')  #新建一个工作簿
    sheet = workbook.add_sheet(sheet_name)  # 在工作簿中新建一个表格
    for i in range(0,colum_num):
       sheet.write(0,i,str(colum_name[i]))  # 像表格中写入数据(对应的行和列)
       value = colum_value[i]#获取第i列的数据
       index = len(value)  # 获取需要写入数据的行数       
       for j in range(0, index):
        #for j in range(0, len(value[i])):
          sheet.write(j+1,i,str(value[j]))  # 像表格中写入数据(对应的行和列)
    workbook.save(path)  # 保存工作簿
    print("xls格式表格写入数据成功!")

def load_Phytoplankton_data(flieName,type):    
    X = loadmat(flieName)   
    return X[type]

def data_load():
    f_train_x = 'dataset/phytoplankton_train_x_20_20_4_600.mat'
    f_train_y = 'dataset/phytoplankton_train_y_20_20_4_600.mat'
    f_test_x= 'dataset/phytoplankton_test_x_20_20_4_600.mat'
    f_test_y = 'dataset/phytoplankton_test_y_20_20_4_600.mat'
    #f_train_x = 'dataset/phytoplankton_train_x_20_20_8_1200.mat'
    #f_train_y = 'dataset/phytoplankton_train_y_20_20_8_1200.mat'
    #f_test_x= 'dataset/phytoplankton_test_x_20_20_8_1200.mat'
    #f_test_y = 'dataset/phytoplankton_test_y_20_20_8_1200.mat'
    X_train = load_Phytoplankton_data(f_train_x,"train_x")
    Y_train = load_Phytoplankton_data(f_train_y,"train_y")
    X_test = load_Phytoplankton_data(f_test_x,"test_x")
    Y_test = load_Phytoplankton_data(f_test_y,"test_y")
    return X_train, X_test, Y_train, Y_test

x_train, x_test, y_train, y_test = data_load()
x_train, x_test = x_train[..., np.newaxis] / 255.0, x_test[..., np.newaxis] / 255.0  # normalize the data
x_train, x_test = (x_train - x_train.min()) * (np.pi / (x_train.max() - x_train.min())),\
                 (x_test - x_test.min()) * (np.pi / (x_test.max() - x_test.min())) 
X_train = torch.Tensor(x_train)
X_test = torch.Tensor(x_test)

X_train = torch.reshape(X_train,(600,1,20,20))
X_test = torch.reshape(X_test,(600,1,20,20))

y_train = torch.Tensor(y_train)
y_test = torch.Tensor(y_test)
y_train = y_train.long()
y_test = y_test.long()
Y_train = torch.reshape(y_train,(600,))
Y_test = torch.reshape(y_test,(600,))

train = TensorDataset(X_train, Y_train)
test = TensorDataset(X_test, Y_test)
trainset = torch.utils.data.DataLoader(train, batch_size=15, shuffle=True)
testset = torch.utils.data.DataLoader(test, batch_size=15, shuffle=False)

class MyLayer1(nn.Module):
    # 初始化:输入输出单元数,权重,偏置
    def __init__(self,channel_in=1,channel_out=4,kernel_size=2,stride=2,padding=1,bias=True): # define parameters
        super(MyLayer1,self).__init__()#调用父类的构造函数
        # self.channel_out = 3
        # self.kernel_size = kernel_size
        # self.stride = stride
        # self.padding = padding
        self.qconv = Quanvolution(channel_in,channel_out,kernel_size,stride,padding)
        self.weight = nn.Parameter(torch.ones(1,28),requires_grad=True)#随机产生一个in_channel*out_channel的矩阵权重。又由于权重是要不断训练的,需要将其绑定为一个可以训练的参数于是需要使用Parameter
        if bias:
            self.bias = nn.Parameter(torch.Tensor(1,28).view(1,-1),requires_grad=True)#注意⚠️这边的b是自定义层中自带的b,而不是神经网络中的卷积核的偏置,因此维数需要和输入单元数一样
        else:
            self.register_parameter('bias',None)#取消bias这个参数
    def forward(self,x):#前向传播计算				
        output = self.qconv.quanv(x,self.weight,self.bias) 
        return output


class MyNet(nn.Module):
    
    def __init__(self): # define each layer
        super(MyNet,self).__init__()
        self.myLayer1 = MyLayer1(channel_in=1,channel_out=4,kernel_size=2,stride=2,padding=1)
        # self.batchnorm0 = nn.BatchNorm2d(3) #卷积层之后总会添加BatchNorm2d进行数据的归一化处理,这使得数据在进行Relu之前不会因为数据过大而导致网络性能的不稳定
        #self.relu0 = nn.ReLU()
        # self.maxpool0 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # self.myLayer2 = MyLayer2(channel_in=4,channel_out=9,kernel_size=3,stride=1,padding=1)
        self.conv1 = nn.Conv2d(in_channels=4, out_channels=9, kernel_size=3, stride=1, padding=1)
        # self.batchnorm1 = nn.BatchNorm2d(8) #卷积层之后总会添加BatchNorm2d进行数据的归一化处理,这使得数据在进行Relu之前不会因为数据过大而导致网络性能的不稳定
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2,padding=0)#8*7*7
        
        # self.conv2 = nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1)
        # # self.batchnorm2 = nn.BatchNorm2d(16) #卷积层之后总会添加BatchNorm2d进行数据的归一化处理,这使得数据在进行Relu之前不会因为数据过大而导致网络性能的不稳定
        # self.relu2 = nn.ReLU()
        # self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)#8*7*7
        
        self.fc1 = nn.Linear(in_features = 5*5*9, out_features = 120) #输入为28*28时,in_features=392,输入为100*100时,in_features=25*25*8
        # self.relu2 = nn.ReLU()
        # self.fc2 = nn.Linear(in_features= 120, out_features=64)
        self.relu3 = nn.ReLU()
        self.fc3 = nn.Linear(in_features = 120, out_features = 8)        
    
    def forward(self,x): # connect each layer
        input_size = x.size(0)
        x = self.myLayer1(x)
        # x = self.batchnorm0(x)
        #x = self.relu0(x)
        # x = self.maxpool0(x)
        # x = self.myLayer2(x)
        x = self.conv1(x)      
        x = self.relu1(x)
        x = self.maxpool1(x)
        
        # x = self.conv2(x)
        # # x = self.batchnorm2(x)
        # x = self.relu2(x)        
        # x = self.maxpool2(x)
        # x = np.array(x).reshape(input_size,-1)
        x = x.view(input_size,-1)
        x= self.fc1(x)
        # x = self.relu2(x)     
        # x = self.fc2(x)
        x =self.relu3(x)
        x = self.fc3(x)
        return x

model = MyNet()
################使用显卡
if torch.cuda.is_available():
    model = model.cuda() #使用gup计算要将模型、损失函数、数据调用cuda()函数
loss = nn.CrossEntropyLoss()
if torch.cuda.is_available():
    loss = loss.cuda()
# print(model)  #输出网络的结构情况
# input = torch.ones((10, 1, 28, 28))
# output = model(input)
# print(output.shape)  # 输出output的尺寸

# loss_fn = torch.nn.CrossEntropyLoss() # multi-classification
# loss_fn = torch.nn.HingeEmbeddingLoss() # SVM
# loss_fn = torch.nn.MSELoss() # Regression
# optimizer = torch.optim.Adagrad(model.parameters(), lr = learning_rate)
# loss_function = nn.CrossEntropyLoss()
#optimizer = optim.Adam(model.parameters(), lr=0.001)
# # optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
# # optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.05)
# # optimizer = torch.optim.Adadelta(model.parameters(), lr = learning_rate)






test_loss_list = []
train_loss_list =[]

for epoch in range(50): # 3 full passes over the data
    model.train()
    index = 0
    print("第{}个epoch".format(epoch))
    for data in trainset:  # `data` is a batch of data
        index =index+1
        img,label = data
        if torch.cuda.is_available():
           img , label = img.cuda() ,label.cuda()
        output_train = model(img)
        l = loss(output_train,label)
        for name, param in model.named_parameters():
            #print(name, param.requires_grad)
             print(name, param.grad)
        train_loss_list.append(l.item())
        model.zero_grad()  # sets gradients to 0 before loss calc. You will do this likely every step.      
        l.backward()  # apply this loss backwards thru the network's parameters
        optimizer.step()  # attempt to optimize weights to account for loss/gradients
        # if(index%10==0):
        #     print("epoch{},第{}次,Loss:{:.4f}".format(epoch,index,l))
    # excel_loss_path = 'Result/' + str(epoch) + 'epoch_trainloss.xls' #存放数据路径
    # excel_sheetname =  str(epoch)+"trainloss"#存放数据工作表           
    # colum_loss_title=["trainloss"]
    # colum_loss_value=[train_loss_list]           
    # write_excel_xls(excel_loss_path,excel_sheetname,colum_loss_value,colum_loss_title)    
    model.eval()
    total_test_loss = 0
    total_test_acc = 0
    label_list=[]
    predict_out_list=[]
    with torch.no_grad():#torch.no_grad()
         for data in testset:
             img, label = data
             if torch.cuda.is_available():
                 img , label = img.cuda() ,label.cuda()
             outputs = model(img)
             test_l = loss(outputs,label)
             # test_loss_list.append(test_l)
             # writer.add_scalar('test_loss',loss)
             total_test_loss += test_l.item()
             acc = (outputs.argmax(1)==label).sum() 
             total_test_acc += acc.item()
             label_list.extend(label.tolist())
             predict_out_list.extend(outputs.argmax(1).tolist())
  

import pennylane as qml
from pennylane import numpy as np
from pennylane.templates import RandomLayers
# import tensorflow as tf
# from tensorflow import keras
from math import ceil
import matplotlib.pyplot as plt
# from tensorflow.python.keras import datasets, layers, optimizers, Sequential, metrics
import torch
import torch.nn.functional as F
import time        
dev = qml.device("default.mixed", wires = 4,shots = 1500)
@qml.qnode(dev,interface='torch',diff_method="parameter-shift")
def circuit(phi,params): 
    #for j in range(3):
        #qml.RZ(np.pi * float(phi[0][j]), wires=j)     
            for k in range(2):
                 qml.RY(np.pi * float(phi[0][k]), wires=k)  
            for h in range(2):
                 qml.RY(np.pi * float(phi[1][h]), wires=(2+h)) 
    ###################################################################
    #以下为图6
                      
            for j in range(4):
                qml.RX(float(params[0,j]), wires=j)
            for j in range(4):
                  qml.RZ(float(params[0,(4+j)]), wires=j)
                
            qml.CRX(float(params[0,8]),wires=(3,2))
            qml.CRX(float(params[0,9]),wires=(3,1))
            qml.CRX(float(params[0,10]),wires=(3,0))        
            
            qml.CRX(float(params[0,11]),wires=(2,3))
            qml.CRX(float(params[0,12]),wires=(2,1))
            qml.CRX(float(params[0,13]),wires=(2,0))
            
            qml.CRX(float(params[0,14]),wires=(1,3))
            qml.CRX(float(params[0,15]),wires=(1,2))
            qml.CRX(float(params[0,16]),wires=(1,0))
            
            qml.CRX(float(params[0,17]),wires=(0,3))
            qml.CRX(float(params[0,18]),wires=(0,2))
            qml.CRX(float(params[0,19]),wires=(0,1))
            
            for j in range(4):
                qml.RX(float(params[0,20+j]), wires=j)
            for j in range(4):
                qml.RZ(float(params[0,(24+j)]), wires=j)
            return [qml.expval(qml.PauliZ(j)) for j in range(4)]  
 class Quanvolution():
      def __init__(self,channel_in,channel_out,kernel_size,stride,padding):
          super(Quanvolution, self).__init__()
          self.channel_in = channel_in
          self.channel_out = channel_out
          self.kernel_size = kernel_size
          self.stride = stride
          self. Padding = padding   
 def quanv(self,inputs,params,bias): #必须返回一个测量     
          N, C, H, W = inputs.shape       
          Filter_out = self.channel_out #输出滤波器的数量
          Filter_in = self.channel_in
          HH = self.kernel_size  #卷积核的H 
          WW = self.kernel_size  #卷积核的W
          p = self.padding #填充数
          stride = self.stride
          H_new = 1 + (H + 2 * p - HH) // stride
          W_new = 1 + (W + 2 * p - WW) // stride
          inputs_padded = F.pad(inputs, pad=(1, 1,1, 1), mode='constant', value=0)
          s = stride
          out = torch.zeros((N, Filter_out, H_new, W_new)) #卷积后的输出初始化
          start_time= time.time()
          # q_results = torch.zeros((9))
          #q_results = 0
         
          for i in range(N):       # ith image
              #for f in range(Filter_out):   # fth filter
                  for j in range(H_new):
                      for k in range(W_new):
                              #for t in range(Filter_in):
                              q_result = circuit(inputs_padded[i,0, j*s:HH+j*s, k*s:WW+k*s],params)                           
                              for c in range(Filter_out):                   
                                  out[i,c,j, k] = q_result[c] 
         
          return out
    
    
    


1684771748114
hello,I have created a hybrid quantum classical convolutional neural network with Pytorch as the interface, adding a quantum convolutional layer to the classical convolution. The program can run, but the gradient of the quantum convolutional layer is None, which makes the parameters non updatable. Can you help me? thank you

Hey @shangshang_shi! Welcome back :muscle:

There’s some modules missing (e.g., quanvolutional_neural_network_plankton_2_quantum_layer``) in your code and I can't run it to reproduce your error. It might be that you aren't setting requires_grad=True` somewhere.

Can you shorten your code into a minimal example that reproduces your error?