Why is my neural network training getting slower and slower?

I am using Pennylane and Torchlayer to train the MNIST dataset, but it seems that the training time is getting slower and slower. The first EPCOH takes 8 minutes, the second takes 24 minutes, and the rest takes over 20 minutes.

Here is my code below

import torch
import pennylane as qml
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from sklearn.datasets import make_moons
from torchvision import datasets,transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.nn import (

from tqdm import tqdm
import time

train_num = 100
test_num = 100
batch_size = 20
epochnum = 50

transform = transforms.Compose([transforms.ToTensor()])
# train_dateset = datasets.FashionMNIST(root="./data", train=True, download=True, transform=transform)
# test_dateset = datasets.FashionMNIST(root="./data", train=True, download=True, transform=transform)
train_dateset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
test_dateset = datasets.MNIST(root="./data", train=False, download=False, transform=transform)
train_loader = DataLoader(train_dateset, batch_size=batch_size, shuffle=True)
test_loader =DataLoader(test_dateset, batch_size=batch_size, shuffle=True)


n_qubits = 10
dev = qml.device("default.qubit", wires=n_qubits)
# dev = qml.device("lightning.qubit", wires=n_qubits)

def qnode(inputs, weights,weight2):

    qml.AngleEmbedding(inputs, wires=range(n_qubits))

    for i in range(depth):
        qml.BasicEntanglerLayers(weights[i], wires=range(n_qubits))
        for j in range(n_qubits):

            qml.U3(weight2[i][j][0], weight2[i][j][1], weight2[i][j][2], wires=j)

    # qml.BasicEntanglerLayers(weights, wires=range(n_qubits))

    return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]

n_layers = 1

weight_shapes = {
                 "weights": (depth,n_layers, n_qubits),

# def qnode(inputs, weights):
#     qml.AngleEmbedding(inputs, wires=range(n_qubits))
#     qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
#     return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]
# n_layers = 5
# weight_shapes = {
#                  "weights": (n_layers, n_qubits),
#                  }

class HybridModel(torch.nn.Module):
    def __init__(self):
        self.conv1 = Conv2d(1, 6, kernel_size=5)
        self.max_pool1 = MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.max_pool2 = MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = Conv2d(in_channels=16, out_channels=120, kernel_size=4)

        self.fc1 = Linear(in_features=120, out_features=64)

        self.fc2 = Linear(in_features=64, out_features=10)

        self.fc3 = Linear(in_features=10, out_features=10)

        self.qlayer_1 = qml.qnn.TorchLayer(qnode, weight_shapes)

    def forward(self, x):

        x = self.conv1(x)
        x = F.relu(x)
        x = self.max_pool1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = self.max_pool2(x)
        x = self.conv3(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.tanh(x) * np.pi
        x = self.qlayer_1(x)
        x = self.fc3(x)

        return x

device = torch.device("cuda")
model = HybridModel()
# model = torch.compile(model)

learning_rate = 0.001
layer_specific_lr = {
    'conv1.weight': learning_rate,
    'conv1.bias': learning_rate,
    'fc1.weight': learning_rate,
    'fc1.bias': learning_rate,
    'fc2.weight': learning_rate,
    'fc2.bias': learning_rate,
    'fc3.weight': learning_rate,
    'fc3.bias': learning_rate,
    'qlayer_1.weights': 0.01, 
    'qlayer_1.weight2': 0.01,

optimizer_parameters = []
for param_name, param in model.named_parameters():
    if param_name in layer_specific_lr:
        optimizer_parameters.append({'params': param, 'lr': layer_specific_lr[param_name]})
        optimizer_parameters.append({'params': param, 'lr': learning_rate})

opt = torch.optim.Adam(optimizer_parameters, lr=learning_rate)
loss_func = torch.nn.CrossEntropyLoss()
loss_list = []

def train(model,opt,train_loader,test_loader):

    print("start training...")
    for epoch in range(epochnum):
        for batch_id,(data,label) in enumerate(tqdm(train_loader)):

            outputs = model(data)
            loss = loss_func(outputs,label)
            # print(loss)
            # print(1111111)

        losses = 0
        correct = 0

        start_time2 = time.time()

        for batch_id, (data, label) in enumerate(tqdm(test_loader)):
            outputs = model(data)
            pred = F.softmax(outputs,dim=1)
            pred = torch.argmax(pred,dim=1)
            loss = loss_func(outputs, label)
            correct += (pred == label).sum().item()
            # correct += pred.eq(label).sum().item()
            losses += loss

        accuracy = correct /len(test_loader)/batch_size
        loss_avg = losses/ len(test_loader)


        print("[validation] epoch/accuracy/loss: {:d}/{:.4f}/{:.4f}".format(epoch+1,accuracy,loss_avg))
        # end_time1 = time.time()
        # time1 = end_time1 - start_time1
for param_name, param_group in zip(model.state_dict(), opt.param_groups):
    print(f"Layer: {param_name}, Learning Rate: {param_group['lr']}")

Hello @HotFrog, here is a another MNIST/Torch demo that uses a different order of layers in part 2, step 2. Torch Connector and Hybrid QNNs — Qiskit Machine Learning 0.6.1 documentation

Hey @HotFrog! Welcome to the forum! :frog: :fire:

I ran your code and isolated the two passes you make over your data to train the model and calculate the loss. Here are the results for the time it took on my machine:

For the pass over the data to train the model:

For the pass over the data to calculate the loss:

I can’t replicate the behaviour you’re seeing. I’d make sure that your computer’s memory and CPU usage aren’t hitting the ceiling while your code is running, as that could be causing the slowdown from pass to pass. Let me know if this helps!