Does gradient information flows from one circuit to its fromer circuit?

I am trying to use SGD to train a network that connect two circuits together. In the first circuit, after some operators, I want to trace out a few qubits. Then the density matrix after partial trace is input to the second cricuit. But I get some errors like this.

TypeError
must be real number, not ArrayBox
  File "\usingQubitDensityMatrix.py", in cost
    return qnode_2(para_[3 * width_list[1]:], width_list[1], hidden_state)

I am wondering if pennylane provide a method to concatenate circuit like this while keeping the ability to apply gradient descent?

Related code snipplets are as follows:

import pennylane as qml
from pennylane import numpy as np

def device(n_qubits):
    return qml.device(
        # "default.qubit",
        "default.mixed",
        wires=n_qubits,
    )

def common_layer(para, n_qubits):
    for j in range(n_qubits):
        qml.RX(para[j], wires=j)
        qml.RY(para[j + n_qubits], wires=j)
        qml.RX(para[j + 2 * n_qubits], wires=j)
    for j in range(n_qubits // 2):
        qml.CNOT(wires=[2 * j, 2 * j + 1])
    for k in range((n_qubits - 1) // 2):
        qml.CNOT(wires=[2 * k + 1, 2 * k + 2])

def layer1(para, width):
    common_layer(para, width)
    return qml.density_matrix([width - 2, width - 1])

def layer2(para, width, input):
    qml.QubitDensityMatrix(input, [0, 1])
    common_layer(para, width)
    return qml.expval(qml.PauliZ(0))

def cost(para_):
    qnode_1 = qml.QNode(layer1, device(width_list[0]))
    hidden_state = qnode_1(para_[:3 * width_list[0]], width_list[0])
    qnode_2 = qml.QNode(layer2, device(width_list[1]))
    return qnode_2(para_[3 * width_list[1]:], width_list[1], hidden_state)

np.random.seed(0)

width_list = [4, 4]
epoch = 10
opt = qml.GradientDescentOptimizer(0.01)
para = np.random.uniform(0, 2 * np.pi, width_list[0] * 3 + width_list[1] * 3)
loss_ls = []
print('loss at initialization: ', cost(para))

for j in range(epoch):
    para, loss = opt.step_and_cost(cost, para)
    loss_ls.append(loss)
    print("loss: ", loss)
    print("para: ", para)

In another way, I try to use matrix multiplication to reproduce the function of this code as:

import pennylane as qml
from pennylane import numpy as np
import copy

def device(n_qubits):
    return qml.device(
        "default.qubit",
        wires=n_qubits,
    )

def common_layer(para, n_qubits):
    for j in range(n_qubits):
        qml.RX(para[j], wires=j)
        qml.RY(para[j + n_qubits], wires=j)
        qml.RX(para[j + 2 * n_qubits], wires=j)
    for j in range(n_qubits // 2):
        qml.CNOT(wires=[2 * j, 2 * j + 1])
    for k in range((n_qubits - 1) // 2):
        qml.CNOT(wires=[2 * k + 1, 2 * k + 2])

def layer1(para, width):
    common_layer(para, width)
    return qml.density_matrix([width - 2, width - 1])

def layer2(para, width):
    common_layer(para, width)

def Ob(n_qubits):
    for j in range(n_qubits):
        qml.Identity(wires=j)
        qml.PauliZ(wires=j)

width_list = [4, 4]
epoch = 10
opt = qml.GradientDescentOptimizer(0.1)
np.random.seed(0)
para = np.random.uniform(0, 2 * np.pi, width_list[0] * 3 + width_list[1] * 3)
Hermitian = qml.matrix(Ob)(width_list[1])
state = np.zeros((2**2, 2**2), requires_grad=False)
state[0, 0] = 1.

def cost(para_):
    qnode_1 = qml.QNode(layer1, device(width_list[0]))
    hidden_state = qnode_1(para_[:3 * width_list[0]], width_list[0])
    input_state = np.kron(hidden_state, state)
    unitary_layer2 = qml.matrix(layer2)(para_[3 * width_list[1]:], width_list[1])
    unitary_layer2_dag = np.conjugate(unitary_layer2.T)
    return np.real(np.trace(unitary_layer2 @ input_state @ unitary_layer2_dag @ Hermitian))

loss_ls = []
print('loss at initialization: ', cost(para))
last_para = copy.deepcopy(para[:12])

for j in range(epoch):
    para, loss = opt.step_and_cost(cost, para)
    loss_ls.append(loss)
    print("loss: ", loss)
    print("para: ", para)
    print("para: ==", para[:12] == last_para[:12])
    last_para = copy.deepcopy(para[:12])

Is that a valid way to do this? Or if there is a simpler way to implenment this function?

Hi @Jinkai_Tian!

In your cost function, after you calculate hidden_state you should add this line:
hidden_state = qml.math.toarray(hidden_state)
This will prevent the first layer to be differentiated a second time from within the second layer, but it should solve your problem and still let you train your circuit.

Please let me know how it goes!

Thanks! Your suggestion solved my problem!

Awesome @Jinkai_Tian! I’m glad this helped.