Problems running Hybrid QNN using KerasLayers on IBM Device

Hi!

I am trying to run a hybrid QNN which uses classical keras layers on a IBM Device using qiskit.remote and the pennylane-qiskit plugin. I am running into an error that I am not sure how to solve. I followed the Keras tutorials as well as the Qiskit plugin tutorial and modified them to write my code. My goal is to compile and run the circuit on a real device while still using keras layers.

I had found a forum post that had the same error but it uses Torch and was interested to see if there’s a workaround using Keras.

Thanks in advance!

Here is my code (removed IBM token)

#import libraries
import os
os.environ["TF_USE_LEGACY_KERAS"] = "1"

import re
import pennylane as qml
import numpy as np
import tensorflow as tf
import tf_keras as keras
from sklearn.datasets import load_digits
from silence_tensorflow import silence_tensorflow
silence_tensorflow()
tf.keras.backend.set_floatx('float32')

#define variables
n_epochs = 50   # Number of optimization epochs
n_layers = 1    # Number of random layers
n_train = 1600    # Size of the train dataset
n_test = 400     # Size of the test dataset

droprate=0.25

#load data
mnist_dataset = keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist_dataset.load_data()
print(X_train.shape)

X_train=X_train.reshape((len(X_train), -1))
X_test=X_test.reshape((len(X_test), -1))

y_train=y_train.reshape((len(y_train), -1))
y_test=y_test.reshape((len(y_test), -1))

# Reduce dataset size
X_train = X_train[:n_train]
y_train = y_train[:n_train]
X_test = X_test[:n_test]
y_test = y_test[:n_test]

from sklearn.preprocessing import normalize
X_train = normalize(X_train)
X_test = normalize(X_test)

#setup device
n_qubits = 6
from qiskit_ibm_runtime import QiskitRuntimeService

QiskitRuntimeService.save_account(channel="ibm_quantum",overwrite=True, token=<token>)

## To access saved credentials for the IBM quantum channel and select an instance
service = QiskitRuntimeService(channel="ibm_quantum", instance="ibm-q/open/main")
backend = service.least_busy(operational=True, simulator=False, min_num_qubits=n_qubits)

# passing a string in backend would result in an error
dev = qml.device('qiskit.remote', wires=backend.num_qubits, backend=backend)

#define circuit
@qml.qnode(dev)
def circuit(inputs, weights):
    qml.AngleEmbedding(features=inputs, wires=range(n_qubits), rotation='Y')
    
    qml.RY(weights[0], wires=0)
    qml.RY(weights[1], wires=1)
    qml.RY(weights[2], wires=2)
    qml.RY(weights[3], wires=3)
    qml.RY(weights[4], wires=4)
    qml.RY(weights[5], wires=5)
    
    qml.CNOT(wires=[0,1])
    qml.CNOT(wires=[1,2])
    qml.CNOT(wires=[2,3])
    qml.CNOT(wires=[3,4])
    qml.CNOT(wires=[4,5])
    qml.CNOT(wires=[5,0])
    
    qml.RY(weights[6], wires=0)
    qml.RY(weights[7], wires=1)
    qml.RY(weights[8], wires=2)
    qml.RY(weights[9], wires=3)
    qml.RY(weights[10], wires=4)
    qml.RY(weights[11], wires=5)
    
    qml.CNOT(wires=[0,1])
    qml.CNOT(wires=[1,2])
    qml.CNOT(wires=[2,3])
    qml.CNOT(wires=[3,4])
    qml.CNOT(wires=[4,5])
    qml.CNOT(wires=[5,0])
    
    return [qml.expval(qml.PauliZ(j)) for j in range(n_qubits)]

#initialize weigth and weight shapes for keras layer
weights = np.random.uniform(high=2 * np.pi, size=(n_layers, 12))
#for 2*6 rotation gates, each unique weight
weight_shapes = {"weights": 12}

#setup keras layers
qlayer = qml.qnn.KerasLayer(circuit, weight_shapes = weight_shapes, output_dim=n_qubits)
clayer_1 = tf.keras.layers.Dense(6)
clayer_100 = tf.keras.layers.Dense(100)
clayer_2 = tf.keras.layers.Dense(10, activation="softmax")
dropout= tf.keras.layers.Dropout(rate=droprate)
model = tf.keras.models.Sequential([clayer_1, qlayer, clayer_100, dropout, clayer_2])

opt = tf.keras.optimizers.Adam(learning_rate=0.01)

model.compile(loss='sparse_categorical_crossentropy',
      optimizer=opt,
      metrics=['accuracy'])

#to see if circuit compiles, fir model with one data point
X_quick = X_train[:1]
y_quick = y_train[:1]

q_history = model.fit(X_quick, y_quick, epochs=1, batch_size=1, validation_data=(X_quick, y_quick), verbose=2)

FULL ERROR MESSAGE BELOW

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
Cell In[2], line 115
    112 X_quick = X_train[:1]
    113 y_quick = y_train[:1]
--> 115 q_history = model.fit(X_quick, y_quick, epochs=1, batch_size=1, validation_data=(X_quick, y_quick), verbose=2)

File ~\AppData\Roaming\Python\Python311\site-packages\tf_keras\src\utils\traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
     67     filtered_tb = _process_traceback_frames(e.__traceback__)
     68     # To get the full stack trace, call:
     69     # `tf.debugging.disable_traceback_filtering()`
---> 70     raise e.with_traceback(filtered_tb) from None
     71 finally:
     72     del filtered_tb

File ~\AppData\Roaming\Python\Python311\site-packages\pennylane\workflow\interfaces\tensorflow.py:275, in tf_execute.<locals>.custom_gradient_execute.<locals>.vjp_fn(*dy, **tfkwargs)
    272 nested_dy = _res_restructured(dy, tapes)
    274 try:
--> 275     vjps = jpc.compute_vjp(inner_tapes, nested_dy)
    276 except AttributeError as e:
    277     message = (
    278         "device VJPs cannot be vectorized with tensorflow. "
    279         "To use device_vjp=True, \n set experimental_use_pfor=False"
    280         " as a keyword argument to GradientTape.jacobian\n and set persistent=True to GradientTape."
    281     )

File ~\AppData\Roaming\Python\Python311\site-packages\pennylane\workflow\jacobian_products.py:302, in TransformJacobianProducts.compute_vjp(self, tapes, dy)
    299     jacs = self.compute_jacobian(tapes)
    300     return _compute_vjps(jacs, dy, tapes)
--> 302 vjp_tapes, processing_fn = qml.gradients.batch_vjp(
    303     tapes, dy, self._gradient_transform, gradient_kwargs=self._gradient_kwargs
    304 )
    306 vjp_results = self._inner_execute(tuple(vjp_tapes))
    307 return tuple(processing_fn(vjp_results))

File ~\AppData\Roaming\Python\Python311\site-packages\pennylane\gradients\vjp.py:502, in batch_vjp(tapes, dys, gradient_fn, reduction, gradient_kwargs)
    500 # Loop through the tapes and dys vector
    501 for tape, dy in zip(tapes, dys):
--> 502     g_tapes, fn = vjp(tape, dy, gradient_fn, gradient_kwargs=gradient_kwargs)
    503     reshape_info.append(len(g_tapes))
    504     processing_fns.append(fn)

File ~\AppData\Roaming\Python\Python311\site-packages\pennylane\gradients\vjp.py:363, in vjp(tape, dy, gradient_fn, gradient_kwargs)
    360 except (AttributeError, TypeError, NotImplementedError):
    361     pass
--> 363 gradient_tapes, fn = gradient_fn(tape, **gradient_kwargs)
    365 def processing_fn(results, num=None):
    366     # postprocess results to compute the Jacobian
    367     jac = fn(results)

File ~\AppData\Roaming\Python\Python311\site-packages\pennylane\transforms\core\transform_dispatcher.py:140, in TransformDispatcher.__call__(self, *targs, **tkwargs)
    138 start = 0
    139 for tape in expanded_tapes:
--> 140     intermediate_tapes, post_processing_fn = self._transform(
    141         tape, *targs, **tkwargs
    142     )
    143     transformed_tapes.extend(intermediate_tapes)
    144     end = start + len(intermediate_tapes)

File ~\AppData\Roaming\Python\Python311\site-packages\pennylane\gradients\parameter_shift.py:1100, in param_shift(tape, argnum, shifts, gradient_recipes, fallback_fn, f0, broadcast)
   1098 transform_name = "parameter-shift rule"
   1099 assert_no_state_returns(tape.measurements, transform_name)
-> 1100 assert_no_trainable_tape_batching(tape, transform_name)
   1102 if argnum is None and not tape.trainable_params:
   1103     return _no_trainable_grad(tape)

File ~\AppData\Roaming\Python\Python311\site-packages\pennylane\gradients\gradient_transform.py:97, in assert_no_trainable_tape_batching(tape, transform_name)
     95 for idx in range(len(tape.trainable_params)):
     96     if tape.get_operation(idx)[0].batch_size is not None:
---> 97         raise NotImplementedError(
     98             "Computing the gradient of broadcasted tapes with respect to the broadcasted "
     99             f"parameters using the {transform_name} gradient transform is currently not "
    100             "supported. See #4462 for details."
    101         )

NotImplementedError: Computing the gradient of broadcasted tapes with respect to the broadcasted parameters using the parameter-shift rule gradient transform is currently not supported. See #4462 for details.

OUTPUT OF qml.about():

Name: PennyLane
Version: 0.40.0
Summary: PennyLane is a cross-platform Python library for quantum computing, quantum machine learning, and quantum chemistry. Train a quantum computer the same way as a neural network.
Home-page: https://github.com/PennyLaneAI/pennylane
Author: 
Author-email: 
License: Apache License 2.0
Location: C:\Users\Work\AppData\Roaming\Python\Python311\site-packages
Requires: appdirs, autograd, autoray, cachetools, diastatic-malt, networkx, numpy, packaging, pennylane-lightning, requests, rustworkx, scipy, tomlkit, typing-extensions
Required-by: PennyLane-qiskit, PennyLane_Lightning

Platform info:           Windows-10-10.0.22631-SP0
Python version:          3.11.7
Numpy version:           1.26.4
Scipy version:           1.11.4
Installed devices:
- default.clifford (PennyLane-0.40.0)
- default.gaussian (PennyLane-0.40.0)
- default.mixed (PennyLane-0.40.0)
- default.qubit (PennyLane-0.40.0)
- default.qutrit (PennyLane-0.40.0)
- default.qutrit.mixed (PennyLane-0.40.0)
- default.tensor (PennyLane-0.40.0)
- null.qubit (PennyLane-0.40.0)
- reference.qubit (PennyLane-0.40.0)
- lightning.qubit (PennyLane_Lightning-0.40.0)
- qiskit.aer (PennyLane-qiskit-0.40.0)
- qiskit.basicaer (PennyLane-qiskit-0.40.0)
- qiskit.basicsim (PennyLane-qiskit-0.40.0)
- qiskit.remote (PennyLane-qiskit-0.40.0)

Hi @EmmanSSSS , welcome to the Forum!

Just to clarify, does your problem arise when using any device (e.g. default.qubit) or only when using an IBM device?

Hi Catalina,

Yes, it works fine when using default.qubit, but I run into problems when using an IBM device.

Thanks

Thanks for confirming @EmmanSSSS

I can actually replicate the issue by using default.qubit together with diff_method='parameter-shift'. This means that the issue isn’t really due to the IBM device, but instead to the differentiation method. Note that for quantum hardware devices we need to use hardware-compatible differentiation methods such as ‘parameter-shift’.

The good news is that it’s easy to test a solution. You can see a solution for a similar problem in this post.

The solution consists in identifying whether you have a batch dimension and adjusting your embedding accordingly. Below I copy the changes I made to test the solution for your problem. The rest of the code stays the same.

dev = qml.device('default.qubit', wires=n_qubits)
n_inputs = 6

#define circuit
@qml.qnode(dev, diff_method='parameter-shift')
def circuit(inputs, weights):
    # These print statements help me check that the size for the inputs matches my embedding template
    print('inputs.shape: ',inputs.shape)
    print('inputs[0].shape: ',inputs[0].shape)

    # Encode your inputs
    if len(inputs.shape) > 1: # use this if you have a batch dimension
        for i in range(n_inputs):
            qml.AngleEmbedding(features=inputs[0], wires=range(n_qubits), rotation='Y') # the key is in this line
    else: # use this if you don't have a batch dimension
        for i in range(n_inputs):
            qml.AngleEmbedding(features=inputs, wires=range(n_qubits), rotation='Y')

Let me know if this solution works for you when using the IBM device.

Note:

If you get an error like the one below, you’ll need to change tf.keras.backend.set_floatx('float32') to tf.keras.backend.set_floatx('float64')

InvalidArgumentError: cannot compute Mul as input #1(zero-based) was expected to be a float tensor but is a double tensor [Op:Mul] name: 

Let me know if this helps!

Hi Catalina,

Thank you so much for your help. Yes it seems to work with qiskit.aer. With an IBM device, it’s running into runtime issues.

I don’t quite understand the solution you gave, particularly the use of the for loop. Could you explain that more? From the forum you linked it seems like you are iterating through the inputs but with the code you posted its embedding the inputs n_inputs number of times.

Thanks

Hi @EmmanSSSS ,

I actually don’t know why I did it that way :melting_face: .

Now that I look at it again, for your problem it looks like your inputs to the circuit always have shape (1,6), in which case you would just need this simple line of code for the embedding. Here you’re just removing the batch dimension by using inputs[0], which has shape (6,).

Sorry for the confusing code before. It looks like I managed to confuse myself too. And thanks for following up.

    # Encode your inputs
    # Use this since you have a batch dimension
    qml.AngleEmbedding(features=inputs[0], wires=range(n_qubits), rotation='Y') # the key is in this line

Regarding the runtime error you’re getting, could you please post it here so that I can take a look at it?