Sf implementation of Quantum Machine Learning in Feature Hilbert Spaces

I am trying to implement and run the variational circuit from the paper “Quantum Machine Learning in Feature Hilbert spaces” and apply to Iris dataset. After following the tutorial on the strawberryfields demos on variational I am getting the error:
SympifyError: SympifyError: <tf.Tensor: shape=(3,), dtype=float32, numpy=array([1.0025, 1.0025, 1.0025], dtype=float32)> when I run with engine.run() whilst testing before applying on the dataset. Here is my code:

import strawberryfields as sf
from strawberryfields.ops import *
import tensorflow as tf
import numpy as np
from sklearn.datasets import load_iris

# hyper-parameters
batch_size = 3
eng = sf.Engine(backend="tf", backend_options={"cutoff_dim": 5, "batch_size": batch_size})
circuit = sf.Program(2)
# defining variables
theta1 = tf.Variable([0.1] * batch_size)
theta2 = tf.Variable([0.1] * batch_size)
theta3 = tf.Variable([0.1] * batch_size)
theta4 = tf.Variable([0.1] * batch_size)
theta5 = tf.Variable([0.1] * batch_size)
theta6 = tf.Variable([0.1] * batch_size)
theta7 = tf.Variable([0.1] * batch_size)
theta8 = tf.Variable([0.1] * batch_size)
x1,x2 = circuit.params("x1", "x2")
_x1 = tf.zeros([batch_size], dtype=tf.float32)
_x2 = tf.zeros([batch_size],  dtype=tf.float32)
# construct the circuit
with circuit.context as q:
    Squeezed(sq, x1) | q[0]
    Squeezed(sq, x2) | q[1]
    BSgate(theta1, theta2) | (q[0], q[1])
    Dgate(theta3) | q[0]
    Dgate(theta4) | q[1]
    Pgate(theta5) | q[0]
    Pgate(theta6) | q[1]
    Vgate(theta7) | q[0]
    Vgate(theta8) | q[1]
if eng.run_progs:
    eng.reset()
# results = eng.run(circuit,run_options={"eval": False})
results = eng.run(circuit, args={"x1": _x1, "x2": _x2})

Thanks @therafael!

The error is occurring because the theta1, theta2, … parameters are being defined as TensorFlow objects and not being passed through the args dictionary in eng.run(). In the TF backend, all TensorFlow-based objects must be passed through eng.run(). To fix this, you could redefine the thetas as NumPy arrays:

theta1 = np.array([0.1] * batch_size)
theta2 = np.array([0.1] * batch_size)
theta3 = np.array([0.1] * batch_size)
theta4 = np.array([0.1] * batch_size)
theta5 = np.array([0.1] * batch_size)
theta6 = np.array([0.1] * batch_size)
theta7 = np.array([0.1] * batch_size)
theta8 = np.array([0.1] * batch_size)
x1,x2 = circuit.params("x1", "x2")
_x1 = tf.zeros([batch_size], dtype=tf.float32)
_x2 = tf.zeros([batch_size],  dtype=tf.float32)

However, this means that these parameters would not be trainable.

If you want to train them, you can keep them as tf.Variable() but also define them as symbolic parameters and update eng.run:

import strawberryfields as sf
from strawberryfields.ops import *
import tensorflow as tf
import numpy as np
from sklearn.datasets import load_iris

batch_size = 3
eng = sf.Engine(backend="tf", backend_options={"cutoff_dim": 5, "batch_size": batch_size})
circuit = sf.Program(2)

x1, x2 = circuit.params("x1", "x2")
thetas = circuit.params(*[f"theta{i}" for i in range(8)])

_x1 = tf.zeros([batch_size], dtype=tf.float32)
_x2 = tf.zeros([batch_size],  dtype=tf.float32)
_thetas = 0.1 * tf.ones(8)

sq = 1.0

with circuit.context as q:
    Squeezed(sq, x1) | q[0]
    Squeezed(sq, x2) | q[1]
    BSgate(thetas[0], thetas[1]) | (q[0], q[1])
    Dgate(thetas[2]) | q[0]
    Dgate(thetas[3]) | q[1]
    Pgate(thetas[4]) | q[0]
    Pgate(thetas[5]) | q[1]
    Vgate(thetas[6]) | q[0]
    Vgate(thetas[7]) | q[1]
    
if eng.run_progs:
    eng.reset()
    
results = eng.run(circuit, args={**{"x1": _x1, "x2": _x2}, **{f"theta{i}": _thetas[i] for i in range(8)}})

Hope this helps!

1 Like

Let me give this a try and update on the results

@Tom_Bromley it worked thanks. But my question is I was initially defining thetas in terms of batch_size eg. theta1 = tf.Variable([0.1] * batch_size)
but in your second answer, define thetas as 0.1 * tf.ones(8).Why is the batch_size not used in this case?

Hey @therafael,

Good question, you should be free to choose whether to do 0.1 * tf.ones(8) or 0.1 * tf.ones((8, batch_size)). In this case, since the params are all the same, then it makes sense to go with the former, but yes in general the latter gives you the ability to have a different value for each element of the batch.

For completeness, here is the code:

import strawberryfields as sf
from strawberryfields.ops import *
import tensorflow as tf
import numpy as np
from sklearn.datasets import load_iris
​
batch_size = 3
eng = sf.Engine(backend="tf", backend_options={"cutoff_dim": 5, "batch_size": batch_size})
circuit = sf.Program(2)
​
x1, x2 = circuit.params("x1", "x2")
thetas = circuit.params(*[f"theta{i}" for i in range(8)])
​
_x1 = tf.zeros([batch_size], dtype=tf.float32)
_x2 = tf.zeros([batch_size],  dtype=tf.float32)
_thetas = 0.1 * tf.ones((8, batch_size))
​
sq = 1.0
​
with circuit.context as q:
    Squeezed(sq, x1) | q[0]
    Squeezed(sq, x2) | q[1]
    BSgate(thetas[0], thetas[1]) | (q[0], q[1])
#     Dgate(thetas[2]) | q[0]
#     Dgate(thetas[3]) | q[1]
    Pgate(thetas[4]) | q[0]
    Pgate(thetas[5]) | q[1]
    Vgate(thetas[6]) | q[0]
    Vgate(thetas[7]) | q[1]
    
if eng.run_progs:
    eng.reset()
    
results = eng.run(circuit, args={**{"x1": _x1, "x2": _x2}, **{f"theta{i}": _thetas[i] for i in range(8)}})

Note that I have commented out the Dgate because otherwise I get an error. I’m not sure what’s going on here, it may be a bug, but for now it might be worth leaving out Dgate or using the non-batch approach to setting its parameters.

1 Like

Great @Tom_Bromley , but I would want to apply it on the iris dataset so I think the batch is the best approach.
So in this case, what would be the parameters to update if I am using
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
minimize_opt = optimizer.minimize(loss, [var])
what should be [var] in this case?

Hi @therafael,

It’s up to you which variables you want to optimize over (in this example, you could optimize over the x variables, the theta variables, or both!

Here’s a quick example of how to optimize using a Keras optimizer (adapted from here):

import strawberryfields as sf
from strawberryfields.ops import *
import tensorflow as tf
import numpy as np
from sklearn.datasets import load_iris

batch_size = 3
eng = sf.Engine(backend="tf", backend_options={"cutoff_dim": 5, "batch_size": batch_size})
circuit = sf.Program(2)

x1, x2 = circuit.params("x1", "x2")
thetas = circuit.params(*[f"theta{i}" for i in range(8)])

_x1 = tf.Variable(tf.zeros([batch_size], dtype=tf.float32))
_x2 = tf.Variable(tf.zeros([batch_size],  dtype=tf.float32))
_thetas = tf.Variable(0.1 * tf.ones((8, batch_size)))

sq = 1.0

with circuit.context as q:
    Squeezed(sq, x1) | q[0]
    Squeezed(sq, x2) | q[1]
    BSgate(thetas[0], thetas[1]) | (q[0], q[1])
#     Dgate(thetas[2]) | q[0]
#     Dgate(thetas[3]) | q[1]
    Pgate(thetas[4]) | q[0]
    Pgate(thetas[5]) | q[1]
    Vgate(thetas[6]) | q[0]
    Vgate(thetas[7]) | q[1]
    
if eng.run_progs:
    eng.reset()
    
results = eng.run(circuit, args={**{"x1": _x1, "x2": _x2}, **{f"theta{i}": _thetas[i] for i in range(8)}})

opt = tf.keras.optimizers.Adam(learning_rate=0.1)
steps = 50

for step in range(steps):

    # reset the engine if it has already been executed
    if eng.run_progs:
        eng.reset()

    with tf.GradientTape() as tape:
        # execute the engine
        results = eng.run(circuit, args={**{"x1": _x1, "x2": _x2}, **{f"theta{i}": _thetas[i] for i in range(8)}})
        # get the probability of fock state |1>
        prob = results.state.fock_prob([1,1])
        # negative sign to maximize prob
        loss = -tf.reduce_sum(prob)

    gradients = tape.gradient(loss, [_x1, _x2])
    opt.apply_gradients(zip(gradients, [_x1, _x2]))
    print("Loss at step {}: {}".format(step, loss))

You’ll have to modify it to suit your case, in particular the loss function. The main modifications to notice are: making sure to create tf.Variables for the parameters you want to optimize, and using tf.reduce_sum() to sum over the batch dimension.

1 Like

Hi @Tom_Bromley @nathan,
Using the same code as you have shown above before editing, I keep getting the error
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
This happens at line for
results = eng.run(circuit, args = {**{...}, **{...})

I have been able to fix this by using
results = eng.run(circuit, args={**{"x1": _x1, "x2": _x2}, **{f"theta{i}": _thetas[i].numpy()[j] for i in range(8) for j in range(len(_thetas[i]))}})

Now the test runs successfully so I am now going to edit it to suit my case and apply to iris datasets, I will share results here as well

Hey @therafael,

Using the same code as you have shown above before editing, I keep getting the error
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

Were you getting this error with the Dgate commented out? I only get an error if the Dgate is being used :thinking: However, I’ve just added this PR to support using the Dgate in a batch mode (thanks for bringing it to our attention!).

Although from your second comment, it sounds like you got things to work. Let us know if you still need a hand!

1 Like

Hi @Tom_Bromley @nathan,
I got it working together with the Dgate . However I am unable to combine both the _x and _thetas in the optimization step. I am also struggling to get it working with the iris dataset.

another weird thing, when i try to follow the quantum neural network tutorial in SF except for the usage of a homodyne measurement and try to compute the gradients, the gradients are None. let’s say a single shot and the version is 0.16.0

Hi @kareem_essafty,

Thanks for reporting a possible issue. Just to help keep things organized for future readers (since this sounds like a distinct issue than the one above), could you share your observation with a new post?

Hi @therafael,

If you want to also optimize over the theta variables, you can change the line to

gradients = tape.gradient(loss, [_x1, _x2] + [_thetas[i] for i in range(8)])
    
1 Like

Hi @nathan @Tom_Bromley,
Thank you for the response. In using this on iris dataset, I can pick two columns, one as x1 and the other as x2. Is it possible to pick more than one column in the dataset ie. multi-class ?

Hi @therafael,

Could you provide a bit more detail? I’m not sure I understand the question :thinking:

x1 and x2 are variables in this model. In principle, the number of variables does not have to be related to the number of classes. I’m also uncertain what you mean by “columns”.

I’m thinking you mean a dimension/axis in the dataset? If you have more dimensions in your data, you could encode them into the theta variables as well, but then I’m not sure where the point about “multi-class” comes in

Hi @nathan @Tom_Bromley ,
Assuming I read my data into a dataframe. I would to use this dataset with the circuit, so with four features and species as classes(there are 3 classes) , Do I have to load x_1 as the matrix of features and x_2 as the labels?
I have something as shown below:

Hi @therafael,

Since you have 4 features, the most straightforward thing to do is pick four out of the 10 free variables (the xs and thetas) and assign one feature to each. The other 6 you circuit parameters you could treat as free variables for your model.

For the prediction, you’ll need to assign that to the output of the circuit (the quantity that is returned by results), rather than to the input variables. There’s no “right” way to do this. One choice could be to assign the class based on which photon number has the highest measurement probability (\vert 0 \rangle\rightarrow class “0”, \vert 1 \rangle\rightarrow class “1”) etc.

1 Like

Hi @nathan,
Thank you very much. Let me give this a try and would provide update when I am done.

Hi @nathan, @Tom_Bromley ,
I am getting this error when applying to the dataset ValueError: Parameter can be either a scalar or a vector of length 12.

This is the modified code for 4 qumodes

batch_size = 12
eng = sf.Engine(backend="tf", backend_options={"cutoff_dim": 5, "batch_size": batch_size})
circuit = sf.Program(4)
sq = 1.0

    iris = load_iris()

    X = iris.data
    y = iris.target
    feat_df = pd.DataFrame(X)

x1,x2, x3, x4 = circuit.params("x1", "x2", "x3", "x4")

thetas = circuit.params(*[f"theta{i}" for i in range(12)])

_x1 = tf.convert_to_tensor(feat_df[0], dtype=tf.float32)

_x2 = tf.convert_to_tensor(feat_df[1], dtype=tf.float32)

_x3 = tf.convert_to_tensor(feat_df[2], dtype=tf.float32)

_x4 = tf.convert_to_tensor(feat_df[3], dtype=tf.float32)

_thetas = tf.Variable(0.1 * tf.ones((12, batch_size)))

# construct the circuit
with circuit.context as q:
    Squeezed(sq, x1) | q[0]
    Squeezed(sq, x2) | q[1]
    Squeezed(sq, x3) | q[2]
    Squeezed(sq, x4) | q[3]
    BSgate(thetas[0], thetas[1]) | (q[0], q[1])
    BSgate(thetas[2], thetas[3]) | (q[1], q[2])
    BSgate(thetas[4], thetas[5]) | (q[2], q[3])
    Dgate(thetas[6]) | q[0]
    Dgate(thetas[7]) | q[1]
    Pgate(thetas[8]) | q[2]
    Pgate(thetas[9]) | q[3]
    Vgate(thetas[10]) | q[0]
    Vgate(thetas[11]) | q[1]

   steps = 50
   opt = tf.keras.optimizers.Adam(learning_rate=0.01)

   for step in range(steps):
        if eng.run_progs:
        eng.reset()

   with tf.GradientTape() as tape:
          results = eng.run(circuit, args={**{"x1": _x1, "x2": _x2,"x3": _x3, "x4": _x4}, **{f"theta{i}": _thetas[i].numpy()[j] for i in range(12) for j in range(len(_thetas[i]))}})
    # prob = results.state.fock_prob([1,1])
    prob20 = results.state.fock_prob([2,0,0,0])
    prob02 = results.state.fock_prob([0,2,0,0])
# print(type(prob02))
# negative sign to maximize prob
    loss = -tf.reduce_sum(prob20/(prob02 + prob20))
    # loss = -tf.reduce_sum(prob)

  gradients = tape.gradient(loss, [_x1, _x2, _x3, _x4])
  opt.apply_gradients(zip(gradients, [_x1, _x2, _x3, _x4] + [_thetas[i] for i in range(12)]))
# print('Loss at step {}: {}'.format(step, loss))
  print(f'Loss at step {step}: {loss} ')