Using sklearn in cost function

Hello! I am trying to use sklearn regression within my cost function, but that seems to give an error when I call clf.fit (clf is the sklearn regressor). Wanted to ask whether this is supposed to be supported? Below is my code:

import pennylane as qml
from pennylane import numpy as np

n_probes = 2
n_ancillas = 1
n_qubits = n_probes + n_ancillas
dev = qml.device("default.qubit", analytic=True, wires=n_probes + n_ancillas)

##############################################################################
# Cost
##############################################################################
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

@qml.qnode(dev)
def measure_full_sys(phi, params):
    qml.RY(params[0],wires=0)
    qml.RZ(phi,wires=0)
    qml.RY(params[1],wires=0)
    return qml.probs(wires=0)
    #return qml.probs(wires=range(n_qubits))
    
    
def collect_grid_sample(phi_range, n_sample, params):
    phis = np.arange(0,phi_range,phi_range/n_sample)
    #data = np.zeros((n_sample,2))
    res = np.empty(0)
    for i in range(n_sample):
        cur_phi = phis[i]
        cur_zero_prob = measure_full_sys(cur_phi, params)[0].reshape(1)
        #print(cur_zero_prob)
        res = np.concatenate((res,cur_zero_prob))
    data = np.concatenate((phis.reshape(n_sample, 1),res.reshape(-1,1)),axis=1)
    #print(data)
    return data

def collect_rand_sample(phi_range, n_sample, params):
    phis = np.random.rand(n_sample)*phi_range
    res = np.empty(0)
    for i in range(n_sample):
        cur_phi = phis[i]
        cur_zero_prob = measure_full_sys(cur_phi, params)[0].reshape(1)
        #print(cur_zero_prob)
        res = np.concatenate((res,cur_zero_prob))
    data = np.concatenate((phis.reshape(n_sample, 1),res.reshape(-1,1)),axis=1)
    #print(data)
    return data

def mse(params):
    global iter_counter
    # train the regressor
    clf = LinearRegression()
    train_data = collect_grid_sample(0.1,100,params)
    test_data = collect_rand_sample(0.1,100,params)
        
    X_train = train_data[:,1].reshape(-1,1)
    y_train = train_data[:,0]
    X_test = test_data[:,1].reshape(-1,1)
    y_test = test_data[:,0]
        
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    test_mse = mean_squared_error(pred, y_test)
    if (iter_counter % 20 == 0):
        print("cur test RMSE: "+str(np.sqrt(test_mse)))
        train_pred = clf.predict(X_train)
        train_mse = mean_squared_error(train_pred,y_train)
        print("cur train RMSE:" +str(np.sqrt(train_mse)))
        print("cur theta values:")
        print(params)
        plt.scatter(X_train, y_train,color='g', s=1)
        plt.scatter(X_test, y_test, color='b',s=1)
        plt.xlabel('Pr(0)')
        plt.ylabel('Phi')
        plt.show()
    iter_counter += 1
    return test_mse

##############################################################################
# Optimization
##############################################################################
steps = 200

params_init = np.random.rand(2)

gd_cost = []
opt = qml.RMSPropOptimizer(0.01)

theta = params_init
for _ in range(steps):
    theta = opt.step(mse, theta)
    gd_cost.append(mse(theta))

Hi @ziqi-ma! Welcome :slight_smile:

Since QNodes can be called like regular Python functions, it should be supported! Can you post the full traceback you are receiving?

Yes, thanks!

Here is the full trace:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-8-6216efcec6d2> in <module>
     11 theta = params_init
     12 for _ in range(steps):
---> 13     theta = opt.step(mse, theta)
     14     gd_cost.append(mse(theta))

~/vqsp/vqsp_env/lib/python3.6/site-packages/pennylane/optimize/gradient_descent.py in step(self, objective_fn, x, grad_fn)
     62         """
     63 
---> 64         g = self.compute_grad(objective_fn, x, grad_fn=grad_fn)
     65 
     66         x_out = self.apply_grad(g, x)

~/vqsp/vqsp_env/lib/python3.6/site-packages/pennylane/optimize/gradient_descent.py in compute_grad(objective_fn, x, grad_fn)
     86         else:
     87             # default is autograd
---> 88             g = autograd.grad(objective_fn)(x)  # pylint: disable=no-value-for-parameter
     89         return g
     90 

~/vqsp/vqsp_env/lib/python3.6/site-packages/autograd/wrap_util.py in nary_f(*args, **kwargs)
     18             else:
     19                 x = tuple(args[i] for i in argnum)
---> 20             return unary_operator(unary_f, x, *nary_op_args, **nary_op_kwargs)
     21         return nary_f
     22     return nary_operator

~/vqsp/vqsp_env/lib/python3.6/site-packages/autograd/differential_operators.py in grad(fun, x)
     23     arguments as `fun`, but returns the gradient instead. The function `fun`
     24     should be scalar-valued. The gradient has the same type as the argument."""
---> 25     vjp, ans = _make_vjp(fun, x)
     26     if not vspace(ans).size == 1:
     27         raise TypeError("Grad only applies to real scalar-output functions. "

~/vqsp/vqsp_env/lib/python3.6/site-packages/autograd/core.py in make_vjp(fun, x)
      8 def make_vjp(fun, x):
      9     start_node = VJPNode.new_root()
---> 10     end_value, end_node =  trace(start_node, fun, x)
     11     if end_node is None:
     12         def vjp(g): return vspace(x).zeros()

~/vqsp/vqsp_env/lib/python3.6/site-packages/autograd/tracer.py in trace(start_node, fun, x)
      8     with trace_stack.new_trace() as t:
      9         start_box = new_box(x, t, start_node)
---> 10         end_box = fun(start_box)
     11         if isbox(end_box) and end_box._trace == start_box._trace:
     12             return end_box._value, end_box._node

~/vqsp/vqsp_env/lib/python3.6/site-packages/autograd/wrap_util.py in unary_f(x)
     13                 else:
     14                     subargs = subvals(args, zip(argnum, x))
---> 15                 return fun(*subargs, **kwargs)
     16             if isinstance(argnum, int):
     17                 x = args[argnum]

<ipython-input-7-9f51bd24a77d> in mse(params)
     58     y_test = test_data[:,0]
     59 
---> 60     clf.fit(X_train, y_train)
     61     pred = clf.predict(X_test)
     62     test_mse = mean_squared_error(pred, y_test)

~/vqsp/vqsp_env/lib/python3.6/site-packages/sklearn/linear_model/base.py in fit(self, X, y, sample_weight)
    461         n_jobs_ = self.n_jobs
    462         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
--> 463                          y_numeric=True, multi_output=True)
    464 
    465         if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:

~/vqsp/vqsp_env/lib/python3.6/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
    717                     ensure_min_features=ensure_min_features,
    718                     warn_on_dtype=warn_on_dtype,
--> 719                     estimator=estimator)
    720     if multi_output:
    721         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,

~/vqsp/vqsp_env/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    534         # make sure we actually converted to numeric:
    535         if dtype_numeric and array.dtype.kind == "O":
--> 536             array = array.astype(np.float64)
    537         if not allow_nd and array.ndim >= 3:
    538             raise ValueError("Found array with dim %d. %s expected <= 2."

ValueError: setting an array element with a sequence.

The error happens when I call clf.fit()

Hi @ziqi-ma, I downloaded your example and played around with it, and see what is happening now! There are a couple of subtle things at play here.

  • When you use qml.grad(), the entire computation must support autodifferentiation.

PennyLane implements the quantum portion of the backpropagation itself, but uses Autograd (or PyTorch or TensorFlow) to do the classical part of the backpropagation.

That means, everything outside of the QNode must also support autodifferentiation, for gradients across the full classical-quantum model to be computed.

When you from pennylane import numpy as np, you are importing a version of NumPy from autograd that has been patched to support autodifferentiation. Every NumPy function used here will automatically keep track of the gradient.

  • Scikit learn doesn’t support autodifferentiation.

As a result, you can use Scikit learn in any part of your model that doesn’t need to be differentiated, and it should work fine. However, when you attempt to calculate the gradient, the script will fail at the Scikit learn function call, since no gradient information exists.

That’s why, when you execute mse(), it calculates correctly, but when you call qml.grad(mse), it fails.


Technical aside: When Autograd performs the backpropagation, it passes an ArrayBox object through your cost function in order to accumulate the gradient information. However, Scikit functions don’t know what to do with ArrayBox objects (they expect NumPy arrays), hence your traceback!


If you want to continue using Scikit learn functions within your cost function, one solution is simply to perform gradient-free optimization, for example using the optimizers available in scipy.minimize, rather than the gradient-descent ones built-in to PennyLane.

1 Like

Thanks @josh! One additional suggestion is to change to the PyTorch or TensorFlow interfaces and use their tools for a linear fit.

For example, in PyTorch, some prototype code is:

model = torch.nn.Linear(100, 100)
loss = torch.nn.MSELoss()
opt = torch.optim.SGD(model.parameters(), lr=0.01)

Y_pred = model(X_train)
loss_eval = loss(Y_pred, y_train)

To follow the style of your sklearn-based code, I think you’d first do an inner loop to optimize over the linear fit model parameters. An outer loop would then be optimizing over the QNode params.

1 Like