How to do GPU acceleration of this code?

I’m trying to speed up this code, but it always fails. TF-GPU==2.5.0 cuda==11.2

Thanks to Sophie for the source code:

import tensorflow as tf
#import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers
tf.keras.backend.set_floatx('float64')
from keras.backend import set_session
physical_devices = tf.config.list_physical_devices('GPU') 
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)
'''
config = tf.compat.v1.ConfigProto()
config.gpu_options.allocator_type = 'BFC' #A "Best-fit with coalescing" algorithm, simplified from a version of dlmalloc.
config.gpu_options.per_process_gpu_memory_fraction = 0.3
config.gpu_options.allow_growth = True
set_session(tf.compat.v1.Session(config=config)) 
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
    # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
        print(e)
'''
import pennylane as qml
from pennylane import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml
import matplotlib.pyplot as plt

def ProcessingData(datapoints, train_size, test_size):
    #train_data = np.loadtxt("../dataset/fashion-mnist_test.csv", delimiter=",")
    #print(train_data.shape)
    #Extracting features and labels from the dataset 
    #train_data_features = train_data[:datapoints, 1:]
    #print(train_data_features.shape)
    #train_data_labels = train_data[:datapoints, :1].reshape(datapoints,)
    #print(train_data_labels[1])
    fmnist = fetch_openml(name='Fashion-MNIST', parser='liac-arff', as_frame=False)
    train_data_features, train_data_labels = fmnist['data'], fmnist['target']
    train_data_features = train_data_features
    train_data_labels = train_data_labels
    train_data_labels = train_data_labels.astype(np.uint8)

    # Use PCA technique to reduce dimensions to 4
    DimReduce = PCA(n_components= 14)
    train_data_features_reduced = DimReduce.fit_transform(train_data_features)
    # normalize dataset
    train_data_features_reduced = StandardScaler().fit_transform(train_data_features_reduced)
    #s1 = np.array([StateVector(x) for x in train_data_features_reduced[:,:dim]])
    #s2 = np.array([StateVector(x) for x in train_data_features_reduced[:,dim:]])
    #train_data_features_reduced = np.hstack((s1, s2)).tolist()

    zero_one_datapoint_array = []
    label_0_1 = []
    zero_datapoints_array = [] #an array of the data points containing value 0
    one_datapoints_array = []# an array of the data points containing value 1
    label_0 = []
    label_1 = []
        
    for i in range(datapoints):
        if train_data_labels[i] == 0:                   # extracting zeros
            label_0.append(-1)
            zero_datapoints_array.append(train_data_features_reduced[i])
        elif train_data_labels[i] == 1:                   # extracting ones
            label_1.append(1)
            one_datapoints_array.append(train_data_features_reduced[i]) 
    print(len(zero_datapoints_array), len(one_datapoints_array))
    if (len(label_0) + len(label_1)) < 2 * (train_size + test_size):
        print("datapoints is too small!")
    else:
        print((len(label_0),len(label_1)))
        zero_datapoints_array_train = zero_datapoints_array[:train_size]
        one_datapoints_array_train = one_datapoints_array[:train_size]
        label_0_train = label_0[:train_size]
        label_1_train = label_1[:train_size]
        zero_datapoints_array_test = zero_datapoints_array[train_size + 1:train_size + test_size + 1]
        one_datapoints_array_test = one_datapoints_array[train_size + 1:train_size + test_size + 1]
        label_0_test = label_0[train_size + 1:train_size + test_size + 1]
        label_1_test = label_1[train_size + 1:train_size + test_size + 1]
        #print(len(zero_datapoints_array_train), len(label_1_train))
        #print(len(one_datapoints_array_test), len(label_0_test))

        zero_one_datapoint_array_train = zero_datapoints_array_train + one_datapoints_array_train
        label_0_1_train = label_0_train + label_1_train
        zero_one_datapoint_array_test = zero_datapoints_array_test + one_datapoints_array_test
        label_0_1_test = label_0_test + label_1_test
        #print(len(zero_one_datapoint_array_train), label_0_1_train)
        #print(len(zero_one_datapoint_array_test), label_0_1_test)

        np.random.seed(10)
        index_train = np.random.permutation(range(2 * train_size))
        index_test = np.random.permutation(range(2 * test_size))

        feats_train = []
        Y_train = []
        feats_val = []
        Y_val = []
        for i in range(2 * train_size):
            feats_train.append(zero_one_datapoint_array_train[index_train[i]])
            Y_train.append(label_0_1_train[index_train[i]])


        for i in range(2 * test_size):
            feats_val.append(zero_one_datapoint_array_test[index_test[i]])
            Y_val.append(label_0_1_test[index_test[i]])

        #print(feats_val, Y_val)

        features = feats_train + feats_val
        features = np.array(features, requires_grad=False)
        Y = Y_train + Y_val
        Y = np.array(Y, requires_grad=False)
        feats_train = np.array(feats_train, requires_grad=False)
        Y_train = np.array(Y_train, requires_grad=False)
        feats_val = np.array(feats_val, requires_grad=False)
        Y_val = np.array(Y_val, requires_grad=False)
    return feats_train, Y_train, feats_val, Y_val, features, Y

datapoints = 6000
train_size = 300
test_size = 50
X_train, y_train, X_test, y_test, features, Y = ProcessingData(datapoints, train_size, test_size)

def one_hot(labels):  
       
    depth =  4**2                       # 10 classes + 6 zeros for padding
    indices = labels.astype(np.int32)    
    one_hot_labels = np.eye(depth)[indices].astype(np.float32) 
    
    return one_hot_labels

# one-hot encoded labels, each label of length cutoff dimension**2
y_train, y_test = one_hot(y_train), one_hot(y_test)

model = tf.keras.models.Sequential()

def data_encoding(x):
    qml.Squeezing(x[3], x[4], wires=0)
    qml.Squeezing(x[9], x[10], wires=1)
    
    qml.Beamsplitter(x[5], x[6], wires=[0,1])
    
    qml.Rotation(x[7], wires=0)
    qml.Rotation(x[8], wires=1)
    
    qml.Displacement(x[1], x[2], wires=0)
    qml.Displacement(x[11], x[12], wires=1)
    
    qml.Kerr(x[0], wires=0)
    qml.Kerr(x[13], wires=1)

def qnn_layer(v):
    # Interferometer 1
    qml.Beamsplitter(v[0], v[1], wires=[0,1])
    qml.Rotation(v[2], wires=0)
    qml.Rotation(v[3], wires=1)
    
    # Squeezers
    qml.Squeezing(v[4], 0.0, wires=0)
    qml.Squeezing(v[5], 0.0, wires=1)
    
    # Interferometer 2
    qml.Beamsplitter(v[6], v[7], wires=[0,1])
    qml.Rotation(v[8], wires=0)
    qml.Rotation(v[9], wires=1)
    
    # Bias addition
    qml.Displacement(v[10], 0.0, wires=0)
    qml.Displacement(v[11], 0.0, wires=1)
    
    # Non-linear activation function
    qml.Kerr(v[12], wires=0)
    qml.Kerr(v[13], wires=1)

def init_weights(layers, modes, active_sd=0.0001, passive_sd=0.1):
    
    # Number of interferometer parameters: beamsplitter + 2 rotations
    M = 2 + 1 + 1  

    int1_weights = tf.random.normal(shape=[layers, M], stddev=passive_sd)
    s_weights = tf.random.normal(shape=[layers, modes], stddev=active_sd)
    int2_weights = tf.random.normal(shape=[layers, M], stddev=passive_sd)
    dr_weights = tf.random.normal(shape=[layers, modes], stddev=active_sd)
    k_weights = tf.random.normal(shape=[layers, modes], stddev=active_sd)

    weights = tf.concat([int1_weights, s_weights, int2_weights, dr_weights, k_weights], axis=1)
    weights = tf.Variable(weights)

    return weights

num_modes = 2
cutoff_dim = 4

# select a devide 
dev = qml.device("strawberryfields.fock", wires=num_modes, cutoff_dim=cutoff_dim) 

@qml.qnode(dev, interface="tf")
def quantum_nn(inputs, var):
    # Encode input x into quantum state
    data_encoding(inputs)

    # iterative quantum layers
    for v in var:
        qnn_layer(v)

    return qml.probs(wires=[0, 1])

num_layers = 4

# initialize weights for quantum layers
weigths = init_weights(num_layers, num_modes)

# convert the quantum layer to a Keras layer
shape_tup = weigths.shape
weight_shapes = {'var': shape_tup}
qlayer = qml.qnn.KerasLayer(quantum_nn, weight_shapes, output_dim=4)

# add to the classical sequential model
model.add(qlayer)

opt = keras.optimizers.SGD(learning_rate = 0.05)
model.compile(opt, loss = 'categorical_crossentropy', metrics =['accuracy'])

with tf.device('/GPU:2'):
    hybrid = model.fit(X_train, y_train, epochs = 200, batch_size = 64, shuffle = True, validation_data = (X_test, y_test))

Hey @RX1,

If you could please reduce your code down to something much smaller that still reproduces the error that you’re seeing, that would help tremendously :slight_smile:. In the meantime, since you’re using the sf-fock device, make sure you have PennyLane v0.29 installed, since that’s the last version of PennyLane that supports the PennyLane-SF plugin.