I’m trying to speed up this code, but it always fails. TF-GPU==2.5.0 cuda==11.2
Thanks to Sophie for the source code:
import tensorflow as tf
#import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers
tf.keras.backend.set_floatx('float64')
from keras.backend import set_session
physical_devices = tf.config.list_physical_devices('GPU')
for device in physical_devices:
tf.config.experimental.set_memory_growth(device, True)
'''
config = tf.compat.v1.ConfigProto()
config.gpu_options.allocator_type = 'BFC' #A "Best-fit with coalescing" algorithm, simplified from a version of dlmalloc.
config.gpu_options.per_process_gpu_memory_fraction = 0.3
config.gpu_options.allow_growth = True
set_session(tf.compat.v1.Session(config=config))
gpus = tf.config.list_physical_devices('GPU')
if gpus:
try:
# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Memory growth must be set before GPUs have been initialized
print(e)
'''
import pennylane as qml
from pennylane import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml
import matplotlib.pyplot as plt
def ProcessingData(datapoints, train_size, test_size):
#train_data = np.loadtxt("../dataset/fashion-mnist_test.csv", delimiter=",")
#print(train_data.shape)
#Extracting features and labels from the dataset
#train_data_features = train_data[:datapoints, 1:]
#print(train_data_features.shape)
#train_data_labels = train_data[:datapoints, :1].reshape(datapoints,)
#print(train_data_labels[1])
fmnist = fetch_openml(name='Fashion-MNIST', parser='liac-arff', as_frame=False)
train_data_features, train_data_labels = fmnist['data'], fmnist['target']
train_data_features = train_data_features
train_data_labels = train_data_labels
train_data_labels = train_data_labels.astype(np.uint8)
# Use PCA technique to reduce dimensions to 4
DimReduce = PCA(n_components= 14)
train_data_features_reduced = DimReduce.fit_transform(train_data_features)
# normalize dataset
train_data_features_reduced = StandardScaler().fit_transform(train_data_features_reduced)
#s1 = np.array([StateVector(x) for x in train_data_features_reduced[:,:dim]])
#s2 = np.array([StateVector(x) for x in train_data_features_reduced[:,dim:]])
#train_data_features_reduced = np.hstack((s1, s2)).tolist()
zero_one_datapoint_array = []
label_0_1 = []
zero_datapoints_array = [] #an array of the data points containing value 0
one_datapoints_array = []# an array of the data points containing value 1
label_0 = []
label_1 = []
for i in range(datapoints):
if train_data_labels[i] == 0: # extracting zeros
label_0.append(-1)
zero_datapoints_array.append(train_data_features_reduced[i])
elif train_data_labels[i] == 1: # extracting ones
label_1.append(1)
one_datapoints_array.append(train_data_features_reduced[i])
print(len(zero_datapoints_array), len(one_datapoints_array))
if (len(label_0) + len(label_1)) < 2 * (train_size + test_size):
print("datapoints is too small!")
else:
print((len(label_0),len(label_1)))
zero_datapoints_array_train = zero_datapoints_array[:train_size]
one_datapoints_array_train = one_datapoints_array[:train_size]
label_0_train = label_0[:train_size]
label_1_train = label_1[:train_size]
zero_datapoints_array_test = zero_datapoints_array[train_size + 1:train_size + test_size + 1]
one_datapoints_array_test = one_datapoints_array[train_size + 1:train_size + test_size + 1]
label_0_test = label_0[train_size + 1:train_size + test_size + 1]
label_1_test = label_1[train_size + 1:train_size + test_size + 1]
#print(len(zero_datapoints_array_train), len(label_1_train))
#print(len(one_datapoints_array_test), len(label_0_test))
zero_one_datapoint_array_train = zero_datapoints_array_train + one_datapoints_array_train
label_0_1_train = label_0_train + label_1_train
zero_one_datapoint_array_test = zero_datapoints_array_test + one_datapoints_array_test
label_0_1_test = label_0_test + label_1_test
#print(len(zero_one_datapoint_array_train), label_0_1_train)
#print(len(zero_one_datapoint_array_test), label_0_1_test)
np.random.seed(10)
index_train = np.random.permutation(range(2 * train_size))
index_test = np.random.permutation(range(2 * test_size))
feats_train = []
Y_train = []
feats_val = []
Y_val = []
for i in range(2 * train_size):
feats_train.append(zero_one_datapoint_array_train[index_train[i]])
Y_train.append(label_0_1_train[index_train[i]])
for i in range(2 * test_size):
feats_val.append(zero_one_datapoint_array_test[index_test[i]])
Y_val.append(label_0_1_test[index_test[i]])
#print(feats_val, Y_val)
features = feats_train + feats_val
features = np.array(features, requires_grad=False)
Y = Y_train + Y_val
Y = np.array(Y, requires_grad=False)
feats_train = np.array(feats_train, requires_grad=False)
Y_train = np.array(Y_train, requires_grad=False)
feats_val = np.array(feats_val, requires_grad=False)
Y_val = np.array(Y_val, requires_grad=False)
return feats_train, Y_train, feats_val, Y_val, features, Y
datapoints = 6000
train_size = 300
test_size = 50
X_train, y_train, X_test, y_test, features, Y = ProcessingData(datapoints, train_size, test_size)
def one_hot(labels):
depth = 4**2 # 10 classes + 6 zeros for padding
indices = labels.astype(np.int32)
one_hot_labels = np.eye(depth)[indices].astype(np.float32)
return one_hot_labels
# one-hot encoded labels, each label of length cutoff dimension**2
y_train, y_test = one_hot(y_train), one_hot(y_test)
model = tf.keras.models.Sequential()
def data_encoding(x):
qml.Squeezing(x[3], x[4], wires=0)
qml.Squeezing(x[9], x[10], wires=1)
qml.Beamsplitter(x[5], x[6], wires=[0,1])
qml.Rotation(x[7], wires=0)
qml.Rotation(x[8], wires=1)
qml.Displacement(x[1], x[2], wires=0)
qml.Displacement(x[11], x[12], wires=1)
qml.Kerr(x[0], wires=0)
qml.Kerr(x[13], wires=1)
def qnn_layer(v):
# Interferometer 1
qml.Beamsplitter(v[0], v[1], wires=[0,1])
qml.Rotation(v[2], wires=0)
qml.Rotation(v[3], wires=1)
# Squeezers
qml.Squeezing(v[4], 0.0, wires=0)
qml.Squeezing(v[5], 0.0, wires=1)
# Interferometer 2
qml.Beamsplitter(v[6], v[7], wires=[0,1])
qml.Rotation(v[8], wires=0)
qml.Rotation(v[9], wires=1)
# Bias addition
qml.Displacement(v[10], 0.0, wires=0)
qml.Displacement(v[11], 0.0, wires=1)
# Non-linear activation function
qml.Kerr(v[12], wires=0)
qml.Kerr(v[13], wires=1)
def init_weights(layers, modes, active_sd=0.0001, passive_sd=0.1):
# Number of interferometer parameters: beamsplitter + 2 rotations
M = 2 + 1 + 1
int1_weights = tf.random.normal(shape=[layers, M], stddev=passive_sd)
s_weights = tf.random.normal(shape=[layers, modes], stddev=active_sd)
int2_weights = tf.random.normal(shape=[layers, M], stddev=passive_sd)
dr_weights = tf.random.normal(shape=[layers, modes], stddev=active_sd)
k_weights = tf.random.normal(shape=[layers, modes], stddev=active_sd)
weights = tf.concat([int1_weights, s_weights, int2_weights, dr_weights, k_weights], axis=1)
weights = tf.Variable(weights)
return weights
num_modes = 2
cutoff_dim = 4
# select a devide
dev = qml.device("strawberryfields.fock", wires=num_modes, cutoff_dim=cutoff_dim)
@qml.qnode(dev, interface="tf")
def quantum_nn(inputs, var):
# Encode input x into quantum state
data_encoding(inputs)
# iterative quantum layers
for v in var:
qnn_layer(v)
return qml.probs(wires=[0, 1])
num_layers = 4
# initialize weights for quantum layers
weigths = init_weights(num_layers, num_modes)
# convert the quantum layer to a Keras layer
shape_tup = weigths.shape
weight_shapes = {'var': shape_tup}
qlayer = qml.qnn.KerasLayer(quantum_nn, weight_shapes, output_dim=4)
# add to the classical sequential model
model.add(qlayer)
opt = keras.optimizers.SGD(learning_rate = 0.05)
model.compile(opt, loss = 'categorical_crossentropy', metrics =['accuracy'])
with tf.device('/GPU:2'):
hybrid = model.fit(X_train, y_train, epochs = 200, batch_size = 64, shuffle = True, validation_data = (X_test, y_test))