I have two separate codes – one with classical machine learning (nothing to do with pennylane) and one with quantum ml (below). The only difference between the two is that the qml has additional pennylane code in the DQN class. Running the classical ml code runs without a problem on the GPU but when I run the qml code I get an error. Here is part of the code:
import gym
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torch.nn.functional import relu, sigmoid
import pennylane as qml
import time
out_dim = 2 # output dimension of model
wires = 1 # this is the width of the quantum element
n_quantum_layers = 2 # this is the depth of the quantum element
def layer(inputs, w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10):
qml.templates.SqueezingEmbedding(inputs, wires=range(wires))
qml.templates.CVNeuralNetLayers(w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10,
wires=range(wires))
return [qml.expval(qml.X(wires=i)) for i in range(wires)]
class DQN(nn.Module):
def __init__(self, img_height, img_width):
super().__init__()
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(in_features=img_height * img_width * 3, out_features=12)
self.fc2 = nn.Linear(in_features=12, out_features=8)
# self.fc3 = nn.Linear(in_features=10, out_features=8)
self.clayer_in = torch.nn.Linear(in_features=8, out_features=wires)
self.clayer_out = torch.nn.Linear(wires, out_dim)
dev = qml.device('strawberryfields.fock', wires=wires, cutoff_dim=3)
self.layer_qnode = qml.QNode(layer, dev)
weights = qml.init.cvqnn_layers_all(n_quantum_layers, wires)
weight_shapes = {"w{}".format(i): w.shape for i, w in enumerate(weights)}
self.qlayer = qml.qnn.TorchLayer(self.layer_qnode, weight_shapes)
def forward(self, t):
t = self.flatten(t)
t = self.fc1(t)
t = self.fc2(t)
# t = self.fc3(t)
t = self.clayer_in(t)
t = self.qlayer(t)
t = self.clayer_out(t)
t = t.sigmoid()
return t
#A lot of code between these two parts is left
#out for the sake of brevity and necessity
batch_size = 128
gamma = 0.999
eps_start = 1
eps_end = 0.01
eps_decay = 0.0005
target_update = 10
memory_size = 500000
lr_start = 0.01
lr_end = 0.00001
lr_decay = 0.00009
num_episodes = 1000 # run for more episodes for better results
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
em = CartPoleEnvManager(device)
strategy = EpsilonGreedyStrategy(eps_start, eps_end, eps_decay)
agent = Agent(strategy, em.num_actions_available(), device)
memory = ReplayMemory(memory_size)
#learning_rate = LearningRate(lr_start,lr_end,lr_decay)
#learn = lr(learning_rate)
policy_net = DQN(em.get_screen_height(), em.get_screen_width()).to(device)
target_net = DQN(em.get_screen_height(), em.get_screen_width()).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval() #tells pytorch that target_net is only used for inference, not training
optimizer = optim.Adam(params=policy_net.parameters(), lr=0.01)
i = 0
episode_durations = []
for episode in range(num_episodes): #iterate over each episode
program_starts = time.time()
em.reset()
state = em.get_state()
for timestep in count():
action = agent.select_action(state, policy_net)
reward = em.take_action(action)
next_state = em.get_state()
memory.push(Experience(state, action, next_state, reward))
state = next_state
#i+=1
#print(i)
if memory.can_provide_sample(batch_size):
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.9)
experiences = memory.sample(batch_size)
states, actions, rewards, next_states = extract_tensors(experiences)
current_q_values = QValues.get_current(policy_net, states, actions)
next_q_values = QValues.get_next(target_net, next_states) #will get the max qvalues of the next state, q values of next state are used via next state
target_q_values = (next_q_values * gamma) + rewards
loss = F.mse_loss(current_q_values, target_q_values.unsqueeze(1))
optimizer.zero_grad() # sets the gradiesnt of all weights n biases in policy_net to zero
loss.backward() #computes gradient of loss with respect to all weights n biases in the policy net
optimizer.step() # updates the weights n biases with the gradients that were computed form loss.backwards
scheduler.step()
if em.done:
episode_durations.append(timestep)
plot(episode_durations, 100)
break
if episode % target_update == 0:
target_net.load_state_dict(policy_net.state_dict())
now = time.time()
print("Episode hat {0} Sekunden gedauert".format(now - program_starts))
em.close()
And when running the code, the following error appears:
Traceback (most recent call last):
File "qdqn.py", line 328, in <module>
loss.backward() #computes gradient of loss with respect to all weights n biases in the policy net
File "/home/ubuntu/anaconda3/envs/gymm/lib/python3.8/site-packages/torch/tensor.py", line 198, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/home/ubuntu/anaconda3/envs/gymm/lib/python3.8/site-packages/torch/autograd/__init__.py", line 98, in backward
Variable._execution_engine.run_backward(
RuntimeError: Expected object of device type cuda but got device type cpu for argument #2 'mat2' in call to _th_mm
Any insight is greatly appreciated.