Hi, I was doing some research on ma-QAOA. I was trying to speed up process of implementing expetation value of a shot-based environment using multinomial in torch like this:
import pennylane as qml
import torch
import numpy as np
import os, pandas as pd, random, gc
from collections import defaultdict
from typing import List, Tuple, Dict
# -------------------- 基本設定 --------------------
cuda_id = '0'
os.environ['CUDA_VISIBLE_DEVICES'] = cuda_id
torch.set_default_dtype(torch.float64)
device = torch.device("cuda")
N = 12
m = 2
file_name = "maQAOA"
p = 1
PI = torch.pi
# === 初始化與優化設定 ===
ADAM_LR = 0.01
WINDOW = 100
TOL = 1e-2
MAX_EPOCH = 500
SHOTS = 2**10
# --------------------------------------------------
def generate_case(seed=42):
def generate_bbv_graph(
N: int,
m: int,
seed: int | None = None,
m0: int | None = None,
w0: float = 1.0,
delta: float = 1.0
) -> Tuple[List[Tuple[int, int]], List[float], List[float]]:
rng = random.Random(seed)
adj: Dict[int, Dict[int, float]] = defaultdict(dict)
strength = [0.0] * N
if m0 is None:
m0 = m
for i in range(m0):
for j in range(i + 1, m0):
adj[i][j] = w0
adj[j][i] = w0
strength[i] += w0
strength[j] += w0
def weighted_sample_without_replacement(pop_indices: List[int], weights: List[float], k: int) -> List[int]:
if sum(weights) <= 0:
return rng.sample(pop_indices, k)
chosen = []
pool = pop_indices[:]
w = weights[:]
for _ in range(k):
pick = rng.choices(pool, weights=w, k=1)[0]
idx = pool.index(pick)
chosen.append(pick)
pool.pop(idx); w.pop(idx)
return chosen
for new_v in range(m0, N):
candidates = list(range(new_v))
weights_s = [strength[u] for u in candidates]
targets = weighted_sample_without_replacement(candidates, weights_s, m)
for u in targets:
s_u_old = strength[u]
if s_u_old > 0 and delta > 0:
increments = []
for v, w_uv in adj[u].items():
inc = delta * w_uv / s_u_old
if inc > 0:
increments.append((v, inc))
for v, inc in increments:
adj[u][v] += inc
adj[v][u] += inc
strength[u] += inc
strength[v] += inc
if u not in adj[new_v]:
adj[new_v][u] = 0.0
adj[u][new_v] = 0.0
adj[new_v][u] += w0
adj[u][new_v] += w0
strength[new_v] += w0
strength[u] += w0
edges = []
weights = []
for u in range(N):
for v, w in adj[u].items():
if u < v:
edges.append((u, v))
weights.append(w)
return edges, weights, strength
edges, weights, deg_w = generate_bbv_graph(N=N, m=m, seed=seed, m0=None, w0=1.0, delta=1)
weights = torch.tensor(weights, device=device, dtype=torch.float64)
# --- 成本向量 ---
num_states = 2 ** N
states = torch.arange(num_states, device=device, dtype=torch.int64).unsqueeze(1)
bits = ((states >> torch.arange(N, device=device)) & 1).bool()
costs = torch.zeros(num_states, device=device, dtype=torch.float64)
for idx, (i, j) in enumerate(edges):
diff = bits[:, i] ^ bits[:, j]
costs += weights[idx] * diff.to(torch.float64)
print(f"Min: {costs.min():.4f}, Mean: {costs.mean():.4f}, Max: {costs.max():.4f}")
# --- Hamiltonian ---
coeffs = []
observables = []
for (i, j), w in zip(edges, weights.tolist()):
coeffs.append(0.5 * w)
observables.append(qml.Identity(0))
coeffs.append(-0.5 * w)
observables.append(qml.PauliZ(i) @ qml.PauliZ(j))
cost_hamiltonian = qml.Hamiltonian(coeffs, observables)
return weights, edges, deg_w, costs, cost_hamiltonian
# --------------------------------------------------
def init_params(p: int, N: int, M: int, seed: int = 42) -> np.ndarray:
rng = random.Random(seed)
kspace = list(range(-8, 9))
params = [rng.choice(kspace) * (np.pi/8.0) for _ in range(p*(M+N))]
return np.array(params, dtype=np.float64)
# ======================= 抽樣與手刻 parameter-shift =======================
def exoval_by_shots(state: torch.Tensor, shots: int, costs: torch.Tensor) -> torch.Tensor:
probs = (state.abs() ** 2).to(torch.float64)
idx = torch.multinomial(probs, num_samples=shots, replacement=True)
return costs[idx].mean()
def build_qaoa_state_fn(N, p, edges, weights, dev=None):
if dev is None:
dev = qml.device("lightning.gpu", wires=N, shots=None)
@qml.qnode(dev, interface='torch')
def qaoa_circuit(params):
M = len(edges)
gammas = params[:p*M].view(p, M)
betas = params[p*M:].view(p, N)
for i in range(N):
qml.Hadamard(wires=i)
for layer in range(p):
for e_idx, (i, j) in enumerate(edges):
qml.CNOT(wires=(i, j))
qml.RZ(weights[e_idx] * gammas[layer, e_idx], wires=j)
qml.CNOT(wires=(i, j))
for i in range(N):
qml.RX(betas[layer, i], wires=i)
return qml.state()
def qaoa_state(params: torch.Tensor) -> torch.Tensor:
state = qaoa_circuit(params)
if not torch.is_tensor(state):
state = torch.tensor(state)
if state.device.type != 'cuda':
state = state.to(device)
if state.dtype != torch.complex128:
state = state.to(torch.complex128)
return state
return qaoa_state
@torch.no_grad()
def parameter_shift(qaoa_state_fn, params, shots, costs, N, M, p, weights):
L = p*(M+N)
grads = torch.empty(L, device=device, dtype=torch.float64)
def f(_params: torch.Tensor) -> torch.Tensor:
state = qaoa_state_fn(_params)
return exoval_by_shots(state, shots, costs)
for k in range(L):
params_plus = params.clone()
params_minus = params.clone()
if k < p*M:
e_idx = k % M
w = float(weights[e_idx].item())
if w < 1e-12:
grads[k] = 0.0
continue
delta = (np.pi/2.0) / w
params_plus[k] += delta
params_minus[k] -= delta
f_plus = f(params_plus)
f_minus = f(params_minus)
grads[k] = 0.5 * w * (f_plus - f_minus)
else:
delta = np.pi/2.0
params_plus[k] += delta
params_minus[k] -= delta
f_plus = f(params_plus)
f_minus = f(params_minus)
grads[k] = 0.5 * (f_plus - f_minus)
return grads
# ======================= run_case =======================
def run_case(weights, edges, costs, cost_hamiltonian, init_params, instance_id, trial_id):
M = len(edges)
params = torch.tensor(init_params, device=device, dtype=torch.float64, requires_grad=False)
params = torch.nn.Parameter(params, requires_grad=False)
optimizer = torch.optim.Adam([params], lr=ADAM_LR)
qaoa_state_fn = build_qaoa_state_fn(N, p, edges, weights)
def estimate_objective(_params: torch.Tensor) -> torch.Tensor:
state = qaoa_state_fn(_params)
return exoval_by_shots(state, SHOTS, costs)
loss_list = []
best_para = params.detach().clone()
max_exp = float('-inf')
def is_converged(window=WINDOW, tol=TOL):
if len(loss_list) < window:
return False
recent = loss_list[-window:]
return (max(recent) - min(recent)) < tol * weights.sum().detach().item()
for epoch in range(1, MAX_EPOCH+1):
exp_est = estimate_objective(params.data)
grads = parameter_shift(qaoa_state_fn, params.data, SHOTS, costs, N, M, p, weights)
optimizer.zero_grad(set_to_none=True)
params.grad = -grads
optimizer.step()
approx_ratio = (exp_est / costs.max()).item()
loss_list.append(float(-exp_est))
if exp_est.item() > max_exp:
max_exp = exp_est.item()
best_para = params.detach().clone()
print(f"[instance:{instance_id}][trial:{trial_id}] Epoch {epoch} | approximate ratio {approx_ratio:.6f}")
if is_converged(WINDOW, TOL):
break
print(f"[instance:{instance_id}][trial:{trial_id}] best approximate ratio:{max_exp/costs.max().item():.6f}")
return [best_para.tolist()], [max_exp/costs.max().item()], [len(loss_list)], [loss_list]
# ======================= 主程式 =======================
for i in range(20):
os.makedirs(f'{file_name}/{i}', exist_ok=True)
weights, edges, deg_w, costs, cost_hamiltonian = generate_case(i)
M = len(edges)
for trial_id in range(100):
if os.path.exists(f'{file_name}/{i}/best_params.csv'):
data = pd.read_csv(f'{file_name}/{i}/best_params.csv', header=None)
data = np.array(data[0])
if len(data) != trial_id:
continue
params0 = init_params(p=p, N=N, M=M, seed=trial_id)
best_params_list, best_approx_list, num_epoch_list, loss_list_list = run_case(
weights, edges, costs, cost_hamiltonian, params0, i, trial_id
)
pd.DataFrame(best_params_list).to_csv(f'{file_name}/{i}/best_params.csv', mode='a', index=False, header=False)
pd.DataFrame(best_approx_list).to_csv(f'{file_name}/{i}/best_approx.csv', mode='a', index=False, header=False)
pd.DataFrame(num_epoch_list).to_csv(f'{file_name}/{i}/num_epoch.csv', mode='a', index=False, header=False)
pd.DataFrame(loss_list_list).to_csv(f'{file_name}/{i}/loss_list.csv', mode='a', index=False, header=False)
del weights, edges, deg_w, costs, cost_hamiltonian
gc.collect()
torch.cuda.empty_cache()
, and I got worse converge result and larger noise compared to code using pennylane API like this:
import pennylane as qml
import torch
import numpy as np
import os, pandas as pd, random, gc
from collections import defaultdict
from typing import List, Tuple, Dict
cuda_id = '0'
# === 基本設定 ===
N = 12
m = 2
file_name = "maQAOA"
p = 1
PI = torch.pi
os.environ['CUDA_VISIBLE_DEVICES'] = cuda_id
device = torch.device("cuda")
# === 初始化與優化設定 ===
ADAM_LR = 0.01
WINDOW = 100
TOL = 1e-2
MAX_EPOCH = 500
SHOTS = 1024
#--------------------------------------------------#
def generate_case(seed=42):
def generate_bbv_graph(
N: int,
m: int,
seed: int | None = None,
m0: int | None = None,
w0: float = 1.0,
delta: float = 1.0
) -> Tuple[List[Tuple[int, int]], List[float], List[float], List[int]]:
"""
產生 BBV (Barrat–Barthelemy–Vespignani) 加權網路。
回傳:
edges: [(u,v), ...](u<v)
weights: [w_uv, ...] 與 edges 對齊
strength: [s_i] 每個節點的加權度數 s_i = sum_j w_ij
"""
assert 1 <= m < N, "需滿足 1 <= m < N"
if m0 is None:
m0 = m
assert m0 >= m and m0 <= N
assert w0 >= 0 and delta >= 0
rng = random.Random(seed)
adj: Dict[int, Dict[int, float]] = defaultdict(dict)
strength = [0.0] * N
# 初始 K_{m0}
for i in range(m0):
for j in range(i + 1, m0):
adj[i][j] = w0
adj[j][i] = w0
strength[i] += w0
strength[j] += w0
def weighted_sample_without_replacement(pop_indices: List[int], weights: List[float], k: int) -> List[int]:
if sum(weights) <= 0:
return rng.sample(pop_indices, k)
chosen = []
pool = pop_indices[:]
w = weights[:]
for _ in range(k):
pick = rng.choices(pool, weights=w, k=1)[0]
idx = pool.index(pick)
chosen.append(pick)
pool.pop(idx); w.pop(idx)
return chosen
for new_v in range(m0, N):
candidates = list(range(new_v))
weights_s = [strength[u] for u in candidates]
targets = weighted_sample_without_replacement(candidates, weights_s, m)
for u in targets:
s_u_old = strength[u]
if s_u_old > 0 and delta > 0:
increments = []
for v, w_uv in adj[u].items():
inc = delta * w_uv / s_u_old
if inc > 0:
increments.append((v, inc))
for v, inc in increments:
adj[u][v] += inc
adj[v][u] += inc
strength[u] += inc
strength[v] += inc
if u not in adj[new_v]:
adj[new_v][u] = 0.0
adj[u][new_v] = 0.0
adj[new_v][u] += w0
adj[u][new_v] += w0
strength[new_v] += w0
strength[u] += w0
edges = []
weights = []
for u in range(N):
for v, w in adj[u].items():
if u < v:
edges.append((u, v))
weights.append(w)
return edges, weights, strength
edges, weights, deg_w = generate_bbv_graph(
N=N, m=m, seed=seed, m0=None, w0=1.0, delta=1
)
weights = torch.tensor(weights, device=device, dtype=torch.float32)
# --- 成本向量(為了計算 MaxCut 最佳值,用暴力法) ---
num_states = 2 ** N
states = torch.arange(num_states, device=device).unsqueeze(1)
bits = ((states >> torch.arange(N, device=device)) & 1).bool() # LSB->MSB
costs = torch.zeros(num_states, device=device, dtype=torch.float32)
for idx, (i, j) in enumerate(edges):
diff = bits[:, i] ^ bits[:, j]
costs += weights[idx] * diff.float()
print(f"Min: {costs.min():.4f}, Mean: {costs.mean():.4f}, Max: {costs.max():.4f}")
# --- 也建立 expval 用的 Hamiltonian(常數 + ZZ 形式) ---
coeffs = []
observables = []
for (i, j), w in zip(edges, weights.tolist()):
coeffs.append(0.5 * w) # 常數項 * I
observables.append(qml.Identity(0))
coeffs.append(-0.5 * w) # -0.5 w Z_i Z_j
observables.append(qml.PauliZ(i) @ qml.PauliZ(j))
cost_hamiltonian = qml.Hamiltonian(coeffs, observables)
return weights, edges, deg_w, costs, cost_hamiltonian
#--------------------------------------------------#
def init_params(p: int, N: int, M: int,
seed: int = 42) -> np.ndarray:
rng = random.Random(seed)
kspace = list(range(-8, 9))
params = [rng.choice(kspace)*(np.pi/8.0) for _ in range(p*(M+N))]
return np.array(params, dtype=np.float32)
#--------------------------------------------------#
def run_case(weights, edges, costs, cost_hamiltonian, init_params, instance_id, trial_id):
M = len(edges)
dev = qml.device("lightning.gpu", wires=N, shots=SHOTS)
params = torch.nn.Parameter(torch.tensor(init_params, device=device), requires_grad=True)
# === ma-QAOA 電路 ===
@qml.qnode(dev, interface='torch', diff_method='parameter-shift')
def qaoa_circuit(params):
# 參數切片:每層 [gammas(M), betas(N)]
gammas = params[:p*M].view(p, M)
betas = params[p*M:].view(p, N)
# 均勻疊加
for i in range(N):
qml.Hadamard(wires=i)
for layer in range(p):
# Cost: 每條邊有自己的 γ_{layer,e}
for e_idx, (i, j) in enumerate(edges):
# CNOT - RZ - CNOT 等效於 exp(-i * (theta/2) * Z_i Z_j)
# 這裡沿用你的寫法:theta = w_e * gamma_{l,e}
qml.CNOT(wires=(i, j))
qml.RZ(weights[e_idx] * gammas[layer, e_idx], wires=j)
qml.CNOT(wires=(i, j))
# Mixer: 每個頂點有自己的 β_{layer,v}
for i in range(N):
qml.RX(betas[layer, i], wires=i)
return qml.expval(cost_hamiltonian)
def loss_fn(params):
# 最大化期望值 => 最小化負值
return -qaoa_circuit(params)
optimizer = torch.optim.Adam([params], lr=ADAM_LR)
loss_list = []
best_para = params
max_exp = float('-inf')
def is_converged(window=WINDOW, tol=TOL):
if len(loss_list) < window:
return False
recent = loss_list[-window:]
return (max(recent) - min(recent)) < tol * weights.sum().detach().item()
for epoch in range(1, MAX_EPOCH+1):
optimizer.zero_grad()
loss = loss_fn(params)
loss.backward()
optimizer.step()
loss_val = loss.item()
if -loss_val > max_exp:
max_exp = -loss_val
best_para = params.detach().clone()
loss_list.append(loss_val)
print(f"[instance:{instance_id}][trial:{trial_id}] Epoch {epoch} | approximate ratio {-loss_val/costs.max():.6f}")
if is_converged(WINDOW, TOL):
break
print(f"[instance:{instance_id}][trial:{trial_id}] best approximate ratio:{max_exp/costs.max()}")
return [best_para.tolist()], [max_exp/costs.max().tolist()], [len(loss_list)], [loss_list]
#--------------------------------------------------#
for i in range(20):
os.makedirs(f'{file_name}/{i}', exist_ok=True)
weights, edges, deg_w, costs, cost_hamiltonian = generate_case(i)
M = len(edges)
for trial_id in range(100):
if os.path.exists(f'{file_name}/{i}/best_params.csv'):
data = pd.read_csv(f'{file_name}/{i}/best_params.csv', header=None)
data = np.array(data[0])
if len(data)!=trial_id:
continue
params = init_params(
p=p, N=N, M=M,
seed=trial_id
)
best_params_list, best_approx_list, num_epoch_list, loss_list_list = run_case(
weights, edges, costs, cost_hamiltonian, params, i, trial_id
)
pd.DataFrame(best_params_list).to_csv(f'{file_name}/{i}/best_params.csv', mode='a', index=False, header=False)
pd.DataFrame(best_approx_list).to_csv(f'{file_name}/{i}/best_approx.csv', mode='a', index=False, header=False)
pd.DataFrame(num_epoch_list).to_csv(f'{file_name}/{i}/num_epoch.csv', mode='a', index=False, header=False)
pd.DataFrame(loss_list_list).to_csv(f'{file_name}/{i}/loss_list.csv', mode='a', index=False, header=False)
del weights, edges, deg_w, costs, cost_hamiltonian
gc.collect()
torch.cuda.empty_cache()
can someone help me, thanks ~ !