Nvrtc: error: failed to open nvrtc-builtins64_117.dll

poig · February 9, 2023, 4:58pm

Any help will be appreciate.
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Fri_Jan__6_19:04:39_Pacific_Standard_Time_2023
Cuda compilation tools, release 12.0, V12.0.140
Build cuda_12.0.r12.0/compiler.32267302_0

output:

98     outputs = model(inputs)
     99     #nn.sigmoid(outputs)
    100     #print(outputs)
    101     if len(outputs.shape) == 1:

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
   1190 # If we don't have any hooks, we want to skip the rest of the logic in
   1191 # this function, and just call forward.
   1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
   1195 # Do not call functions when jit is used
   1196 full_backward_hooks, non_full_backward_hooks = [], []

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\torchvision\models\densenet.py:218, in DenseNet.forward(self, x)
    216 out = F.adaptive_avg_pool2d(out, (1, 1))
    217 out = torch.flatten(out, 1)
--> 218 out = self.classifier(out)
    219 return out

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
   1190 # If we don't have any hooks, we want to skip the rest of the logic in
   1191 # this function, and just call forward.
   1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
   1195 # Do not call functions when jit is used
   1196 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[26], line 16, in Quantumnet.forward(self, input_features)
     14 q_out = q_out.to(device)
     15 for elem in q_in:
---> 16     q_out_elem = q_net(elem,self.q_params).float().unsqueeze(0)
     17     q_out = torch.cat((q_out, q_out_elem))
     18 return self.post_net(q_out)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\qnode.py:847, in QNode.__call__(self, *args, **kwargs)
    843     self._update_original_device()
    845     return res
--> 847 res = qml.execute(
    848     [self.tape],
    849     device=self.device,
    850     gradient_fn=self.gradient_fn,
    851     interface=self.interface,
    852     gradient_kwargs=self.gradient_kwargs,
    853     override_shots=override_shots,
    854     **self.execute_kwargs,
    855 )
    857 if old_interface == "auto":
    858     self.interface = "auto"

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\interfaces\execution.py:651, in execute(tapes, device, gradient_fn, interface, mode, gradient_kwargs, cache, cachesize, max_diff, override_shots, expand_fn, max_expansion, device_batch_transform)
    647     return batch_fn(res)
    649 if gradient_fn == "backprop" or interface is None:
    650     return batch_fn(
--> 651         qml.interfaces.cache_execute(
    652             batch_execute, cache, return_tuple=False, expand_fn=expand_fn
    653         )(tapes)
    654     )
    656 # the default execution function is batch_execute
    657 execute_fn = qml.interfaces.cache_execute(batch_execute, cache, expand_fn=expand_fn)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\interfaces\execution.py:206, in cache_execute.<locals>.wrapper(tapes, **kwargs)
    202         return (res, []) if return_tuple else res
    204 else:
    205     # execute all unique tapes that do not exist in the cache
--> 206     res = fn(execution_tapes.values(), **kwargs)
    208 final_res = []
    210 for i, tape in enumerate(tapes):

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\interfaces\execution.py:131, in cache_execute.<locals>.fn(tapes, **kwargs)
    129 def fn(tapes: Sequence[QuantumTape], **kwargs):  # pylint: disable=function-redefined
    130     tapes = [expand_fn(tape) for tape in tapes]
--> 131     return original_fn(tapes, **kwargs)

File ~\anaconda3\envs\qamp2022_gpu\lib\contextlib.py:79, in ContextDecorator.__call__.<locals>.inner(*args, **kwds)
     76 @wraps(func)
     77 def inner(*args, **kwds):
     78     with self._recreate_cm():
---> 79         return func(*args, **kwds)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\_qubit_device.py:656, in QubitDevice.batch_execute(self, circuits)
    653     self.reset()
    655     # TODO: Insert control on value here
--> 656     res = self.execute(circuit)
    657     results.append(res)
    659 if self.tracker.active:

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\devices\default_qubit_torch.py:235, in DefaultQubitTorch.execute(self, circuit, **kwargs)
    226         if params_cuda_device != specified_device_cuda:
    228             warnings.warn(
    229                 f"Torch device {self._torch_device} specified "
    230                 "upon PennyLane device creation does not match the "
    231                 "Torch device of the gate parameters; "
    232                 f"{self._torch_device} will be used."
    233             )
--> 235 return super().execute(circuit, **kwargs)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\_qubit_device.py:432, in QubitDevice.execute(self, circuit, **kwargs)
    429 self.check_validity(circuit.operations, circuit.observables)
    431 # apply all circuit operations
--> 432 self.apply(circuit.operations, rotations=circuit.diagonalizing_gates, **kwargs)
    434 # generate computational basis samples
    435 if self.shots is not None or circuit.is_sampled:

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\devices\default_qubit.py:269, in DefaultQubit.apply(self, operations, rotations, **kwargs)
    267                 self._debugger.snapshots[len(self._debugger.snapshots)] = state_vector
    268     else:
--> 269         self._state = self._apply_operation(self._state, operation)
    271 # store the pre-rotated state
    272 self._pre_rotated_state = self._state

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\devices\default_qubit.py:297, in DefaultQubit._apply_operation(self, state, operation)
    294     axes = [ax + shift for ax in self.wires.indices(wires)]
    295     return self._apply_ops[operation.base_name](state, axes, inverse=operation.inverse)
--> 297 matrix = self._asarray(self._get_unitary_matrix(operation), dtype=self.C_DTYPE)
    299 if operation in diagonal_in_z_basis:
    300     return self._apply_diagonal_unitary(state, matrix, wires)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\devices\default_qubit_torch.py:309, in DefaultQubitTorch._get_unitary_matrix(self, unitary)
    307 if unitary in diagonal_in_z_basis:
    308     return self._asarray(unitary.eigvals(), dtype=self.C_DTYPE)
--> 309 return self._asarray(unitary.matrix(), dtype=self.C_DTYPE)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\operation.py:1529, in Operation.matrix(self, wire_order)
   1528 def matrix(self, wire_order=None):
-> 1529     canonical_matrix = self.compute_matrix(*self.parameters, **self.hyperparameters)
   1531     if self.inverse:
   1532         canonical_matrix = qml.math.conj(qml.math.moveaxis(canonical_matrix, -2, -1))

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\ops\qubit\parametric_ops.py:216, in RY.compute_matrix(theta)
    214 c = (1 + 0j) * c
    215 s = (1 + 0j) * s
--> 216 return qml.math.stack([stack_last([c, -s]), stack_last([s, c])], axis=-2)

RuntimeError: 
  #ifdef __HIPCC__
  #define ERROR_UNSUPPORTED_CAST ;
  // corresponds to aten/src/ATen/native/cuda/thread_constants.h
  #define CUDA_OR_ROCM_NUM_THREADS 256
  // corresponds to aten/src/ATen/cuda/detail/OffsetCalculator.cuh
  #define MAX_DIMS 16
  #ifndef __forceinline__
  #define __forceinline__ inline __attribute__((always_inline))
  #endif
  #else
  //TODO use _assert_fail, because assert is disabled in non-debug builds
......
.....

nvrtc: error: failed to open nvrtc-builtins64_117.dll.
  Make sure that nvrtc-builtins64_117.dll is installed correctly.

mlxd · February 10, 2023, 2:14pm

Hi @poig
Unfortunately the error you post looks NVIDIA driver related, and will be hard for us to reason the issue.

Though, I may suggest downgrading your CUDA version from 12 to 11, and retrying, since it looks like you are using Torch, which may not yet have CUDA 12 support.

poig · February 10, 2023, 3:20pm

thanks for the reminder, because I only got the bugs when running pennylane, I downgraded to cuda11.7 driver and restarted, it works.
Thanks again.

isaacdevlugt · February 10, 2023, 9:51pm

@poig glad to hear this works for you now!

Topic		Replies	Views
Pennylane GPU install PennyLane Help	6	84	January 15, 2025
Pennylane Jetson Xavier NX Error while running PennyLane Help	9	400	August 9, 2023
pennylane-lightning-GPU 0.35 on cuQuantum Appliance 23.10 PennyLane Help	8	408	April 10, 2024
Problem when installing "lightning.gpu" PennyLane Help	2	487	November 29, 2023
An error occurred while running the GPU demo PennyLane Help	3	487	May 17, 2022

Nvrtc: error: failed to open nvrtc-builtins64_117.dll

Related topics