Nvrtc: error: failed to open nvrtc-builtins64_117.dll

Any help will be appreciate.
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Fri_Jan__6_19:04:39_Pacific_Standard_Time_2023
Cuda compilation tools, release 12.0, V12.0.140
Build cuda_12.0.r12.0/compiler.32267302_0

output:

98     outputs = model(inputs)
     99     #nn.sigmoid(outputs)
    100     #print(outputs)
    101     if len(outputs.shape) == 1:

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
   1190 # If we don't have any hooks, we want to skip the rest of the logic in
   1191 # this function, and just call forward.
   1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
   1195 # Do not call functions when jit is used
   1196 full_backward_hooks, non_full_backward_hooks = [], []

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\torchvision\models\densenet.py:218, in DenseNet.forward(self, x)
    216 out = F.adaptive_avg_pool2d(out, (1, 1))
    217 out = torch.flatten(out, 1)
--> 218 out = self.classifier(out)
    219 return out

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
   1190 # If we don't have any hooks, we want to skip the rest of the logic in
   1191 # this function, and just call forward.
   1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
   1195 # Do not call functions when jit is used
   1196 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[26], line 16, in Quantumnet.forward(self, input_features)
     14 q_out = q_out.to(device)
     15 for elem in q_in:
---> 16     q_out_elem = q_net(elem,self.q_params).float().unsqueeze(0)
     17     q_out = torch.cat((q_out, q_out_elem))
     18 return self.post_net(q_out)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\qnode.py:847, in QNode.__call__(self, *args, **kwargs)
    843     self._update_original_device()
    845     return res
--> 847 res = qml.execute(
    848     [self.tape],
    849     device=self.device,
    850     gradient_fn=self.gradient_fn,
    851     interface=self.interface,
    852     gradient_kwargs=self.gradient_kwargs,
    853     override_shots=override_shots,
    854     **self.execute_kwargs,
    855 )
    857 if old_interface == "auto":
    858     self.interface = "auto"

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\interfaces\execution.py:651, in execute(tapes, device, gradient_fn, interface, mode, gradient_kwargs, cache, cachesize, max_diff, override_shots, expand_fn, max_expansion, device_batch_transform)
    647     return batch_fn(res)
    649 if gradient_fn == "backprop" or interface is None:
    650     return batch_fn(
--> 651         qml.interfaces.cache_execute(
    652             batch_execute, cache, return_tuple=False, expand_fn=expand_fn
    653         )(tapes)
    654     )
    656 # the default execution function is batch_execute
    657 execute_fn = qml.interfaces.cache_execute(batch_execute, cache, expand_fn=expand_fn)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\interfaces\execution.py:206, in cache_execute.<locals>.wrapper(tapes, **kwargs)
    202         return (res, []) if return_tuple else res
    204 else:
    205     # execute all unique tapes that do not exist in the cache
--> 206     res = fn(execution_tapes.values(), **kwargs)
    208 final_res = []
    210 for i, tape in enumerate(tapes):

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\interfaces\execution.py:131, in cache_execute.<locals>.fn(tapes, **kwargs)
    129 def fn(tapes: Sequence[QuantumTape], **kwargs):  # pylint: disable=function-redefined
    130     tapes = [expand_fn(tape) for tape in tapes]
--> 131     return original_fn(tapes, **kwargs)

File ~\anaconda3\envs\qamp2022_gpu\lib\contextlib.py:79, in ContextDecorator.__call__.<locals>.inner(*args, **kwds)
     76 @wraps(func)
     77 def inner(*args, **kwds):
     78     with self._recreate_cm():
---> 79         return func(*args, **kwds)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\_qubit_device.py:656, in QubitDevice.batch_execute(self, circuits)
    653     self.reset()
    655     # TODO: Insert control on value here
--> 656     res = self.execute(circuit)
    657     results.append(res)
    659 if self.tracker.active:

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\devices\default_qubit_torch.py:235, in DefaultQubitTorch.execute(self, circuit, **kwargs)
    226         if params_cuda_device != specified_device_cuda:
    228             warnings.warn(
    229                 f"Torch device {self._torch_device} specified "
    230                 "upon PennyLane device creation does not match the "
    231                 "Torch device of the gate parameters; "
    232                 f"{self._torch_device} will be used."
    233             )
--> 235 return super().execute(circuit, **kwargs)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\_qubit_device.py:432, in QubitDevice.execute(self, circuit, **kwargs)
    429 self.check_validity(circuit.operations, circuit.observables)
    431 # apply all circuit operations
--> 432 self.apply(circuit.operations, rotations=circuit.diagonalizing_gates, **kwargs)
    434 # generate computational basis samples
    435 if self.shots is not None or circuit.is_sampled:

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\devices\default_qubit.py:269, in DefaultQubit.apply(self, operations, rotations, **kwargs)
    267                 self._debugger.snapshots[len(self._debugger.snapshots)] = state_vector
    268     else:
--> 269         self._state = self._apply_operation(self._state, operation)
    271 # store the pre-rotated state
    272 self._pre_rotated_state = self._state

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\devices\default_qubit.py:297, in DefaultQubit._apply_operation(self, state, operation)
    294     axes = [ax + shift for ax in self.wires.indices(wires)]
    295     return self._apply_ops[operation.base_name](state, axes, inverse=operation.inverse)
--> 297 matrix = self._asarray(self._get_unitary_matrix(operation), dtype=self.C_DTYPE)
    299 if operation in diagonal_in_z_basis:
    300     return self._apply_diagonal_unitary(state, matrix, wires)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\devices\default_qubit_torch.py:309, in DefaultQubitTorch._get_unitary_matrix(self, unitary)
    307 if unitary in diagonal_in_z_basis:
    308     return self._asarray(unitary.eigvals(), dtype=self.C_DTYPE)
--> 309 return self._asarray(unitary.matrix(), dtype=self.C_DTYPE)

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\operation.py:1529, in Operation.matrix(self, wire_order)
   1528 def matrix(self, wire_order=None):
-> 1529     canonical_matrix = self.compute_matrix(*self.parameters, **self.hyperparameters)
   1531     if self.inverse:
   1532         canonical_matrix = qml.math.conj(qml.math.moveaxis(canonical_matrix, -2, -1))

File ~\anaconda3\envs\qamp2022_gpu\lib\site-packages\pennylane\ops\qubit\parametric_ops.py:216, in RY.compute_matrix(theta)
    214 c = (1 + 0j) * c
    215 s = (1 + 0j) * s
--> 216 return qml.math.stack([stack_last([c, -s]), stack_last([s, c])], axis=-2)

RuntimeError: 
  #ifdef __HIPCC__
  #define ERROR_UNSUPPORTED_CAST ;
  // corresponds to aten/src/ATen/native/cuda/thread_constants.h
  #define CUDA_OR_ROCM_NUM_THREADS 256
  // corresponds to aten/src/ATen/cuda/detail/OffsetCalculator.cuh
  #define MAX_DIMS 16
  #ifndef __forceinline__
  #define __forceinline__ inline __attribute__((always_inline))
  #endif
  #else
  //TODO use _assert_fail, because assert is disabled in non-debug builds
......
.....

nvrtc: error: failed to open nvrtc-builtins64_117.dll.
  Make sure that nvrtc-builtins64_117.dll is installed correctly.

Hi @poig
Unfortunately the error you post looks NVIDIA driver related, and will be hard for us to reason the issue.

Though, I may suggest downgrading your CUDA version from 12 to 11, and retrying, since it looks like you are using Torch, which may not yet have CUDA 12 support.

thanks for the reminder, because I only got the bugs when running pennylane, I downgraded to cuda11.7 driver and restarted, it works.
Thanks again.

@poig glad to hear this works for you now!