I've tried using Tensorflow GPU accelerator in google colab with local runtime on my machine which has the following system information
- OS Platform and Distribution : Windows 10
- TensorFlow version: 2.1
- Python version: 3.6.10
- CUDA/cuDNN version: Cudnn - 7.5.6 , CUDA- 10.1
- GPU: Nividia Geforce RTX 2060
I've followed all the steps precisely on https://www.tensorflow.org/install/gpu and ran then ran this code to check if it can discover my gpu and to see the difference in speed between it and cpu:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
import timeit
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
print(
'\n\nThis error most likely means that this notebook is not '
'configured to use a GPU. Change this in Notebook Settings via the '
'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
raise SystemError('GPU device not found')
def cpu():
with tf.device('/cpu:0'):
random_image_cpu = tf.random.normal((100, 100, 100, 3))
net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
return tf.math.reduce_sum(net_cpu)
def gpu():
with tf.device('/device:GPU:0'):
random_image_gpu = tf.random.normal((100, 100, 100, 3))
net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
return tf.math.reduce_sum(net_gpu)
# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()
# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
'(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))
it returned the following error:
Found GPU at: /device:GPU:0
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-1-121519b30cf2> in <module>
29 # We run each op once to warm up; see: https://stackoverflow.com/a/45067900
30 cpu()
---> 31 gpu()
32
33 # Run the op several times.
<ipython-input-1-121519b30cf2> in gpu()
24 with tf.device('/device:GPU:0'):
25 random_image_gpu = tf.random.normal((100, 100, 100, 3))
---> 26 net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
27 return tf.math.reduce_sum(net_gpu)
28
~\anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
820 with base_layer_utils.autocast_context_manager(
821 self._compute_dtype):
--> 822 outputs = self.call(cast_inputs, *args, **kwargs)
823 self._handle_activity_regularization(inputs, outputs)
824 self._set_mask_metadata(inputs, outputs, input_masks)
~\anaconda3\lib\site-packages\tensorflow_core\python\keras\layers\convolutional.py in call(self, inputs)
207 inputs = array_ops.pad(inputs, self._compute_causal_padding())
208
--> 209 outputs = self._convolution_op(inputs, self.kernel)
210
211 if self.use_bias:
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
1133 call_from_convolution=False)
1134 else:
-> 1135 return self.conv_op(inp, filter)
1136 # copybara:strip_end
1137 # copybara:insert return self.conv_op(inp, filter)
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
638
639 def __call__(self, inp, filter): # pylint: disable=redefined-builtin
--> 640 return self.call(inp, filter)
641
642
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
237 padding=self.padding,
238 data_format=self.data_format,
--> 239 name=self.name)
240
241
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, data_format, dilations, name, filters)
2009 data_format=data_format,
2010 dilations=dilations,
-> 2011 name=name)
2012
2013
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\gen_nn_ops.py in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, explicit_paddings, data_format, dilations, name)
931 input, filter, strides=strides, use_cudnn_on_gpu=use_cudnn_on_gpu,
932 padding=padding, explicit_paddings=explicit_paddings,
--> 933 data_format=data_format, dilations=dilations, name=name, ctx=_ctx)
934 except _core._SymbolicException:
935 pass # Add nodes to the TensorFlow graph.
~\anaconda3\lib\site-packages\tensorflow_core\python\ops\gen_nn_ops.py in conv2d_eager_fallback(input, filter, strides, padding, use_cudnn_on_gpu, explicit_paddings, data_format, dilations, name, ctx)
1020 explicit_paddings, "data_format", data_format, "dilations", dilations)
1021 _result = _execute.execute(b"Conv2D", 1, inputs=_inputs_flat, attrs=_attrs,
-> 1022 ctx=ctx, name=name)
1023 if _execute.must_record_gradient():
1024 _execute.record_gradient(
~\anaconda3\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
~\anaconda3\lib\site-packages\six.py in raise_from(value, from_value)
UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. [Op:Conv2D]
and this is the log from the jupyter terminal:
2020-08-09 04:37:22.168805: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
2020-08-09 04:37:24.322956: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
2020-08-09 04:37:24.329330: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library nvcuda.dll
2020-08-09 04:37:25.599803: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.335GHz coreCount: 30 deviceMemorySize: 6.00GiB deviceMemoryBandwidth: 312.97GiB/s
2020-08-09 04:37:25.607874: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
2020-08-09 04:37:25.616921: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cublas64_10.dll
2020-08-09 04:37:25.626584: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cufft64_10.dll
2020-08-09 04:37:25.635135: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library curand64_10.dll
2020-08-09 04:37:25.650044: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusolver64_10.dll
2020-08-09 04:37:25.659390: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusparse64_10.dll
2020-08-09 04:37:25.681098: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-08-09 04:37:25.686397: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-08-09 04:37:26.217444: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-08-09 04:37:26.222044: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0
2020-08-09 04:37:26.225124: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N
2020-08-09 04:37:26.228586: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/device:GPU:0 with 4604 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-08-09 04:37:26.239786: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.335GHz coreCount: 30 deviceMemorySize: 6.00GiB deviceMemoryBandwidth: 312.97GiB/s
2020-08-09 04:37:26.249100: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
2020-08-09 04:37:26.254350: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cublas64_10.dll
2020-08-09 04:37:26.260971: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cufft64_10.dll
2020-08-09 04:37:26.265307: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library curand64_10.dll
2020-08-09 04:37:26.271569: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusolver64_10.dll
2020-08-09 04:37:26.276251: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusparse64_10.dll
2020-08-09 04:37:26.281798: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-08-09 04:37:26.287682: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-08-09 04:37:26.291846: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-08-09 04:37:26.298235: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0
2020-08-09 04:37:26.300794: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N
2020-08-09 04:37:26.305262: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/device:GPU:0 with 4604 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-08-09 04:37:26.313775: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.335GHz coreCount: 30 deviceMemorySize: 6.00GiB deviceMemoryBandwidth: 312.97GiB/s
2020-08-09 04:37:26.328318: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
2020-08-09 04:37:26.339994: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cublas64_10.dll
2020-08-09 04:37:26.345874: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cufft64_10.dll
2020-08-09 04:37:26.352587: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library curand64_10.dll
2020-08-09 04:37:26.359694: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusolver64_10.dll
2020-08-09 04:37:26.365286: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusparse64_10.dll
2020-08-09 04:37:26.371099: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-08-09 04:37:26.375749: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-08-09 04:37:26.380113: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.335GHz coreCount: 30 deviceMemorySize: 6.00GiB deviceMemoryBandwidth: 312.97GiB/s
2020-08-09 04:37:26.393424: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
2020-08-09 04:37:26.403150: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cublas64_10.dll
2020-08-09 04:37:26.408577: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cufft64_10.dll
2020-08-09 04:37:26.423141: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library curand64_10.dll
2020-08-09 04:37:26.428838: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusolver64_10.dll
2020-08-09 04:37:26.434061: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cusparse64_10.dll
2020-08-09 04:37:26.438479: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-08-09 04:37:26.443288: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-08-09 04:37:26.446511: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-08-09 04:37:26.453204: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0
2020-08-09 04:37:26.458931: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N
2020-08-09 04:37:26.463016: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 4604 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5)
2020-08-09 04:37:26.823644: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-08-09 04:37:27.877441: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_ALLOC_FAILED
2020-08-09 04:37:27.882143: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_ALLOC_FAILED
I've tried the solutions mentioned here https://forums.developer.nvidia.com/t/could-not-create-cudnn-handle-cudnn-status-alloc-failed/108261/2 but to no avail, I hope I can someone who can assist me in this here.