My tensorflow-gpu install is not working on CNN's

Question

Here is my code:

model = Sequential()

# CONVOLUTIONAL LAYER
model.add(Conv2D(filters=32, kernel_size=(4,4),input_shape=(28, 28, 1), activation='relu',))
# POOLING LAYER
model.add(MaxPool2D(pool_size=(2, 2)))

# FLATTEN IMAGES FROM 28 by 28 to 764 BEFORE FINAL LAYER
model.add(Flatten())

# 128 NEURONS IN DENSE HIDDEN LAYER (YOU CAN CHANGE THIS NUMBER OF NEURONS)
model.add(Dense(128, activation='relu'))

# LAST LAYER IS THE CLASSIFIER, THUS 10 POSSIBLE CLASSES
model.add(Dense(10, activation='softmax'))

# https://keras.io/metrics/
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy']) # we can add in additional metrics https://keras.io/metrics/

model.fit(x_train,y_cat_train,epochs=10,validation_data=(x_test,y_cat_test))

And I get the following when I run the model.fit line:

UnknownError                              Traceback (most recent call last)
<ipython-input-37-0eafbb732ade> in <module>
----> 1 model.fit(x_train,y_cat_train,epochs=10,validation_data=(x_test,y_cat_test))

~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817         max_queue_size=max_queue_size,
    818         workers=workers,
--> 819         use_multiprocessing=use_multiprocessing)
    820 
    821   def evaluate(self,

~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    340                 mode=ModeKeys.TRAIN,
    341                 training_context=training_context,
--> 342                 total_epochs=epochs)
    343             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    344 

~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    126         step=step, mode=mode, size=current_batch_size) as batch_logs:
    127       try:
--> 128         batch_outs = execution_function(iterator)
    129       except (StopIteration, errors.OutOfRangeError):
    130         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
     96     # `numpy` translates Tensors to values in Eager mode.
     97     return nest.map_structure(_non_none_constant_value,
---> 98                               distributed_function(input_fn))
     99 
    100   return execution_function

~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
    566         xla_context.Exit()
    567     else:
--> 568       result = self._call(*args, **kwds)
    569 
    570     if tracing_count == self._get_tracing_count():

~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
    597       # In this case we have created variables on the first call, so we run the
    598       # defunned version which is guaranteed to never create variables.
--> 599       return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
    600     elif self._stateful_fn is not None:
    601       # Release the lock early so that multiple threads can perform the call

~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\function.py in __call__(self, *args, **kwargs)
   2361     with self._lock:
   2362       graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2363     return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
   2364 
   2365   @property

~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\function.py in _filtered_call(self, args, kwargs)
   1609          if isinstance(t, (ops.Tensor,
   1610                            resource_variable_ops.BaseResourceVariable))),
-> 1611         self.captured_inputs)
   1612 
   1613   def _call_flat(self, args, captured_inputs, cancellation_manager=None):

~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1690       # No tape is watching; skip to running the function.
   1691       return self._build_call_outputs(self._inference_function.call(
-> 1692           ctx, args, cancellation_manager=cancellation_manager))
   1693     forward_backward = self._select_forward_and_backward_functions(
   1694         args,

~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\function.py in call(self, ctx, args, cancellation_manager)
    543               inputs=args,
    544               attrs=("executor_type", executor_type, "config_proto", config),
--> 545               ctx=ctx)
    546         else:
    547           outputs = execute.execute_with_cancellation(

~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     65     else:
     66       message = e.message
---> 67     six.raise_from(core._status_to_exception(e.code, message), None)
     68   except TypeError as e:
     69     keras_symbolic_tensors = [

~\anaconda3\envs\GPU\lib\site-packages\six.py in raise_from(value, from_value)

UnknownError:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
     [[node sequential/conv2d/Conv2D (defined at <ipython-input-36-6ec5ba2e7ade>:1) ]] [Op:__inference_distributed_function_788]

Function call stack:
distributed_function

I would just like to clarify that I have run this CNN before on regular tensorflow without issues, and this newly installed tensorflow-gpu works on ANN's and RNN's. Additionally here are my specs and installation versions:

Tensorflow 2.1
Keras 2.3.1
CudNN 7.6.5
cudatoolkit 10.1.243
GPU: Nvidia 2070

Let me know if any other information is necessary, thank you!

score 1 · Accepted Answer · answered Aug 12 '20 at 00:04

1

I have had this error happen periodically. Not sure of the details of the cause but this happens when I have to many instances of jupyter notebook or python running at the same time that are running tensorflow. If you are on windows go task manager and check how many instances of notebook.exe or python are running and shut down all of them then reload the notebook.This always fixes the problem for me

answered Aug 12 '20 at 00:04

Gerry P

7,662
3
10
20

Thank you so much this fixed my issue. God bless your soul – brian samoukian Aug 12 '20 at 00:16
glad it worked I think what happens is allthe GPU memory get used up – Gerry P Aug 12 '20 at 04:32

My tensorflow-gpu install is not working on CNN's

1 Answers1