I don't know what happened, but when I am trying to train a model on Keras I am getting this error.
UnknownError: Fail to find the dnn implementation.
[[{{node CudnnRNN}}]]
[[sequential/lstm/PartitionedCall]] [Op:__inference_train_function_8758]
Function call stack:
train_function -> train_function -> train_function
Here's my sequential model:
def prepare_sequences(notes, n_vocab, seq_len):
""" Prepare the sequences used by the Neural Network """
sequence_length = seq_len
names = sorted(set(item for item in notes))
note_to_int = dict((note, number) for number, note in enumerate(names))
network_input = []
network_output = []
# create input sequences and the corresponding outputs
for i in range(0, len(notes) - sequence_length, 1):
sequence_in = notes[i:i + sequence_length]
sequence_out = notes[i + sequence_length]
network_input.append([note_to_int[char] for char in sequence_in])
network_output.append(note_to_int[sequence_out])
n_patterns = len(network_input)
# reshape the input into a format compatible with LSTM layers
network_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
# normalize input
network_input = network_input / float(n_vocab)
network_output = np_utils.to_categorical(network_output)
return (network_input, network_output)
def create_network(network_input, n_vocab, LSTM_node_count, Dropout_count):
""" create the structure of the neural network """
model = Sequential()
model.add(LSTM(
LSTM_node_count,
input_shape=(network_input.shape[1], network_input.shape[2]),
recurrent_dropout= 0,
return_sequences=True
))
model.add(LSTM(
LSTM_node_count,
return_sequences=True,
recurrent_dropout= 0,))
model.add(LSTM(LSTM_node_count))
model.add(BatchNorm())
model.add(Dropout(Dropout_count))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(BatchNorm())
model.add(Dropout(Dropout_count))
model.add(Dense(n_vocab))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
return model
def train(model, network_input, network_output, epoch, batchsize):
""" train the neural network """
filepath = "trained_weights/" + "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(
filepath,
monitor='loss',
verbose=0,
save_best_only= True,
mode='min'
)
callbacks_list = [checkpoint]
model.fit(network_input,
network_output,
epochs= epoch,
batch_size= batchsize,
callbacks= callbacks_list)
Here's my driver details
import tensorflow.compat.v1 as tf1
sess = tf1.Session(config=tf1.ConfigProto(log_device_placement=True))
Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce GTX 1650, pci bus id: 0000:0a:00.0, compute capability: 7.5
Nvidia driver and CUDA version.
> nvidia-smi
Tue Dec 29 16:16:34 2020
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.89 Driver Version: 460.89 CUDA Version: 11.2 |
|-------------------------------+----------------------+----------------------+
| GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 GeForce GTX 1650 WDDM | 00000000:0A:00.0 On | N/A |
| 30% 37C P8 4W / 75W | 4048MiB / 4096MiB | 1% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 1292 C+G Insufficient Permissions N/A |
| 0 N/A N/A 2184 C+G ...es.TextInput.InputApp.exe N/A |
| 0 N/A N/A 2636 C+G ...me\Application\chrome.exe N/A |
| 0 N/A N/A 3416 C+G ...perience\NVIDIA Share.exe N/A |
| 0 N/A N/A 5676 C ...ython\Python38\python.exe N/A |
| 0 N/A N/A 6352 C+G ...artMenuExperienceHost.exe N/A |
| 0 N/A N/A 6784 C+G ...w5n1h2txyewy\SearchUI.exe N/A |
| 0 N/A N/A 10148 C ...thon\Python38\pythonw.exe N/A |
| 0 N/A N/A 11368 C+G ...8bbwe\Microsoft.Notes.exe N/A |
+-----------------------------------------------------------------------------+
Here is my version details:
Tensorflow Version: 2.4.0
Keras Version: 2.4.0
Python: 3.8.2 (tags/v3.8.2:7b3ab59, Feb 25 2020, 23:03:10) [MSC v.1916 64 bit (AMD64)]
Pandas: 1.2.0
scikit-learn: 0.24.0
GPU:
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
I have a different computer with the exact same driver and CUDA version and I am able to run the model normally in the other computer but not this one.
I tried the following solutions provided by other threads and it still didn't work.
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
configproto = tf.compat.v1.ConfigProto()
configproto.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=configproto)
tf.compat.v1.keras.backend.set_session(sess)