i'm training an LSTM on my GTX 1060 6gb using Tensorflow 1.2 in python.
On each epoch i save the model with this method:
def save_model(self,session,epoch,save_model_path):
save_path = self.saver.save(session, save_model_path + "lstm_model_epoch_" + str(epoch) + ".ckpt")
print("Model saved in file: %s" % save_path)
Everything works but after nine epoch i get ResourceExhaustedError when i try to save the model with this method.
I have checked my resources during the training but no resources are exhausted.
The error that i get is the following:
2017-06-29 12:43:02.865845: W tensorflow/core/framework/op_kernel.cc:1158] Resource exhausted: log/example_0/lstm_models/lstm_model_epoch_9.ckpt.data-00000-of-00001.tempstate10865381291487648358 Traceback (most recent call last): File "main.py", line 32, in File "/home/alb3rto/Scrivania/Tesi/sentiment_classification/text_lstm/LSTM_sentence.py", line 306, in train_lstm File "/home/alb3rto/Scrivania/Tesi/sentiment_classification/text_lstm/LSTM_sentence.py", line 449, in save_model File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 1472, in save File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 789, in run File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 997, in _run File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1132, in _do_run File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1152, in _do_call tensorflow.python.framework.errors_impl.ResourceExhaustedError: log/example_0/lstm_models/lstm_model_epoch_9.ckpt.data-00000-of-00001.tempstate10865381291487648358 [[Node: save/SaveV2 = SaveV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_save/Const_0_0, save/SaveV2/tensor_names, save/SaveV2/shape_and_slices, Variable/_21, Variable/Adam/_23, Variable/Adam_1/_25, Variable_1/_27, Variable_1/Adam/_29, Variable_1/Adam_1/_31, beta1_power/_33, beta2_power/_35, rnn/basic_lstm_cell/bias/_37, rnn/basic_lstm_cell/bias/Adam/_39, rnn/basic_lstm_cell/bias/Adam_1/_41, rnn/basic_lstm_cell/kernel/_43, rnn/basic_lstm_cell/kernel/Adam/_45, rnn/basic_lstm_cell/kernel/Adam_1/_47)]] Caused by op u'save/SaveV2', defined at: File "main.py", line 28, in lstm_sentence = lstm() File "/home/alb3rto/Scrivania/Tesi/sentiment_classification/text_lstm/LSTM_sentence.py", line 18, in init File "/home/alb3rto/Scrivania/Tesi/sentiment_classification/text_lstm/LSTM_sentence.py", line 117, in build_lstm File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 1139, in init self.build() File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 1170, in build restore_sequentially=self._restore_sequentially) File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 689, in build save_tensor = self._AddSaveOps(filename_tensor, saveables) File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 276, in _AddSaveOps save = self.save_op(filename_tensor, saveables) File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 219, in save_op tensors) File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/ops/gen_io_ops.py", line 745, in save_v2 File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2506, in create_op original_op=self._default_original_op, op_def=op_def) File "/home/alb3rto/anaconda2/envs/tesi/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1269, in init self._traceback = _extract_stack() ResourceExhaustedError (see above for traceback): log/example_0/lstm_models/lstm_model_epoch_9.ckpt.data-00000-of-00001.tempstate10865381291487648358 [[Node: save/SaveV2 = SaveV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_save/Const_0_0, save/SaveV2/tensor_names, save/SaveV2/shape_and_slices, Variable/_21, Variable/Adam/_23, Variable/Adam_1/_25, Variable_1/_27, Variable_1/Adam/_29, Variable_1/Adam_1/_31, beta1_power/_33, beta2_power/_35, rnn/basic_lstm_cell/bias/_37, rnn/basic_lstm_cell/bias/Adam/_39, rnn/basic_lstm_cell/bias/Adam_1/_41, rnn/basic_lstm_cell/kernel/_43, rnn/basic_lstm_cell/kernel/Adam/_45, rnn/basic_lstm_cell/kernel/Adam_1/_47)]]
How can I fix it?