I am trying to compete in kaggle's cornell birdcall detection challenge and there is in total 23 gb of data which mainly composed as mp3 sound files. As you may know 23 gb of data is impossible to fit into the RAM kaggle or google colab. Therefore, I tried to write a datagenerator to fetch mp3 files while training my model and convert them in order to prevent out of memory issue. However, I am still getting out of memory issue after first few epochs. Below you can find my generator and training code where I use del command to specifically de-allocate objects from memory but apparently I did something wrong. Is there any resource you can suggest for that or any suggestion to improve my code to prevent memory leak? Calling garbage collector makes no difference too.
Thx
My datagenerator code
from tensorflow import keras
import random
import glob
import gc
class My_Custom_Generator(keras.utils.Sequence):
def __init__(self, batch_size):
files = glob.glob("../input/birdsong-recognition/train_audio/*/*.mp3")
random.shuffle(files)
self.files = files
self.batch_size = batch_size
def __len__(self) :
return (np.ceil(len(self.files) / float(self.batch_size))).astype(np.int)
def __getitem__(self, idx) :
gc.collect(2)
batch_x = self.files[idx * self.batch_size : (idx+1) * self.batch_size]
#batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
train_image = []
train_label = []
for i in range(0, len(batch_x)):
image, label = get_data(batch_x[i])
image = tf.convert_to_tensor(image)
label_matrix = get_cat_label(label)
train_image.append(image)
train_label.append(label_matrix)
self.train_image = np.array(train_image)
self.train_label = np.array(train_label)
del train_image
del train_label
return self.train_image, self.train_label
My training loop which I got from tensorflow tutorial and edited
## Note: Rerunning this cell uses the same model variables
# Keep results for plotting
train_loss_results = []
train_accuracy_results = []
num_epochs = int(len(glob.glob("../input/birdsong-recognition/train_audio/*/*.mp3")) // 8)
for epoch in range(num_epochs):
epoch_loss_avg = tf.keras.metrics.Mean()
epoch_accuracy = tf.keras.metrics.CategoricalAccuracy()
imgs, labels = my_training_batch_generator.__getitem__(epoch)
# Training loop - using batches of 32
for i in range(1):
# Optimize the model
loss_value, grads = grad(xceptionModel, imgs, labels)
optimizer.apply_gradients(zip(grads, xceptionModel.trainable_variables))
# Track progress
epoch_loss_avg.update_state(loss_value) # Add current batch loss
# Compare predicted label to actual label
# training=True is needed only if there are layers with different
# behavior during training versus inference (e.g. Dropout).
epoch_accuracy.update_state(labels, xceptionModel(imgs, training=True))
del imgs
del labels
# End epoch
train_loss_results.append(epoch_loss_avg.result())
train_accuracy_results.append(epoch_accuracy.result())
if epoch % 2 == 0:
print("Epoch {:03d}: Loss: {:.3f}, Accuracy: {:.3%}".format(epoch,
epoch_loss_avg.result(),
epoch_accuracy.result()))