I am having difficulty trying to load 18k of training data for training with tensorflow. The files are npy files named as such: 0.npy, 1.npy...18000.npy.
I was looking around the web and came up with a simple code to first read the files in the correct sequence and trying to concatenate the training data together but it takes forever..
import numpy as np
import glob
import re
import tensorflow as tf
print("TensorFlow version: {}".format(tf.__version__))
files = glob.glob('D:/project/train/*.npy')
files.sort(key=lambda var:[int(x) if x.isdigit() else x for x in
re.findall(r'[^0-9]|[0-9]+', var)])
# print(files)
final_dataset = []
i = 0
for file in files:
dataset = np.load(file, mmap_mode='r')
print(i)
#print("Size of dataset: {} ".format(dataset.shape))
if (i==0):
final_dataset = dataset
else:
final_dataset = np.concatenate((final_dataset, dataset), axis = 0)
i = i + 1
print("Size of final_dataset: {} ".format(final_dataset.shape))
np.save('combined_train.npy', final_dataset)