I needed something like @Dipendra Pant idea but his code wasn't working for me. I think it has some identation error in the for loop. Anyway, strongly based in his answer, here's the solution that worked for me: It reads from a folder with 5 subfolders (my 5 classes), and save all that in 3 folders (train_ds, test_ds, val_ds), everyone with 5 subfolders inside, just ready for use image_dataset_from_directory with shuffle= False (the shuffling is already done in this code).
import os
import numpy as np
import shutil
import random
root_dir = base_folder+"input/House_Room_Dataset-5_rooms/" # for requesting directly pics
classes_dir = os.listdir(root_dir)
train_ratio = 0.6
val_ratio = 0.1
for cls in classes_dir:
os.makedirs(input_destination +'train_ds/' + cls, exist_ok=True)
os.makedirs(input_destination +'test_ds/' + cls, exist_ok=True)
os.makedirs(input_destination +'val_ds/' + cls, exist_ok=True)
# for each class, let's counts its elements
src = root_dir + cls
allFileNames = os.listdir(src)
# shuffle it and split into train/test/va
np.random.shuffle(allFileNames)
train_FileNames, test_FileNames, val_FileNames = np.split(np.array(allFileNames),[int(train_ratio * len(allFileNames)), int((1-val_ratio) * len(allFileNames))])
# save their initial path
train_FileNames = [src+'/'+ name for name in train_FileNames.tolist()]
test_FileNames = [src+'/' + name for name in test_FileNames.tolist()]
val_FileNames = [src+'/' + name for name in val_FileNames.tolist()]
print("\n *****************************",
"\n Total images: ",cls, len(allFileNames),
'\n Training: ', len(train_FileNames),
'\n Testing: ', len(test_FileNames),
'\n Validation: ', len(val_FileNames),
'\n *****************************')
# copy files from the initial path to the final folders
for name in train_FileNames:
shutil.copy(name, input_destination +'train_ds/' + cls)
for name in test_FileNames:
shutil.copy(name, input_destination +'test_ds/' + cls)
for name in val_FileNames:
shutil.copy(name, input_destination +'val_ds/' + cls)
# checking everything was fine
paths = ['train_ds/', 'test_ds/','val_ds/']
for p in paths:
for dir,subdir,files in os.walk(input_destination + p):
print(dir,' ', p, str(len(files)))