How to fit data for augmentation to avoid out of memory error?

Question

Doing augmentation for training segmentation but the total number of images is about 26,000+. That's the reason facing problem in making an array of images.

Tried:

def get_data():
    X_data = []
    train_images = sorted(glob('../input/fg_image/images/*.jpg', recursive=True))
    size = 128, 128
    X_data = np.empty((len(train_images),128, 128, 3), dtype=np.float32)
    for i, image in enumerate(train_images):
        X_data[i] = np.asarray(Image.open(image).thumbnail(size))
    return X_data

X_train = get_data()

By following the above method I am collecting the X_train, Y_train. Up to this step, it's working fine.

But further when applying the below method for augmentation is the whole notebook was crashed.

def augmentation(X_data, Y_data, validation_split=0.2, batch_size=32, seed=42):
    X_train, X_test, Y_train, Y_test = train_test_split(X_data,
                                                        Y_data,
                                                        train_size=1-validation_split,
                                                        test_size=validation_split,
                                                        random_state=seed)
    

    data_gen_args = dict(rotation_range=45.,
                         width_shift_range=0.1,
                         height_shift_range=0.1)

    X_datagen = ImageDataGenerator(**data_gen_args)
    Y_datagen = ImageDataGenerator(**data_gen_args)
    X_datagen.fit(X_train, augment=True, seed=seed)
    Y_datagen.fit(Y_train, augment=True, seed=seed)
    X_train_augmented = X_datagen.flow(X_train, batch_size=batch_size, shuffle=True, seed=seed)
    Y_train_augmented = Y_datagen.flow(Y_train, batch_size=batch_size, shuffle=True, seed=seed)
    
    
    train_generator = zip(X_train_augmented, Y_train_augmented)
    
    return train_generator

train_generator = augmentation(X_train, Y_train)

How to fit data for augmentation to avoid out of memory error?

0 Answers0