I am using a customised batch generator in an attempt to fix the problem of incompatible shapes (BroadcastGradientArgs error) while using the standard model.fit() function due to the small size of the last batch in the training data. I used the batch generator mentioned here with the model.fit_generator() function:
class Generator(Sequence):
# Class is a dataset wrapper for better training performance
def __init__(self, x_set, y_set, batch_size=256):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
self.indices = np.arange(self.x.shape[0])
def __len__(self):
return math.floor(self.x.shape[0] / self.batch_size)
def __getitem__(self, idx):
inds = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size] #Line A
batch_x = self.x[inds]
batch_y = self.y[inds]
return batch_x, batch_y
def on_epoch_end(self):
np.random.shuffle(self.indices)
But it seems that it discards the last batch if its size is smaller than the provided batch size. How can I update it to include the last batch and expand it (for example) with some repeated samples?
Also, somehow I don't get how "Line A" works!
Update: here is how I am using the generator in with my model:
# dummy model
input_1 = Input(shape=(None,))
...
dense_1 = Dense(10, activation='relu')(input_1)
output_1 = Dense(1, activation='sigmoid')(dense_1)
model = Model(input_1, output_1)
print(model.summary())
#Compile and fit_generator
model.compile(optimizer='adam', loss='binary_crossentropy')
train_data_gen = Generator(x1_train, y_train, batch_size)
test_data_gen = Generator(x1_test, y_test, batch_size)
model.fit_generator(generator=train_data_gen, validation_data = test_data_gen, epochs=epochs, shuffle=False, verbose=1)
loss, accuracy = model.evaluate_generator(generator=test_data_gen)
print('Test Loss: %0.5f Accuracy: %0.5f' % (loss, accuracy))