I am reading images from a directory hierarchy (flow_from_directory using generators from the ImageDataGenerator class). The model is a fixed parameter mobilenetv2 + a trainable softmax layer. When I fit the model to training data, accuracy levels are comparable for training and validation. If I play with the validation parameters or reset the generator, accuracy for the validation generator drops significantly using model.evaluate or if I restart fitting the model with model.fit. The database is a 3D view database. Relevant code:
'''
batch_size=16
rescaled3D_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, zoom_range=0.2,
shear_range=0.2,
horizontal_flip=True)
train_gen =rescaled3D_gen.flow_from_directory(data_directory + '/train/', seed=3,
target_size = (pixels, pixels), shuffle=True,
batch_size = batch_size, class_mode='binary')
val_gen =rescaled3D_gen.flow_from_directory(data_directory + '/test/', seed=3,
target_size = (pixels, pixels), shuffle=True,
batch_size = batch_size, class_mode='binary')
#MODEL
inputs = tf.keras.Input(shape=(None, None, 3), batch_size=batch_size)
x = tf.keras.layers.Lambda(lambda img: tf.image.resize(img, (pixels,pixels)))(inputs)
x = tf.keras.layers.Lambda(tf.keras.applications.mobilenet_v2.preprocess_input)(x)
mobilev2 = tf.keras.applications.mobilenet_v2.MobileNetV2(weights = 'imagenet', input_tensor = x,
input_shape=(pixels,pixels,3),
include_top=True, pooling = 'avg')
#add a dense layer for task-specific categorization.
full_model = tf.keras.Sequential([mobilev2,
tf.keras.layers.Dense(train_gen.num_classes, activation='softmax')])
for idx, layers in enumerate(mobilev2.layers):
layers.trainable = False
mobilev2.layers[-1].trainable=True
full_model.compile(optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.0001),
loss = 'sparse_categorical_crossentropy',
metrics=['accuracy'])
#start fitting
val_gen.reset()
train_gen.reset()
full_model.fit(train_gen,
steps_per_epoch = samples_per_epoch,
epochs=30,
validation_data=val_gen,
validation_steps = int(np.floor(val_gen.samples/val_gen.batch_size)))
good_acc_score = full_model.evaluate(val_gen, steps=val_gen.n//val_gen.batch_size)
'''
reproduce strangeness by doing something like this:
'''
val_gen.batch_size=4
val_gen.reset()
val_gen.batch_size=batch_size
'''
Then validation accuracy is automatically lower (perhaps to chance) during fit or evaluation
'''
bad_acc_score = full_model.evaluate(val_gen, steps=val_gen.n//val_gen.batch_size)
#or
full_model.fit(train_gen,
steps_per_epoch = samples_per_epoch,
epochs=1,
validation_data=val_gen,
validation_steps = int(np.floor(val_gen.samples/val_gen.batch_size)))
'''