I am currently using Keras for satellite image classification and i have troubles getting the right predictions using predict and predict_generator.
Below my code
import os
import numpy as np
import pandas as pd
from keras.optimizers import Adam, SGD
from tools import load_val_datas, load_test_datas, make_predictions, make_submissions
from keras_tools import save_model, load_model
from callbacks import CustomCallbacks
from data_generator import ImageDataGenerator
from model import base_cnn
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
TRAIN_SIZE, VAL_SIZE, TEST_SIZE, TEST_SIZE_ADD = 30000, 10479, 40669, 20522
IMAGE_FIRST_DIM, N_COLORS = 32, 3
IMAGE_SIZE = IMAGE_FIRST_DIM * IMAGE_FIRST_DIM * N_COLORS
LABEL_SIZE = 17
DROPOUT = 0.25
BATCH_SIZE = 96
N_EPOCHS = 2
CHECKPOINTS_FOLDER = "checkpoints/"
MODEL_JSON = "epoch-10.json"
MODEL_H5 = "epoch-10.h5"
TO_LOAD = False
df_train_labels = pd.read_csv("datas/train_labels.csv")
label_dict = df_train_labels.set_index("image_name").T.to_dict("list")
val_x, val_y = load_val_datas(VAL_SIZE, IMAGE_FIRST_DIM, N_COLORS, LABEL_SIZE)
if TO_LOAD:
model, is_loaded = load_model(CHECKPOINTS_FOLDER + MODEL_JSON, CHECKPOINTS_FOLDER + MODEL_H5)
else:
model = base_cnn(IMAGE_FIRST_DIM, N_COLORS)
adam = Adam(lr=0.01)
sgd = SGD(lr=0.01, momentum=0.9, decay=0.0005)
model.compile(loss='binary_crossentropy', optimizer=sgd)
my_callbacks = CustomCallbacks()
datagen = ImageDataGenerator(rescale=1./255)
train_generator = datagen.flow_from_directory("datas/train", target_size=(IMAGE_FIRST_DIM, IMAGE_FIRST_DIM),
batch_size=BATCH_SIZE,
class_mode="multilabel", multilabel_classes=label_dict)
val_generator = datagen.flow_from_directory("datas/validation", target_size=(IMAGE_FIRST_DIM, IMAGE_FIRST_DIM),
batch_size=BATCH_SIZE, shuffle=False,
class_mode="multilabel", multilabel_classes=label_dict)
model.fit_generator(train_generator, steps_per_epoch=TRAIN_SIZE/BATCH_SIZE, epochs=N_EPOCHS,
verbose=2)
save_model(model, MODEL_JSON, MODEL_H5)
from time import time
st = time()
p_valid = model.predict_generator(val_generator, steps=VAL_SIZE/BATCH_SIZE, pickle_safe=True)
print("time: ", time() - st)
print(p_valid)
from sklearn.metrics import fbeta_score
print(fbeta_score(val_y, np.array(p_valid) > 0.2, beta=2, average='samples'))
st = time()
p_valid1 = model.predict(val_x)
print("time: ", time() - st)
print(type(p_valid1))
print(fbeta_score(val_y, np.array(p_valid1) > 0.2, beta=2, average='samples'))
i am using a different version of ImageDataGenerator that can handle multilabel (I already checked the implementation and the datas look correctly loaded in batch)
The troubles comes from the predict and predict_generator part where i get different results from both. I double checked with a model trained without generator and the output from predict is correct (and very different from the output from predict_generator). Datas feeded in predict are constructed the same way the generator will do (also checked that).
Using TensorFlow backend.
validation images loaded in 0.01 seconds
validation labels loaded in 0.00 seconds
Found 30000 images belonging to 1 classes.
Found 10479 images belonging to 1 classes.
Epoch 1/2
63s - loss: 0.2762
Epoch 2/2
66s - loss: 0.2288
time: 22.098024606704712
beta_score: 0.667686382255
time: 3.3181281089782715
beta_score: 0.740394519272
Thanks, Nicolas