I'm working on this dataset to make pollution prediction (NO2) using keras library. I made interpolation on missing data, one hot encoding on wind direction. Divided records on train/validation and test set an gave them to keras through a batch generator after doing Normalization through MinMaxScaler
This are some model I used but none of them seems to go over 75% of validation accuracy and prediction made are really bad:
#### MODEL TYPE DECLARATION AND CONFIGURATION
######################################
code_name = {-1: "linear", 0: "ann_base_single", 1 : "ann_base_multi",
2 : "gru_single_layer", 3 : "gru_single_layer_w_dropout", 4 : "gru_multi_layer", 5 : "gru_multi_layer_2",
6 : "lstm_single_layer", 7 : "lstm_single_layer_w_dropout", 8: "lstm_multi_layer", 9 : "lstm_multi_layer_2"}
model_type = 20
model = Sequential()
if model_type == -1:
model.add(layers.Flatten(input_shape=(sequence_length, x_data.shape[1])))
model.add(layers.Dense(num_y_signals, activation='linear'))
elif model_type == 0:
model.add(layers.Flatten(input_shape=(sequence_length, x_data.shape[1])))
model.add(layers.Dense(hidden_layer_size, activation='relu'))
model.add(layers.Dense(num_y_signals, activation='sigmoid'))
elif model_type == 1:
model.add(layers.Flatten(input_shape=(sequence_length, x_data.shape[1])))
model.add(layers.Dense(hidden_layer_size, activation='relu'))
model.add(layers.Dense(hidden_layer_size, activation='relu'))
model.add(layers.Dense(num_y_signals, activation='sigmoid'))
elif model_type == 2:
model.add(layers.GRU(hidden_layer_size, activation='relu', input_shape=(sequence_length, x_data.shape[1])))
model.add(layers.Dense(num_y_signals, activation='sigmoid'))
elif model_type == 3:
model.add(layers.GRU(hidden_layer_size, activation='relu', input_shape=(sequence_length, x_data.shape[1])))
model.add(Dropout(0.2))
model.add(layers.Dense(num_y_signals, activation='sigmoid'))
elif model_type == 4:
model.add(layers.GRU(64, activation='relu', return_sequences=True, input_shape=(sequence_length, x_data.shape[1])))
model.add(layers.GRU(32, activation='relu'))
model.add(layers.Dense(num_y_signals, activation='sigmoid'))
elif model_type == 5:
model.add(layers.GRU(64, activation='relu', return_sequences=True, input_shape=(sequence_length, x_data.shape[1])))
model.add(layers.GRU(32, activation='relu', return_sequences=True))
model.add(layers.GRU(16, activation='relu'))
model.add(layers.Dense(num_y_signals, activation='sigmoid'))
elif model_type == 6:
model.add(layers.LSTM(hidden_layer_size, activation='relu', input_shape=(sequence_length, x_data.shape[1])))
model.add(layers.Dense(num_y_signals, activation='sigmoid'))
elif model_type == 7:
model.add(layers.LSTM(hidden_layer_size, activation='relu', input_shape=(sequence_length, x_data.shape[1])))
model.add(Dropout(0.2))
model.add(layers.Dense(num_y_signals, activation='sigmoid'))
elif model_type == 8:
model.add(layers.LSTM(64, activation='relu', return_sequences=True, input_shape=(sequence_length, x_data.shape[1])))
model.add(layers.LSTM(32, activation='relu'))
model.add(layers.Dense(num_y_signals, activation='sigmoid'))
elif model_type == 9:
model.add(layers.LSTM(64, activation='relu', return_sequences=True, input_shape=(sequence_length, x_data.shape[1])))
model.add(layers.LSTM(32, activation='relu', return_sequences=True))
model.add(layers.LSTM(16, activation='relu'))
model.add(layers.Dense(num_y_signals, activation='sigmoid'))
model.compile(optimizer=Adam(), loss='mae', metrics=[metrics.mae, 'accuracy'])
model.summary()
timenow = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S_")
run_name = '{7}_Nodes_{0}-Units_{1}-shift_steps_{2}-aq_parameter_{3}-train_split_{4}-batch_size_{5}-sequence_length_{6}'.format(code_name[model_type], hidden_layer_size, shift_steps, aq_parameter, train_split, batch_size, sequence_length, timenow)
path_checkpoint = 'checkpoints\\{0}.keras'.format(run_name)
#### CALLBACK DECLARATION #######################################
callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint,
monitor='val_loss',
verbose=1,
save_weights_only=True,
save_best_only=True)
callback_early_stopping = EarlyStopping(monitor='val_loss',
patience=4, verbose=1)
callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss',
factor=0.1,
min_lr=1e-5,
patience=3,
verbose=1)
callbacks = [callback_checkpoint,
callback_early_stopping,
callback_reduce_lr,
callback_reset_states]
#### FITTING DATA INTO MODEL
history = model.fit_generator(generator=train_generator,
steps_per_epoch=int(train_data_size / batch_size),
epochs=epochs,
validation_data=validation_data,
callbacks = callbacks,
shuffle= False)
Followed this tutorial.