This is for a kaggle competition wherein I have essays which I have to grade/predict on 6 parameters (vocabulary, cohesion, conventions, grammar, phraseology, syntax). I have implemented RandomSearch hypermodel to get better results but the model is peaking at 0.52 accuracy and is giving terrible predictions.
Following is the X and y for the model: (y=1:7, X=7:
Following is the model_builder:
def model_builder(hp):
model = tf.keras.Sequential()
# model_input = tf.keras.layers.Input(shape=(6,))
model.add(Dense(6,input_shape=(6,), activation = "relu"))
# hp_units = random.randrange(32,512,32)
units1 = random.randrange(32,512,32)
model.add(tf.keras.layers.Dense(units=units1, activation='relu'))
model.add(tf.keras.layers.Dropout(0.3))
units2 = random.randrange(32,512,32)
model.add(tf.keras.layers.Dense(units=units2, activation='relu'))
model.add(tf.keras.layers.Dropout(0.3))
units3 = random.randrange(32,512,32)
model.add(tf.keras.layers.Dense(units=units3, activation='relu'))
model.add(tf.keras.layers.Dropout(0.4))
# units4 = random.randrange(32,512,32)
# model.add(tf.keras.layers.Dense(units=units4, activation='LeakyReLU'))
# model.add(tf.keras.layers.Dropout(0.4))
# units5 = random.randrange(32,512,32)
# model.add(tf.keras.layers.Dense(units=units5, activation='LeakyReLU'))
# model.add(tf.keras.layers.Dropout(0.5))
# units6 = random.randrange(32,512,32)
# model.add(tf.keras.layers.Dense(units=units6, activation='LeakyReLU'))
# model.add(tf.keras.layers.Dropout(0.4))
units7 = random.randrange(32,512,32)
model.add(tf.keras.layers.Dense(units=units7, activation='relu'))
model.add(tf.keras.layers.Dropout(0.3))
units8 = random.randrange(32,512,32)
model.add(tf.keras.layers.Dense(units=units8, activation='relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(6))
# Tune the learning rate for the optimizer
# Choose an optimal value from 0.01, 0.001, or 0.0001
hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
hp_momentum = hp.Choice('momentum', values = [1e-1, 3e-1, 5e-1, 7e-1, 9e-1])
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=hp_learning_rate,
momentum=hp_momentum,
),
loss=tf.keras.losses.MeanSquaredError(),
metrics=['accuracy'])
return model
#Following is the tuner and search code:
tuner = kt.RandomSearch(
hypermodel=model_builder,
objective='val_accuracy',
max_trials=10,
# seed=None,
# hyperparameters=None,
# tune_new_entries=True,
# allow_new_entries=True,
# **kwargs
)
early_stopping_monitor = EarlyStopping(monitor='val_accuracy', patience = 10, restore_best_weights=True)
tuner.search(X_train,
y_train,
epochs=80,
batch_size=16,
validation_data=(X_test,y_test),
callbacks=[early_stopping_monitor])
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"""
The hyperparameter search is complete. The optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")
I have tried feature extraction for better correlation but it's not great. I tried to use different optimizers, activations, etc but none prevailed.
My last shot at this is probably to scale the data to 0-1 and use sigmoid on it.