0

I am trying to tune the LGBM regressor based on RMSE and MAE. From what I understand this should be done by returning the metrics from the objective function for the optuna study. I read this and this article which describe something similar. However I run into a value error which I don't understand:

ValueError: The entry associated with the validation name "valid_0" and the metric name "['l1', 'rmse']" is not found in the evaluation result list [('valid_0', 'l1', 202.16873851000517, False), ('valid_0', 'rmse', 221.49779008784068, False), ('valid_0', 'l2', 49061.27101379713, False)].

Can someone help?

This is what I tried:

# Get categorical features
cat_features = df.select_dtypes(include='category').columns.to_list()

def objective(trial, X, y):
    param_grid = {
        'objective': 'regression',
        'n_estimators': trial.suggest_int('n_estimators', 100, 10000, step=100),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 20, 3000, step=20),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 200, 10000, step=100),
        'max_bin': trial.suggest_int('max_bin', 200, 300),
        'lambda_l1': trial.suggest_int('lambda_l1', 0, 100, step=5),
        'lambda_l2': trial.suggest_int('lambda_l2', 0, 100, step=5),
        'min_gain_to_split': trial.suggest_float('min_gain_to_split', 0, 15),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.2, 0.95, step=0.1),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.2, 0.95, step=0.1)
    }

    cv = KFold(n_splits=5, shuffle=True, random_state=42)

    cv_scores_mae = []
    cv_scores_rmse = []

    for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        model = lgbm.LGBMRegressor(**param_grid)

        # Create dataset for LightGBM
        lgb_train = lgbm.Dataset(X_train, y_train, categorical_feature=cat_features)
        lgb_eval = lgbm.Dataset(X_test, y_test, reference=lgb_train)

        model.fit(
            X_train, y_train,
            eval_set=[(X_test, y_test)],
            eval_metric=['l1', 'rmse'],
            categorical_feature=cat_features,
            callbacks=[LightGBMPruningCallback(trial, ['l1', 'rmse']), lgbm.early_stopping(50)],
            verbose=False
        )

        y_pred = model.predict(X_test)

        # Calculate the evaluation metrics
        mae = mean_absolute_error(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))

        cv_scores_mae.append(mae)
        cv_scores_rmse.append(rmse)

    return np.mean(cv_scores_mae), np.mean(cv_scores_rmse)

study = optuna.create_study(directions=['minimize', 'minimize'], study_name="LGBM Regressor")
func = lambda trial: objective(trial, df.drop(columns='price'), df['price'])
study.optimize(func, n_trials=20, show_progress_bar=True)

0 Answers0