Different result metric from evaluation and prediction with hyperopt

Question

This is my first experience with tuning XGBoost's hyperparameter. My plan is finding the optimal hyperparameter by using hyperopt.

def obj (params):
  xgb_model=xgb.XGBRegressor(
      n_estimator=params['n_estimator'],
      learning_rate=params['learning_rate'],
      booster=params['booster'],
      gamma=params['gamma'],
      max_depth=int(params['max_depth']),
      min_child_weight=int(params['min_child_weight']),
      colsample_bytree=int(params['colsample_bytree']),
      reg_lambda=params['reg_lambda'],reg_alpha=params['reg_alpha']
  )
  evaluation=[(X_train,Y_train),(X_test,Y_test)]
  xgb_model.fit(X_train, Y_train,
            eval_set=evaluation,
            verbose=False)
  pred = xgb_model.predict(X_test)
  r2_value=r2_score(y_true=Y_test,y_pred=pred)
  mape=MAPE(pred,Y_test)
  print('R2-Value:',r2_value)
  print('MAPE Value :',mape)
  print(xgb_model.get_params)
  return {'loss': -r2_value, 'status': STATUS_OK ,'model':xgb_model }
  
params={'n_estimator':450,
        'learning_rate':hp.loguniform('learning_rate',np.log(0.01),np.log(1)),
        'booster':hp.choice('booster',['gbtree','dart','gblinear']),
        'reg_lambda':hp.uniform('reg_lambda',0,2.5),
        'reg_alpha':hp.uniform('reg_alpha',0,2.5),
        'colsample_bytree':hp.uniform('colsample_bytree',0,1),
        'gamma':hp.uniform('gamma',0,10),
        'max_depth':hp.quniform('max_depth',3,10,1),
        'min_child_weight':hp.quniform('min_child_weight',0,10,1),'seed': 0}

trials = Trials()
best_hyperparams = fmin(fn = obj,
                        space = params,
                        algo = tpe.suggest,
                        max_evals = 100,
                        trials = trials)

I display loss value based on the R2 Score and MAPE. I caught the best loss value after running the code.

When I use that hyperparameter, I got different MAPE and R2 results than before.

model=xgb.XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0, gamma=4.478273315667381,
             importance_type='gain', learning_rate=0.49914654574533074,
             max_delta_step=0, max_depth=8, min_child_weight=4, missing=None,
             n_estimator=450, n_estimators=100, n_jobs=1, nthread=None,
             objective='reg:linear', random_state=0,
             reg_alpha=1.4575139694808485, reg_lambda=1.7326686243254332,
             scale_pos_weight=1, seed=None, silent=None, subsample=1,
             verbosity=1)

model.fit(X_train,Y_train)
model.predict(X_test)

Can you give me some explanation, why could it happen?

Flavia Giammarino · Answer 1 · 2021-10-11T07:50:43.280

For the XGBoost results to be reproducible you need to set n_jobs=1 in addition to fixing the random seed, see this answer and the code below.

import numpy as np
import xgboost as xgb
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_percentage_error
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK

# generate the data
X, y = make_regression(random_state=0)

# split the data
X_train, X_test, Y_train, Y_test = train_test_split(X, y, random_state=0)

# define the model
def XGBModel(params):

    return xgb.XGBRegressor(
        n_estimator=params['n_estimator'],
        learning_rate=params['learning_rate'],
        booster=params['booster'],
        gamma=params['gamma'],
        max_depth=int(params['max_depth']),
        min_child_weight=int(params['min_child_weight']),
        colsample_bytree=int(params['colsample_bytree']),
        reg_lambda=params['reg_lambda'],
        reg_alpha=params['reg_alpha'],
        random_state=0, # fix the random seed
        n_jobs=1, # set the number of parallel jobs equal to one
    )

# define the objective function
def obj(params):

    # fit the model
    xgb_model = XGBModel(params)
    xgb_model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_test, Y_test)], verbose=False)
    pred = xgb_model.predict(X_test)

    # score the model
    r2_value = r2_score(y_true=Y_test, y_pred=pred)
    mape = mean_absolute_percentage_error(y_true=Y_test, y_pred=pred)

    return {'loss': - r2_value, 'mape': mape, 'status': STATUS_OK, 'model': xgb_model}

# define the hyperparameter space
params = {
    'n_estimator': 1000,
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(1)),
    'booster': hp.choice('booster', ['gbtree', 'dart', 'gblinear']),
    'reg_lambda': hp.uniform('reg_lambda', 0, 2.5),
    'reg_alpha': hp.uniform('reg_alpha', 0, 2.5),
    'colsample_bytree': hp.uniform('colsample_bytree', 0, 1),
    'gamma': hp.uniform('gamma', 0, 10),
    'max_depth': hp.quniform('max_depth', 3, 10, 1),
    'min_child_weight': hp.quniform('min_child_weight', 0, 10, 1),
}

# tune the hyperparameters
trials = Trials()
best_hyperparams = fmin(fn=obj, space=params, algo=tpe.suggest, max_evals=10, trials=trials, rstate=np.random.RandomState(0))

# extract the best scores
print('R2-Value:', - trials.best_trial['result']['loss'])
print('MAPE Value :', trials.best_trial['result']['mape'])
# R2-Value: 0.5388751508268976
# MAPE Value : 4.700583518398514

# extract the best model
best_model = trials.best_trial['result']['model']

# fit the best model
best_model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_test, Y_test)], verbose=False)
pred = best_model.predict(X_test)

# score the best model
r2_value = r2_score(y_true=Y_test, y_pred=pred)
mape = mean_absolute_percentage_error(y_true=Y_test, y_pred=pred)

print('R2-Value:', r2_value)
print('MAPE Value :', mape)
# R2-Value: 0.5388751508268976
# MAPE Value : 4.700583518398514

Different result metric from evaluation and prediction with hyperopt

1 Answers1