I Try to hyper parameter tuning over my model , I have small dataset thus the basic model is evaluatred by LOOCV. for better performance I want to hyper parameter tune the model thus I did the next step:
- I Run LOOCV 1.1 for each fold of the LOOCV I run random grid search of multiple parameter 1.2 I choose the best model of each fold 1.3 I train the model of the whole fold according the chosen hyper parameters and the test with the single sample that considered a test
Finally when I get the score, the AUC dramatically reduce (by 0.10) I Wonder why did that happen ?
def fit_loocv(self):
for train_ind, test_ind in self.leave_one_out.split(self.X, self.y):
X_train, X_test = self.X.iloc[train_ind], self.X.iloc[test_ind]
y_train, y_test = self.y.iloc[train_ind], self.y.iloc[test_ind]
# model fit and evaluation
self.cv_tuning(X_train,y_train,test_ind)
self.model.fit(X_train, y_train)
self.y_pred.append(self.model.predict(X_test))
def cv_tuning(self,X_train,y_train,test_ind):
params = {
'depth':[5,6,7],
'iterations':[250,275,300,325,350,400],
'learning_rate':[0.05,0.1,0.15],
'l2_leaf_reg':[0.1],
'scale_pos_weight' : [len(y_train[y_train==0])/len(y_train[y_train==1])],
'eval_metric':['AUC'],
'verbose':[False]
}
model = CatBoostClassifier(verbose=False)
est = RandomizedSearchCV(estimator = model,scoring='roc_auc', param_distributions = params, cv = 30,verbose=False,n_iter=20)
est.fit(X_train,y_train)
self.model=est.best_estimator_
return