I have written the following custom evaluation function to use with xgboost, in order to optimize F1. Umfortuantely it returns an exception when run with xgboost.
The evaluation function is the following:
def F1_eval(preds, labels):
t = np.arange(0, 1, 0.005)
f = np.repeat(0, 200)
Results = np.vstack([t, f]).T
P = sum(labels == 1)
for i in range(200):
m = (preds >= Results[i, 0])
TP = sum(labels[m] == 1)
FP = sum(labels[m] == 0)
if (FP + TP) > 0:
Precision = TP/(FP + TP)
Recall = TP/P
if (Precision + Recall >0) :
F1 = 2 * Precision * Recall / (Precision + Recall)
else:
F1 = 0
Results[i, 1] = F1
return(max(Results[:, 1]))
Below I provide a reproducible example along with the error message:
from sklearn import datasets
Wine = datasets.load_wine()
X_wine = Wine.data
y_wine = Wine.target
y_wine[y_wine == 2] = 1
X_wine_train, X_wine_test, y_wine_train, y_wine_test = train_test_split(X_wine, y_wine, test_size = 0.2)
clf_wine = xgb.XGBClassifier(max_depth=6, learning_rate=0.1,silent=False, objective='binary:logistic', \
booster='gbtree', n_jobs=8, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, \
subsample=0.8, colsample_bytree=0.8, colsample_bylevel=1, reg_alpha=0, reg_lambda=1)
clf_wine.fit(X_wine_train, y_wine_train,\
eval_set=[(X_wine_train, y_wine_train), (X_wine_test, y_wine_test)], eval_metric=F1_eval, early_stopping_rounds=10, verbose=True)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-453-452852658dd8> in <module>()
12 clf_wine = xgb.XGBClassifier(max_depth=6, learning_rate=0.1,silent=False, objective='binary:logistic', booster='gbtree', n_jobs=8, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=0.8, colsample_bytree=0.8, colsample_bylevel=1, reg_alpha=0, reg_lambda=1)
13
---> 14 clf_wine.fit(X_wine_train, y_wine_train,eval_set=[(X_wine_train, y_wine_train), (X_wine_test, y_wine_test)], eval_metric=F1_eval, early_stopping_rounds=10, verbose=True)
15
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\sklearn.py in fit(self, X, y, sample_weight, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set)
519 early_stopping_rounds=early_stopping_rounds,
520 evals_result=evals_result, obj=obj, feval=feval,
--> 521 verbose_eval=verbose, xgb_model=None)
522
523 self.objective = xgb_options["objective"]
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\training.py in train(params, dtrain, num_boost_round, evals, obj, feval, maximize, early_stopping_rounds, evals_result, verbose_eval, xgb_model, callbacks, learning_rates)
202 evals=evals,
203 obj=obj, feval=feval,
--> 204 xgb_model=xgb_model, callbacks=callbacks)
205
206
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\training.py in _train_internal(params, dtrain, num_boost_round, evals, obj, feval, xgb_model, callbacks)
82 # check evaluation result.
83 if len(evals) != 0:
---> 84 bst_eval_set = bst.eval_set(evals, i, feval)
85 if isinstance(bst_eval_set, STRING_TYPES):
86 msg = bst_eval_set
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\core.py in eval_set(self, evals, iteration, feval)
957 if feval is not None:
958 for dmat, evname in evals:
--> 959 feval_ret = feval(self.predict(dmat), dmat)
960 if isinstance(feval_ret, list):
961 for name, val in feval_ret:
<ipython-input-383-dfb8d5181b18> in F1_eval(preds, labels)
11
12
---> 13 P = sum(labels == 1)
14
15
TypeError: 'bool' object is not iterable
I do not understand why the function is not working. I have followed the examples here: https://github.com/dmlc/xgboost/blob/master/demo/guide-python/custom_objective.py
I would like to understand where I err.