metric, predictions = backtesting_forecaster(
forecaster = forecaster,
y = data['Count of Visits'],
steps = 24,
metric = 'mean_absolute_error',
initial_train_size = len(data.loc[:end_validation]),
refit = False,
verbose = True,
show_progress = True
)
I'm performing a backtesting function but this is the error I get
KeyError: "None of [Int64Index([24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n ...\n 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],\n dtype='int64', length=720)] are in the [index]"
I tried passing data['Count of Visits'].values
and data['Count of Visits'].to_numpy
but it requires a panda series to be passed. But when we pass a panda series I'm getting the key error as shown above. I'm not quite sure how to proceed from here
data = data.loc['2023-02-01 00:00:00': '2023-06-30 23:00:00'].copy()
end_train = '30-04-2023 23:59'
end_validation = '31-05-2023 23:59'
data_train = data.loc[: end_train, :].copy()
data_val = data.loc[end_train:end_validation, :].copy()
data_test = data.loc[end_validation:, :].copy()
Above are the values for end_validation. Data has 3600 rows.
from skforecast.model_selection import backtesting_forecaster
from skforecast.ForecasterAutoreg import ForecasterAutoreg
Above are the libraries used. Forecaster variable gets its values from the function below
forecaster = ForecasterAutoreg(
regressor = Ridge(random_state=123),
lags = 24,
transformer_y = StandardScaler()
)
forecaster.fit(y=data.loc[:end_validation, 'Count of Visits'])
Shown below is the complete error
KeyError Traceback (most recent call
last)
Cell In[68], line 1
----> 1 metric, predictions = backtesting_forecaster(
2 forecaster = forecaster,
3 y = data['Count of
Visits'],
4 steps = 24,
5 metric =
'mean_absolute_error',
6 initial_train_size =
len(data.loc[:end_validation]),
7 refit = False,
8 verbose = True,
9 show_progress = True
10 )
File ~\anaconda3\lib\site-
packages\skforecast\model_selection\model_selection.py:887, in
backtesting_forecaster(forecaster, y, steps, metric,
initial_train_size, fixed_train_size, gap, allow_incomplete_fold,
exog, refit, interval, n_boot, random_state, in_sample_residuals,
verbose, show_progress)
869 metrics_values, backtest_predictions =
_backtesting_forecaster_refit(
870 forecaster = forecaster,
871 y = y,
(...)
884 show_progress = show_progress
885 )
886 else:
--> 887 metrics_values, backtest_predictions =
_backtesting_forecaster_no_refit(
888 forecaster = forecaster,
889 y = y,
890 steps = steps,
891 metric = metric,
892 initial_train_size = initial_train_size,
893 gap = gap,
894 allow_incomplete_fold = allow_incomplete_fold,
895 exog = exog,
896 interval = interval,
897 n_boot = n_boot,
898 random_state = random_state,
899 in_sample_residuals = in_sample_residuals,
900 verbose = verbose,
901 show_progress = show_progress
902 )
904 return metrics_values, backtest_predictions
File ~\anaconda3\lib\site-
packages\skforecast\model_selection\model_selection.py:703, in
_backtesting_forecaster_no_refit(forecaster, y, steps, metric,
initial_train_size, gap, allow_incomplete_fold, exog, interval,
n_boot, random_state, in_sample_residuals, verbose, show_progress)
700 if isinstance(backtest_predictions, pd.Series):
701 backtest_predictions = pd.DataFrame(backtest_predictions)
--> 703 metrics_values = [m(
704 y_true = y.loc[backtest_predictions.index],
705 y_pred = backtest_predictions['pred']
706 ) for m in metrics
707 ]
709 if not isinstance(metric, list):
710 metrics_values = metrics_values[0]
File ~\anaconda3\lib\site-
packages\skforecast\model_selection\model_selection.py:704, in
<listcomp>(.0)
700 if isinstance(backtest_predictions, pd.Series):
701 backtest_predictions = pd.DataFrame(backtest_predictions)
703 metrics_values = [m(
--> 704 y_true =
y.loc[backtest_predictions.index],
705 y_pred = backtest_predictions['pred']
706 ) for m in metrics
707 ]
709 if not isinstance(metric, list):
710 metrics_values = metrics_values[0]
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1073, in
_LocationIndexer.__getitem__(self, key)
1070 axis = self.axis or 0
1072 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073 return self._getitem_axis(maybe_callable, axis=axis)
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1301, in
_LocIndexer._getitem_axis(self, key, axis)
1298 if hasattr(key, "ndim") and key.ndim > 1:
1299 raise ValueError("Cannot index with multidimensional
key")
-> 1301 return self._getitem_iterable(key, axis=axis)
1303 # nested tuple slicing
1304 if is_nested_tuple(key, labels):
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1239, in
_LocIndexer._getitem_iterable(self, key, axis)
1236 self._validate_key(key, axis)
1238 # A collection of keys
-> 1239 keyarr, indexer = self._get_listlike_indexer(key, axis)
1240 return self.obj._reindex_with_indexers(
1241 {axis: [keyarr, indexer]}, copy=True, allow_dups=True
1242 )
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1432, in
_LocIndexer._get_listlike_indexer(self, key, axis)
1429 ax = self.obj._get_axis(axis)
1430 axis_name = self.obj._get_axis_name(axis)
-> 1432 keyarr, indexer = ax._get_indexer_strict(key, axis_name)
1434 return keyarr, indexer
File ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py:6070,
in Index._get_indexer_strict(self, key, axis_name)
6067 else:
6068 keyarr, indexer, new_indexer =
self._reindex_non_unique(keyarr)
-> 6070 self._raise_if_missing(keyarr, indexer, axis_name)
6072 keyarr = self.take(indexer)
6073 if isinstance(key, Index):
6074 # GH 42790 - Preserve name from an Index
File ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py:6130,
in Index._raise_if_missing(self, key, indexer, axis_name)
6128 if use_interval_msg:
6129 key = list(key)
-> 6130 raise KeyError(f"None of [{key}] are in the
[{axis_name}]")
6132 not_found = list(ensure_index(key)[missing_mask.nonzero()
[0]].unique())
6133 raise KeyError(f"{not_found} not in index")
KeyError: "None of [Int64Index([24, 25, 26, 27, 28, 29, 30, 31, 32,
33,\n ...\n 38, 39, 40, 41, 42, 43, 44, 45, 46,
47],\n dtype='int64', length=720)] are in the [index]"