KeyError: "None of [Int64Index(...)] are in the [index]"

Question

metric, predictions = backtesting_forecaster(
                          forecaster         = forecaster,
                          y                  =  data['Count of Visits'],
                          steps              = 24,
                          metric             = 'mean_absolute_error',
                          initial_train_size = len(data.loc[:end_validation]),
                          refit              = False,
                          verbose            = True,
                          show_progress      = True
                    )

I'm performing a backtesting function but this is the error I get

KeyError: "None of [Int64Index([24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n ...\n 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],\n dtype='int64', length=720)] are in the [index]"

I tried passing data['Count of Visits'].values and data['Count of Visits'].to_numpy but it requires a panda series to be passed. But when we pass a panda series I'm getting the key error as shown above. I'm not quite sure how to proceed from here

data = data.loc['2023-02-01 00:00:00': '2023-06-30 23:00:00'].copy()
end_train = '30-04-2023 23:59'
end_validation = '31-05-2023 23:59'
data_train = data.loc[: end_train, :].copy()
data_val   = data.loc[end_train:end_validation, :].copy()
data_test  = data.loc[end_validation:, :].copy()

Above are the values for end_validation. Data has 3600 rows.

from skforecast.model_selection import backtesting_forecaster
from skforecast.ForecasterAutoreg import ForecasterAutoreg

Above are the libraries used. Forecaster variable gets its values from the function below

forecaster = ForecasterAutoreg(
             regressor     = Ridge(random_state=123),
             lags          = 24,
             transformer_y = StandardScaler()
         )

forecaster.fit(y=data.loc[:end_validation, 'Count of Visits'])

Shown below is the complete error

KeyError                                  Traceback (most recent call 
last)
Cell In[68], line 1
----> 1 metric, predictions = backtesting_forecaster(
  2                           forecaster         = forecaster,
  3                           y                  =  data['Count of 
  Visits'],
  4                           steps              = 24,
  5                           metric             = 
 'mean_absolute_error',
  6                           initial_train_size = 
 len(data.loc[:end_validation]),
  7                           refit              = False,
  8                           verbose            = True,
  9                           show_progress      = True
 10                     )

 File ~\anaconda3\lib\site- 
packages\skforecast\model_selection\model_selection.py:887, in 
backtesting_forecaster(forecaster, y, steps, metric, 
initial_train_size, fixed_train_size, gap, allow_incomplete_fold, 
exog, refit, interval, n_boot, random_state, in_sample_residuals, 
verbose, show_progress)
869     metrics_values, backtest_predictions = 
_backtesting_forecaster_refit(
870         forecaster            = forecaster,
871         y                     = y,
(...)
 884         show_progress         = show_progress
885     )
886 else:
--> 887     metrics_values, backtest_predictions = 
_backtesting_forecaster_no_refit(
888         forecaster            = forecaster,
889         y                     = y,
890         steps                 = steps,
891         metric                = metric,
892         initial_train_size    = initial_train_size,
893         gap                   = gap,
894         allow_incomplete_fold = allow_incomplete_fold,
895         exog                  = exog,
896         interval              = interval,
897         n_boot                = n_boot,
898         random_state          = random_state,
899         in_sample_residuals   = in_sample_residuals,
900         verbose               = verbose,
901         show_progress         = show_progress
902     )  
904 return metrics_values, backtest_predictions

File ~\anaconda3\lib\site- 
packages\skforecast\model_selection\model_selection.py:703, in 
_backtesting_forecaster_no_refit(forecaster, y, steps, metric, 
initial_train_size, gap, allow_incomplete_fold, exog, interval, 
n_boot, random_state, in_sample_residuals, verbose, show_progress)
700 if isinstance(backtest_predictions, pd.Series):
701     backtest_predictions = pd.DataFrame(backtest_predictions)
--> 703 metrics_values = [m(
704                     y_true = y.loc[backtest_predictions.index],
705                     y_pred = backtest_predictions['pred']
706                   ) for m in metrics
707                  ]
709 if not isinstance(metric, list):
710     metrics_values = metrics_values[0]

File ~\anaconda3\lib\site- 
packages\skforecast\model_selection\model_selection.py:704, in 
<listcomp>(.0)
700 if isinstance(backtest_predictions, pd.Series):
701     backtest_predictions = pd.DataFrame(backtest_predictions)
703 metrics_values = [m(
--> 704                     y_true = 
y.loc[backtest_predictions.index],
705                     y_pred = backtest_predictions['pred']
706                   ) for m in metrics
707                  ]
709 if not isinstance(metric, list):
710     metrics_values = metrics_values[0]

File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1073, in 
_LocationIndexer.__getitem__(self, key)
1070 axis = self.axis or 0
1072 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073 return self._getitem_axis(maybe_callable, axis=axis)

File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1301, in 
_LocIndexer._getitem_axis(self, key, axis)
1298     if hasattr(key, "ndim") and key.ndim > 1:
1299         raise ValueError("Cannot index with multidimensional 
key")
-> 1301     return self._getitem_iterable(key, axis=axis)
1303 # nested tuple slicing
1304 if is_nested_tuple(key, labels):

File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1239, in 
_LocIndexer._getitem_iterable(self, key, axis)
1236 self._validate_key(key, axis)
1238 # A collection of keys
-> 1239 keyarr, indexer = self._get_listlike_indexer(key, axis)
1240 return self.obj._reindex_with_indexers(
1241     {axis: [keyarr, indexer]}, copy=True, allow_dups=True
1242 ) 

File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1432, in 
_LocIndexer._get_listlike_indexer(self, key, axis)
1429 ax = self.obj._get_axis(axis)
1430 axis_name = self.obj._get_axis_name(axis)
-> 1432 keyarr, indexer = ax._get_indexer_strict(key, axis_name)
1434 return keyarr, indexer

File ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py:6070, 
in Index._get_indexer_strict(self, key, axis_name)
6067 else:
6068     keyarr, indexer, new_indexer = 
self._reindex_non_unique(keyarr)
-> 6070 self._raise_if_missing(keyarr, indexer, axis_name)
6072 keyarr = self.take(indexer)
6073 if isinstance(key, Index):
6074     # GH 42790 - Preserve name from an Index

File ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py:6130, 
in Index._raise_if_missing(self, key, indexer, axis_name)
6128     if use_interval_msg:
6129         key = list(key)
-> 6130     raise KeyError(f"None of [{key}] are in the 
[{axis_name}]")
6132 not_found = list(ensure_index(key)[missing_mask.nonzero() 
[0]].unique())
6133 raise KeyError(f"{not_found} not in index")

KeyError: "None of [Int64Index([24, 25, 26, 27, 28, 29, 30, 31, 32, 
33,\n            ...\n            38, 39, 40, 41, 42, 43, 44, 45, 46, 
47],\n           dtype='int64', length=720)] are in the [index]"

Welcome to Stack Overflow! Please take the [tour]. SO is a Q&A site, but this is not a question. Check out [ask]. Maybe you want to ask why you're getting this error when the data does have all those indices? IDK, I don't know what library you're using, what the inputs look like, or where the error's occurring. Please make a [mre] including enough code to reproduce the error and the [full error message with traceback](https://meta.stackoverflow.com/q/359146/4518341). See also [How to make good reproducible pandas examples](/q/20109391/4518341). — wjandrea, Jul 04 '23 at 14:31

KeyError: "None of [Int64Index(...)] are in the [index]"

0 Answers0