This is what my data looks like (from Jan 2016 to Jul 2018) :
I am trying to fit SARIMA on it using grid search. I drop locationkey, units, holiday and use holidaybool as exog features:
df.drop(columns=['units', 'locationkey','holiday'], inplace=True)
features = df.columns.drop(['sales'])
And then it is split into training and testing data:
df_tr = df.loc[:'2017-12-31']
df_te =df.loc['2018-01-31':]
This datewise data is aggregated in months and then different configurations are found like this:
def configs(features, seasonal=[0]):
cfg_list = list()
# define config lists
features_lists = powerset(features)
Log_ = [False,True]
p_params = [0,1,2,3]
d_params = [0,1,2]
q_params = [0,1,2,3]
t_params = ['n','c','t','ct']
P_params = [0,1,2]
D_params = [0,1]
Q_params = [0,1,2]
S_params = seasonal
# create config instances
for feat_list in features_lists:
for log_ in Log_:
for p in p_params:
for d in d_params:
for q in q_params:
for t in t_params:
for P in P_params:
for D in D_params:
for Q in Q_params:
for S in S_params:
cfg = [(p,d,q), (P,D,Q,S), t,
feat_list,
log_]
cfg_list.append(cfg)
return cfg_list
And then the grid search is applied on it:
def grid_search(df_tr_m, df_te_m, cfg_list, parallel=False):
if parallel:
# execute configs in parallel
results = Parallel(n_jobs=2)(delayed(score_model)(df_tr_m, df_te_m, cfg) for cfg in cfg_list)
This is what sends everything to SARIMMA
def score_model(df_tr_m, df_te_m, cfg, debug=False):
msg = str()
error, msg = validation(df_tr_m, df_te_m, cfg)
# check for an interesting result
#if msg in ['Positive','Negative']:
if 'Positive' in msg or 'Negative' in msg:
print(' > Model%s wmape=%.2f rmse=%.2f aic=%.0f -- %s' % (cfg, error[0],error[1],error[2], msg))
#if msg in ['NaN Pred','Failed']:
if 'NaN Pred' in msg or 'Failed' in msg:
print(' > Model%s %s' % (cfg, msg))
if msg in ['',None]:
print(' > Model%s %s' % (cfg, 'Something Went Wrong'))
def validation(df_tr_m, df_te_m, cfg):
error=np.NaN
msg = str()
pred, aic, success, msg = sarima_forecast(df_tr_m, df_te_m, cfg)
It works fine for first few configurations but then it throws none type exception
[(0, 0, 1), (0, 0, 1, 12), 'c', ['holidaybool'], False]
([(0, 0, 1), (0, 0, 1, 12), 'c', ['holidaybool'], False], None, 'maxlag should be < nobs')
[(0, 0, 1), (0, 0, 2, 12), 'c', ['holidaybool'], False]
([(0, 0, 1), (0, 0, 2, 12), 'c', ['holidaybool'], False], None, 'maxlag should be < nobs')
[(0, 0, 1), (0, 1, 0, 12), 'c', ['holidaybool'], False]
> Model[(0, 0, 1), (0, 1, 0, 12), 'c', ['holidaybool'], False] wmape=39.77 rmse=9911702.00 aic=339 -- Real Pred,Positive
([(0, 0, 1), (0, 1, 0, 12), 'c', ['holidaybool'], False], (39.77, 9911702.0, 339.0), 'Real Pred,Positive')
[(0, 0, 1), (0, 1, 1, 12), 'c', ['holidaybool'], False]
([(0, 0, 1), (0, 1, 1, 12), 'c', ['holidaybool'], False], None, 'maxlag should be < nobs')
[(0, 0, 1), (0, 1, 2, 12), 'c', ['holidaybool'], False]
([(0, 0, 1), (0, 1, 2, 12), 'c', ['holidaybool'], False], None, 'maxlag should be < nobs')
[(0, 0, 1), (1, 0, 0, 12), 'c', ['holidaybool'], False]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-32-e2ec42f294a3> in <module>
27 print("done 1")
28 # grid search
---> 29 pos_, neg_, failed_ = grid_search(df_tr_m, df_te_m, cfg_list)
30 print('done')
31
<ipython-input-30-e6de90770dad> in grid_search(df_tr_m, df_te_m, cfg_list, parallel)
160 print(cfg)
161 if cfg:
--> 162 r = score_model(df_tr_m, df_te_m, cfg)
163 print(r)
164 if r:
<ipython-input-30-e6de90770dad> in score_model(df_tr_m, df_te_m, cfg, debug)
136 def score_model(df_tr_m, df_te_m, cfg, debug=False):
137 msg = str()
--> 138 error, msg = validation(df_tr_m, df_te_m, cfg)
139
140 # check for an interesting result
<ipython-input-30-e6de90770dad> in validation(df_tr_m, df_te_m, cfg)
117 error=np.NaN
118 msg = str()
--> 119 pred, aic, success, msg = sarima_forecast(df_tr_m, df_te_m, cfg)
120
121 if 'Real Pred' in msg:
TypeError: 'NoneType' object is not iterable
I have been stuck on this issue for quite a while now. I tried checking whether training or testing data or features are none but nothing is none. Is there any way to get to the root of this problem?