0

This is what my data looks like (from Jan 2016 to Jul 2018) : enter image description here

I am trying to fit SARIMA on it using grid search. I drop locationkey, units, holiday and use holidaybool as exog features:

df.drop(columns=['units', 'locationkey','holiday'], inplace=True) 
features = df.columns.drop(['sales'])

And then it is split into training and testing data:

df_tr = df.loc[:'2017-12-31']
df_te =df.loc['2018-01-31':]

This datewise data is aggregated in months and then different configurations are found like this:

def configs(features, seasonal=[0]):
    cfg_list = list()
    # define config lists
    features_lists = powerset(features)
    Log_ = [False,True]
    p_params = [0,1,2,3]
    d_params = [0,1,2]
    q_params = [0,1,2,3]
    t_params = ['n','c','t','ct']
    P_params = [0,1,2]
    D_params = [0,1]
    Q_params = [0,1,2]
    S_params = seasonal
    # create config instances
    for feat_list in features_lists:
        for log_ in Log_:
            for p in p_params:
                for d in d_params:
                    for q in q_params:
                        for t in t_params:
                            for P in P_params:
                                for D in D_params:
                                    for Q in Q_params:
                                        for S in S_params:
                                            cfg = [(p,d,q), (P,D,Q,S), t,
                                                          feat_list,
                                                   log_]
                                            cfg_list.append(cfg)
    return cfg_list

And then the grid search is applied on it:

def grid_search(df_tr_m, df_te_m, cfg_list, parallel=False):
    if parallel:
        # execute configs in parallel
        results = Parallel(n_jobs=2)(delayed(score_model)(df_tr_m, df_te_m, cfg) for cfg in cfg_list)

This is what sends everything to SARIMMA

def score_model(df_tr_m, df_te_m, cfg, debug=False):
    msg = str()
    error, msg = validation(df_tr_m, df_te_m, cfg)

    # check for an interesting result
    #if msg in ['Positive','Negative']:
    if 'Positive' in msg or 'Negative' in msg:
        print(' > Model%s wmape=%.2f rmse=%.2f aic=%.0f -- %s' % (cfg, error[0],error[1],error[2], msg))
    #if msg in ['NaN Pred','Failed']:
    if 'NaN Pred' in msg or 'Failed' in msg:
        print(' > Model%s %s' % (cfg, msg))
    if msg in ['',None]:
        print(' > Model%s %s' % (cfg, 'Something Went Wrong'))

def validation(df_tr_m, df_te_m, cfg):
    error=np.NaN
    msg = str()
    pred, aic, success, msg = sarima_forecast(df_tr_m, df_te_m, cfg)

It works fine for first few configurations but then it throws none type exception

[(0, 0, 1), (0, 0, 1, 12), 'c', ['holidaybool'], False]
([(0, 0, 1), (0, 0, 1, 12), 'c', ['holidaybool'], False], None, 'maxlag should be < nobs')
[(0, 0, 1), (0, 0, 2, 12), 'c', ['holidaybool'], False]
([(0, 0, 1), (0, 0, 2, 12), 'c', ['holidaybool'], False], None, 'maxlag should be < nobs')
[(0, 0, 1), (0, 1, 0, 12), 'c', ['holidaybool'], False]
 > Model[(0, 0, 1), (0, 1, 0, 12), 'c', ['holidaybool'], False] wmape=39.77 rmse=9911702.00 aic=339 -- Real Pred,Positive
([(0, 0, 1), (0, 1, 0, 12), 'c', ['holidaybool'], False], (39.77, 9911702.0, 339.0), 'Real Pred,Positive')
[(0, 0, 1), (0, 1, 1, 12), 'c', ['holidaybool'], False]
([(0, 0, 1), (0, 1, 1, 12), 'c', ['holidaybool'], False], None, 'maxlag should be < nobs')
[(0, 0, 1), (0, 1, 2, 12), 'c', ['holidaybool'], False]
([(0, 0, 1), (0, 1, 2, 12), 'c', ['holidaybool'], False], None, 'maxlag should be < nobs')

[(0, 0, 1), (1, 0, 0, 12), 'c', ['holidaybool'], False]

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-32-e2ec42f294a3> in <module>
     27     print("done 1")
     28     # grid search
---> 29     pos_, neg_, failed_ = grid_search(df_tr_m, df_te_m, cfg_list)
     30     print('done')
     31 

<ipython-input-30-e6de90770dad> in grid_search(df_tr_m, df_te_m, cfg_list, parallel)
    160             print(cfg)
    161             if cfg:
--> 162                 r = score_model(df_tr_m, df_te_m, cfg)
    163                 print(r)
    164             if r:

<ipython-input-30-e6de90770dad> in score_model(df_tr_m, df_te_m, cfg, debug)
    136 def score_model(df_tr_m, df_te_m, cfg, debug=False):
    137     msg = str()
--> 138     error, msg = validation(df_tr_m, df_te_m, cfg)
    139 
    140     # check for an interesting result

<ipython-input-30-e6de90770dad> in validation(df_tr_m, df_te_m, cfg)
    117     error=np.NaN
    118     msg = str()
--> 119     pred, aic, success, msg = sarima_forecast(df_tr_m, df_te_m, cfg)
    120 
    121     if 'Real Pred' in msg:

TypeError: 'NoneType' object is not iterable

I have been stuck on this issue for quite a while now. I tried checking whether training or testing data or features are none but nothing is none. Is there any way to get to the root of this problem?

Fatima Arshad
  • 119
  • 1
  • 9
  • Please provide a minimal example that reproduces the problem (https://stackoverflow.com/help/minimal-reproducible-example) – Itamar Mushkin Oct 28 '19 at 12:58
  • Also, please do not post images of dataframes, but rather post the data itself (or, even better, code that creates a minimal example that reproduces the problem), see https://stackoverflow.com/questions/20109391/how-to-make-good-reproducible-pandas-examples – Itamar Mushkin Oct 28 '19 at 12:59

0 Answers0