8

I am trying to use the ARIMA algorithm in statsmodels library to do forecasting on a time series dataset. It is a stock price dataset and when I feed normalized data to the model it gives the below error.

Note: This is a uni-variate forecasting and I am trying to forecast the closing price.

ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals"Check mle_retvals", ConvergenceWarning)

Question: What is the reason for this? Is it not a good practice to normalize data when feeding to ARIMA models?

Shown below is my code:

from statsmodels.tsa.arima_model import ARIMA
import pandas as pd
from pandas.plotting import autocorrelation_plot
import matplotlib as mplt
mplt.use('agg')  # Must be before importing matplotlib.pyplot or pylab!
from matplotlib import pyplot
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt


class RNNConfig():
    lag_order = 2
    degree_differencing = 1
    order_moving_avg = 0
    scaler = MinMaxScaler(feature_range=(0,1))
    test_ratio = 0.2
    fileName = 'AIG.csv'
    min = 10
    max = 2000
    column = 'Close'

config = RNNConfig()

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def scale(data):

    data[config.column] = (data[config.column] - config.min) / (config.max - config.min)

    return data

def ARIMA_model():

    stock_data = pd.read_csv(config.fileName)
    stock_data = stock_data.reindex(index=stock_data.index[::-1])

    scaled_data = scale(stock_data)

    price = scaled_data[config.column]

    size = int(len(price) * 0.66)
    train, test = price[0:size], price[size:len(price)]
    history = [x for x in train]
    predictions = list()
    for t in range(len(test)):
        model = ARIMA(history, order=(config.lag_order, config.degree_differencing, config.order_moving_avg))
        model_fit = model.fit(disp=0)
        output = model_fit.forecast()
        yhat = output[0]
        predictions.append(yhat)
        obs = test[t]
        history.append(obs)
        print('predicted=%f, expected=%f' % (yhat, obs))

    meanSquaredError = mean_squared_error(test, predictions)
    rootMeanSquaredError = sqrt(meanSquaredError)
    print("RMSE:", rootMeanSquaredError)
    mae = mean_absolute_error(test, predictions)
    print("MAE:", mae)
    mape = mean_absolute_percentage_error(test, predictions)
    print("MAPE:", mape)

    # plot
    pyplot.plot(test, color='blue', label='truth close')
    pyplot.plot(predictions, color='red',label='pred close')
    pyplot.legend(loc='upper left', frameon=False)
    pyplot.xlabel("day")
    pyplot.ylabel("closing price")
    pyplot.grid(ls='--')
    pyplot.savefig("ARIMA results.png", format='png', bbox_inches='tight', transparent=False)

if __name__ == '__main__':
    ARIMA_model()
KiynL
  • 4,097
  • 2
  • 16
  • 34
Suleka_28
  • 2,761
  • 4
  • 27
  • 43

1 Answers1

0

I'm not sure whether the error is produced due to normalization or other reasons such as seasonality. In any case, the problem can be solved by increasing the maximum number of iterations used to estimate the model parameters. I.e.,

model = ARIMA(time_series, order=(p, d, q))
model_fit = model.fit(method_kwargs={'maxiter':300})

Note that statsmodels.tsa.arima_model.ARIMA have been removed in favor of statsmodels.tsa.arima.model.ARIMA