-1

i have code that looks like this below and I"m trying to draw a black line on the plot shown for Jul - Dec for the data in the df "ltyc". My error is at the very end just before the plt.legend line.

import warnings
import itertools
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
import pandas as pd
import statsmodels.api as sm
import matplotlib

matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'k'
from sys import exit

df = pd.read_excel("MOSDailyWindSpeed.xlsx")

wspdBH1 = df.groupby('Date')[' Simulated WS BH1PI'].sum().reset_index()
wspdHOO = df.groupby('Date')[' Simulated WS HOO801'].sum().reset_index()
wspdBH1 = wspdBH1.set_index('Date')
wspdHOO = wspdHOO.set_index('Date')
wspdBH1.index
wspdHOO.index
y = wspdHOO[' Simulated WS HOO801'].resample('MS').mean()#monthly mean --> 
change site here 'MS' is month start for 'Date' col
y['2017':]#look at daily data starting 2017 -view data 

y.plot(figsize=(15, 6))
plt.show()

from pylab import rcParams
rcParams['figure.figsize'] = 18, 8

decomposition = sm.tsa.seasonal_decompose(y, model='additive')
fig = decomposition.plot()
plt.show()

p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, 
d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

for param in pdq:
  for param_seasonal in seasonal_pdq:
    try:
        mod = sm.tsa.statespace.SARIMAX(y,
                                        order=param,
                                        seasonal_order=param_seasonal,
                                        enforce_stationarity=False,
                                        enforce_invertibility=False)

        results = mod.fit()

        print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, 
results.aic))
    except:
        continue

mod = sm.tsa.statespace.SARIMAX(y,
                            order=(1, 1, 1),
                            seasonal_order=(1, 1, 0, 12),
                            enforce_stationarity=False,
                            enforce_invertibility=False)

results = mod.fit()

print(results.summary().tables[1])        

results.plot_diagnostics(figsize=(16, 8))
plt.show()

# VALIDATE THE FORECAST - PLOT FORECAST VS ACTUAL 
pred = results.get_prediction(start=pd.to_datetime('2019-01-01'), 
dynamic=False)
pred_ci = pred.conf_int()
ax = y['2019':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, 
figsize=(14, 7))

ax.fill_between(pred_ci.index,
            pred_ci.iloc[:, 0],
            pred_ci.iloc[:, 1], color='k', alpha=.2)

ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()

plt.show()

y_forecasted = pred.predicted_mean
y_truth = y['2019-01-01':]

# Compute the mean square error
mse = ((y_forecasted - y_truth) ** 2).mean()
print('The Mean Squared Error of our forecasts is {}'.format(round(mse, 
2)))
print('The Root Mean Squared Error of our forecasts is 
{}'.format(round(np.sqrt(mse), 2)))

#PRODUCE AND VISUALIZE FORECAST
pred_uc = results.get_forecast(steps=6)
pred_ci = pred_uc.conf_int()

ax = y['2019':].plot(label='observed', figsize=(14, 7))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
            pred_ci.iloc[:, 0],
            pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('MOS Wind Speed')
#add the LT monthly average to plot
from datetime import date
cm = date.today().month
lty = y.groupby(y.index.month).mean()
lty = lty.to_frame()
ltyc = lty.iloc[cm-1:12].reset_index() # extract curr month to end of LT 
mean monthly wind speed
ltyc.plot(label='LT Mean',ax=ax, color='k')

plt.legend()
plt.show()

The ltyc df looks like this that I am trying to plot in the image below using axis = ax. I think I need to change the "Date" col below since the current axis doesn't interpret 7,8,9,10,11,12 as months but I'm not sure how to do this.

   Date   Simulated WS HOO801
0     7              5.491916
1     8              5.596823
2     9              5.793934
3    10              7.501096
4    11              8.152358
5    12              8.426322 

Finally, my error looks like this:

File 
"C:\Users\U321103\AppData\Local\Continuum\anaconda3\envs\Stats\lib\site- 
packages\pandas\plotting\_matplotlib\timeseries.py", line 309, in 
format_dateaxis
raise TypeError("index type not supported")

TypeError: index type not supported

enter image description here

user2100039
  • 1,280
  • 2
  • 16
  • 31
  • Please [create a reproducible copy of the DataFrame with `df.head(10).to_clipboard(sep=',')`](https://stackoverflow.com/questions/52413246/how-to-provide-a-copy-of-your-dataframe-with-to-clipboard), [edit] the question, and paste the clipboard into a code block or include synthetic data: [How to make good reproducible pandas examples](https://stackoverflow.com/questions/20109391/how-to-make-good-reproducible-pandas-examples) – Trenton McKinney Jul 14 '20 at 21:14
  • Also the error does not correspond to any of the code in the example. Please provide a [Provide a Minimal, Reproducible Example (e.g. code, data, errors) as text](https://stackoverflow.com/help/minimal-reproducible-example) – Trenton McKinney Jul 14 '20 at 21:15

1 Answers1

0

I used this to convert the integer dates into yyyy-mm-dd format in the DF. ltyc['Date'] = pd.to_datetime(ltyc["Date"], format='%m').apply(lambda dt: dt.replace(year=2020))

Then, i converted the DF (ltyc) into a series like this: ltycs = pd.Series(ltyc['LT Mean'].values, index=ltyc['Date'])#convert to Series since the other plots are in series format

ltycs.plot(label='LT Mean',ax=ax,color='k')

user2100039
  • 1,280
  • 2
  • 16
  • 31