I am trying to plot confidence intervals in my matplotlib plot with the seaborn style (similar to what the regplot fuction in seaborn would give but with the ability to run statistics from the regression).
My current plot looks like this:
Which is created with the following code:
#Read in proper dataframe
storms_per_year = pd.read_csv('number_of_storms_per_year.csv')
#Create linear regression function
def lin_reg(x,y):
linreg = LinearRegression().fit(x,y)
print(linreg.intercept_, linreg.coef_, linreg.score(x,y))
n = sm.add_constant(x)
results = sm.OLS(y, n).fit()
conf_interval = results.conf_int(0.05)
print(results.summary())
pass
#Define variables for linear regression - frequency
x_col ='season'
y_col = 'days'
x = storms_per_year[x_col]
y = storms_per_year[y_col]
x_array = np.array(x).reshape(-1,1)
y_array = np.array(y).reshape(-1,1)
linreg = LinearRegression().fit(x_array,y_array)
#Perform linear regression for frequency
lin_reg(x_array,y_array)
#Plot
sns.set_theme(context='notebook', style='darkgrid')
sns.light_palette("#79C")
plt.scatter(x_array,y_array, alpha = 0.25)
plt.plot(x_array,linreg.predict(x_array), label='y=-0.0291x+69.2610')
plt.xlabel('Season')
plt.ylabel('Number of Storms')
plt.title('Frequency of Storms Over Time')
plt.legend()
plt.show
I have tried the following with successful confidence intervals:
import pydove as dv
#Plot-----------------------------
#Set variables
x_col ='season'
y_col = 'days'
x = storms_per_year[x_col]
y = storms_per_year[y_col]
fig, ax = plt.subplots()
res = dv.regplot(x,y, ax=ax )
ax.set_xlabel('Season')
ax.set_ylabel('Number of Storms')
ax.set_title('Frequency of Storms Over Time')
fig.set_label(res)
reg_line = mlines.Line2D([],[])
plt.legend()
res.summary()
But then I cannot add the statistical info to the legend as I want to do. Any suggestions are welcome.