I'm relatively new to python and I'm having some issues trying to plot some data with pandas - please help! The relevant sections of my code are below.
I create a new data-frame, do some analysis with it, then split it into two separate data-frames (one for 2016 data, one for 2017 data). I then calculate some new columns for the 2017 data-frame (percentage difference between these values and the values for 2016). Finally, I make several plots to show this data.
I'm trying to modify the x axis tick labels to show each month as e.g. '07/17' for July 2017. I'm using a method I've used before, however it really doesn't seem to be working!
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib as mp
import numpy as np
import seaborn as sns
# set up index dates
dates1 = pd.date_range(start='2016-06-01', end='2016-12-01', freq='MS')
dates2 = pd.date_range(start='2017-06-01', end='2017-12-01', freq='MS')
dates = dates1.union(dates2)
# create data-frame
data = {'sessions':[609981,734725,713928,602909,562960,425774,394887,1094831,
1210158,1141875,898867,798469,642942,546186],
'conversion':[5.40,4.98,4.89,4.68,3.99,3.54,3.71,4.17,4.23,4.13,4.3,3.86,3.24,3.60],
'avg_value':[98.12,95.43,89.94,86.99,83.23,86.31,93.34,98.01,96.89,90.82,86.00,84.40,85.90,89.53]}
df = pd.DataFrame(data,index=dates)
# calculate revenue and yearly changes
df['revenue'] = (df.sessions*df.conversion/100.)*df.avg_value
df_2016 = df['Jan 2016':'Dec 2016'].copy()
df_2017 = df['Jan 2017':'Dec 2017'].copy()
df_2017['d_avg_value'] = (df_2017.avg_value.values - df_2016.avg_value.values)*100 / df_2016.avg_value.values
df_2017['d_conversion'] = (df_2017.conversion.values - df_2016.conversion.values)*100 / df_2016.conversion.values
df_2017['d_sessions'] = (df_2017.sessions.values - df_2016.sessions.values)*100 / df_2016.sessions.values
df_2017['d_revenue'] = (df_2017.revenue.values - df_2016.revenue.values)*100 / df_2016.revenue.values
# set up subplot layout
fig, axes = plt.subplots(figsize=(10, 10), nrows=2, ncols=2)
plt.subplots_adjust(wspace=0.3, hspace=0.3)
# populate subplots
df_2017.plot(ax=axes[0,0], y='d_sessions',marker='o',linestyle='none', legend=None)
df_2017.plot(ax=axes[0,1], y='d_conversion',marker='o',linestyle='none', legend=None)
df_2017.plot(ax=axes[1,0], y='d_avg_value',marker='o',linestyle='none', legend=None)
df_2017.plot(ax=axes[1,1], y='d_revenue',marker='o',linestyle='none', legend=None, color='red')
# change x-limits, x-tick labels
for ax in axes.reshape(-1):
ax.set_xlim(['May 2017','Jan 2018'])
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%y'))
# label y-axes
axes[0,0].set_ylabel('% Change in Sessions')
axes[0,1].set_ylabel('% Change in Conversion')
axes[1,0].set_ylabel('% Change in Average Value')
axes[1,1].set_ylabel('% Change in Revenue')
plt.show()