I would like to create a plot using a pandas timeseries in one subplot and a rectangle in another subplot.
If I don't include the subplots, I can achieve this pretty easily:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
N = 100
np.random.seed(N)
dates = pd.date_range(start='2018-01-01', periods=N, freq='D')
one_third_delta = (dates[-1] - dates[0])/3
one_third_stamp = dates[0] + one_third_delta
ts = pd.Series(index=dates, data=np.random.randn(N))
def add_rectangle(ax, x, y, width, height, **kwargs):
ax.add_patch(mpatches.Rectangle(
(x, y),
width,
height,
**kwargs
))
args = [one_third_stamp, -1, one_third_delta, 2]
kwargs = {
'facecolor': 'orange',
'edgecolor': 'None',
'alpha': 0.5,
}
# Plot 1: 1 subplot with ts plotted first (Working)
fig, ax = plt.subplots()
ts.plot(ax=ax)
add_rectangle(ax, *args, **kwargs)
plt.savefig('plot1.png')
plt.close(fig)
Plot 1
However, things already start to get weird when I try adding the rectangle first:
# Plot 2: 1 subplot with ts plotted second (Not Working)
fig, ax = plt.subplots()
add_rectangle(ax, *args, **kwargs)
ts.plot(ax=ax)
plt.savefig('plot2.png')
plt.close(fig)
Plot 2
If I try splitting out the two plots, neither approach works:
# Plot 3: 2 subplots with ts plotted first (Not Working)
fig, axes = plt.subplots(2, sharex=True)
ts.plot(ax=axes[1])
add_rectangle(axes[0], *args, **kwargs)
plt.savefig('plot3.png')
plt.close(fig)
# Plot 4: 2 subplots with ts plotted second (Not Working)
fig, axes = plt.subplots(2, sharex=True)
add_rectangle(axes[0], *args, **kwargs)
ts.plot(ax=axes[1])
plt.savefig('plot4.png')
plt.close(fig)
Plot 3
Plot 4
I've found two work-arounds.
The first involves casting everything to a float with matplotlib.dates.date2num
:
# Plot 5: 2 subplots with date2num (Working)
two_thirds_stamp = one_third_stamp + one_third_delta
args_date2num = [
mdates.date2num(one_third_stamp),
-1,
mdates.date2num(two_thirds_stamp) - mdates.date2num(one_third_stamp),
2,
]
df = ts.to_frame().reset_index()
df.columns = ['date', 'value']
df['num'] = df.date.apply(mdates.date2num)
fig, axes = plt.subplots(2, sharex=True)
add_rectangle(axes[0], *args_date2num, **kwargs)
axes[1].plot_date(df.num, df.value, ls='-', marker=None)
axes[0].set_ylim(axes[1].get_ylim())
plt.savefig('plot5.png')
plt.close(fig)
Plot 5
This isn't great for two reasons:
- I lose the nice ticklabel formatting that pandas uses.
- As far as I can tell,
date2num
is incompatible with how pandas internally represents datetimes as floats. So if I usedate2num
at all, all other datetimes must be converted too.
The other work around involves a dummy plot:
# Plot 6: 2 subplots with alpha=0 dummy (Working)
fig, axes = plt.subplots(2, sharex=True)
dummy_ts = ts[::(len(ts)-1)] + 10 # make it out of sight
dummy_ts.plot(ax=axes[0], alpha=0) # and invisible for good measure
add_rectangle(axes[0], *args, **kwargs)
ts.plot(ax=axes[1])
axes[0].set_ylim(axes[1].get_ylim())
plt.savefig('plot6.png')
plt.close(fig)
Plot 6
My question (finally) is why is this necessary? What changes between doing this on a single subplot vs. multiple? Is there a better, more canonical way?
Python version:
Python 3.6.3 (v3.6.3:2c5fed86e0)
[GCC 4.2.1 (Apple Inc. build 5666) (dot 3)] on darwin
Pip freeze:
cycler==0.10.0
kiwisolver==1.0.1
matplotlib==2.2.0
numpy==1.14.2
pandas==0.22.0
pyparsing==2.2.0
python-dateutil==2.7.0
pytz==2018.3
six==1.11.0