I have a barplot that I would like to include a note at the bottom. The current code is shown below
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
data = {
'id': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22],
'survey': ['baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline'],
'level': ['low', 'high', 'medium', 'low', 'high', 'medium', 'medium', 'high', 'low', 'low', 'medium', 'high', 'low', 'medium', 'low', 'high', 'low', 'low', 'medium', 'high', 'high', 'high', 'high', 'medium', 'low', 'low', 'medium', 'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'high', 'medium', 'medium', 'low', 'high', 'low', 'low', 'low', 'low', 'low']
}
df = pd.DataFrame(data)
df_N = df.groupby(['level']).count().sort_index(ascending = True).reset_index()
df_N['%'] = 100 * df_N['id'] / df_N['id'].sum()
sns.set_style('white')
ax = sns.barplot(data=df_N, x='level', y='%', ci=None,
palette="rainbow")
N = df_N['id'].to_numpy()
N_it = '$\it{N}$'
labels=[f'{np.round(perc,1)}% ({N_it} = {n})'
for perc, n in zip(ax.containers[0].datavalues, N)]
ax.bar_label(ax.containers[0], labels = labels, fontsize = 10)
sns.despine(ax=ax, left=True)
ax.grid(True, axis='y')
ax.yaxis.set_major_formatter(PercentFormatter(100))
ax.set_xlabel('')
ax.set_ylabel('')
ax.margins(y=0.15) # optionally some more free space at the top
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.tight_layout()
plt.show()
I would like to include a note based on disaggregation below
df_N = df.groupby(['survey', 'level']).count().sort_index(ascending = True).reset_index()
df_N
Specifically:
Note: Baseline: high - 6, low - 10, medium - 6 Endline: high - 8, low - 8, medium - 6