0

I am drawing a graph of categorical variable using Seaborn's countplot using the syntax below and would like to transform the y axis to percent

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

sns.set_theme(style = 'whitegrid')

data = {
'x': ['group1', 'group1', 'group2', 'group3', 'group4', 'group4', 'group3', 'group2', 'group4', 'group1', 'group4', 'group3', 'group2', 'group2', 'group1', 'group3', 'group4', 'group4', 'group2', 'group3', 'group1', 'group2', 'group4', 'group2', 'group3', 'group4', 'group1', 'group1', 'group2', 'group1', 'group2', 'group3', 'group4', 'group4', 'group4', 'group4', 'group4', 'group3', 'group1', 'group2', 'group4', 'group2', 'group3', 'group4', 'group2', 'group2', 'group1', 'group3', 'group4', 'group3',
     'group1', 'group4', 'group4', 'group4', 'group4', 'group4']
}

df = pd.DataFrame(data)
df.head(n = 5)

total = float(df['x'].count())

plt.figure(figsize = (10,6))

ax = sns.countplot(x = df['x'])

for p in ax.patches:
    percentage = '{:.1f}%'.format(100 * p.get_height()/total)
    x = p.get_x() + 0.5
    y = p.get_height()
    ax.annotate(percentage, (x, y),ha='center')
plt.show()

I would appreciate any help on how to achieve this. Thanks in advance!

Stephen Okiya
  • 315
  • 1
  • 8

1 Answers1

1

It is perhaps easier to establish the distribution before creating the plot.

Calculate percentages

s = df['x'].value_counts(normalize=True, sort=False).mul(100)
print(s)

group1    19.642857
group2    23.214286
group3    19.642857
group4    37.500000
Name: x, dtype: float64

Plot

Use sns.barplot.

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns

sns.set_theme(style = 'whitegrid')

plt.figure(figsize = (10,6))
ax = sns.barplot(x=s.index, y=s)
ax.set(ylabel='count', xlabel='x')

# change yaxis vals into percentages
ax.yaxis.set_major_formatter(mtick.PercentFormatter())

for i, p in enumerate(ax.patches):
    # access index 0, 1, 2, 3 from `pd.Series`
    percentage = '{:.1f}%'.format(s[i])
    x = p.get_x() + 0.5
    y = p.get_height()
    ax.annotate(percentage, (x, y), ha='center')
    
plt.show()

Result

barplot

ouroboros1
  • 9,113
  • 3
  • 7
  • 26