1

There is a problem with displaying values on a pyplot chart in reverse order.

import matplotlib.pyplot as plt
import pandas as pd

merged_month_df = pd.DataFrame({
   'month': ['january', 'february', 'march'],
   'reg_month': [100, 80, 60],
   'application': [50, 40, 30],
   'game': [30, 20, 10],
   'visit_site_month': [1000, 800, 600]
})

# Calculate ratios for each stage of the funnel
merged_month_df['reg_ratio'] = merged_month_df['reg_month'] / merged_month_df['visit_site_month'] * 100
merged_month_df['app_ratio'] = merged_month_df['application'] / merged_month_df['visit_site_month'] * 100
merged_month_df['game_ratio'] = merged_month_df['game'] / merged_month_df['visit_site_month'] * 100

# Create plot
fig, ax = plt.subplots(figsize=(10, 6))

# Draw bars for each stage of the funnel
ax.bar(merged_month_df['month'], merged_month_df['visit_site_month'], label='Website visits')
ax.bar(merged_month_df['month'], merged_month_df['reg_month'], label='Registrations', alpha=0.7)
ax.bar(merged_month_df['month'], merged_month_df['application'], label='Applications', alpha=0.7)
ax.bar(merged_month_df['month'], merged_month_df['game'], label='Visited game', alpha=0.7)

# Add percentage values for each stage of the funnel
for i, v in enumerate(merged_month_df['visit_site_month']):
   if v > 40:
       ax.text(i, v + 5, f"{round(100, 1)}%", ha='center')
   else:
       ax.text(i, v + 80, f"{round(100, 1)}%", ha='center')
for i, v in enumerate(merged_month_df['reg_month']):
   if v > 20:
       ax.text(i, v + 5, f"{round(v/merged_month_df['visit_site_month'][i]*100, 1)}%", ha='center')
   else:
       ax.text(i, v + 65, f"{round(v/merged_month_df['visit_site_month'][i]*100, 1)}%", ha='center')
for i, v in enumerate(merged_month_df['application']):
   if v > 20:
       ax.text(i, v + 15, f"{round(v/merged_month_df['reg_month'][i]*100, 1)}%", ha='center')
   else:
       ax.text(i, v + 40, f"{round(v/merged_month_df['reg_month'][i]*100, 1)}%", ha='center')
for i, v in enumerate(merged_month_df['game']):
   if v > 20:
       ax.text(i, v + 5, f"{round(v/merged_month_df['application'][i]*100, 1)}%", ha='center')
   else:
       ax.text(i, v + 10, f"{round(v/merged_month_df['application'][i]*100, 1)}%", ha='center')

# Set plot settings
ax.set_xticklabels(merged_month_df['month'], rotation=90)
ax.set_ylabel('Number of customers')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid()

plt.show()  

The result of executing the code is this graph:

enter image description here

How to make the order as follows: Website visits - registrations - Applications - Visited Game. That is, the order is reversed

Trenton McKinney
  • 56,955
  • 33
  • 144
  • 158
Joker221
  • 15
  • 3

1 Answers1

0
  • Tested in python 3.11, pandas 1.5.3, matplotlib 3.7.1

Imports and DataFrame from OP

import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame({'month': ['january', 'february', 'march'],
                   'reg_month': [100, 80, 60],
                   'application': [50, 40, 30],
                   'game': [30, 20, 10],
                   'visit_site_month': [1000, 800, 600]})

# Calculate ratios for each stage of the funnel
df['reg_ratio'] = df['reg_month'].div(df['visit_site_month']).mul(100)
df['app_ratio'] = df['application'].div(df['visit_site_month']).mul(100)
df['game_ratio'] = df['game'].div(df['visit_site_month']).mul(100)
  • Use .rename to give the columns the appropriate name prior to plotting.
  • Use pandas.DataFrame.plot with kind='bar' to create the bar plot.
    • Use .iloc to select the columns to be plotted, and specify the order to plot them.
  • Create a separate dataframe, per, with the calculated values for the bar labels.
    • Use matplotlib.pyplot.bar_label to annotate the bars, with custom labels from per, which will be in the correct order because the column is selected with label.
    • See How to add value labels on a bar chart for a thorough explanation of the method, and additional examples.
    • Do not calculate the separate positions of x and y for ax.text, because this is cumbersome and error-prone.
# rename the columns to be plotted
mapper = {'visit_site_month': 'Website visits', 'reg_month': 'Registrations',
          'application': 'Applications', 'game': 'Visited game'}
df = df.rename(mapper=mapper, axis=1)

# create a separate dataframe with the calculated percent column values
per = df.iloc[:, 1:5].copy()
per['Visited game'] = per['Visited game'].div(df['Applications']).mul(100).round(1)
per['Applications'] = per['Applications'].div(df['Registrations']).mul(100).round(1)
per['Registrations'] = per['Registrations'].div(df['Website visits']).mul(100).round(1)
per['Website visits'] = per['Website visits'].div(df['Website visits']).mul(100).round(1)

# the correct way to plot the stacked bar from pandas is with pandas.DataFrame.plot
# select the columns to be plotted in the desired order
ax = df.iloc[:, [0, 4, 1, 2, 3]].plot(kind='bar', stacked=True, x='month', figsize=(10, 10), rot=0,
                                      grid=True, ylabel='Number of Customers', xlabel='')
ax.legend(bbox_to_anchor=(1, 1), loc='upper left', frameon=False)

# iterate through each container of bar artists
for c in ax.containers:
    
    # get the label of the current container
    label = c.get_label()
    
    # use the label to select the corresponding column values
    labels = per[label].astype(str) + '%'
    
    # add the labels for the container
    ax.bar_label(c, labels=labels, label_type='center')

plt.show()

enter image description here

df

      month  Registrations  Applications  Visited game  Website visits  reg_ratio  app_ratio  game_ratio
0   january            100            50            30            1000       10.0        5.0    3.000000
1  february             80            40            20             800       10.0        5.0    2.500000
2     march             60            30            10             600       10.0        5.0    1.666667

per

   Registrations  Applications  Visited game  Website visits
0           10.0          50.0          60.0           100.0
1           10.0          50.0          50.0           100.0
2           10.0          50.0          33.3           100.0
Trenton McKinney
  • 56,955
  • 33
  • 144
  • 158