1

I have the following df, from which I want to create a bar plot:

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df = pd.DataFrame({
  'Country': ["A", "B", "C", "D", "E", "F", "G"],
  'Answer declined': [0.000000, 0.000000, 0.000000, 0.000667, 0.000833, 0.000833, 0.000000],
  "Don't know": [0.003333, 0.000000, 0.000000, 0.001333, 0.001667, 0.000000, 0.000000],
  "No": [0.769167, 0.843333, 0.762000, 0.666000, 0.721667, 0.721667, 0.775833],
  "Yes": [0.227500, 0.156667, 0.238000, 0.332000, 0.275833, 0.277500, 0.224167]}, )
df.set_index("Country", inplace = True)

As I have multiple such dfs, I created a function to call bar plots from different dfs:

def bar_plot(plot_df):
    N = len(plot_df) # number of groups
    num_y_cats = len(plot_df.columns) # number of y-categories (responses)
    ind = np.arange(N) # x locations for the groups
    width = 0.35 # width of bars

    p_s = []
    p_s.append(plt.bar(ind, plot_df.iloc[:,0], width))
    for i in range(1,len(plot_df.columns)):
        p_s.append(plt.bar(ind, plot_df.iloc[:,i], width,
                           bottom=np.sum(plot_df.iloc[:,:i], axis=1),
                           label = 'TEST'))
    plt.ylabel('[%]')
    plt.title('Responses by country')

    x_ticks_names = tuple([item for item in plot_df.index])

    plt.xticks(ind, x_ticks_names)
    plt.yticks(np.arange(0, 1.1, 0.1)) # ticks from, to, steps
    plt.legend(p_s, plot_df.columns,
               bbox_to_anchor = (0.5, -0.25),
               #bbox_to_anchor = (0., 1.02, 1., .102),
               loc = 'lower center',
               ncol = num_y_cats // 2,
               borderaxespad = 0
               )
    plt.show()
    plt.close()    # close the figure

bar_plot(df)

This works but I can't fix one issue with the resulting plots: if the responses (i.e. column names) contain "Yes", I want this to be shown first (i.e. at the bottom) - and change nothing in the resulting plot otherwise.

My attempts so far based on this question were unsuccessful.

EDIT: I found a solution and am now looking for an elegant solution.

Ivo
  • 3,890
  • 5
  • 22
  • 53

2 Answers2

1

Since you are already using pandas.DataFrame, why not use the convenience method plot():

def bar_plot(plot_df):
    N, num_y_cats = plot_df.shape

    width = 0.35

    cols = plot_df.columns.tolist()

    if 'Yes' in cols:
        cols.insert(0, cols.pop(cols.index('Yes')))

    plot_df[cols].plot(kind='bar',
                       stacked=True,
                       width=width,
                       title='Responses by country'
                      )

    plt.ylabel('[%]')
    plt.xlabel('')
    plt.xticks(rotation=0)
    plt.yticks(np.arange(0, 1.1, 0.1))
    plt.legend(loc='lower center',
               ncol = num_y_cats // 2,
               bbox_to_anchor = (0.5, -0.25),
               borderaxespad = 0
              )

    plt.show()
    plt.close()    # close the figure

bar_plot(df)
Chris Adams
  • 18,389
  • 4
  • 22
  • 39
0

I have found a solution, however it seems a bit too laborious to be the most efficient and pythonic solution. Does anyone have a elegant solution?

def bar_plot(plot_df):
    N = len(plot_df) # number of groups
    num_y_cats = len(plot_df.columns) # number of y-categories (responses)
    ind = np.arange(N) # x locations for the groups
    width = 0.35 # width of bars

    ### inserted these lines
    cols = plot_df.columns.tolist()
    if 'Yes' in cols:
        cols = cols[cols.index('Yes'):] + cols[:cols.index('Yes')]
        #print(cols)
        #cols.insert(0, 'Yes')
        yes = pd.Series(plot_df['Yes'])
        del plot_df['Yes']
        plot_df.insert(0, 'Yes', yes)
    #### end of insertion    
    p_s = []
    p_s.append(plt.bar(ind, plot_df.iloc[:,0], width))
    for i in range(1,len(plot_df.columns)):
        p_s.append(plt.bar(ind, plot_df.iloc[:,i], width,
                           bottom=np.sum(plot_df.iloc[:,:i], axis=1),
                           label = 'TEST'))
    plt.ylabel('[%]')
    plt.title('Responses by country')

    x_ticks_names = tuple([item for item in plot_df.index])

    plt.xticks(ind, x_ticks_names)
    plt.yticks(np.arange(0, 1.1, 0.1)) # ticks from, to, steps
    plt.legend(p_s, plot_df.columns,
               bbox_to_anchor = (0.5, -0.25),
               #bbox_to_anchor = (0., 1.02, 1., .102),
               loc = 'lower center',
               ncol = num_y_cats // 2,
               borderaxespad = 0
               )
    plt.show()
    plt.close()
Ivo
  • 3,890
  • 5
  • 22
  • 53