1

In the histogram a gap appears between 2 bars.. anyone knows why?

I get this error:

The number of FixedLocator locations (11), usually from a call to set_ticks, does not match the number of ticklabels (10).

The csv file is just 2 columns, one with the name of the country and the other with the type of medal achieved, each line a medal with its type and country.

The link to the file is: https://github.com/jpiedehierroa/files/blob/main/Libro1.csv

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
my_csv = Path("C:/Usersjosep/Desktop/Libro1.csv")
df = pd.read_csv("Libro1.csv", sep=',')

# or load from github repo link
url = 'https://raw.githubusercontent.com/jpiedehierroa/files/main/Libro1.csv'
df = pd.read_csv(url)    

# Prepare data
x_var = 'countries'
groupby_var = 'type'
df_agg = df.loc[:,[x_var, groupby_var]].groupby(groupby_var)
vals = [df[x_var].values.tolist() for i, df in df_agg]

# Draw
plt.figure(figsize=(10,10), dpi= 100)
colors= ("#CD7F32","silver","gold")
n, bins, patches = plt.hist(vals, df[x_var].unique().__len__(), stacked=True, density=False, color=colors[:len(vals)])

# Decoration
plt.legend(["bronze", "silver","gold"], loc="upper right")
plt.title(f"Histogram of medals achieved by ${x_var}$ colored by ${groupby_var}$ in Tokyo 2020", fontsize=18)
plt.text(2,80,"138")
plt.xlabel(x_var)
plt.ylabel("amount of medals by type")
plt.ylim(0, 130)
plt.xticks(ticks=bins, labels=np.unique(df[x_var]).tolist(), rotation=90, horizontalalignment='left')
plt.show()

enter image description here

Test Data

  • In case the link dies
countries,type
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,gold
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,silver
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
USA,bronze
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,gold
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,silver
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
China,bronze
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,gold
Japan,silver
Japan,silver
Japan,silver
Japan,silver
Japan,silver
Japan,silver
Japan,silver
Japan,silver
Japan,silver
Japan,silver
Japan,silver
Japan,silver
Japan,silver
Japan,silver
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
Japan,bronze
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,gold
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,silver
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
GB,bronze
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,gold
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,silver
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
ROC,bronze
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,gold
Australia,silver
Australia,silver
Australia,silver
Australia,silver
Australia,silver
Australia,silver
Australia,silver
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Australia,bronze
Netherlands,gold
Netherlands,gold
Netherlands,gold
Netherlands,gold
Netherlands,gold
Netherlands,gold
Netherlands,gold
Netherlands,gold
Netherlands,gold
Netherlands,gold
Netherlands,silver
Netherlands,silver
Netherlands,silver
Netherlands,silver
Netherlands,silver
Netherlands,silver
Netherlands,silver
Netherlands,silver
Netherlands,silver
Netherlands,silver
Netherlands,silver
Netherlands,silver
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
Netherlands,bronze
France,gold
France,gold
France,gold
France,gold
France,gold
France,gold
France,gold
France,gold
France,gold
France,gold
France,silver
France,silver
France,silver
France,silver
France,silver
France,silver
France,silver
France,silver
France,silver
France,silver
France,silver
France,silver
France,bronze
France,bronze
France,bronze
France,bronze
France,bronze
France,bronze
France,bronze
France,bronze
France,bronze
France,bronze
France,bronze
Germany,gold
Germany,gold
Germany,gold
Germany,gold
Germany,gold
Germany,gold
Germany,gold
Germany,gold
Germany,gold
Germany,gold
Germany,silver
Germany,silver
Germany,silver
Germany,silver
Germany,silver
Germany,silver
Germany,silver
Germany,silver
Germany,silver
Germany,silver
Germany,silver
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Germany,bronze
Italy,gold
Italy,gold
Italy,gold
Italy,gold
Italy,gold
Italy,gold
Italy,gold
Italy,gold
Italy,gold
Italy,gold
Italy,silver
Italy,silver
Italy,silver
Italy,silver
Italy,silver
Italy,silver
Italy,silver
Italy,silver
Italy,silver
Italy,silver
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Italy,bronze
Trenton McKinney
  • 56,955
  • 33
  • 144
  • 158
  • If you're using jupyter then [all the plot and labeling code should be in the same cell](https://i.stack.imgur.com/X528q.png). If you're using anaconda the update with `conda update --all` at the anaconda prompt – Trenton McKinney Nov 07 '21 at 16:07
  • Hi Trenton, you are completely right. It works after updating in spyder. The thing is that I have to present it in jupyter, so, to end the topic, by being in the same cell you mean the same code line? Thanks. – Jose Antonio Piedehierro Arias Nov 07 '21 at 16:55
  • just the way it's shown in the link in my previous comment. One cell can have multiple lines of code. All of my code and example are run in `jupyter lab` – Trenton McKinney Nov 07 '21 at 16:56
  • ok but then, how do you separate these cells... because I didn't even knew the existed. I mean, I start coding but on the same "block"/cell if I understand you correctly. I see you diferenciate between load/shape and plot. Is that what you mean? – Jose Antonio Piedehierro Arias Nov 07 '21 at 17:27
  • That example is in a Jupyter Notebook When you make a plot, any code affecting the format of the plot must be in the same cell, as shown in the screen shot. – Trenton McKinney Nov 07 '21 at 17:29

1 Answers1

2
  • This is easier to implement as a stacked bar plot, as such, reshape the dataframe with pandas.crosstab and plot using pandas.DataFrame.plot with kind='bar' and stacked=True
    • This should not be implemented with plt.hist because it's more convoluted, and it's easier to use the pandas plot method directly.
    • Also a histogram is more appropriate when the x values are a continuous range of numbers, not discrete categorical values.
  • ct.iloc[:, :-1] selects all but the last column, 'tot' to be plotted as bars.
  • Use matplotlib.pyplot.bar_label to add annotations
    • ax.bar_label(ax.containers[2], padding=3) uses label_type='edge' by default, which results in annotating the edge with the cumulative sum ('center' annotates with the patch value), as shown in this answer.
      • The [2] in ax.containers[2] selects only the top containers to annotate with the cumulative sum. The containers are 0 indexed from the bottom.
    • See this answer for additional details and examples
    • This answer shows how to do annotations the old way, without .bar_label. I do not recommend it.
    • This answer shows how to customize labels to prevent annotations for values under a given size.
  • Tested in python 3.10, pandas 1.3.5, matplotlib 3.5.1

Load and Shape the DataFrame

import pandas as pd

# load from github repo link
url = 'https://raw.githubusercontent.com/jpiedehierroa/files/main/Libro1.csv'
df = pd.read_csv(url) 

# reshape the dataframe
ct = pd.crosstab(df.countries, df.type)

# total medals per country, which is necessary to sort the bars
ct['tot'] = ct.sum(axis=1)

# sort
ct = ct.sort_values(by='tot', ascending=False)

# display(ct)
type         bronze  gold  silver  tot
countries                             
USA              33    39      41  113
China            18    38      32   88
ROC              23    20      28   71
GB               22    22      21   65
Japan            17    27      14   58
Australia        22    17       7   46
Italy            20    10      10   40
Germany          16    10      11   37
Netherlands      14    10      12   36
France           11    10      12   33

Plot

colors = ("#CD7F32", "silver", "gold")
cd = dict(zip(ct.columns, colors))

# plot the medals columns
title = 'Country Medal Count for Tokyo 2020'
ax = ct.iloc[:, :-1].plot(kind='bar', stacked=True, color=cd, title=title,
                          figsize=(12, 5), rot=0, width=1, ec='k' )

# annotate each container with individual values
for c in ax.containers:
    ax.bar_label(c, label_type='center')
    
# annotate the top containers with the cumulative sum
ax.bar_label(ax.containers[2], padding=3)

# pad the spacing between the number and the edge of the figure
ax.margins(y=0.1)

enter image description here

  • An alternative way to annotate the top with the sum is to use the 'tot' column for custom labels, but as shown, this is not necessary.
labels = ct.tot.tolist()
ax.bar_label(ax.containers[2], labels=labels, padding=3)
Trenton McKinney
  • 56,955
  • 33
  • 144
  • 158