Of course I have no data of yours, so I have to construct my a
, containing names and my b
, containing multiples of 10, possibly $$$...
I decided to place the names in the tick labels and in mouse-hover labels — if the tick labels, for a large no. of names, stick together you can just remove completely, see the code.
The mouse-hover stuff is lifted from this answer, that you may upvote if you think, as I do, that's a useful example.
The code is commented, but I'll be glad to address any clarification request.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from itertools import product
# Those 2 functions are lifted (stolen?) from the answer
# https://stackoverflow.com/a/47166787/2749397 by
# https://stackoverflow.com/users/4124317/importanceofbeingernest
# Visit to learn how it works (and upvote if you like it!)
def update_annot(ind):
pos = sc.get_offsets()[ind["ind"][0]]
annot.xy = pos
text = "\n".join("(%s,%d)"%(a[n],b[n]) for n in ind["ind"])
annot.set_text(text)
annot.get_bbox_patch().set_edgecolor(sc.cmap(sc.norm(c[ind["ind"][0]])))
def hover(event):
vis = annot.get_visible()
if event.inaxes == ax:
cont, ind = sc.contains(event)
if cont:
update_annot(ind)
annot.set_visible(True)
fig.canvas.draw_idle()
else:
if vis:
annot.set_visible(False)
fig.canvas.draw_idle()
# 90 different names, 400 data points
N_different_names, N_points = 90, 400
# How many names on tick labels?
Max_different_names = 100
# generate a random list of names
a = np.random.choice([''.join(t) for t in product("ABCDE", repeat=5)], N_different_names)
# and place at random these names in a longer list
a = [a[i] for i in np.random.randint(N_different_names, size=N_points)]
# the b array will contain the (?) salaries, unit is $ 10
b_min, b_max = 15, 50
b = np.random.randint(b_min, b_max+1, size=N_points)*10
# sorting the names is optional
sort_names = True
if sort_names:
a, b = zip(*sorted([t for t in zip(a,b)]))
# we need a list of numbers to convert to colors, I
# arbitrarily choose to set each item to the position of the
# name on the x-axis — it works for sorted or non-s. `a` array
c, d, i = [], {}, 0
for name in a:
if name not in d:
d[name] = i
i +=1
c.append(d[name])
del d
# plot the points, rotate and scale the x tick labels
# I'd say that the labels are OK up to ~100 different names
# note the WIDE figsize to accomodate all the x tick labels
fig, ax = plt.subplots(figsize=(11,6))
sc = ax.scatter(a, b, edgecolor='lightgray',
c=c, s=100, cmap='winter')
if N_different_names>Max_different_names:
ax.set_xticks([])
else:
for label in ax.get_xticklabels():
label.set(rotation=75, fontsize='xx-small')
# place a dummy annotation and set our callback (also "lifted" as above)
annot = ax.annotate("", xy=(0,0), xytext=(20,20),textcoords="offset points",
bbox=dict(boxstyle="round,pad=0.6", fc="w", linewidth=8, alpha=0.75),
arrowprops=dict(arrowstyle="->"))
annot.set_visible(False)
fig.canvas.mpl_connect("motion_notify_event", hover)
# That's all Folks!
plt.show()