1

I want to plot a heatmap that better visualize the distribution pattern in a scatterplot, but I have some trouble generating the heatmap. The data on y-axis spreads from 0 to 15 and x from 0 to 7.

I referred to the post below regarding how to generate heatmap and coded the following which seems to give me a scatterplot that seems quite off from what I would hope for from the scatterplot.

Generate a heatmap in MatPlotLib using a scatter data set

import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm as CM

x = [0.3178, 2.0857, 2.5922, 0.088, 0.3, 0.4006, 1.0241, 0.1913, 0.56, 1.1828, 2.6879, 5.8044, 0.3593, 1.8732, 10.8003, 0.3457, 1.7003, 0.1677, 0.7442, 1.5731, 0.4927, 0.4143, 0.558, 0.2486, 0.3009, 0.163, 2.645, 4.1364, 13.8043, 3.9997, 0.258, 0.78, 10.3991, 0.2425, 0.3335, 4.8002, 0.3529, 5.9263, 0.151, 0.34, 0.1146, 13.6505, 2.8802, 3.2738, 0.5562, 0.5067, 1.5142, 2.0373, 2.5427, 12.1005]
y = [4.4903, 6.8879, 5.6211, 5.1128, 1.8125, 4.9716, 2.6847, 5.3744, 6.5254, 3.875, 3.6667, 2.0, 6.9811, 6.0501, 6.0, 6.8478, 5.0, 5.3676, 3.403, 6.1015, 6.8793, 4.7684, 3.5934, 2.6224, 5.9319, 1.8191, 3.0554, 3.5207, 3.6786, 3.0, 5.9041, 1.9128, 6.3333, 5.4949, 5.7135, 6.0, 5.5348, 3.0, 5.2644, 5.8111, 1.093, 4.0, 7.0, 6.0, 3.8684, 4.8, 1.5283, 6.6932, 7.0, 4.0]

# plot the scatter_plot
xposition = [0,7]
plt.figure()
plt.plot(y,x,'r^', label='series_1',markersize=12)
plt.gcf().set_size_inches(11.7, 8.27)
ax = plt.gca()
ax.tick_params(axis = 'both', which = 'major', labelsize = 16)
for xc in range(0,xposition[-1]+1):
    ax.axvline(x=xc, color='darkgrey', linestyle='--', linewidth = 2)

plt.xlabel('x', fontsize=18)
plt.ylabel('y', fontsize=18)
plt.xlim(xposition)
plt.ylim([0,15])
plt.legend(loc='upper right',fontsize = 'x-large')

# plot the heatmap
plt.figure()
heatmap, xedges, yedges = np.histogram2d(y, x, bins=50)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]

plt.clf()
plt.imshow(heatmap.T, extent=extent, interpolation='nearest', origin='lower')
plt.pcolormesh(xedges, yedges, heatmap, cmap=CM.RdBu_r, vmin=-7, vmax=7)
plt.gcf().set_size_inches(11.7, 8.27)
plt.show()

For the results, first of all, the plot size of the heatmap seems to be different than the scatterplot although I specified them to be the same. Second, the heatmap simply does not seem to match the pattern in the scatterplot that seems to gather towards the bottom right. Please advise on where I should revise to get the correct heatmap. Thank you.

Molly Zhou
  • 335
  • 1
  • 10

1 Answers1

1

The code below seems to fix it. You made 3 mistakes.

  1. You made the figures the same size, not the axes. I added a set_aspect for the scatter plot to make the aspect ratio equal, same as in the heat map.

  2. You drew an imshow and then a pcolormesh on top of it (you don't need both).

  3. The pcolormesh for some reason expects the heat map to be transposed relative to what imshow requires. I transposed it.



    import matplotlib.pyplot as plt
    import numpy as np
    from matplotlib import cm as CM

    x = [0.3178, 2.0857, 2.5922, 0.088, 0.3, 0.4006, 1.0241, 0.1913, 0.56, 1.1828, 2.6879, 5.8044, 0.3593, 1.8732, 10.8003, 0.3457, 1.7003, 0.1677, 0.7442, 1.5731, 0.4927, 0.4143, 0.558, 0.2486, 0.3009, 0.163, 2.645, 4.1364, 13.8043, 3.9997, 0.258, 0.78, 10.3991, 0.2425, 0.3335, 4.8002, 0.3529, 5.9263, 0.151, 0.34, 0.1146, 13.6505, 2.8802, 3.2738, 0.5562, 0.5067, 1.5142, 2.0373, 2.5427, 12.1005]
    y = [4.4903, 6.8879, 5.6211, 5.1128, 1.8125, 4.9716, 2.6847, 5.3744, 6.5254, 3.875, 3.6667, 2.0, 6.9811, 6.0501, 6.0, 6.8478, 5.0, 5.3676, 3.403, 6.1015, 6.8793, 4.7684, 3.5934, 2.6224, 5.9319, 1.8191, 3.0554, 3.5207, 3.6786, 3.0, 5.9041, 1.9128, 6.3333, 5.4949, 5.7135, 6.0, 5.5348, 3.0, 5.2644, 5.8111, 1.093, 4.0, 7.0, 6.0, 3.8684, 4.8, 1.5283, 6.6932, 7.0, 4.0]

    # plot the scatter_plot
    xposition = [0,7]
    plt.figure()
    plt.plot(y,x,'r^', label='series_1',markersize=12)
    plt.gcf().set_size_inches(11.7, 8.27)
    ax = plt.gca()
    ax.tick_params(axis = 'both', which = 'major', labelsize = 16)
    for xc in range(0,xposition[-1]+1):
        ax.axvline(x=xc, color='darkgrey', linestyle='--', linewidth = 2)

    plt.xlabel('x', fontsize=18)
    plt.ylabel('y', fontsize=18)
    plt.xlim(xposition)
    plt.ylim([0,15])
    plt.legend(loc='upper right',fontsize = 'x-large')
    plt.gca().set_aspect('equal')

    heatmap, xedges, yedges = np.histogram2d(y, x, bins=50)
    extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]


    # plot the heatmap
    plt.figure()
    #plt.imshow(heatmap.T, extent=extent, interpolation='nearest', origin='lower')
    plt.pcolormesh(xedges, yedges,  heatmap.transpose(), cmap=CM.RdBu_r, vmin=-7, vmax=7)
    plt.gcf().set_size_inches(11.7, 8.27)
    plt.gca().set_aspect('equal')
    plt.show()

Also, why don't you try to use subplot instead of two figures like in the following example? You might run into some problems with adding a colorbar though, but it's solvable.

import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm as CM

x = [0.3178, 2.0857, 2.5922, 0.088, 0.3, 0.4006, 1.0241, 0.1913, 0.56, 1.1828, 2.6879, 5.8044, 0.3593, 1.8732, 10.8003, 0.3457, 1.7003, 0.1677, 0.7442, 1.5731, 0.4927, 0.4143, 0.558, 0.2486, 0.3009, 0.163, 2.645, 4.1364, 13.8043, 3.9997, 0.258, 0.78, 10.3991, 0.2425, 0.3335, 4.8002, 0.3529, 5.9263, 0.151, 0.34, 0.1146, 13.6505, 2.8802, 3.2738, 0.5562, 0.5067, 1.5142, 2.0373, 2.5427, 12.1005]
y = [4.4903, 6.8879, 5.6211, 5.1128, 1.8125, 4.9716, 2.6847, 5.3744, 6.5254, 3.875, 3.6667, 2.0, 6.9811, 6.0501, 6.0, 6.8478, 5.0, 5.3676, 3.403, 6.1015, 6.8793, 4.7684, 3.5934, 2.6224, 5.9319, 1.8191, 3.0554, 3.5207, 3.6786, 3.0, 5.9041, 1.9128, 6.3333, 5.4949, 5.7135, 6.0, 5.5348, 3.0, 5.2644, 5.8111, 1.093, 4.0, 7.0, 6.0, 3.8684, 4.8, 1.5283, 6.6932, 7.0, 4.0]

# plot the scatter_plot
xposition = [0,7]
plt.figure()
ax1 = plt.subplot(1,2,1)
plt.plot(y,x,'r^', label='series_1',markersize=12)
plt.gcf().set_size_inches(11.7, 8.27)
ax1.tick_params(axis = 'both', which = 'major', labelsize = 16)
for xc in range(0,xposition[-1]+1):
    ax1.axvline(x=xc, color='darkgrey', linestyle='--', linewidth = 2)

plt.xlabel('x', fontsize=18)
plt.ylabel('y', fontsize=18)
plt.xlim(xposition)
plt.ylim([0,15])
plt.legend(loc='upper right',fontsize = 'x-large')
plt.gca().set_aspect('equal')

heatmap, xedges, yedges = np.histogram2d(y, x, bins=50)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]


# plot the heatmap
#plt.figure()
#plt.imshow(heatmap.T, extent=extent, interpolation='nearest', origin='lower')
ax2 = plt.subplot(1,2,2,sharex=ax1,sharey=ax1)
heatmap_copy = heatmap.transpose()
heatmap_copy[heatmap_copy==0] = np.nan
plt.pcolormesh(xedges, yedges,  heatmap_copy, cmap=CM.RdBu_r, vmin=-7, vmax=7)
ax2.set_aspect('equal')
plt.xlabel('x', fontsize=18)
plt.ylabel('y', fontsize=18)
plt.ylim([0,3])
ax2.tick_params(axis = 'both', which = 'major', labelsize = 16)
for xc in range(0,xposition[-1]+1):
    ax2.axvline(x=xc, color='darkgrey', linestyle='--', linewidth = 2)
plt.show()
iliar
  • 932
  • 6
  • 11
  • Thank you for your response. It works:) However, why is the background in the heatmap grey? Can I change that? Additionally, I tried to zoom in to only the lower part of y-axis by adding "plt.ylim([0,3]) " after "plt.ylabel" and it does not seem to work. May I ask where should I change instead? – Molly Zhou Nov 12 '19 at 08:32
  • @MollyZhou The 'background' is the color that corresponds to 0 in the colormap. If you want it to be white as in the missing data case you need to set those places in the heatmap to nan instead of 0. I edited the second script (the one with subplots) to display a white background. Also, I added ply.ylim([0,3]) after ylabel and it works. Are you sure you're doing it on the correct figure? When the correct figure is the current figure that is. If you like the answer, you should like and accept. – iliar Nov 12 '19 at 09:56