I'm using the example dendrogram from this post in my work but would also like to keep track of which row / column is from which piece of data.
I've edited the code with records of names of the data as names
as follows and would like to print out the names at the bottom and to the right of the distance matrix visualization. I've tried adding labels = names
in the call to dendrogram
but this didn't help.
Does anyone know how to add labels to this?
import scipy
import pylab
import scipy.cluster.hierarchy as sch
# Generate random features and distance matrix.
x = scipy.rand(40)
D = scipy.zeros([40,40])
for i in range(40):
for j in range(40):
D[i,j] = abs(x[i] - x[j])
### new code
names = [ ]
for i in range(40):
names.append( 'str%i'%( i ) )
print names[-1]
### end new code
# Compute and plot first dendrogram.
fig = pylab.figure(figsize=(8,8))
ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
Y = sch.linkage(D, method='centroid')
Z1 = sch.dendrogram(Y, orientation='right')
ax1.set_xticks([])
ax1.set_yticks([])
# Compute and plot second dendrogram.
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
Y = sch.linkage(D, method='single')
Z2 = sch.dendrogram(Y)
ax2.set_xticks([])
ax2.set_yticks([])
# Plot distance matrix.
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
idx1 = Z1['leaves']
idx2 = Z2['leaves']
D = D[idx1,:]
D = D[:,idx2]
im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
# Plot colorbar.
#axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
#pylab.colorbar(im, cax=axcolor)
fig.show()
fig.savefig('dendrogram.png')