2

I'm working with k-means in MATLAB. I am trying to create the plot/graph, but my data has three dimensional array. Here is my k-means code:

clc
clear all
close all
load cobat.txt;  % read the file

k=input('Enter a number: ');        % determine the number of cluster
isRand=0;   % 0 -> sequeantial initialization
            % 1 -> random initialization

[maxRow, maxCol]=size(cobat);
if maxRow<=k, 
    y=[m, 1:maxRow];
elseif k>7
    h=msgbox('cant more than 7');
else
    % initial value of centroid
    if isRand,
        p = randperm(size(cobat,1));      % random initialization
        for i=1:k
            c(i,:)=cobat(p(i),:);  
        end
    else
        for i=1:k
           c(i,:)=cobat(i,:);        % sequential initialization
        end
    end

    temp=zeros(maxRow,1);   % initialize as zero vector
    u=0;
    while 1,
        d=DistMatrix3(cobat,c);   % calculate the distance 
        [z,g]=min(d,[],2);      % set the matrix g group

        if g==temp,             % if the iteration doesn't change anymore
            break;              % stop the iteration
        else
            temp=g;             % copy the matrix to the temporary variable
        end
        for i=1:k
            f=find(g==i);
            if f                % calculate the new centroid 
                c(i,:)=mean(cobat(find(g==i),:),1);
            end
        end
        c
        [B,index] = sortrows( c );  % sort the centroids
        g = index(g); % arrange the labels based on centroids
    end
    y=[cobat,g]

    hold off;    

   %This plot is actually placed in plot 3D code (last line), but I put it into here, because I think this is the plotting line
   f = PlotClusters(cobat,g,y,Colors) %Here is the error
   if Dimensions==2
    for i=1:NumOfDataPoints %plot data points    
        plot(cobat(i,1),cobat(i,2),'.','Color',Colors(g(i),:))
        hold on
    end
    for i=1:NumOfCenters %plot the centers
        plot(y(i,1),y(i,2),'s','Color',Colors(i,:))
    end
else
    for i=1:NumOfDataPoints %plot data points 
        plot3(cobat(i,1),cobat(i,2),cobat(i,3),'.','Color',Colors(g(i),:)) 
        hold on
    end
    for i=1:NumOfCenters %plot the centers
        plot3(y(i,1),y(i,2),y(i,3),'s','Color',Colors(i,:))
    end 

   end
end

And here is the plot 3D code:

%This function plots clustering data, for example the one provided by
%kmeans. To be able to plot, the number of dimensions has to be either 2 or
%3. 
%Inputs:
%       Data - an m-by-d matrix, where m is the number of data points to
%              cluster and d is the number of dimensions. In my code, it is cobat
%       IDX - an m-by-1 indices vector, where each element gives the
%             cluster to which the corresponding data point in Data belongs. In my file, it is 'g'
%       Centers y - an optional c-by-d matrix, where c is the number of
%             clusters and d is the dimensions of the problem. The matrix
%             gives the location of the cluster centers. If this is not
%             given, the centers will be calculated. In my file, I think, it is 'y'
%       Colors - an optional color scheme generated by hsv. If this is not
%             given, a color scheme will be generated.
%
function f = PlotClusters(cobat,g,y,Colors)
%Checking inputs
switch nargin
    case 1 %Not enough inputs
        error('Clustering data is required to plot clusters. Usage: PlotClusters(Data,IDX,Centers,Colors)')
    case 2 %Need to calculate cluster centers and color scheme
        [NumOfDataPoints,Dimensions]=size(cobat);
        if Dimensions~=2 && Dimensions~=3 %Check ability to plot
            error('It is only possible to plot in 2 or 3 dimensions.')
        end
        if length(g)~=NumOfDataPoints %Check that each data point is assigned to a cluster
            error('The number of data points in Data must be equal to the number of indices in IDX.')
        end
        NumOfClusters=max(g);
        Centers=zeros(NumOfClusters,Dimensions);
        NumOfCenters=NumOfClusters;
        NumOfPointsInCluster=zeros(NumOfClusters,1);
        for i=1:NumOfDataPoints
            Centers(g(i),:)=y(g(i),:)+cobat(i,:);
            NumOfPointsInCluster(g(i))=NumOfPointsInCluster(g(i))+1;
        end
        for i=1:NumOfClusters
            y(i,:)=y(i,:)/NumOfPointsInCluster(i);
        end
        Colors=hsv(NumOfClusters);        
    case 3 %Need to calculate color scheme        
        [NumOfDataPoints,Dimensions]=size(cobat);
        if Dimensions~=2 && Dimensions~=3 %Check ability to plot
            error('It is only possible to plot in 2 or 3 dimensions.')
        end
        if length(g)~=NumOfDataPoints %Check that each data point is assigned to a cluster
            error('The number of data points in Data must be equal to the number of indices in IDX.')
        end
        NumOfClusters=max(g);
        [NumOfCenters,Dims]=size(y);
        if Dims~=Dimensions
            error('The number of dimensions in Data should be equal to the number of dimensions in Centers')
        end
        if NumOfCenters<NumOfClusters %Check that each cluster has a center
            error('The number of cluster centers is smaller than the number of clusters.')
        elseif NumOfCenters>NumOfClusters %Check that each cluster has a center
            disp('There are more centers than clusters, all will be plotted')
        end
        Colors=hsv(NumOfCenters);
    case 4 %All data is given just need to check consistency        
        [NumOfDataPoints,Dimensions]=size(cobat);
        if Dimensions~=2 && Dimensions~=3 %Check ability to plot
            error('It is only possible to plot in 2 or 3 dimensions.')
        end
        if length(g)~=NumOfDataPoints %Check that each data point is assigned to a cluster
            error('The number of data points in Data must be equal to the number of indices in IDX.')
        end
        NumOfClusters=max(g);
        [NumOfCenters,Dims]=size(y);
        if Dims~=Dimensions
            error('The number of dimensions in Data should be equal to the number of dimensions in Centers')
        end
        if NumOfCenters<NumOfClusters %Check that each cluster has a center
            error('The number of cluster centers is smaller than the number of clusters.')
        elseif NumOfCenters>NumOfClusters %Check that each cluster has a center
            disp('There are more centers than clusters, all will be plotted')
        end
        [NumOfColors,RGB]=size(Colors);
        if RGB~=3 || NumOfColors<NumOfCenters
            error('Colors should have at least the same number of rows as number of clusters and 3 columns')
        end            
end
%Data is ready. Now plotting

end

Here is the error:

??? Undefined function or variable 'Colors'.

Error in ==> clustere at 69
    f = PlotClusters(cobat,g,y,Colors)

Am I wrong call the function like that? What should I do? Your help will be appreciated a lot.

Amro
  • 123,847
  • 25
  • 243
  • 454
Alvi Syahrin
  • 373
  • 3
  • 7
  • 16
  • you never defined the `Colors` variable, hence the error message.. – Amro May 08 '13 at 10:19
  • Now I define Colors, like this: `Colors=['r' 'g' 'b' 'c' 'm' 'y' 'k'];` is it right? But now I got another error: `Error in ==> PlotClusters at 22 switch nargin ??? Output argument "f" (and maybe others) not assigned during call to "E:\MATLAB 2012\PlotClusters.m>PlotClusters". Error in ==> clustere at 68 f = PlotClusters(cobat,g,y,Colors)` Thank you so much, @Amro, for always helping me. Your help means a lot and I always appreciate it. – Alvi Syahrin May 08 '13 at 13:26
  • your code has a number of problems. Rather than going through each, I provided my own example. Please study it and adapt it to your case. – Amro May 10 '13 at 01:59

3 Answers3

10

Your code is very messy, and unnecessarily long..

Here is smaller example that does the same thing. You'll need the Statistics toolbox to run it (for the kmeans function and Iris dataset):

%# load dataset of 150 instances and 3 dimensions
load fisheriris
X = meas(:,1:3);
[numInst,numDims] = size(X);

%# K-means clustering
%# (K: number of clusters, G: assigned groups, C: cluster centers)
K = 3;
[G,C] = kmeans(X, K, 'distance','sqEuclidean', 'start','sample');

%# show points and clusters (color-coded)
clr = lines(K);
figure, hold on
scatter3(X(:,1), X(:,2), X(:,3), 36, clr(G,:), 'Marker','.')
scatter3(C(:,1), C(:,2), C(:,3), 100, clr, 'Marker','o', 'LineWidth',3)
hold off
view(3), axis vis3d, box on, rotate3d on
xlabel('x'), ylabel('y'), zlabel('z')

pic

Amro
  • 123,847
  • 25
  • 243
  • 454
  • Amro, thank you a lot! Actually I also wrote in another script using kmeans function on MATLAB, just like yours. But your scatter3() code is really useful for me. It is very efficient. Thank you so much! But my graph looks messy. Is it just because the file I used, or the clustering of mine isn't working properly? – Alvi Syahrin May 10 '13 at 23:03
  • Sorry,, it's fixed. I just need to rotate the volume to see the better view. Thank you! – Alvi Syahrin May 11 '13 at 01:05
  • use the mouse and the rotate tool for that :) – Amro May 11 '13 at 01:39
  • Hi, Amro. I want to ask; what is 'Marker' for? Is it for the marker type, like '.' and 'o'?And then, what is the meaning of 36 and 100 on that script? The last, can you tell me the meaning of axis vis3d? – Alvi Syahrin May 23 '13 at 10:36
  • @AlviSyahrin: This all can be found in the MATLAB docs: 1) [marker type](http://www.mathworks.com/help/matlab/ref/scattergroupproperties.html#Marker) 2) [marker area](http://www.mathworks.com/help/matlab/ref/scatter3.html#inputarg_S) 3) [axis vis3d](http://www.mathworks.com/help/matlab/ref/axis.html#f17-642404) freezes aspect ratio properties to enable rotation of 3-D objects and overrides stretch-to-fill – Amro May 23 '13 at 11:04
  • @Amro I'm trying to use the above code on a 3D data found in this link where the K is 3 and it always gives this error"Empty cluster created at iteration 1." could you please assist me? https://www.dropbox.com/s/rgatmmg2cx2z1cv/matlab_X.mat – Tak Aug 02 '13 at 05:49
  • 1
    @user1460166: I posted an answer on your question at http://stackoverflow.com/questions/18009664/kmeans-matlab-empty-cluster-created-at-iteration-1-error – Amro Aug 02 '13 at 09:36
1

You could simply go for scatter():

enter image description here

As you can see from the image, you differentiate colors, size of the clusters. FOr more details check out the examples in the documentation.

Oleg
  • 10,406
  • 3
  • 29
  • 57
0

Here is the sample code for how we can get the 3d graph.

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

x =[1,2,3,4,5,6,7,8,9,10]
y =[5,6,2,3,13,4,1,2,4,8]
z =[2,3,3,3,5,7,9,11,9,10]


ax.scatter(x, y, z, c='r', marker='o')

ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

plt.show()

enter image description here