1

I have a vector of length 370 that I would like to fit to a mixture of Gaussians. I have followed the example here: Any suggestions for how I can plot mixEM type data using ggplot2 to plot the data, but as you can see from the image link, my results are different from those in the example: Plot of a mixture of three Gaussians

Here is a snippet of the code that I used:

library(ggplot2)
library(mixtools)

gg.mixEM <- function(EM) {
  require(ggplot2)
  x       <- with(EM,seq(min(x),max(x),len=1000))
  pars    <- with(EM,data.frame(comp=colnames(posterior), mu, sigma,lambda))
  em.df   <- data.frame(x=rep(x,each=nrow(pars)),pars)
  em.df$y <- with(em.df,lambda*dnorm(x,mean=mu,sd=sigma))
  ggplot(data.frame(x=EM$x),aes(x,y=..density..)) + 
    geom_histogram(fill=NA,color="black",bins=41)+
    geom_polygon(data=em.df,aes(x,y,fill=comp),color="grey50", alpha=0.5)+
    scale_fill_discrete("Component\nMeans",labels=format(em.df$mu,digits=3))+
    theme_bw()
}

dput(gradesCS)
c(6.5, 22.375, 20.5, 24.25, 33.25, 24, 26.75, 30.75, 35.5, 23.5, 
26.875, 24, 35.5, 29.875, 29.75, 31.25, 32.875, 33.75, 34, 29, 
33, 24, 12, 26.375, 6.75, 31.25, 21.625, 32.875, 29.25, 27.125, 
28.25, 26.25, 24.875, 35.5, 26.5, 37.5, 35.375, 27.5, 33, 27.5, 
39.5, 34.25, 28.125, 28, 32.625, 37.625, 34.5, 29.5, 38.5, 37.5, 
28.75, 38, 16, 35.75, 30, 33.5, 36, 31.125, 29.75, 32.5, 35, 
24.375, 23.375, 28, 32.125, 36, 31.5, 33.5, 1.5, 30.5, 37, 29.5, 
29.5, 31.125, 32.5, 20.5, 28.75, 30.25, 32.5, 28, 36, 37.5, 28.5, 
35.5, 30.25, 36.375, 36, 23.25, 31.5, 25.125, 33.5, 34, 19.5, 
31.75, 39.5, 33.25, 24.875, 26.75, 23.375, 34, 16.5, 37, 33.375, 
31.25, 31.75, 35.5, 32, 27.5, 23.375, 20.625, 35.5, 31.5, 25.375, 
24.5, 27.25, 25.25, 35.75, 24, 28.25, 33.125, 31.5, 39.5, 39.25, 
24.75, 37, 25.5, 34.75, 34, 20.25, 37.625, 30.5, 32.375, 15, 
32.75, 33.5, 32.75, 31.5, 29.25, 30, 37.25, 34.5, 23, 32.5, 38.25, 
35.625, 33, 35, 31.125, 37, 28.125, 29.25, 31.75, 34.75, 34.625, 
36.625, 15.25, 35.5, 37, 33.5, 30.875, 35, 31.625, 22.75, 31, 
31.125, 25.125, 35.5, 2, 36.125, 25.25, 32.5, 28, 38.5, 35.5, 
38.5, 30.5, 34, 28.125, 38, 29.25, 29.75, 33.25, 25.125, 35, 
34.5, 32, 35, 26.875, 20.5, 35.5, 23.25, 26.25, 36, 35.5, 38, 
39.25, 22, 38.5, 31, 35.5, 33.5, 31.5, 26, 30.375, 35.75, 29.75, 
34, 37.625, 38, 35.5, 34.25, 24.375, 30, 33.75, 39.5, 36.5, 36.5, 
32, 36.5, 29.75, 29.75, 25, 32, 29.25, 32.125, 31.25, 38, 33.5, 
33.5, 38.5, 37.25, 31.125, 33.5, 31, 28, 29.75, 36, 36, 37, 22, 
29, 36.5, 32.25, 30.75, 38.5, 24.125, 28.75, 38.25, 32.5, 34.75, 
29, 30.375, 33.5, 31.25, 30, 33, 33.5, 27.5, 26.5, 30.25, 34.75, 
33.5, 39, 33.25, 38.5, 27, 39.5, 34.25, 33, 35.125, 38, 31.25, 
32.75, 22.75, 31.125, 34.5, 33, 37.125, 31, 18.75, 30.25, 31.75, 
34, 30.75, 29, 34.5, 36, 36.5, 31.5, 26, 27.5, 27.5, 36.5, 19.75, 
33, 35.125, 16, 19.75, 31.5, 38.5, 34.25, 36.5, 27, 22, 21.75, 
36, 31.5, 33, 29.75, 32.5, 26.25, 33.5, 35.75, 33, 39, 35, 34.25, 
28.5, 25.5, 30.5, 28, 21.25, 39.125, 22.75, 28.375, 29.125, 30, 
34.125, 31.25, 32, 26.25, 36, 24.5, 30.25, 32.75, 29.625, 16, 
34, 16.75, 25.25, 33, 38, 28, 24.75, 29.75, 24.5, 19.25, 32.75, 
27.5, 24.75, 17.375, 25.25, 30.125, 38, 28, 35, 11.75, 27.75, 
38, 28.625, 31.25, 31.25, 32, 17.25, 18.25, 32.625, 25.5, 27.5, 
35.25, 35.5)

b <- gradesCS
c <- b[sample(length(b), length(b)) ]
c3 <- normalmixEM(c, lambda=NULL, mu=NULL, sigma=NULL,k=3,maxit=1000,epsilon = 1e-2)
gg.mixEM(c3)
fokumdt
  • 17
  • 4
  • It would be easier to help you if you posted a [reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) with sample input data so we can run and test the code ourselves. Explicitly list any non-base R packages you are using. – MrFlick Aug 21 '17 at 18:35
  • Thanks. I edited the question. – fokumdt Aug 22 '17 at 19:31

1 Answers1

2

The problem is that polygons freak out if they don't have continuous drawing space (e.g. if you end abruptly at 0, but the polygon function has not reached 0).

In the first line of the ggplot function, add extra spacing on each side of x. I'm going with 5 here, but you just need enough for the function to hit 0.

x <- with(EM,seq(min(x)-5,max(x)+5,len=1000))

In the bottom, we cut off the excess space with

coord_cartesian(xlim = c(0,42),
                expand = c(0,0))

This renders the graph with your spacing, and then "zooms in" on the selected x interval.

fit_test <- normalmixEM(
    test,
    k = 2)

gg.mixEM <- function(EM) {
    require(ggplot2)
    x       <- with(EM,seq(min(x)-5,max(x)+5,len=1000))
    pars    <- with(EM,data.frame(comp=colnames(posterior), mu, sigma,lambda))
    em.df   <- data.frame(x=rep(x,each=nrow(pars)),pars)
    em.df$y <- with(em.df,lambda*dnorm(x,mean=mu,sd=sigma))
    ggplot(data.frame(x=EM$x),aes(x,y=..density..)) + 
        geom_histogram(fill=NA,color="black",bins=41)+
        geom_polygon(data=em.df,aes(x,y,fill=comp),color="grey50", alpha=0.5)+
        scale_fill_discrete("Component\nMeans",labels=format(em.df$mu,digits=3))+
        theme_bw() +
        coord_cartesian(xlim = c(0,42),
                        expand = c(0,0))
}

gg.mixEM(fit_test)

And we get

enter image description here

komodovaran_
  • 1,940
  • 16
  • 44