R using ggplot2 to plot mixEM data

Question

I have a vector of length 370 that I would like to fit to a mixture of Gaussians. I have followed the example here: Any suggestions for how I can plot mixEM type data using ggplot2 to plot the data, but as you can see from the image link, my results are different from those in the example: Plot of a mixture of three Gaussians

Here is a snippet of the code that I used:

library(ggplot2)
library(mixtools)

gg.mixEM <- function(EM) {
  require(ggplot2)
  x       <- with(EM,seq(min(x),max(x),len=1000))
  pars    <- with(EM,data.frame(comp=colnames(posterior), mu, sigma,lambda))
  em.df   <- data.frame(x=rep(x,each=nrow(pars)),pars)
  em.df$y <- with(em.df,lambda*dnorm(x,mean=mu,sd=sigma))
  ggplot(data.frame(x=EM$x),aes(x,y=..density..)) + 
    geom_histogram(fill=NA,color="black",bins=41)+
    geom_polygon(data=em.df,aes(x,y,fill=comp),color="grey50", alpha=0.5)+
    scale_fill_discrete("Component\nMeans",labels=format(em.df$mu,digits=3))+
    theme_bw()
}

dput(gradesCS)
c(6.5, 22.375, 20.5, 24.25, 33.25, 24, 26.75, 30.75, 35.5, 23.5, 
26.875, 24, 35.5, 29.875, 29.75, 31.25, 32.875, 33.75, 34, 29, 
33, 24, 12, 26.375, 6.75, 31.25, 21.625, 32.875, 29.25, 27.125, 
28.25, 26.25, 24.875, 35.5, 26.5, 37.5, 35.375, 27.5, 33, 27.5, 
39.5, 34.25, 28.125, 28, 32.625, 37.625, 34.5, 29.5, 38.5, 37.5, 
28.75, 38, 16, 35.75, 30, 33.5, 36, 31.125, 29.75, 32.5, 35, 
24.375, 23.375, 28, 32.125, 36, 31.5, 33.5, 1.5, 30.5, 37, 29.5, 
29.5, 31.125, 32.5, 20.5, 28.75, 30.25, 32.5, 28, 36, 37.5, 28.5, 
35.5, 30.25, 36.375, 36, 23.25, 31.5, 25.125, 33.5, 34, 19.5, 
31.75, 39.5, 33.25, 24.875, 26.75, 23.375, 34, 16.5, 37, 33.375, 
31.25, 31.75, 35.5, 32, 27.5, 23.375, 20.625, 35.5, 31.5, 25.375, 
24.5, 27.25, 25.25, 35.75, 24, 28.25, 33.125, 31.5, 39.5, 39.25, 
24.75, 37, 25.5, 34.75, 34, 20.25, 37.625, 30.5, 32.375, 15, 
32.75, 33.5, 32.75, 31.5, 29.25, 30, 37.25, 34.5, 23, 32.5, 38.25, 
35.625, 33, 35, 31.125, 37, 28.125, 29.25, 31.75, 34.75, 34.625, 
36.625, 15.25, 35.5, 37, 33.5, 30.875, 35, 31.625, 22.75, 31, 
31.125, 25.125, 35.5, 2, 36.125, 25.25, 32.5, 28, 38.5, 35.5, 
38.5, 30.5, 34, 28.125, 38, 29.25, 29.75, 33.25, 25.125, 35, 
34.5, 32, 35, 26.875, 20.5, 35.5, 23.25, 26.25, 36, 35.5, 38, 
39.25, 22, 38.5, 31, 35.5, 33.5, 31.5, 26, 30.375, 35.75, 29.75, 
34, 37.625, 38, 35.5, 34.25, 24.375, 30, 33.75, 39.5, 36.5, 36.5, 
32, 36.5, 29.75, 29.75, 25, 32, 29.25, 32.125, 31.25, 38, 33.5, 
33.5, 38.5, 37.25, 31.125, 33.5, 31, 28, 29.75, 36, 36, 37, 22, 
29, 36.5, 32.25, 30.75, 38.5, 24.125, 28.75, 38.25, 32.5, 34.75, 
29, 30.375, 33.5, 31.25, 30, 33, 33.5, 27.5, 26.5, 30.25, 34.75, 
33.5, 39, 33.25, 38.5, 27, 39.5, 34.25, 33, 35.125, 38, 31.25, 
32.75, 22.75, 31.125, 34.5, 33, 37.125, 31, 18.75, 30.25, 31.75, 
34, 30.75, 29, 34.5, 36, 36.5, 31.5, 26, 27.5, 27.5, 36.5, 19.75, 
33, 35.125, 16, 19.75, 31.5, 38.5, 34.25, 36.5, 27, 22, 21.75, 
36, 31.5, 33, 29.75, 32.5, 26.25, 33.5, 35.75, 33, 39, 35, 34.25, 
28.5, 25.5, 30.5, 28, 21.25, 39.125, 22.75, 28.375, 29.125, 30, 
34.125, 31.25, 32, 26.25, 36, 24.5, 30.25, 32.75, 29.625, 16, 
34, 16.75, 25.25, 33, 38, 28, 24.75, 29.75, 24.5, 19.25, 32.75, 
27.5, 24.75, 17.375, 25.25, 30.125, 38, 28, 35, 11.75, 27.75, 
38, 28.625, 31.25, 31.25, 32, 17.25, 18.25, 32.625, 25.5, 27.5, 
35.25, 35.5)

b <- gradesCS
c <- b[sample(length(b), length(b)) ]
c3 <- normalmixEM(c, lambda=NULL, mu=NULL, sigma=NULL,k=3,maxit=1000,epsilon = 1e-2)
gg.mixEM(c3)

It would be easier to help you if you posted a [reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) with sample input data so we can run and test the code ourselves. Explicitly list any non-base R packages you are using. — MrFlick, Aug 21 '17 at 18:35

score 2 · Answer 1 · answered Aug 23 '17 at 07:42

The problem is that polygons freak out if they don't have continuous drawing space (e.g. if you end abruptly at 0, but the polygon function has not reached 0).

In the first line of the ggplot function, add extra spacing on each side of x. I'm going with 5 here, but you just need enough for the function to hit 0.

x <- with(EM,seq(min(x)-5,max(x)+5,len=1000))

In the bottom, we cut off the excess space with

coord_cartesian(xlim = c(0,42),
                expand = c(0,0))

This renders the graph with your spacing, and then "zooms in" on the selected x interval.

fit_test <- normalmixEM(
    test,
    k = 2)

gg.mixEM <- function(EM) {
    require(ggplot2)
    x       <- with(EM,seq(min(x)-5,max(x)+5,len=1000))
    pars    <- with(EM,data.frame(comp=colnames(posterior), mu, sigma,lambda))
    em.df   <- data.frame(x=rep(x,each=nrow(pars)),pars)
    em.df$y <- with(em.df,lambda*dnorm(x,mean=mu,sd=sigma))
    ggplot(data.frame(x=EM$x),aes(x,y=..density..)) + 
        geom_histogram(fill=NA,color="black",bins=41)+
        geom_polygon(data=em.df,aes(x,y,fill=comp),color="grey50", alpha=0.5)+
        scale_fill_discrete("Component\nMeans",labels=format(em.df$mu,digits=3))+
        theme_bw() +
        coord_cartesian(xlim = c(0,42),
                        expand = c(0,0))
}

gg.mixEM(fit_test)

And we get

R using ggplot2 to plot mixEM data

1 Answers1