1

Continuing from where this thread left off.

I want to make a split violin plot in ggplot 2. The method presented above is limited to 2 categories on the x axis.

Example:

set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5)),
x=c(rep('a', 2000), rep('b', 2000)),
m=c(rep('i', 1000), rep('j', 2000), rep('i', 1000)))

#Get densities
library(dplyr)
pdat <- my_data %>%
group_by(x, m) %>%
do(data.frame(loc = density(.$y)$x,
            dens = density(.$y)$y))

#Flip and offset densities for the groups
pdat$dens <- ifelse(pdat$m == 'i', pdat$dens * -1, pdat$dens)
pdat$dens <- ifelse(pdat$x == 'b', pdat$dens + 1, pdat$dens)
Plot

ggplot(pdat, aes(dens, loc, fill = m, group = interaction(m, x))) + 
geom_polygon() +
  scale_x_continuous(breaks = 0:1, labels = c('a', 'b')) +
  ylab('density') +
  theme_minimal() +
  theme(axis.title.x = element_blank())

And the result

It looks like placing more than 2 categories on the x axis is possible but I'm not sure how to go about it.

Community
  • 1
  • 1
bigFin
  • 13
  • 4

1 Answers1

0

I figured it out using a for loop to position the shapes by the group factor level.

 set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5), rnorm(1000, 1.25), rnorm(1000, 0.75)),
x=c(rep('a', 2000), rep('b', 2000), rep('c', 2000)),
m=c(rep('i', 1000), rep('j', 1000), rep('i', 1000,rep('j', 1000), rep('i', 1000,rep('j', 1000), rep('i', 1000)))

#Get densities
library(dplyr)
pdat <- my_data %>%
group_by(x, m) %>%
do(data.frame(loc = density(.$y)$x,
            dens = density(.$y)$y))

#Flip and offset densities for the groups
pdat$dens <- ifelse(pdat$m == 'i', pdat$dens * -1, pdat$dens)

#Flip and offset densities for x
    #for(pdat$x){pdat$dens <- (pdat$dens + (as.numeric(as.factor(pdat$x))))}
    for(i in 1:nrow(pdat)){(pdat$dens[i] <- (pdat$dens[i] + as.numeric(as.factor(pdat$x[i]))))}


    #Plot
    library(ggplot2)
    ggplot(pdat, aes(dens, loc, fill = m, group = interaction(m, x))) + 
      geom_polygon() +
      scale_x_continuous(breaks = (1:(as.numeric(length(levels(unique(pdat$x)))))), labels = levels(pdat$x)) +

      #scale_x_continuous(breaks = length(pdat$x), labels=pdat$x)+
      ylab("y") +
      theme_minimal() +
      theme(axis.title.x = element_blank())

https://i.stack.imgur.com/bxfHh.png

bigFin
  • 13
  • 4
  • There is no need for the `for` loop I think, just `pdat$dens <- pdat$dens + as.numeric(as.factor(pdat$x))` should work. – Axeman Sep 13 '17 at 11:07