5

I have the following dataset:

df <- data.frame(dens = rnorm(5000),
             split = as.factor(sample(1:2, 5000, replace = T)),
             method = as.factor(sample(c("A","B"), 5000, replace = T)),
             counts = sample(c(1, 10, 100, 1000, 10000), 5000, replace = T))

I have the following split violin plots for splits 1 and 2 within groups A and B for each count. We have four groups for each setting but there is a nested aspect to it:

library(ggplot2)
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, 
                           draw_group = function(self, data, ..., draw_quantiles = NULL){
                               ## By @YAK: https://stackoverflow.com/questions/35717353/split-violin-plot-with-ggplot2
                               data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
                               grp <- data[1,'group']
                               newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
                               newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
                               newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x']) 
                               if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
                                   stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
                                   quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
                                   aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
                                   aesthetics$alpha <- rep(1, nrow(quantiles))
                                   both <- cbind(quantiles, aesthetics)
                                   quantile_grob <- GeomPath$draw_panel(both, ...)
                                   ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
                               }
                               else {
                                   ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
                               }
                           }
                           )

create_quantile_segment_frame <- function (data, draw_quantiles, split = FALSE, grp = NULL) {
    dens <- cumsum(data$density)/sum(data$density)
    ecdf <- stats::approxfun(dens, data$y)
    ys <- ecdf(draw_quantiles)
    violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
    violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
    violin.xs <- (stats::approxfun(data$y, data$x))(ys)
    if (grp %% 2 == 0) {
        data.frame(x = ggplot2:::interleave(violin.xs, violin.xmaxvs), 
                   y = rep(ys, each = 2), group = rep(ys, each = 2)) 
    } else {
        data.frame(x = ggplot2:::interleave(violin.xminvs, violin.xs), 
                   y = rep(ys, each = 2), group = rep(ys, each = 2)) 
    }
}

geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
    layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}



df$key <- factor(paste(df$split, df$method))

levels(df$split) <- factor(0:2)
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) +geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(limits=levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],drop = FALSE, name = "Counts")

And I get the following:

enter image description here

Which is great, except that I would like to only have labels of counts 1, 10, 100, 1000, 10000 on the x-axis and in between the blue and the green violin plots. So label 1 in between the first blue and the green violin plots, 10 in between the second blue and the green violin plots, 100 in between the second blue and the green violin plots and so on.

Thanks for any suggestions on how to do this.

Mike Wise
  • 22,131
  • 8
  • 81
  • 104
user3236841
  • 1,088
  • 1
  • 15
  • 39
  • have a look at that one. https://stackoverflow.com/questions/44255322/ggplot-different-position-for-ticks-and-labels-on-x-axis-in-grouped-bar-plot – tjebo Mar 07 '18 at 21:44
  • and this one might also help. https://stackoverflow.com/questions/44616530/axis-labels-on-two-lines-with-nested-x-variables-year-below-months – tjebo Mar 07 '18 at 22:23
  • Can't really answer your question, but I seriously like your plot. – tjebo Mar 07 '18 at 22:24
  • Thanks! I am not sure how to get what I want to work though. – user3236841 Mar 07 '18 at 23:40
  • The example is self-contained. If you run it, you get df. – user3236841 Mar 08 '18 at 16:51
  • The edits which I suggested were 1) correcting your code (your df <- assignment is incorrect, a comma is missing) and 2) for readability. It's not really great to have the code without any line break. It makes the code much more readable if you break it ,e.g. after the plus signs – tjebo Mar 09 '18 at 23:19

2 Answers2

3

Instead of changing the break point for a discrete scale, you can try adding a text layer to the plot itself, which is able to accept non-integer values for discrete scale positions:

ggplot(df,
       aes(x = x, y = dens, fill = key)) + 
  geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +

  # annotate layer with non-integer positions
  annotate(geom = "text", x = c(1.5, 4.5, 7.5, 10.5, 13.5), y = -3.75,
           label = c("1", "10", "100", "1000", "10000")) +
  scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired", n=4)) + 
  scale_x_discrete(name = "Counts", drop = FALSE) +
  theme_minimal() + 

  # hide the actual discrete labels / ticks
  theme(legend.position="bottom",
        axis.ticks.x = element_blank(),
        axis.text.x = element_blank())

plot

Z.Lin
  • 28,055
  • 6
  • 54
  • 94
3

I usually solve these issues with facets, then format the strips as though they are axis labels. This also naturally puts the pairs closer together, without any hacks, and you can change the distance by changing theme(panel.spacing = .....), if needed. E.g.:

ggplot(df, aes(x = split, y = dens, fill = key)) +
  geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
  scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + 
  xlab('count') +
  facet_grid(~counts, scales = 'free_x', switch = 'x') +
  theme_light() + 
  theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
        strip.background = element_blank(), strip.text = element_text(color = 'black'))

enter image description here

Or a different theme with less obvious facets:

ggplot(df, aes(x = split, y = dens, fill = key)) +
  geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
  scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + 
  xlab('count') +
  facet_grid(~counts, scales = 'free_x', switch = 'x') +
  theme_minimal() + 
  theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank())

enter image description here

Axeman
  • 32,068
  • 8
  • 81
  • 94
  • Can there be a larger gap between the "counts" for the second plot? Also, "bounding box" around axes would be worth trying. " – user3236841 Mar 12 '18 at 13:52
  • Yes, by changing `theme(panel.spacing = .....)`, as stated in the answer... – Axeman Mar 12 '18 at 13:53
  • Thanks! I don't like the fact that the lines do not continue acrorss the figure, however. I guess we can not put an overall bounding box? – user3236841 Mar 12 '18 at 14:12
  • I have grown to like your second answer more and more out of the three answers. Btw, is it possible to have colour of blue and green for the 1 and 2, and light and dark hue (I guess this would be light gray for A and dark gray for B) in the legend? – user3236841 Mar 14 '18 at 04:27