I am generating split violin plots using the geom_split_violin
function created here: Split violin plot with ggplot2.
Then, I add labels for sample sizes (n = ...) for each split violin. However, there are some missing values, which results in mislabelling from the missing data onward.
Using the code below, this is the result:
In the bottom grid (B), under p2, there are no values for fill value = 1
. This results in mislabelling of the split violins thereafter. Specifically, the labels n = 3
and n = 108
are swapped.
Might there be a way to solve this?
Here is the full code:
# Create data
set.seed(12345)
my_data = data.frame(
y=c(rnorm(1000, 15)),
type=c(rep("A", 635), rep("B", 365)),
variable=c(rep("p1", 151), rep("p2", 287), rep("p3", 197),
rep("p1", 73), rep("p2", 181), rep("p3", 111)),
value=c(rep("0", 89), rep("1", 62),
rep("0", 151), rep("1", 136),
rep("0", 101), rep("1", 96),
rep("0", 39), rep("1", 34),
rep("0", 181),
rep("0", 108), rep("1", 3)))
# Code to create geom_split_violin function from link above
library('ggplot2')
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
# Add labels 'n = ...'
give_n = function(x, y_lo = 12) {
data.frame(y = y_lo,
label = paste("n =", length(x)))
}
# Plot data
ggplot(my_data, aes(variable, y, fill = value)) +
geom_split_violin() +
facet_grid(type ~ ., scales = "free_y") +
stat_summary(fun.data = give_n, aes(x = as.factor(variable)),
geom = "text", position = position_nudge(x = c(-0.25, 0.25)))