In ggplot2, I would like the boxplot to have y-axis range spanning to just the ends of the whiskers so that the plot does not include all of the outliers, but without making all of the outliers invisible. So basically, I would like to go from the first image boxplot with y range including all outliers to the second image boxplot with y range to widest point of whiskers but while showing some outliers
This is the code I have currently, but it doesn't seem to be doing what I want it to do. Any ideas?
get_range <- function(data, j) {
split_dfs <- group_split(data, group)
ends <- data.frame(matrix(nrow = length(split_dfs), ncol = 2))
colnames(ends) <- c("min_range", "max_range")
for (i in c(1:length(split_dfs))) {
cur_min <- min(split_dfs[[i]][, j + 2])
cur_max <- max(split_dfs[[i]][, j + 2])
i_summary <- (split_dfs[[i]])[, j + 2] %>%
pull() %>%
quantile()
Q1 <- i_summary[["25%"]]
Q3 <- i_summary[["75%"]]
iqr <- (split_dfs[[i]])[, j + 2] %>%
pull() %>%
IQR()
ends[i, "min_range"] <- Q1 - (1.5 * iqr)
ends[i, "max_range"] <- Q3 + (1.5 * iqr)
}
y_min <- min(ends$min_range)
y_max <- max(ends$max_range)
return(list(y_min, y_max))
}
plot_features <- function(data, i) {
lims <- get_range(data, i)
lim_min <- lims[[1]]
lim_max <- lims[[2]]
ggplot(data, aes(x = group, y = data[, i + 2], fill = group)) +
coord_cartesian(ylim = c(lim_min, lim_max)) +
ylab(colnames(data)[i + 2]) +
stat_boxplot(geom = "errorbar", width = 0.2) +
geom_boxplot() +
theme(legend.position = "none", axis.title.x=element_blank())
}