2

I have data similar to the one I've created below:

set.seed(42)
dates <- seq.Date(as.Date("2012-08-01"), as.Date("2014-08-30"), "day")
n <- length(dates)
dat <- data.frame(date = dates,
                  category = rep(LETTERS[1:4], n/2),
                  daily_count = sample(18:100, n, replace=TRUE))

#following to be used for creating dotted lines; highlighting a certain point for each category
point_dates <- sample(seq.Date(as.Date("2012-08-01"), as.Date("2014-08-30"), "month"),4)
category_name <- list("A", "B", "C", "D")

I am creating a boxplot for each category using facet_wrap, and point_dates are important for me as they shows the point of interest in each boxplot. This is how I am creating the plot:

ggplot(dat) +
  geom_boxplot(aes(y = daily_count,
                   x = yearmonth(date),
                   group = paste(yearmonth(date), category),
                   fill = category)) +
  labs(x = 'Month & Year',
       y = 'Count',
       fill = "Category") +
  theme_bw() +
  theme(axis.text=element_text(size=10),
        axis.title=element_text(size=10),
        legend.position="none") +
  geom_vline(xintercept =  lubridate::ymd("2013-08-23"), linetype=1, colour="red", size = 0.5)+
  
  sapply(point_dates[[1]], function(xint) geom_vline(data=filter(dat, 
  category==category_name[[1]]),aes(xintercept = xint),
  linetype=3, colour="black", size = 1))+
  
  sapply(point_dates[[2]], function(xint) geom_vline(data=filter(dat, 
  category==category_name[[2]]),aes(xintercept = xint),
  linetype=3, colour="black", size = 1))+
  
  sapply(point_dates[[3]], function(xint) geom_vline(data=filter(dat, 
  category==category_name[[3]]),aes(xintercept = xint),
  linetype=3, colour="black", size = 1))+
  
  sapply(point_dates[[4]], function(xint) geom_vline(data=filter(dat, 
  category==category_name[[4]]),aes(xintercept = xint),
  linetype=3, colour="black", size = 1))+
  
  facet_wrap(~category, nrow = 2)

And this is the output of the code: enter image description here The plot is being created just fine. My question is, is there any better way (loop may be?) that would help me get rid of writing sapply multiple times. Because the number of categories may change (increase/decrease), that would be to change the code everytime.

Any guidance please?

AneesBaqir
  • 423
  • 1
  • 12

3 Answers3

2

I'm not sure that this is the best way, but you could do all of them in one go using map2 from tidyr. This would save you time from having to write out individual sapply.

library(tidyverse)

ggplot(dat) +
  geom_boxplot(aes(y = daily_count,
                   x = yearmonth(date),
                   group = paste(yearmonth(date), category),
                   fill = category)) +
  labs(x = 'Month & Year',
       y = 'Count',
       fill = "Category") +
  theme_bw() +
  theme(axis.text=element_text(size=10),
        axis.title=element_text(size=10),
        legend.position="none") +
  geom_vline(xintercept =  lubridate::ymd("2013-08-23"), 
             linetype=1, colour="red", size = 0.5)+
  map2(point_dates, category_name, 
       ~geom_vline(data=filter(dat, category==.y),
                   aes(xintercept = .x),
                   linetype=3, colour="black", size = 1))+
  facet_wrap(~category, nrow = 2)

AndS.
  • 7,748
  • 2
  • 12
  • 17
  • Thank you so much to everyone. I love SO, always find the solutions and the best of people. All answers were right ones, I wish I could choose them all. – AneesBaqir Nov 08 '21 at 14:45
1

You can use map() to iterate the calls to sapply():

ggplot(dat) +
  geom_boxplot(aes(y = daily_count,
                   x = yearmonth(date),
                   group = paste(yearmonth(date), category),
                   fill = category)) +
  labs(x = 'Month & Year',
       y = 'Count',
       fill = "Category") +
  theme_bw() +
  theme(axis.text=element_text(size=10),
        axis.title=element_text(size=10),
        legend.position="none") +
  geom_vline(xintercept =  lubridate::ymd("2013-08-23"), linetype=1, colour="red", size = 0.5)+
  
  map(seq_along(unique(dat$category)), ~sapply(point_dates[[.]], function(xint) geom_vline(data=filter(dat, 
  category==category_name[[.]]),aes(xintercept = xint),
  linetype=3, colour="black", size = 1))) +

  facet_wrap(~category, nrow = 2)
ulfelder
  • 5,305
  • 1
  • 22
  • 40
1

If i got it correct, you have already defined the dates for each group. So make the first plot:

library(ggplot2)
library(tsibble)

g = ggplot(dat) +
  geom_boxplot(aes(y = daily_count,
                   x = yearmonth(date),
                   group = paste(yearmonth(date), category),
                   fill = category)) +
  labs(x = 'Month & Year',
       y = 'Count',
       fill = "Category") +
  theme_bw() +
  theme(axis.text=element_text(size=10),
        axis.title=element_text(size=10),
        legend.position="none") +
  geom_vline(xintercept =  lubridate::ymd("2013-08-23"), linetype=1, colour="red", size = 0.5)+ 
  facet_wrap(~category, nrow = 2)

You just need to provide a new data frame and call geom_vline:

tmp = data.frame(category=unlist(category_name),date=point_dates)

g + geom_vline(data=tmp,aes(xintercept = date),
linetype=3, colour="black", size = 1)

enter image description here

StupidWolf
  • 45,075
  • 17
  • 40
  • 72