The code below generates a scatter plot with three horizontal lines, which refer to mean, mean+standard deviation and mean - standard deviation. To calculate these three factors, all the dates in my data
database are being considered.
However, I would like to exclude the month of April for calculating the mean and standard deviation, how could I do that?
Executable code below:
library(dplyr)
library(tidyr)
library(lubridate)
data <- structure(
list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
"2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
"2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
"2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
"2021-07-09","2021-07-09","2021-07-10","2021-07-10"),
Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
"Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
"Thursday","Friday","Friday","Saturday","Saturday"),
DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
class = "data.frame", row.names = c(NA, -21L))
graph <- function(dt, dta = data) {
dim_data<-dim(data)
day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
length = dim_data[1]
))
data_grouped <- data %>%
mutate(across(starts_with("date"), as.Date)) %>%
group_by(date2) %>%
summarise(Id = first(Id),
date1 = first(date1),
Week = first(Week),
DTPE = first(DTPE),
D1 = sum(D1)) %>%
select(Id,date1,date2,Week,DTPE,D1)
data_grouped %>%
mutate(DTPE = na_if(DTPE, ""))
df_OC<-subset(data_grouped, DTPE == "")
ds_CO = df_OC %>% filter(weekdays(date2) %in% weekdays(as.Date(dt)))
mean<-mean(ds_CO$D1)
sd<-sd(ds_CO$D1)
dta %>%
filter(date2 == ymd(dt)) %>%
summarize(across(starts_with("DR"), sum)) %>%
pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
mutate(name = as.numeric(name)) %>%
plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5,
cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
abline(h=mean, col='blue') +
abline(h=(mean + sd), col='green',lty=2)
abline(h=(mean - sd), col='orange',lty=2)
}
graph("2021-07-10",data)