2

The code below generates a scatter plot with three horizontal lines, which refer to mean, mean+standard deviation and mean - standard deviation. To calculate these three factors, all the dates in my data database are being considered.

However, I would like to exclude the month of April for calculating the mean and standard deviation, how could I do that?

Executable code below:

library(dplyr)
library(tidyr)
library(lubridate)


data <- structure(
  list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
       date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
       date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                 "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                 "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                 "2021-07-09","2021-07-09","2021-07-10","2021-07-10"),
       Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
               "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
               "Thursday","Friday","Friday","Saturday","Saturday"),
       DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
       D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
       DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
       DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
  class = "data.frame", row.names = c(NA, -21L))


graph <- function(dt, dta = data) {
  
  dim_data<-dim(data)
  
  day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                  length = dim_data[1]
  )) 
  
  data_grouped <- data %>%
    mutate(across(starts_with("date"), as.Date)) %>%
    group_by(date2) %>%
    summarise(Id = first(Id),
              date1 = first(date1),
              Week = first(Week),
              DTPE = first(DTPE),
              D1 = sum(D1)) %>%
    select(Id,date1,date2,Week,DTPE,D1)

  data_grouped %>% 
    mutate(DTPE = na_if(DTPE, ""))

  df_OC<-subset(data_grouped, DTPE == "")
  ds_CO = df_OC %>% filter(weekdays(date2) %in% weekdays(as.Date(dt)))

  mean<-mean(ds_CO$D1)
  sd<-sd(ds_CO$D1)
  
  
  dta %>%
    filter(date2 == ymd(dt)) %>%
    summarize(across(starts_with("DR"), sum)) %>%
    pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
    mutate(name = as.numeric(name)) %>%
    plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
         cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
  abline(h=mean, col='blue') +
    abline(h=(mean + sd), col='green',lty=2) 
  abline(h=(mean - sd), col='orange',lty=2)
  
}  
graph("2021-07-10",data)
Antonio
  • 1,091
  • 7
  • 24
  • The functions provided by `lubridate` should help you do this quite easily. – mikebader Sep 02 '21 at 20:22
  • This is effectively the negation of the question asked here: https://stackoverflow.com/q/68984597/3358272. Does that give you what you need? – r2evans Sep 02 '21 at 20:31

3 Answers3

3
data %>%
  filter("04" != format(as.Date(date2), format = "%m"))
#   Id      date1      date2     Week DTPE D1 DR01 DR02 DR03 DR04 DR05
# 1  1 2021-06-20 2021-07-01 Thursday   Ho  8    4    8    7    4    9
# 2  1 2021-06-20 2021-07-01 Thursday   Ho  1    1    1    5    5    5
# 3  1 2021-06-20 2021-07-01 Thursday   Ho  9    4    4    4    6    4
# 4  1 2021-06-20 2021-07-01 Thursday   Ho  3    3    3    3    7    3
# 5  1 2021-06-20 2021-07-09   Friday       2    2    2    2    2    2
# 6  1 2021-06-20 2021-07-09   Friday       6    6    6    6    6    6
# 7  1 2021-06-20 2021-07-10 Saturday   Ho  2    7    2    4    4    4
# 8  1 2021-06-20 2021-07-10 Saturday   Ho  3    3    3    3    3    3

(I recommend you permanently make date1 and date2 proper Date objects in the frame instead of converting it every time you do something. While the conversion is relatively inexpensive, it's also unnecessary, and the consequence of forgetting it might be subtle differences in the results (i.e., treating it as a categorical variable vice continuous/discrete-ordinal).

r2evans
  • 141,215
  • 6
  • 77
  • 149
3

You already use lubridate therefore you could apply month function from lubridate package:

data %>% 
  filter(month(date2) != 4)
  Id      date1      date2     Week DTPE D1 DR01 DR02 DR03 DR04 DR05
1  1 2021-06-20 2021-07-01 Thursday   Ho  8    4    8    7    4    9
2  1 2021-06-20 2021-07-01 Thursday   Ho  1    1    1    5    5    5
3  1 2021-06-20 2021-07-01 Thursday   Ho  9    4    4    4    6    4
4  1 2021-06-20 2021-07-01 Thursday   Ho  3    3    3    3    7    3
5  1 2021-06-20 2021-07-09   Friday       2    2    2    2    2    2
6  1 2021-06-20 2021-07-09   Friday       6    6    6    6    6    6
7  1 2021-06-20 2021-07-10 Saturday   Ho  2    7    2    4    4    4
8  1 2021-06-20 2021-07-10 Saturday   Ho  3    3    3    3    3    3
TarJae
  • 72,363
  • 6
  • 19
  • 66
  • 1
    Given that the OP is already using `lubridate`, this might be the more canonical answer for what they are doing. – r2evans Sep 02 '21 at 21:22
1

Using substr

subset(data, substr(date2, 6, 7 ) != '04')

-ouptut

 Id      date1      date2     Week DTPE D1 DR01 DR02 DR03 DR04 DR05
1   1 2021-06-20 2021-07-01 Thursday   Ho  8    4    8    7    4    9
2   1 2021-06-20 2021-07-01 Thursday   Ho  1    1    1    5    5    5
3   1 2021-06-20 2021-07-01 Thursday   Ho  9    4    4    4    6    4
4   1 2021-06-20 2021-07-01 Thursday   Ho  3    3    3    3    7    3
18  1 2021-06-20 2021-07-09   Friday       2    2    2    2    2    2
19  1 2021-06-20 2021-07-09   Friday       6    6    6    6    6    6
20  1 2021-06-20 2021-07-10 Saturday   Ho  2    7    2    4    4    4
21  1 2021-06-20 2021-07-10 Saturday   Ho  3    3    3    3    3    3
akrun
  • 874,273
  • 37
  • 540
  • 662