if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, lubridate)
# Example of sample dates - these are to be used to cross check if date exists within the range
Sample.Dates = tibble(
ID = "ID",
Round = 1:3,
Start.Date = dmy(c("03/12/2018","10/12/2018","17/12/2018")),
End.Date = dmy(c("09/12/2018","16/12/2018","23/12/2018")))
# Reference dates for a particular player - "John". Need to cross check the date against Sample.Dates and attach round, start and end date columns
Ref.Dates = tibble(
ID= "ID",
Date = seq.Date(ymd("2018-12-05"), ymd("2018-12-31") , by = "day"),
Player = "John",
Rows = row_number(Date))
# Function for checking if date exists within range and then returns the round, start and end date values
Dates.Check.YN.Func = function(x){
Date = x %>% pull(Date)
Cross.Check = Sample.Dates %>% rowwise()%>%
dplyr::mutate(Match = ifelse(between(Date, Start.Date, End.Date),1,0))%>%
filter(Match == 1)%>%
ungroup()%>%
select(-Match)
left_join(x, Cross.Check, by = "ID")
}
# Applying function to each row/date using nest()
Data.Nest = Ref.Dates %>%
nest(-Rows)%>%
mutate(out = map(data,Dates.Check.YN.Func)) %>%
unnest(out) %>%
select(-data)
Now this code works with no problems. However this is just a dummy data set and in actual fact I want to cross check over 100,000 dates. When doing this with my real data set this takes ~30mins. Searching to see if anyone can see a way of speeding up my code using a tidyverse solution (preferred) or other means.