dplyr
option using join_by
with closest
like this:
devtools::install_github("tidyverse/dplyr")
library(dplyr)
by <- join_by(ID, closest(Date > Date))
left_join(dataset2, dataset1, by)
#> ID Date.x Date.y
#> 1 A 2021-01-01 <NA>
#> 2 A 2021-01-01 <NA>
#> 3 A 2021-05-02 2021-04-27
#> 4 A 2021-05-09 2021-05-02
#> 5 A 2021-05-09 2021-05-02
#> 6 A 2021-05-09 2021-05-02
#> 7 A 2021-05-09 2021-05-02
#> 8 A 2021-06-16 2021-06-02
#> 9 A 2021-06-27 2021-06-02
Created on 2022-10-19 with reprex v2.0.2
Please note: Make sure to install the right version using devtools::install_github("tidyverse/dplyr")
!
To get nearest in both ways you could use the following chain:
library(dplyr)
dataset2 %>%
left_join(., dataset1, join_by(ID, closest(Date >= Date))) %>%
left_join(., dataset1, join_by(ID, closest(Date.x <= Date))) %>%
mutate(Date.y = ifelse(is.na(Date.y), Date, Date.y)) %>%
select(-Date)
#> ID Date.x Date.y
#> 1 A 2021-01-01 2021-02-08
#> 2 A 2021-01-01 2021-02-08
#> 3 A 2021-05-02 2021-05-02
#> 4 A 2021-05-09 2021-05-02
#> 5 A 2021-05-09 2021-05-02
#> 6 A 2021-05-09 2021-05-02
#> 7 A 2021-05-09 2021-05-02
#> 8 A 2021-06-16 2021-06-02
#> 9 A 2021-06-27 2021-06-02
Created on 2022-10-19 with reprex v2.0.2