I have the following dataframes (df11 and df22) I'd like to do a merge/full join on with "UserID=UserID" and date difference <= 30 . So if the UserIDs match up AND the date's are less than or equal to 30, I'd like them merged into one singular row. I've looked at fuzzy join here and sqldf here but I can't figure out how to implement either of those for my data frames.
df1 <- structure(list(UserID = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L),
Full.Name = c( "John Smith", "Jack Peters", "Bob Brown", "Jane Doe", "Jackie Jane", "Sarah Brown", "Chloe Brown", "John Smith" ),
Info = c("yes", "no", "yes", "yes", "yes", "yes", "no", "yes"),
EncounterID = c(13L, 14L, 15L, 16L, 17L, 18L, 19L, 13L), DateTime = c("1/2/21 00:00", "1/5/21 12:00", "1/1/21 1:31", "1/5/21 3:34", "5/9/21 5:33", "5/8/21 3:39", "12/12/21 2:30", "12/11/21 9:21"),
Temp = c("100", "103", "104", "103", "101", "102", "103", "105"),
misc = c("(null)", "no", "(null)", "(null)", "(null)","(null)", "(null)", "(null)"
)),
class = "data.frame", row.names = c(NA,
-8L))
df2 <- structure(list(UserID = c(1L, 2L, 3L, 4L, 5L, 6L),
Full.Name = c("John Smith", "Jack Peters", "Bob Brown", "Jane Doe", "Jackie Jane", "Sarah Brown"),
DOB = c("1/1/90", "1/10/90", "1/2/90", "2/20/80", "2/2/80", "12/2/80"),
EncounterID = c(13L, 14L, 15L, 16L, 17L, 18L), EncounterDate = c("1/1/21", "1/2/21", "1/1/21", "1/6/21", "5/7/21", "5/8/21"),
Type = c("Intro", "Intro", "Intro", "Intro", "Care", "Out"),
responses = c("(null)", "no",
"yes", "no", "no", "unsat")),
class = "data.frame", row.names = c(NA,
-6L))
loadedNamespaces()
install.packages("Rcpp")
library(dplyr)
library(tidyr)
install.packages("lubridate")
library(lubridate)
df11 <-
df1 %>%
separate(DateTime, c("Date", "Time"), sep=" ") %>%
mutate(Date = as_datetime(mdy(Date))) %>%
select(-Time) %>%
as_tibble()
df22 <-
df2 %>%
mutate(across(c(EncounterDate), mdy)) %>%
mutate(across(c(EncounterDate), as_datetime)) %>%
as_tibble()
@r2evans After running the first set of code, I get the following output. Which is slightly different from yours.
df11 <- mutate(df11, Date_m30 = Date %m-% days(30), Date_p30 = Date %m+% days(30))
df11
# A tibble: 8 x 7
UserID Full.Name Info EncounterID Date Temp misc
<int> <chr> <chr> <int> <dttm> <chr> <chr>
1 1 John Smith yes 13 2021-01-02 00:00:00 100 (null)
2 2 Jack Peters no 14 2021-01-05 00:00:00 103 no
3 3 Bob Brown yes 15 2021-01-01 00:00:00 104 (null)
4 4 Jane Doe yes 16 2021-01-05 00:00:00 103 (null)
5 5 Jackie Jane yes 17 2021-05-09 00:00:00 101 (null)
6 6 Sarah Brown yes 18 2021-05-08 00:00:00 102 (null)
7 7 Chloe Brown no 19 2021-12-12 00:00:00 103 (null)
8 1 John Smith yes 13 2021-12-11 00:00:00 105 (null)