I need to identify instances when two events occur within a specific time epoch as follows. If a event A occurs first, the event B must occur within 24 hours. On the other hand, if B occurs first, then A need to be found within 72 hours. Also, when the criteria is met, I need the "onset" time, which is time at which the first of these events occurred.
Event A
structure(list(fake_id = c("1000686267", "1000686267", "1000686267",
"1000686267", "1000686267", "1000686267", "1000686267", "1070640921",
"1070640921", "1070640921", "1070640921", "1070640921", "1070640921",
"1184695414", "1184695414", "1184695414", "1184695414", "1184695414"
), date = structure(c(1515063600, 1514822400, 1514822400, 1514822400,
1514822400, 1515146400, 1514901600, 1515330000, 1514822400, 1514822400,
1514822400, 1514822400, 1517385600, 1516701600, 1515142800, 1515178800,
1515178800, 1516557600), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA,
-18L), class = "data.frame", .Names = c("fake_id",
"date"))
Event B
structure(list(fake_id = c("1000686267", "1000686267", "1000686267",
"1000686267", "1000686267", "1000686267", "1000686267", "1000686267",
"1000686267", "1000686267", "1000686267", "1000686267", "1000686267",
"1000686267", "1000686267", "1000686267", "1000686267", "1070640921",
"1070640921", "1070640921", "1070640921", "1070640921", "1070640921",
"1184695414", "1184695414", "1184695414", "1184695414", "1184695414",
"1184695414", "1184695414"), date = structure(c(1516795200, 1516795200,
1516795200, 1516917600, 1517400000, 1517400000, 1515492000, 1515492000,
1516190400, 1516190400, 1517410800, 1517410800, 1516921200, 1515070800,
1515070800, 1515052800, 1516633200, 1517374800, 1515322800, 1515322800,
1516525200, 1515232800, 1516543200, 1516550400, 1515189600, 1516543200,
1516543200, 1515142800, 1515142800, 1515142800), class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -30L), class = "data.frame", .Names = c("fake_id",
"date"))
Some code
library (data.table)
event_a <- data.table(event_a[, c("fake_id", "date"), with = FALSE])
event_b <- data.table(event_b[, c("fake_id", "date"), with = FALSE])
event_a[, `:=`("criteria_a", "criteria_a")]
event_b[, `:=`("criteria_b", "criteria_b")]
setkeyv(event_a, c("fake_id", "date"))
setkeyv(event_b, c("fake_id", "date"))
join_window <- 60 * 60 * c(24, 72)
event_subset_a <- event_a[event_b, roll = join_window[1]]
event_subset_b <- event_b[event_a, roll = join_window[2]]
event_df <- rbind(event_subset_a, event_subset_b)
event_df[, `:=`(c("criteria_a", "criteria_b"), NULL)]
setkeyv(event_df, c("fake_id", "date"))
event_df <- unique(event_df)
Current output
fake_id date
1 1184695414 2018-01-05 09:00:00
2 1184695414 2018-01-05 19:00:00
3 1184695414 2018-01-05 22:00:00
4 1184695414 2018-01-21 14:00:00
5 1184695414 2018-01-21 16:00:00
6 1184695414 2018-01-21 18:00:00
7 1184695414 2018-01-23 10:00:00
Desired output
fake_id date
1 1184695414 2018-01-05 09:00:00
2 1184695414 2018-01-21 14:00:00
3 1184695414 2018-01-23 10:00:00