I have a data frame were each row is a unique observation.
Observations overlap in time if they are located within a specified temporal distance (e.g. 30 days) of one another.
Observations overlap in space if they are located within a specified spatial distance (e.g. 20 kilometers) of one another.
I am working with the collections of observations that overlap in both time and space. I want to make a column (overlaps) that contains vectors with the ids of the observations that overlap with an observation. I have tried the solution below, but the run time is too poor for the solution to be applicable.
library(dplyr)
library(lubridate)
library(purrr)
library(geosphere)
spat_proximity <- function(x, y, z) {
return(which(map_dbl(y, ~ distGeo(., x)) <= z))}
temp_proximity <- function(x, y, z) {
return(which(map_dbl(y, ~ abs(x - .)) <= z))}
test %>%
mutate(overlaps = map2(map(place, ~ spat_proximity(., place, 20000)),
map(time, ~ temp_proximity(., time, 30)),
~ intersect(.x, .y)))
Ideas on how to speed things up would be much appreciated.
Desired output
structure(list(id = 1:42, time = structure(c(1478601762, 1475170279,
1469770219, 1462441336, 1474739469, 1488216507, 1475203721, 1468705558,
1481722718, 1485897197, 1488669576, 1501288618, 1510266595, 1516828588,
1497048175, 1516546144, 1507576242, 1517654363, 1496070298, 1519765220,
1507408104, 1532046710, 1542196446, 1534747170, 1533605231, 1521381844,
1545389880, 1537988628, 1544304998, 1524842149, 1551051077, 1540822870,
1579775599, 1580337175, 1551486497, 1554879837, 1568620434, 1568701543,
1556387550, 1561253396, 1582925482, 1562166384), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), place = list(c(7.59729413351368, 52.6052275122351
), c(9.99728447956781, 53.43773657253), c(10.1114473929533, 53.1295890148866
), c(7.74115218835801, 53.555354690339), c(9.82895066827581,
53.1009319396015), c(10.061107415855, 53.1908752763309), c(10.1134381934544,
53.1450558612239), c(8.59001735546083, 53.1767797285482), c(6.43939168487555,
52.5520931654252), c(8.38811111096636, 53.9043055557574), c(6.20061916537948,
52.462037409576), c(8.66656282486832, 52.8269702466929), c(9.92127490588442,
53.1240045666796), c(9.77810957468704, 53.1445777603789), c(10.0972382106036,
53.1604265989175), c(10.0473952445094, 53.1698097395641), c(9.23773401919961,
53.2120381900218), c(8.29524237837988, 52.822332696399), c(6.63690696797941,
53.4436726627048), c(6.89839325296288, 53.947454203445), c(6.97064542834721,
54.2487197094445), c(9.98865072631714, 53.4088944299342), c(9.94164401569524,
53.1500576073959), c(9.64242996587752, 52.9285470044703), c(10.1026940185685,
53.1635394335485), c(9.94874529044194, 53.2202512735354), c(8.8025526552284,
53.2423093779114), c(7.93352467761445, 52.9129105531343), c(6.6418846001424,
53.2459031608081), c(7.56102465003101, 53.5306444680171), c(7.36619114998468,
53.748869508885), c(7.40993284414052, 54.5367797663042), c(9.90022663895919,
53.3726361099083), c(9.41110555596208, 52.5001044709056), c(10.1151193231519,
53.1539029361817), c(10.1064400828529, 53.1793449776572), c(9.94235711256256,
53.2622041055899), c(9.44215997717822, 53.4799339987572), c(7.03832846889284,
53.1986115213435), c(7.32755360272354, 53.416700338513), c(7.57828611098173,
53.6107027769073), c(7.55411005022882, 54.1905803935834)), overlaps = list(
1L, 2L, 3L, 4L, c(5L, 7L), 6L, c(5L, 7L), 8L, 9L, 10L, 11L,
12L, 13L, c(14L, 16L), 15L, c(14L, 16L), 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L,
33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L)), row.names = c(NA,
-42L), class = c("tbl_df", "tbl", "data.frame"))
Data
structure(list(id = 1:42, time = structure(c(1478601762, 1475170279,
1469770219, 1462441336, 1474739469, 1488216507, 1475203721, 1468705558,
1481722718, 1485897197, 1488669576, 1501288618, 1510266595, 1516828588,
1497048175, 1516546144, 1507576242, 1517654363, 1496070298, 1519765220,
1507408104, 1532046710, 1542196446, 1534747170, 1533605231, 1521381844,
1545389880, 1537988628, 1544304998, 1524842149, 1551051077, 1540822870,
1579775599, 1580337175, 1551486497, 1554879837, 1568620434, 1568701543,
1556387550, 1561253396, 1582925482, 1562166384), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), place = list(c(7.59729413351368, 52.6052275122351
), c(9.99728447956781, 53.43773657253), c(10.1114473929533, 53.1295890148866
), c(7.74115218835801, 53.555354690339), c(9.82895066827581,
53.1009319396015), c(10.061107415855, 53.1908752763309), c(10.1134381934544,
53.1450558612239), c(8.59001735546083, 53.1767797285482), c(6.43939168487555,
52.5520931654252), c(8.38811111096636, 53.9043055557574), c(6.20061916537948,
52.462037409576), c(8.66656282486832, 52.8269702466929), c(9.92127490588442,
53.1240045666796), c(9.77810957468704, 53.1445777603789), c(10.0972382106036,
53.1604265989175), c(10.0473952445094, 53.1698097395641), c(9.23773401919961,
53.2120381900218), c(8.29524237837988, 52.822332696399), c(6.63690696797941,
53.4436726627048), c(6.89839325296288, 53.947454203445), c(6.97064542834721,
54.2487197094445), c(9.98865072631714, 53.4088944299342), c(9.94164401569524,
53.1500576073959), c(9.64242996587752, 52.9285470044703), c(10.1026940185685,
53.1635394335485), c(9.94874529044194, 53.2202512735354), c(8.8025526552284,
53.2423093779114), c(7.93352467761445, 52.9129105531343), c(6.6418846001424,
53.2459031608081), c(7.56102465003101, 53.5306444680171), c(7.36619114998468,
53.748869508885), c(7.40993284414052, 54.5367797663042), c(9.90022663895919,
53.3726361099083), c(9.41110555596208, 52.5001044709056), c(10.1151193231519,
53.1539029361817), c(10.1064400828529, 53.1793449776572), c(9.94235711256256,
53.2622041055899), c(9.44215997717822, 53.4799339987572), c(7.03832846889284,
53.1986115213435), c(7.32755360272354, 53.416700338513), c(7.57828611098173,
53.6107027769073), c(7.55411005022882, 54.1905803935834))), row.names = c(NA,
-42L), class = c("tbl_df", "tbl", "data.frame"))