# Example data
set.seed(2022)
n <- 20
df <- data.frame(
dt = rep(as.Date('2010-06-01'), n) + cumsum(sample(1:20, n, TRUE))
)
df
#> dt
#> 1 2010-06-05
#> 2 2010-06-24
#> 3 2010-07-08
#> 4 2010-07-19
#> 5 2010-07-23
#> 6 2010-07-29
#> 7 2010-08-12
#> 8 2010-08-21
#> 9 2010-09-04
#> 10 2010-09-11
#> 11 2010-09-29
#> 12 2010-10-15
#> 13 2010-10-20
#> 14 2010-10-21
#> 15 2010-11-09
#> 16 2010-11-10
#> 17 2010-11-12
#> 18 2010-11-19
#> 19 2010-12-01
#> 20 2010-12-16
library(dplyr, warn.conflicts = FALSE)
library(purrr)
cutoff <- 25
df %>%
# if date is < cutoff days of first date, maintain the same group
# else create a new group
group_by(g = accumulate(dt, ~ if (.y - .x < cutoff) .x else .y)) %>%
# for each group select the first row
slice_head(n = 1) %>%
# ungroup and remove grouping variable
ungroup() %>%
select(-g)
#> # A tibble: 7 × 1
#> dt
#> <date>
#> 1 2010-06-05
#> 2 2010-07-08
#> 3 2010-08-12
#> 4 2010-09-11
#> 5 2010-10-15
#> 6 2010-11-09
#> 7 2010-12-16
Created on 2022-02-13 by the reprex package (v2.0.1)
Or, using data.table::rowid
library(dplyr, warn.conflicts = FALSE)
library(purrr)
library(data.table, warn.conflicts = FALSE)
cutoff <- 25
df %>%
filter(rowid(accumulate(dt, ~ if (.y - .x < cutoff) .x else .y)) == 1)
#> dt
#> 1 2010-06-05
#> 2 2010-07-08
#> 3 2010-08-12
#> 4 2010-09-11
#> 5 2010-10-15
#> 6 2010-11-09
#> 7 2010-12-16
Created on 2022-02-13 by the reprex package (v2.0.1)