df = read.table(text = 'ID Startdt Enddt
60A 5/4/2018 1/10/2022
60B 2/4/2019 12/20/2022
60C 8/22/2015 6/20/2020', header = T)
library(dplyr)
library(lubridate)
library(purrr)
df %>%
mutate(
across(ends_with("dt"), mdy), ## added this line to convert to date class
date_int = interval(Startdt, Enddt),
year = map2(year(Startdt), year(Enddt), seq)
) %>%
unnest(year) %>%
mutate(
year_int = interval(as.Date(paste0(year, '-01-01')),
as.Date(paste0(year, '-12-31'))
),
year_sect = intersect(date_int, year_int),
start_new = as.Date(int_start(year_sect)),
end_new = as.Date(int_end(year_sect))
) %>%
select(ID, start_new, end_new) %>%
mutate(
year = year(start_new),
days = as.numeric(end_new - start_new) + 1 ## added 1 here as a correction
) %>%
right_join(df, by = "ID") %>%
pivot_wider(
id_cols = c(ID, Startdt, Enddt),
names_from = year, values_from = days,
names_prefix = "year_",
values_fill = list(days = 0)
) %>%
mutate(days_number = rowSums(across(starts_with("year")))) ## updated this line to use `across()`
# # A tibble: 3 × 12
# ID Startdt Enddt year_2018 year_2019 year_2020 year_2021 year_2022 year_2015 year_2016 year_2017
# <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 60A 5/4/2018 1/10/2022 242 365 366 365 10 0 0 0
# 2 60B 2/4/2019 12/20/2022 0 331 366 365 354 0 0 0
# 3 60C 8/22/2015 6/20/2020 365 365 172 0 0 132 366 365
# # ℹ 1 more variable: days_number <dbl>