Ok, here we go.
We're going to solve your problem in five steps:
- Create 'interval' column from your original 'start' and 'end'
- Create vector of all start + end dates and sort it
- Using our 'interval' column, store an ordered list of all dates within that interval, per row
- Use
lubridate:int_diff()
to turn our ordered list of dates into a list of intervals
- Unnest our list-of-interval column into multiple rows: we're done!
Below, you'll find a short version that does just the above five steps, and then a longer version the plots the results with ggplot
.
Short version
library(tidyverse)
library(ggplot2)
# Short version
# I'm using a tibble instead of data.table; hope you don't mind
dt <- tibble(
id = c(1, 1, 1),
start = c("1970-01-10 06:01:16", "1970-01-10 12:01:16", "1970-01-10 09:34:49"),
end = c("1970-01-10 12:01:16", "1970-01-10 17:01:16", "1970-01-11 07:49:48"),
value = c(1, 0, 3)
)
# 1. Create 'interval' column from your original 'start' and 'end'
# 2. Create vector of all start + end dates and sort it
# 3. Using our 'interval' column, store an ordered list of all dates within that interval, per row
# 4. Use lubridate:int_diff() to turn our ordered list of dates into a list of intervals
# 5. Unnest our list-of-interval column into multiple rows: we're done!
# 1. Create 'interval' column
dt <- dt %>%
mutate(
start = lubridate::ymd_hms(start), # convert to Date objects
end = lubridate::ymd_hms(end), # convert to Date objects
date_range = lubridate::interval(start, end)
)
# 2. Create vector of all start + end dates and sort it
all_dates <- dt %>%
select(start, end, value) %>%
pivot_longer(!value, names_to = "date_type", values_to="date") %>%
arrange(date) %>%
select(date) %>%
distinct()
# Steps 3 and 4 in one mutate
dt <- dt %>%
rowwise() %>%
mutate(
# 3. Store an ordered list of all dates within that interval
bounded_dates = list(filter(all_dates, all_dates$date %within% date_range) %>% pull(date)),
# 4. convert list-of-dates column into list-of-intervals
bounded_intervals = list(int_diff(bounded_dates))
)
# 5. Unnest our list-of-interval column into multiple rows: we're done!
new_intervals <- dt %>%
select(value, bounded_intervals) %>%
unnest(bounded_intervals)
new_intervals
This should create:
# A tibble: 6 x 7
value bounded_intervals start
<fct> <Interval> <dttm>
1 1 1970-01-10 06:01:16 UTC--1970-01-10 09:34:49 UTC 1970-01-10 06:01:16
2 1 1970-01-10 09:34:49 UTC--1970-01-10 12:01:16 UTC 1970-01-10 09:34:49
3 0 1970-01-10 12:01:16 UTC--1970-01-10 17:01:16 UTC 1970-01-10 12:01:16
4 3 1970-01-10 09:34:49 UTC--1970-01-10 12:01:16 UTC 1970-01-10 09:34:49
5 3 1970-01-10 12:01:16 UTC--1970-01-10 17:01:16 UTC 1970-01-10 12:01:16
6 3 1970-01-10 17:01:16 UTC--1970-01-11 07:49:48 UTC 1970-01-10 17:01:16
# … with 4 more variables: end <dttm>, row <fct>, y_min <dbl>, y_max <dbl>
Longer version
This should produce plots that look like:


# Here is the same code as above, but with more steps and some plots to illustrate the results.
dt <- tibble(
id = c(1, 1, 1),
start = c("1970-01-10 06:01:16", "1970-01-10 12:01:16", "1970-01-10 09:34:49"),
end = c("1970-01-10 12:01:16", "1970-01-10 17:01:16", "1970-01-11 07:49:48"),
value = c(1, 0, 3)
)
dt <- dt %>%
mutate(
value = as_factor(value),
start = lubridate::ymd_hms(start), # convert to Date objects
end = lubridate::ymd_hms(end), # convert to Date objects
date_range = lubridate::interval(start, end), # create a 'Date Interval' for use later
row = as_factor(row_number()), # For plotting
y_min = as.integer(value) - 0.4, # For plotting
y_max = as.integer(value) + 0.4, # For plotting
)
# Let's plot the initial data
ggplot(dt, aes(xmin=start, xmax=end, ymin=y_min, ymax = y_max, color=value)) +
geom_rect() +
labs(title = "original intervals")
ggsave("original_intervals.png")
all_dates <- dt %>%
select(start, end, value) %>%
pivot_longer(!value, names_to = "date_type", values_to="date") %>%
arrange(date) %>%
select(date) %>%
distinct()
dt <- dt %>%
rowwise() %>%
mutate(
bounded_dates = list(filter(all_dates, all_dates$date %within% date_range) %>% pull(date)),
bounded_intervals = list(int_diff(bounded_dates))
)
new_intervals <- dt %>%
select(value, bounded_intervals) %>%
unnest(bounded_intervals)
# Add plotting-relevant columns
new_intervals <- new_intervals %>%
mutate(
start = int_start(bounded_intervals),
end = int_end(bounded_intervals),
row = as_factor(row_number()),
y_min = as.integer(value) - 0.4,
y_max = as.integer(value) + 0.4,
value = as_factor(value)
)
ggplot(new_intervals, aes(xmin=start, xmax=end, ymin=y_min, ymax = y_max, color=row)) +
geom_rect() +
labs(title = "new intervals")
ggsave("updated_intervals.png")