Several ways to attack this.
First, we need to identify a 3-day period for each. To do so, I'll create a datevec
which is a sequence of days.
dat$RealDate <- as.Date(dat$FROM_DATE, format = "%m/%d/%Y")
datevec <- seq(min(dat$RealDate), max(dat$RealDate) + 3, by = 3)
dat$Period1 <- cut(dat$RealDate, datevec,
labels = paste(datevec[-length(datevec)], datevec[-1], sep = " - "))
dat
# OBJECTID District Zone year_value FROM_DATE SUM_Crime RealDate Period1
# 1 1 Northwestern Not in Zone 2019 2/6/2019 4 2019-02-06 2019-02-06 - 2019-02-09
# 2 2 Northwestern Zone 30 2019 2/7/2019 6 2019-02-07 2019-02-06 - 2019-02-09
# 3 3 Northwestern Zone 40 2019 2/8/2019 5 2019-02-08 2019-02-06 - 2019-02-09
# 4 4 Northwestern Zone 30 2019 2/9/2019 2 2019-02-09 2019-02-09 - 2019-02-12
# 5 5 Northwestern Not in Zone 2019 2/10/2019 4 2019-02-10 2019-02-09 - 2019-02-12
# 6 6 Northwestern Zone 40 2019 2/11/2019 4 2019-02-11 2019-02-09 - 2019-02-12
# 7 7 Northwestern Zone 30 2019 2/12/2019 0 2019-02-12 2019-02-12 - 2019-02-15
If you want to keep the dates formatted as you have them (which are not real Date
s in R), then we can add a second column for that format:
datevec2 <- format(datevec, format = "%m/%d/%Y")
dat$Period2 <- cut(dat$RealDate, datevec,
labels = paste(datevec2[-length(datevec2)], datevec2[-1], sep = " - "))
dat
# OBJECTID District Zone year_value FROM_DATE SUM_Crime RealDate Period1 Period2
# 1 1 Northwestern Not in Zone 2019 2/6/2019 4 2019-02-06 2019-02-06 - 2019-02-09 02/06/2019 - 02/09/2019
# 2 2 Northwestern Zone 30 2019 2/7/2019 6 2019-02-07 2019-02-06 - 2019-02-09 02/06/2019 - 02/09/2019
# 3 3 Northwestern Zone 40 2019 2/8/2019 5 2019-02-08 2019-02-06 - 2019-02-09 02/06/2019 - 02/09/2019
# 4 4 Northwestern Zone 30 2019 2/9/2019 2 2019-02-09 2019-02-09 - 2019-02-12 02/09/2019 - 02/12/2019
# 5 5 Northwestern Not in Zone 2019 2/10/2019 4 2019-02-10 2019-02-09 - 2019-02-12 02/09/2019 - 02/12/2019
# 6 6 Northwestern Zone 40 2019 2/11/2019 4 2019-02-11 2019-02-09 - 2019-02-12 02/09/2019 - 02/12/2019
# 7 7 Northwestern Zone 30 2019 2/12/2019 0 2019-02-12 2019-02-12 - 2019-02-15 02/12/2019 - 02/15/2019
From here, base R aggregation:
ag <- aggregate(SUM_Crime ~ Zone + Period2, data = dat, FUN = sum)
ag
# Zone Period2 SUM_Crime
# 1 Not in Zone 02/06/2019 - 02/09/2019 4
# 2 Zone 30 02/06/2019 - 02/09/2019 6
# 3 Zone 40 02/06/2019 - 02/09/2019 5
# 4 Not in Zone 02/09/2019 - 02/12/2019 4
# 5 Zone 30 02/09/2019 - 02/12/2019 2
# 6 Zone 40 02/09/2019 - 02/12/2019 4
# 7 Zone 30 02/12/2019 - 02/15/2019 0
xtabs( SUM_Crime ~ Zone + Period2, data = ag)
# Period2
# Zone 02/06/2019 - 02/09/2019 02/09/2019 - 02/12/2019 02/12/2019 - 02/15/2019
# Not in Zone 4 4 0
# Zone 30 6 2 0
# Zone 40 5 4 0
You can also put this into a dplyr
pipe if you'd like:
library(dplyr)
library(tidyr) # pivot_wider
dat2 <- dat %>%
mutate(RealDate = as.Date(FROM_DATE, format = "%m/%d/%Y"))
datevec <- seq(min(dat2$RealDate), max(dat2$RealDate) + 3, by = 3)
dat2 %>%
mutate(
Period1 = cut(RealDate, datevec,
labels = paste(datevec[-length(datevec)], datevec[-1], sep = " - "))
) %>%
group_by(Zone, Period1) %>%
summarize(SUM_Crime = sum(SUM_Crime)) %>%
ungroup() %>%
filter(SUM_Crime > 0) %>%
pivot_wider(., "Zone", names_from = "Period1", values_from = "SUM_Crime")
# # A tibble: 3 x 3
# Zone `2019-02-06 - 2019-02-09` `2019-02-09 - 2019-02-12`
# <chr> <int> <int>
# 1 Not in Zone 4 4
# 2 Zone 30 6 2
# 3 Zone 40 5 4
Data
dat <- read.csv(header = TRUE, stringsAsFactors = FALSE, text = "
OBJECTID,District,Zone,year_value,FROM_DATE,SUM_Crime
1,Northwestern,Not in Zone,2019,2/6/2019,4
2,Northwestern,Zone 30,2019,2/7/2019,6
3,Northwestern,Zone 40,2019,2/8/2019,5
4,Northwestern,Zone 30,2019,2/9/2019,2
5,Northwestern,Not in Zone,2019,2/10/2019,4
6,Northwestern,Zone 40,2019,2/11/2019,4
7,Northwestern,Zone 30,2019,2/12/2019,0")