1

Data:

 set.seed(42)

df1 = data.frame(
  Date = seq.Date(as.Date("2018-01-01"),as.Date("2018-01-30"),1),
  value = sample(1:30),
  Y = sample(c("yes", "no"), 30, replace = TRUE)
)

df2 = data.frame(
  Date = seq.Date(as.Date("2018-01-01"),as.Date("2018-01-30"),7)
)

I want for each date in df2$Date calculate the sum of df1$Value if date in df1$Date falls within df2$Date and df2$Date+6

Inshort I need to calculate weekly sums

CapaNif
  • 83
  • 6

3 Answers3

0

Check out library lubridate and dplyr, those two are quiet common.

library(lubridate)
library(dplyr)

df1$last_week_day <- ceiling_date(df1$Date, "week") + 1
df1 %>% group_by(last_week_day) %>% summarize(week_value = sum(value))
lypskee
  • 342
  • 1
  • 11
  • `df %>% group_by(week = cut(date, "week")) %>% mutate(weekly_income = sum(income))` I tried this which works fine but in my case I want the week to start from a particular date and it's not always monday. – CapaNif Mar 26 '19 at 10:02
0

Using data.table, create a range start/end, then merge on overlap, then get sum over group:

library(data.table)

df1$start <- df1$Date
df1$end <- df1$Date

df2$start <- df2$Date
df2$end <- df2$Date + 6

setDT(df1, key = c("start", "end"))
setDT(df2, key = c("start", "end"))

foverlaps(df1, df2)[, list(mySum = sum(value)), by = Date ]
#          Date mySum
# 1: 2018-01-01   138
# 2: 2018-01-08    96
# 3: 2018-01-15    83
# 4: 2018-01-22   109
# 5: 2018-01-29    39
zx8754
  • 52,746
  • 12
  • 114
  • 209
0

We can use fuzzyjoin

library(dplyr)
library(fuzzyjoin)

df2$EndDate <- df2$Date+6

fuzzy_left_join(
  df1, df2,
  by = c(
    "Date" = "Date",
    "Date" = "EndDate"
  ), match_fun = list(`>=`, `<=`)) %>% 
  group_by(Date.y) %>% summarise(Sum=sum(value))


# A tibble: 5 x 2
     Date.y       Sum
     <date>     <int>
  1 2018-01-01   138
  2 2018-01-08    96
  3 2018-01-15    83
  4 2018-01-22   109
  5 2018-01-29    39
A. Suliman
  • 12,923
  • 5
  • 24
  • 37