2

Have: \\

date        age1 age2 age3 age4 age5 age6 useditems total items
11/10/2021   1     2   1    2    1    1     15         20
11/11/2021   2     1   2    4    1    2      5         30
09/16/2022   1     2   1    5    3    1      3         10
09/17/2022   2     3   1    3    1    1      2         15
04/05/2021   1     1   2    1    2    1      3          9
04/06/2021   2     1   2    1    3    2      1          9

Want: \

Week        age_sum useditems_sum total_sum Week
11/07/2021    20     20              50
09/11/2022    24      5              25
04/04/2021    19      4              18

I want to group my data by week as in the 'want' and calculate the sum across rows so that I have total for each week.

I am new to R and trying to figure this out. Any help or guidance is appeciated.

ThomasIsCoding
  • 96,636
  • 9
  • 24
  • 81

3 Answers3

2

We may also do

library(dplyr) # >= 1.1.0
library(lubridate)
df1 %>% 
  mutate(date = mdy(date),
  Week = format(as.Date(format(date, '%Y-%U-0'), 
     format = '%Y-%W-%w'), "%m/%d/%Y")) %>% 
  reframe(age_sum = sum(pick(starts_with("age"))), 
   across(c(useditems, totalitems), ~ sum(.x), .names = "{.col}_sum"),
    .by = Week)

-output

        Week age_sum useditems_sum totalitems_sum
1 11/07/2021      20            20             50
2 09/11/2022      24             5             25
3 04/04/2021      19             4             18

data

df1 <- structure(list(date = c("11/10/2021", "11/11/2021", "09/16/2022", 
"09/17/2022", "04/05/2021", "04/06/2021"), age1 = c(1L, 2L, 1L, 
2L, 1L, 2L), age2 = c(2L, 1L, 2L, 3L, 1L, 1L), age3 = c(1L, 2L, 
1L, 1L, 2L, 2L), age4 = c(2L, 4L, 5L, 3L, 1L, 1L), age5 = c(1L, 
1L, 3L, 1L, 2L, 3L), age6 = c(1L, 2L, 1L, 1L, 1L, 2L), useditems = c(15L, 
5L, 3L, 2L, 3L, 1L), totalitems = c(20L, 30L, 10L, 15L, 9L, 9L
)), class = "data.frame", row.names = c(NA, -6L))
akrun
  • 874,273
  • 37
  • 540
  • 662
2

Here is a solution in base R (about three times faster than dplyr solutions).

First, convert your dates into "Date" format,

dat$date <- as.Date(dat$date, format='%m/%d/%Y')

then split by week, find first day of the week1 and add calculations:

by(dat, strftime(dat$date, '%Y-%U'), \(x) {
  w <- seq.Date(x$date[1] - 6, x$date[1], by='day')
  data.frame(week=w[weekdays(w) == 'Sunday']) |>
    cbind(t(c(age_sum=sum(x[grep('^age\\d+', names(x))]), sum=colSums(x[c("useditems", "total_items")]))))
}) |> do.call(what=rbind)
#               week age_sum sum.useditems sum.total_items
# 2021-14 2021-04-04      19             4              18
# 2021-45 2021-11-07      20            20              50
# 2022-37 2022-09-11      24             5              25

Note, that this follows the US convention. For ISO, you might want to use this version:

by(dat, strftime(dat$date, '%Y-%V'), \(x) {
  w <- seq.Date(x$date[1] - 6, x$date[1], by='day')
  data.frame(week=w[weekdays(w) == 'Monday']) |>
    cbind(t(c(age_sum=sum(x[grep('^age\\d+', names(x))]), sum=colSums(x[c("useditems", "total_items")]))))
}) |> do.call(what=rbind)
#               week age_sum sum.useditems sum.total_items
# 2021-14 2021-04-05      19             4              18
# 2021-45 2021-11-08      20            20              50
# 2022-37 2022-09-12      24             5              25

Data:

dat <- structure(list(date = c("11/10/2021", "11/11/2021", "09/16/2022", 
"09/17/2022", "04/05/2021", "04/06/2021"), age1 = c(1L, 2L, 1L, 
2L, 1L, 2L), age2 = c(2L, 1L, 2L, 3L, 1L, 1L), age3 = c(1L, 2L, 
1L, 1L, 2L, 2L), age4 = c(2L, 4L, 5L, 3L, 1L, 1L), age5 = c(1L, 
1L, 3L, 1L, 2L, 3L), age6 = c(1L, 2L, 1L, 1L, 1L, 2L), useditems = c(15L, 
5L, 3L, 2L, 3L, 1L), total_items = c(20L, 30L, 10L, 15L, 9L, 
9L)), class = "data.frame", row.names = c(NA, -6L))
jay.sf
  • 60,139
  • 8
  • 53
  • 110
1

We could do it this way:

library(dplyr)

df %>%
  mutate(date = as.Date(date, format = "%m/%d/%Y"),
         week = format(date, format = "%U")) %>% 
  group_by(week) %>%
  summarise(age_sum = sum(age1 + age2 + age3 + age4 + age5 + age6),
            useditems_sum = sum(useditems),
            total_sum = sum(total_items)) %>% 
  arrange(-total_sum)
 week  age_sum useditems_sum total_sum
  <chr>   <int>         <int>     <int>
1 45         20            20        50
2 37         24             5        25
3 14         19             4        18
TarJae
  • 72,363
  • 6
  • 19
  • 66