0

I have the following script in R

library(tidyverse)
library(lubridate)
library(tibbletime)
payments <- read_csv("payments.csv")

monthly_sum_df <- payments %>%
  group_by(year(Date), month(Date)) %>%
  summarize(monthly_sum = sum(Amount, na.rm = TRUE))

colnames(monthly_sum_df) <- c("year", "month", "monthly_sum")

monthly_sum_df <- monthly_sum_df %>%
  drop_na() %>%
  mutate(date = paste(month, "-", year)) %>%
  mutate(date = parse_date_time(date, "m - y"))

mean_roll_3 <- rollify(mean, window = 3)

monthly_sum_df %>%
  select(date, monthly_sum) %>%
  mutate(mean_3 = mean_roll_3(monthly_sum))

I use tibbletime for the function mean_roll_3, however, this function seems to group_by year, because of this message in the prompt:

Adding missing grouping variables: year

In the result, the mean is calculated by 3 months, but for the next year is not being calculated with the previous (i.e. 2019-1 is NA when it should be calculated using the mean from 2018-12, etc)

Any way to rollify the function correctly?

dput of the results:

structure(list(year = c(2018, 2018, 2018, 2018, 2018, 2018, 2018, 
2018, 2018, 2018, 2018, 2018, 2019, 2019, 2019, 2019, 2019, 2019, 
2019, 2019, 2019, 2019, 2019, 2019, 2020, 2020, 2020, 2020, 2020, 
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2021, 2021, 2021, 2021, 
2021, 2021, 2021, 2021, 2021, 2021, 2021), date = structure(c(1514764800, 
1517443200, 1519862400, 1522540800, 1525132800, 1527811200, 1530403200, 
1533081600, 1535760000, 1538352000, 1541030400, 1543622400, 1546300800, 
1548979200, 1551398400, 1554076800, 1556668800, 1559347200, 1561939200, 
1564617600, 1567296000, 1569888000, 1572566400, 1575158400, 1577836800, 
1580515200, 1583020800, 1585699200, 1588291200, 1590969600, 1593561600, 
1596240000, 1598918400, 1601510400, 1604188800, 1606780800, 1609459200, 
1612137600, 1614556800, 1617235200, 1619827200, 1622505600, 1625097600, 
1627776000, 1630454400, 1633046400, 1638316800), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), monthly_sum = c(148608.87, 70428.96, 31349.07, 56794.06, 
38180.84, 92117.26, 116876.04, 112942.92, 48320.8, 20063.76, 
214159.64, 129344.37, 24507.88, 105899.21, 392706.68, 20116.58, 
35382.68, 107442.17, 212670.8, 55776.95, 90448.55, 215526.13, 
58887.12, 135513.86, 150394.01, 47638.67, 138796.63, 30807.85, 
66992.08, 93002.19, 147317.46, 58478.05, 221612.41, 127631.71, 
130702.85, 119925.89, 174094.97, 93998.026, 90549.49, 89263.14, 
118876.77, 106765.18, 152270.15, 69983.47, 57372.6, 54171.34, 
0), mean_3 = c(NA, NA, 83462.3, 52857.3633333333, 42107.99, 62364.0533333333, 
82391.38, 107312.073333333, 92713.2533333333, 60442.4933333333, 
94181.4, 121189.256666667, NA, NA, 174371.256666667, 172907.49, 
149401.98, 54313.81, 118498.55, 125296.64, 119632.1, 120583.876666667, 
121620.6, 136642.37, NA, NA, 112276.436666667, 72414.3833333333, 
78865.52, 63600.7066666667, 102437.243333333, 99599.2333333333, 
142469.306666667, 135907.39, 159982.323333333, 126086.816666667, 
NA, NA, 119547.495333333, 91270.2186666667, 99563.1333333333, 
104968.363333333, 125970.7, 109672.933333333, 93208.74, 60509.1366666667, 
37181.3133333333)), row.names = c(NA, -47L), groups = structure(list(
    year = c(2018, 2019, 2020, 2021), .rows = structure(list(
        1:12, 13:24, 25:36, 37:47), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -4L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))
Rich Pauloo
  • 7,734
  • 4
  • 37
  • 69
Norhther
  • 545
  • 3
  • 15
  • 35

2 Answers2

1

Based on your data, you can use rollmeanr from zoo or roll_meanr from Rcpproll or slidify from timetk, the follow package from tibbletime:

df1 %>% 
  ungroup() %>% # needed because your dput has the groups specified
  mutate(roll_sum = zoo::rollmeanr(monthly_sum, k = 3, fill = NA ))
phiver
  • 23,048
  • 14
  • 44
  • 56
  • I have the same problem: for different years, the two first months have a `roll_sum` of NA. But for 2019, we can calculate the sum of the two previous months already. – Norhther Nov 12 '21 at 17:58
1
monthly_sum_df %>%
  select(date, monthly_sum) %>%
  mutate(., mean_3 = zoo::rollapply(monthly_sum, 3, FUN = mean,
         partial = T, align = 'right'))
runr
  • 1,142
  • 1
  • 9
  • 25