Calculate averages over row-specific time interval in R

Question

I have a data frame with prey items, the day they were brought and their estimated weight. Some rows have NA for estimated weight and I would like to extrapolate a rough estimate by averaging the weight of the same type of prey item over a period of 3 days, being the day before the prey delivery, the same day and the day after. How should I do that? Also, the data is grouped by nest IDs. MRE:

example_data <- structure(list(broodID = structure(c(7L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 
8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("2018_1", "2019_3", 
"2020_10", "2020_11", "2020_12", "2020_2", "2020_5", "2020_8", 
"2020_9"), class = "factor"), date = structure(c(18469, 17743, 
17724, 17730, 17731, 17739, 17723, 17724, 17738, 17738, 17739, 
17739, 17742, 17735, 17724, 17730, 17732, 17745, 17737, 17751, 
17725, 17725, 17725, 17725, 17726, 17729, 17723, 17730, 18104, 
18104, 18102, 18102, 18103, 18098, 18098, 18100, 18462, 18462, 
18470, 18459, 18459, 18471, 18473, 18463, 18463, 18474, 18493, 
18479, 18473, 18471, 18471, 18475, 18475, 18472, 18472, 18474, 
18411, 18411, 18462, 18462, 18461, 18462, 18470, 18464, 18449, 
18453, 18452, 18452, 18448, 18456, 18464, 18464, 18450, 18450, 
18471, 18471, 18451, 18460, 18470, 18460, 18456, 18456, 18473, 
18473, 18447, 18447, 18471, 18447, 18452, 18452, 18450, 18456, 
18459, 18488, 18464, 18451, 18458, 18455, 18469, 18453, 18461, 
18461, 18468, 18466, 18458, 18458, 18449, 18450, 18450, 18476, 
18482, 18454, 18454, 18458, 18458, 18470, 18452, 18453, 18453, 
18448, 18448, 18457, 18457, 18467, 18467, 18472, 18472, 18455, 
18457, 18457, 18461, 18461, 18465, 18465, 18453, 18453, 18458, 
18466, 18466, 18475, 18452, 18452, 18459, 18456, 18456, 18456, 
18458, 18458, 18456, 18456, 18450, 18450, 18465, 18452, 18461, 
18461, 18456, 18464, 18459), class = "Date"), PreyGroup = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "bird", class = "factor"), 
    weightcalc = c(10, 5, 10, 10, 10, 10, 15, 15, 15, 15, 15, 
    15, 15, 20, 25, 25, 25, 25, 25, 40, 45, 45, 45, 45, NA, NA, 
    NA, NA, 10, 10, 15, 15, 20, 25, NA, 20, 15, 15, 20, 20, 20, 
    20, 35, 45, 45, 50, 120, 15, 20, 20, 20, 20, 20, 25, 35, 
    45, 45, NA, 6, 6, 7, 8, 10, 12, 12, 12, 14, 14, 15, 15, 15, 
    15, 15, 15, 20, 20, 20, 25, 25, 35, 40, 40, 55, 55, 60, 60, 
    70, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 15, 15, 20, 50, 
    50, NA, 6, 8, 8, 10, 10, 10, 15, 15, 15, 15, 15, 15, 15, 
    15, 15, 15, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 
    20, 20, 20, 20, 20, 30, 30, 30, 30, 30, 30, 35, 35, 35, 35, 
    40, 40, 45, 45, 45, 45, 70, NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", 
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", 
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", 
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", 
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", 
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", 
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100", 
"101", "102", "103", "104", "105", "106", "107", "108", "109", 
"110", "111", "112", "113", "114", "115", "116", "117", "118", 
"119", "120", "121", "122", "123", "124", "125", "126", "127", 
"128", "129", "130", "131", "132", "133", "134", "135", "136", 
"137", "138", "139", "140", "141", "142", "143", "144", "145", 
"146", "147", "148", "149", "150", "151", "152", "153", "154", 
"155", "156", "157", "158", "159"))

Please submit a [Minimal Reproducible Example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) with usable data, not an image. — Vincent, Oct 25 '20 at 18:27
Sorry, I added now the dput code but somehow it only keeps only the first row in the code section. Does it work anyway? — Benedetta Catitti, Oct 25 '20 at 18:48

score 2 · Answer 1 · answered Oct 25 '20 at 19:01

You could first create a new variable with a moving average of the weightcalc variable. The rollmean function from the zoo package can help you do that. Make sure you look at ?rollmean to know the options (the align and k arguments are especially important for you):

library(zoo)
rollmean(c(1, 2, 3, 3, 4, NA, 6, 7, 7), k=3, na.rm=TRUE)
#> [1] 2.000000 2.666667 3.333333 3.500000 5.000000 6.500000 6.666667

Apply the rollmean within groups using dplyr's group_by and mutate function:

library(dplyr)
library(zoo)

dat = dataset %>% 
  arrange(PreyGroup, date) %>%
  group_by(PreyGroup) %>%
  mutate(weightcalc_ma = rollmean(weightcalc, k=4, fill=NA, na.rm=TRUE),
         weightcalc_new = ifelse(is.na(weightcalc), weightcalc_ma, weightcalc))

dat
#> # A tibble: 159 x 6
#> # Groups:   PreyGroup [1]
#>    broodID date       PreyGroup weightcalc weightcalc_ma weightcalc_new
#>    <fct>   <date>     <fct>          <dbl>         <dbl>          <dbl>
#>  1 2018_1  2018-07-11 bird              15          NA             15  
#>  2 2018_1  2018-07-11 bird              NA          13.3           13.3
#>  3 2018_1  2018-07-12 bird              10          16.7           10  
#>  4 2018_1  2018-07-12 bird              15          23.8           15  
#>  5 2018_1  2018-07-12 bird              25          32.5           25  
#>  6 2018_1  2018-07-13 bird              45          40             45  
#>  7 2018_1  2018-07-13 bird              45          45             45  
#>  8 2018_1  2018-07-13 bird              45          45             45  
#>  9 2018_1  2018-07-13 bird              45          45             45  
#> 10 2018_1  2018-07-14 bird              NA          27.5           27.5
#> # … with 149 more rows

Alternatively, you could use the data.table package:

library(data.table)
dat = data.table(dataset, key=c("PreyGroup", "date"))[
      , weightcalc_ma := rollmean(weightcalc, k=4, fill=NA, na.rm=TRUE), 
        by="PreyGroup"][
      , weightcalc_new := ifelse(is.na(weightcalc), weightcalc_ma, weightcalc),
        by="PreyGroup"]

Calculate averages over row-specific time interval in R

1 Answers1