cumsum by day with hourly observations with dplyr

Question

I was able to sum up all hourly observations (column: gdd10) in a day, and group that by the three factors (Treatment, Rep, and Depth). I used the following code:

##Summarise by Day
gdd10<- tempsub%>% 
group_by(Date = as.Date.character(Date), Treatment,Rep,Depth)%>%
summarise(sum = sum(gdd10)) %>% 
arrange(sum)

However, I would like to accumulate the newly created gdd10 sums per day over the entire period of time. So that I have a cumulative sum at the end grouped by treatment, Rep, Depth. I tried following code (based on cumsum in grouped data with dplyr):

##Cumulate sum over entire period
gdd10csum<- gdd10%>% 
group_by(Treatment,Rep,Depth)%>%
mutate(cumsum = cumsum(sum)) %>% 
arrange(cumsum)

When looking at a ggplot, it reveals that this code does not sum up over the entire period:

I would like to have ONE accumulated (cumsum) value by Date over the entire period of time with grouping by treatment, rep and depth. Any ideas what is wrong with this code are highly appreciated?

> dput(gdd10csum)
structure(list(Date = structure(c(16941, 16941, 16941, 16941, 
16941, 16941, 16941, 16941, 16941, 16941, 16941, 16942, 16941, 
16941, 16941, 16941, 16942, 16942, 16941, 16942, 16942, 16942, 
16942, 16941, 16941, 16942, 16942, 16941, 16942, 16941, 16941, 
16941, 16941, 16942, 16941, 16942, 16942, 16942, 16941, 16941, 
16941, 16942, 16941, 16941, 16941, 16941, 16941, 16942, 16942, 
16942, 16942, 16942, 16942, 16942, 16942, 16942, 16942, 16942, 
16942, 16942, 16942, 16942, 16942, 16942), class = "Date"), Treatment = structure(c(2L, 
1L, 5L, 3L, 1L, 5L, 2L, 3L, 5L, 5L, 4L, 2L, 4L, 3L, 4L, 4L, 1L, 
5L, 2L, 5L, 3L, 1L, 2L, 2L, 2L, 5L, 3L, 1L, 5L, 5L, 1L, 5L, 1L, 
4L, 3L, 4L, 3L, 4L, 3L, 1L, 4L, 4L, 5L, 3L, 3L, 4L, 4L, 2L, 2L, 
2L, 1L, 5L, 1L, 5L, 1L, 3L, 3L, 4L, 1L, 5L, 3L, 3L, 4L, 4L), .Label = c("Disc", 
"Strip_NT", "Strip_ST", "Vt_high", "Vt_low"), class = "factor"), 
    Rep = structure(c(2L, 1L, 1L, 2L, 2L, 4L, 1L, 3L, 2L, 3L, 
    4L, 2L, 1L, 1L, 2L, 3L, 1L, 1L, 2L, 4L, 2L, 2L, 1L, 1L, 3L, 
    2L, 3L, 1L, 3L, 1L, 4L, 4L, 3L, 4L, 2L, 1L, 1L, 2L, 4L, 2L, 
    1L, 3L, 2L, 3L, 1L, 3L, 2L, 2L, 3L, 1L, 1L, 1L, 4L, 4L, 3L, 
    2L, 4L, 1L, 2L, 2L, 3L, 1L, 3L, 2L), .Label = c("1", "2", 
    "3", "4"), class = "factor"), Depth = structure(c(2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 
    1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("5", "30"), class = "factor"), sum = c(19.264, 
    27.405, 37.078, 40.186, 40.838, 42.793, 44.761, 47.656, 47.842, 
    53.317, 59.739, 40.824, 60.351, 61.321, 62.353, 64.521, 47.445, 
    55.643, 94.985, 59.336, 62.25, 63.425, 64.323, 110.563, 110.912, 
    68.41, 68.735, 116.45, 73.875, 127.29, 129.475, 130.305, 
    133.851, 78.288, 139.362, 79.38, 80.889, 83.889, 147.819, 
    150.538, 150.655, 89.098, 156.007, 159.148, 168.585, 173.395, 
    175.571, 116.011, 128.453, 129.605, 133.488, 140.31, 139.496, 
    143.325, 150.379, 153.905, 157.835, 163.186, 166.053, 174.543, 
    175.186, 176.603, 190.917, 192.106), cumsum = c(19.264, 27.405, 
    37.078, 40.186, 40.838, 42.793, 44.761, 47.656, 47.842, 53.317, 
    59.739, 60.088, 60.351, 61.321, 62.353, 64.521, 74.85, 92.721, 
    94.985, 102.129, 102.436, 104.263, 109.084, 110.563, 110.912, 
    116.252, 116.391, 116.45, 127.192, 127.29, 129.475, 130.305, 
    133.851, 138.027, 139.362, 139.731, 142.21, 146.242, 147.819, 
    150.538, 150.655, 153.619, 156.007, 159.148, 168.585, 173.395, 
    175.571, 210.996, 239.365, 240.168, 249.938, 267.6, 268.971, 
    273.63, 284.23, 293.267, 305.654, 313.841, 316.591, 330.55, 
    334.334, 345.188, 364.312, 367.677)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -64L), vars = list(
    Treatment, Rep, Depth), indices = list(c(27L, 50L), c(1L, 
16L), c(39L, 58L), c(4L, 21L), c(32L, 54L), c(30L, 52L), c(23L, 
49L), c(6L, 22L), c(18L, 47L), c(0L, 11L), c(24L, 48L), c(44L, 
61L), c(13L, 36L), c(34L, 55L), c(3L, 20L), c(43L, 60L), c(7L, 
26L), c(38L, 56L), c(40L, 57L), c(12L, 35L), c(46L, 63L), c(14L, 
37L), c(45L, 62L), c(15L, 41L), c(10L, 33L), c(29L, 51L), c(2L, 
17L), c(42L, 59L), c(8L, 25L), c(9L, 28L), c(31L, 53L), c(5L, 
19L)), drop = TRUE, group_sizes = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), biggest_group_size = 2L, .Names = c("Date", 
"Treatment", "Rep", "Depth", "sum", "cumsum"), labels = structure(list(
    Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Disc", "Strip_NT", 
    "Strip_ST", "Vt_high", "Vt_low"), class = "factor"), Rep = structure(c(1L, 
    1L, 2L, 2L, 3L, 4L, 1L, 1L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 3L, 
    3L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 1L, 1L, 2L, 2L, 3L, 4L, 
    4L), .Label = c("1", "2", "3", "4"), class = "factor"), Depth = structure(c(1L, 
    2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 
    2L), .Label = c("5", "30"), class = "factor")), class = "data.frame", row.names = c(NA, 
-32L), vars = list(Treatment, Rep, Depth), indices = list(c(34L, 
40L), c(1L, 8L), c(48L, 55L), c(5L, 19L), c(41L, 47L), c(37L, 
43L), c(31L, 38L), c(7L, 20L), c(30L, 33L), c(0L, 4L), c(32L, 
36L), c(56L, 61L), c(16L, 27L), c(42L, 50L), c(3L, 17L), c(53L, 
59L), c(9L, 23L), c(46L, 52L), c(49L, 54L), c(15L, 26L), c(60L, 
63L), c(18L, 28L), c(57L, 62L), c(21L, 29L), c(14L, 25L), c(35L, 
44L), c(2L, 12L), c(51L, 58L), c(10L, 22L), c(11L, 24L), c(39L, 
45L), c(6L, 13L)), drop = TRUE, group_sizes = c(2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), biggest_group_size = 2L, .Names = c("Treatment", 
"Rep", "Depth")))

Make sure `gdd10` isn't grouped by Date at the time you do the `cumsum`. It's impossible to debug without a [reproducible example](http://stackoverflow.com/q/5963269/903061), please see that link for details if you need help making one. Picking a couple levels of rep, depth, treatment and maybe 3 days ought to be plenty, and please share data reproducibly using `dput()`. — Gregor Thomas, Dec 28 '16 at 22:10
Thank you. I used only two days of my data set and copied the output of dput() into my edits. Is this correct? — Pat14, Dec 28 '16 at 22:46

score 1 · Accepted Answer · answered Dec 28 '16 at 23:25

1

The answer is to exclude the arrange function in the first step:

##Summarise by Day
gdd10<- tempsub%>% 
group_by(Date = as.Date.character(Date), Treatment,Rep,Depth)%>%
summarise(sum = sum(gdd10))

answered Dec 28 '16 at 23:25

Pat14

29
6

cumsum by day with hourly observations with dplyr

1 Answers1