0

I've been trying to combine the two For Loops into a single loop.

Loop 1:

    Unique.Order.Comment <- unique(df2$Rebuilt.Order.Comment)
length(Unique.Order.Comment)

#loop for the calculations
for (i in 1:length(Unique.Order.Comment)) {
  #a <- i-11
  #c[i] <-  print(sum(n.Cases.per.month$nCases[a:i]))
  a <- subset.data.frame(Rebuilt.Data, Rebuilt.Order.Comment == Unique.Order.Comment[i])  
  assign(Unique.Order.Comment[i],a)


}

Loop 2:

    #loop for the calculations
c <- rep(0, nrow(BR))
for (ii in 1:nrow(BR)) {
  if (ii < 12){
    print(0)
  }else { 
    a <- ii-11
    c[ii] <-  print(sum(BR$Number.Cases.Authorised[a:ii]))
  }
  
} 


c <- data.frame(c)
c <- c %>% 
  rename(
    n.Seen.Cum = c
  )

#View(c)

BR <- cbind(BR,c)

The BR need to be Unique.Order.Comment[i] in Loop 2.

What I believe/hope it would look like should be the below. But I get the error message Error in rep(0, nrow(Unique.Order.Comment[i])) : invalid 'times' argument

(What I think it should look like)

    Unique.Order.Comment <- unique(df2$Rebuilt.Order.Comment)
length(Unique.Order.Comment)

#loop for the calculations
for (i in 1:length(Unique.Order.Comment)) {
  #a <- i-11
  #c[i] <-  print(sum(n.Cases.per.month$nCases[a:i]))
  a <- subset.data.frame(Rebuilt.Data, Rebuilt.Order.Comment == Unique.Order.Comment[i])  
  assign(Unique.Order.Comment[i],a)

  
  
  #loop for the calculations
  c <- rep(0, nrow(Unique.Order.Comment[i]))
  for (ii in 1:nrow(Unique.Order.Comment[i])) {
    if (ii < 12){
      print(0)
    }else { 
      a <- ii-11
      c[ii] <-  print(sum(Unique.Order.Comment[i]$Number.Cases.Authorised[a:ii]))
    }
    
  } 
  
  
  c <- data.frame(c)
  c <- c %>% 
    rename(
      n.Seen.Cum = c
    )
  
  #View(c)
  
  Unique.Order.Comment[i] <- cbind(Unique.Order.Comment[i],c)
}

Edit example data: dput(Unique.Order.Comment)

c("CN", "DM", "DR", "FF", "PG", "HN", "SK", "GI", "GYN", "BR", 
"UR", "LYMPH", "HPB", "BST", "ENDOC", "PAEDGI", "CT", "PERI", 
"NEURO", "MOHS", "ICC", "RE", "PAED", "MN", "EMR", "PR", "LBX", 
"HAEM", "CTT", "UGI", "NEUR", "URGI", "GYNAE")

dput(head(Rebuilt.Data))

structure(list(Rebuilt.Order.Comment = c("BR", "BR", "BR", "BR", 
"BR", "BR"), Period.Received = c("2019-01", "2019-02", "2019-03", 
"2019-04", "2019-05", "2019-06"), Number.Cases.Received = c(838L, 
730L, 778L, 832L, 574L, 626L), Number.Cases.Authorised = c(680L, 
587L, 896L, 715L, 761L, 554L), Number.Cases.Authorised.Less7Days = c(550L, 
343L, 520L, 389L, 393L, 374L), Number.Cases.Authorised.Less10.Days = c(628L, 
475L, 723L, 595L, 555L, 474L), Percentage.Authorsied.Less7Days = c(0.808823529411765, 
0.584327086882453, 0.580357142857143, 0.544055944055944, 0.516425755584757, 
0.675090252707581), Percentage.Authorsied.Less10Days = c(0.923529411764706, 
0.809199318568995, 0.806919642857143, 0.832167832167832, 0.729303547963206, 
0.855595667870036), Avg.TaT.for.Authorised.Cases = structure(c(5.26470588235294, 
8.74616695059625, 8.34709821428571, 8.09370629370629, 12.826544021025, 
6.22021660649819), class = "difftime", units = "days"), MDM.Received = c(2L, 
13L, 2L, NA, NA, 5L), MDM.Received.Avg.TAT = structure(c(5, 29.2307692307692, 
0.5, NA, NA, 5.4), class = "difftime", units = "days"), So.Received = c(NA, 
1L, NA, 1L, NA, 2L), So.Received.Avg.TAT = structure(c(NA, 14, 
NA, 9, NA, 54), class = "difftime", units = "days")), row.names = c(NA, 
6L), class = "data.frame")

if I place print(Unique.Order.Comment[i]) before the second seperate loop I get: "CN"

In theory the first loop subsets data based upon a unique list of Order.Comment (which it can do). Then it does a cumlative sum and this gets cbind onto the subsetted data.

  • 1
    Add `print(Unique.Order.Comment[i])` before your second loop. What's the result? Are you able to share example data? https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example – Skaqqs Jul 04 '21 at 13:04
  • @skaqqs example data and I get "CN" when I place the 'print(Unique.Order.Comment[i])' before the second loop (in what I believe it should look like after the #Loop for calculations – James Slasor Jul 04 '21 at 16:55
  • One error might be due to using `nrow` instead of `length` in your inner loop. Because `Unique.Order.Comment` is a vector, I think it should be `for(ii in 1:length(Unique.Order.Comment[i])` or `for(ii in 1:NROW(Unique.Order.Comment)` although the former is much more commonly used. – Skaqqs Jul 04 '21 at 18:00
  • @Skaqqs if i use `for (ii in 1:NROW(Unique.Order.Comment[i])) {` I receive **Error in rep(0, nrow(Unique.Order.Comment[i])) : invalid 'times' argument** and `for (ii in 1:length(Unique.Order.Comment[i])) {` gives me **Error in rep(0, nrow(Unique.Order.Comment[i])) : invalid 'times' argument** – James Slasor Jul 04 '21 at 20:45

2 Answers2

1

First, it is easier to help if you provide a small example along with the your expected output. You can share your original data removing the columns which are not necessary to the question or create a fake dataset which is similar to your original data.

Second, I think you are overcomplicating this. It is never a good idea to create multiple datasets in your global environment. They are very difficult to manage and unnecessary pollute the global environment. You can use lists instead.

In this case I don't think we need to split the datasets in different lists as we have different packages that can perform rolling calculations. For example, below I have used zoo package which has rollsumr function.

library(dplyr)
library(zoo)

df <- df %>%
  group_by(Rebuilt.Order.Comment) %>%
  mutate(n.Seen.Cum = rollsumr(Number.Cases.Authorised, 12, fill = 0)) %>%
  ungroup

df

#   Rebuilt.Order.Comment Period.Received Number.Cases.Authorised n.Seen.Cum
#   <chr>                 <chr>                             <int>      <int>
# 1 BR                    2019-01                             680          0
# 2 BR                    2019-02                             587          0
# 3 BR                    2019-03                             896          0
# 4 BR                    2019-04                             715          0
# 5 BR                    2019-05                             761          0
# 6 BR                    2019-06                             554          0
# 7 BR                    2019-07                             843          0
# 8 BR                    2019-08                             815          0
# 9 BR                    2019-09                             704          0
#10 BR                    2019-10                             939          0
#11 BR                    2019-11                             834          0
#12 BR                    2019-12                             880       9208
#13 BR                    2020-01                             801       9329
#14 BR                    2020-02                             610       9352
#15 BR                    2020-03                             853       9309
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
  • the `ii<12` and `a <- ii-11` were my attempt at creating a 12-month rolling cumulative sum calculation ie sum the last 12 positions only. The Double Loop is to generate the dataset & then add cumulatively sum; there will be a bit after this to export the data so I can pass it to someone to use – James Slasor Jul 05 '21 at 19:18
  • Output below: structure(list(Rebuilt.Order.Comment = c("BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR" ), Period.Received = c("2019-01", "2019-02", "2019-03", "2019-04", "2019-05", "2019-06", "2019-07", "2019-08", "2019-09", "2019-10", "2019-11", "2019-12", "2020-01", "2020-02", "2020-03"), Number.Cases.Authorised = c(680L, 587L, 896L, 715L, 761L, 554L, 843L, 815L, 704L, 939L, 834L, 880L, 801L, 610L, 853L), n.Seen.Cum = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9208, 9329, 9352, 9309)), row.names = c(NA, 15L), class = "data.frame") – James Slasor Jul 05 '21 at 19:28
  • @JamesSlasor So in the output you want 0's for first 11 values and continue with cumulative sum after that? I understand that 9208 is `sum(df$Number.Cases.Authorised[1:12])` but how do you get 9329 after that? `sum(df$Number.Cases.Authorised[1:13])` is 10009. – Ronak Shah Jul 06 '21 at 01:26
  • it's a rolling sum of 12 positions. So it's Sum positions 1:12 of Number.Cases.Authorised; Then you sum 2:13 of Number.Cases.Authorised. So it would be `sum(df$Number.Cases.Authorised[1:12])` and then `sum(df$Number.Cases.Authorised[2:13])` & then `sum(df$Number.Cases.Authorised[3:14])` – James Slasor Jul 08 '21 at 19:10
  • 1
    @JamesSlasor You can use `rollsumr` function from `zoo` package to do rolling calculation. See the updated answer. – Ronak Shah Jul 09 '21 at 01:30
0

I think I see what you are aiming for, but I may have missed something. Let me know, and I can edit.

From what I can tell, you only need one loop, and instead of assign()ing a bunch of dataframes, you can iteratively build a summary table.

edit

The other answer here is quite elegant! I'm updating my answer based on your new comments just for fun. Not sure why we have different n.Seen.Cum values...

df2 <- structure(list(
  Rebuilt.Order.Comment = c("BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR" ),
  Period.Received = c("2019-01", "2019-02", "2019-03", "2019-04", "2019-05", "2019-06", "2019-07", "2019-08", "2019-09", "2019-10", "2019-11", "2019-12", "2020-01", "2020-02", "2020-03"),
  Number.Cases.Authorised = c(680L, 587L, 896L, 715L, 761L, 554L, 843L, 815L, 704L, 939L, 834L, 880L, 801L, 610L, 853L),
  n.Seen.Cum = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9208, 9329, 9352, 9309)),
  row.names = c(NA, 15L), class = "data.frame")

# This will hold results
output <- list()

# Loop over this vector
Unique.Order.Comment <- unique(df2$Rebuilt.Order.Comment)

for(comment in Unique.Order.Comment){
  # Temporary dataframe that is subset of 'df2'
  temp <- df2[df2$Rebuilt.Order.Comment == comment,] 
  
  # We can do arithmetic with dates that have days
  temp$Period.Received2 <- as.Date(paste(temp$Period.Received, "-01", sep=""))
  
  # Calculate cumsum after 333 days have passed
  temp$n.Seen.cum2 <- ifelse(
    test = temp$Period.Received2 - min(temp$Period.Received2) > 333,
    yes = cumsum(temp$Number.Cases.Authorised),
    no = NA)
  
  # better
  output[[comment]] <- temp
  
  # quick and dirty
  # assign(x = comment, value = temp)
}

output[[1]]
#>    Rebuilt.Order.Comment Period.Received Number.Cases.Authorised n.Seen.Cum
#> 1                     BR         2019-01                     680          0
#> 2                     BR         2019-02                     587          0
#> 3                     BR         2019-03                     896          0
#> 4                     BR         2019-04                     715          0
#> 5                     BR         2019-05                     761          0
#> 6                     BR         2019-06                     554          0
#> 7                     BR         2019-07                     843          0
#> 8                     BR         2019-08                     815          0
#> 9                     BR         2019-09                     704          0
#> 10                    BR         2019-10                     939          0
#> 11                    BR         2019-11                     834          0
#> 12                    BR         2019-12                     880       9208
#> 13                    BR         2020-01                     801       9329
#> 14                    BR         2020-02                     610       9352
#> 15                    BR         2020-03                     853       9309
#>    Period.Received2 n.Seen.cum2
#> 1        2019-01-01          NA
#> 2        2019-02-01          NA
#> 3        2019-03-01          NA
#> 4        2019-04-01          NA
#> 5        2019-05-01          NA
#> 6        2019-06-01          NA
#> 7        2019-07-01          NA
#> 8        2019-08-01          NA
#> 9        2019-09-01          NA
#> 10       2019-10-01          NA
#> 11       2019-11-01          NA
#> 12       2019-12-01        9208
#> 13       2020-01-01       10009
#> 14       2020-02-01       10619
#> 15       2020-03-01       11472

If you have multiple years and want the cumulative sum to reset, update the test parameter in ifelse() to include some max number of days.

Skaqqs
  • 4,010
  • 1
  • 7
  • 21
  • The output from this is: `structure(list(Unique.Order.Comment = c("CN", "DM", "DR", "FF", "PG", "HN", "SK", "GI", "GYN", "BR", "UR", "LYMPH", "HPB", "BST", "ENDOC", "PAEDGI", "CT", "PERI", "NEURO", "MOHS", "ICC", "RE", "PAED", "MN", "EMR", "PR", "LBX", "HAEM", "CTT", "UGI", "NEUR", "URGI", "GYNAE"), n.Seen.Cum = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), row.names = c(NA, 33L ), class = "data.frame")` I have them broken down into groups as i'm splitting them to give to people. – James Slasor Jul 04 '21 at 20:50
  • Hmm, check to make sure your input dataset is called `df2` before you run the code in my answer. If you want to output `temp` as a stand-alone dataframe after each iteration, you could use `assign(x = Unique.Order.Comment[i], value = temp)` as the last line of the loop, like you did in your question. – Skaqqs Jul 04 '21 at 21:19
  • When I use yours it seems to create the last Cumulative for the data set. Also I can't seem to get the output to work where you suggested placing it ` # Save results output[i,"Unique.Order.Comment"] <- Unique.Order.Comment[i] output[i,"n.Seen.Cum"] <- response assign(x = Unique.Order.Comment[i], value = temp) } output` Worst case I can use VBA when I export to Excel and add this in. Just trying to do as much as i can in one location. Thank you for helping – James Slasor Jul 05 '21 at 19:32