10

I'm running the following script in R. If I use a %do% rather than a %dopar% the script works fine. However, if in the outer loop I use a %dopar% the loop runs forever without throwing any error (constant increase in memory usage until it goes out of memory). I'm using 16 cores.

library(parallel)
library(foreach)
library(doSNOW)
library(dplyr)


NumberOfCluster <- 16 
cl <- makeCluster(NumberOfCluster) 
registerDoSNOW(cl) 


foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %dopar% 
    { 
      terms <- as.data.table(unique(gsub(" ", "", unlist(terms_list_by_UNSPSC$Terms[which(substr(terms_list_by_UNSPSC$UNSPSC,1,6) == i)])))) 
      temp <- inner_join(N_of_UNSPSCs_by_Term, terms, on = 'V1') 
      temp$V2 <- 1/as.numeric(temp$V2)
      temp <- temp[order(temp$V2, decreasing = TRUE),]
      names(temp) <- c('Term','Imp')
      ABNs <- unique(UNSPSCs_per_ABN[which(substr(UNSPSCs_per_ABN$UNSPSC,1,4) == substr(i,1,4)), 1])

      predictions <- as.numeric(vector()) 
      predictions <- foreach (j = seq(1 : nrow(train)), .combine = 'c', .packages = 'dplyr')  %do% 
      { 
        descr <- names(which(!is.na(train[j,]) == TRUE)) 
        if(unlist(predict_all[j,1]) %in% unlist(ABNs) || !unlist(predict_all[j,1]) %in% unlist(suppliers)) {union_all(predictions, sum(temp$Imp[which(temp$Term %in% descr)]))} else {union_all(predictions, 0)}    

      } 
    save(predictions, file = paste("Predictions", i,".rda", sep = "_")) 
    }
Dario Federici
  • 1,228
  • 2
  • 18
  • 40
  • Did you try with `NumberOfCluster <- 2`? – F. Privé Feb 06 '18 at 07:23
  • Just tried and it does not solve the problem. – Dario Federici Feb 07 '18 at 01:37
  • So without the inner `%do%` loop the code runs fine with `%dopar%`? – RolandASc Feb 08 '18 at 09:50
  • No the code runs fine only when both are %do%. If one of the two is %dopar% they do not work. – Dario Federici Feb 08 '18 at 21:31
  • not sure if I understand, my question was whether the code runs with only a single `%dopar%`, i.e. if you would save `predictions` directly after `predictions <- as.numeric(vector())` and comment out the second `foreach` (for narrowing down your problem) – RolandASc Feb 09 '18 at 09:57
  • 2
    have you read this, you can nest loops with %:%: https://cran.r-project.org/web/packages/foreach/vignettes/nested.pdf ? – gdkrmr Feb 09 '18 at 16:20
  • another option could be replacing the inner `foreach` with a simple `for` or `lapply` and use `%dopar%` in the outer loop. – gdkrmr Feb 09 '18 at 16:25
  • The real solution is to not use R in Windows. Use linux or Mac or even Windows built in bash subsystem; and then use `mclapply` from the package `parallel`. – thc Feb 12 '18 at 17:57
  • Thank you all, %dopar% does not work either for a single or nested loop. I believe R in windows is the problem. – Dario Federici Feb 13 '18 at 02:25

1 Answers1

14

The proper way of nesting foreach loop is using %:% operator. See the example. I have tested it on Windows.

library(foreach)
library(doSNOW)

NumberOfCluster <- 4
cl <- makeCluster(NumberOfCluster) 
registerDoSNOW(cl) 

N <- 1e6

system.time(foreach(i = 1:10, .combine = rbind) %:%
              foreach(j = 1:10, .combine = c) %do% mean(rnorm(N, i, j)))

system.time(foreach(i = 1:10, .combine = rbind) %:%
              foreach(j = 1:10, .combine = c) %dopar% mean(rnorm(N, i, j)))

Output:

> system.time(foreach(i = 1:10, .combine = rbind) %:%
+               foreach(j = 1:10, .combine = c) %do% mean(rnorm(N, i, j)))
   user  system elapsed 
   7.38    0.23    7.64 
> system.time(foreach(i = 1:10, .combine = rbind) %:%
+               foreach(j = 1:10, .combine = c) %dopar% mean(rnorm(N, i, j)))
   user  system elapsed 
   0.09    0.00    2.14 

CPU usage for %do% and %dopar%

Scheme for using nested loops is as following:

foreach(i) %:% foreach(j) {foo(i, j)}

Operator %:% is used to nest several foreach loops. You can not do computation between nesting. In your case you have to do two loops, for example:

# Loop over i
x <- foreach(i = 1:10, .combine = c) %dopar% 2 ^ i

# Nested loop over i and j
foreach(i = 1:10, .combine = rbind) %:% foreach(j = 1:10, .combine = c) %dopar% {x[i] + j}

Untested code:

library(data.table)
library(foreach)
library(doSNOW)

NumberOfCluster <- 2
cl <- makeCluster(NumberOfCluster)
registerDoSNOW(cl)

# Create ABNs as list
ABNs <- foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %dopar% {
  terms <- as.data.table(unique(gsub(" ", "", unlist(terms_list_by_UNSPSC$Terms[which(substr(terms_list_by_UNSPSC$UNSPSC, 1, 6) == i)]))))
  temp <- inner_join(N_of_UNSPSCs_by_Term, terms, on = 'V1')
  temp$V2 <- 1 / as.numeric(temp$V2)
  temp <- temp[order(temp$V2, decreasing = TRUE), ]
  names(temp) <- c('Term', 'Imp')
  unique(UNSPSCs_per_ABN[which(substr(UNSPSCs_per_ABN$UNSPSC,1,4) == substr(i,1,4)), 1])
}

# Nested loop
predictions <- foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %:%
  foreach(j = seq(1:nrow(train)), .combine = 'c', .packages = 'dplyr') %dopar% {
    descr <- names(which(!is.na(train[j, ]) == TRUE))
    if (unlist(predict_all[j, 1]) %in% unlist(ABNs[[i]]) || !unlist(predict_all[j, 1]) %in% unlist(suppliers)) {
      sum(temp$Imp[which(temp$Term %in% descr)])
    } else 0
  }

for (i in seq_along(predictions)) save(predictions[[i]], file = paste("Predictions", i, ".rda", sep = "_"))
djhurio
  • 5,437
  • 4
  • 27
  • 48
  • Thanks djhurio, I thought of using %:% the fact is that my code is structured as foreach - code - nested foreach. yours is just foreach - nested foreach. If you show me how to adapt my code to your solution the 50 points are yours. – Dario Federici Feb 14 '18 at 23:19