0

I want to create a lot of models so to reduce the time, I decided to use parallel processing. After doing some tests with foreach, which were successful, I tried to do it in parallel. Unfortunately, I can't use my Model vector after doing it in parallel.

library(parallel)
library(doParallel)
library(foreach)

SizeSample = ceiling(0.8 * nrow(FinalData))
SizeSample
[1] 14

NbSample = as.numeric(dlg_input(message = "Number of Folds to create", default = "10")$res)
NbSample
[1] 10

Nb = seq(1:NbSample)
Nb
 [1]  1  2  3  4  5  6  7  8  9 10

Fold <<- function(Fd) { #Create a number of folds for different seeds
+ set.seed(Fd)
+ sample(1:nrow(FinalData), size = SizeSample, replace = FALSE)
+ }

reg <<- function(i){#Create a model with a fold from the "Fold" function
+ Model[[i]] <<- lm(Formula2, data = FinalData[Fold(i),])
+ }

reg_npar <- function(Nb){#Function to create a model for each different folds (Non-Parallel)
+ foreach(i = Nb) %do% {
+ reg(i)}}

Model <<- vector("list", length(Nb))

head(Model, n=3)
[[1]]
NULL

[[2]]
NULL

[[3]]
NULL

invisible(reg_npar(Nb))

head(Model, n=3)
[[1]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.439e+04          -7.751e-07           2.623e-06  


[[2]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.594e+04          -2.278e-06           4.212e-06  


[[3]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.679e+04          -1.505e-06           3.085e-06  



reg_par <- function(Nb){#Function to create a model for each different folds (In-Parallel)
+ foreach(i = Nb) %dopar% {
+ reg(i)}}

#Run the "reg_par" function
Model <<- vector("list", length(Nb))

head(Model, n=3)
[[1]]
NULL

[[2]]
NULL

[[3]]
NULL


cl = makeCluster(detectCores()-1)
registerDoParallel(cl)
reg_par(Nb)
[[1]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.439e+04          -7.751e-07           2.623e-06  


[[2]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.594e+04          -2.278e-06           4.212e-06  


[[3]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.679e+04          -1.505e-06           3.085e-06  


[[4]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.510e+04          -1.244e-06           2.971e-06  


[[5]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.707e+04          -2.037e-06           3.541e-06  


[[6]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.637e+04          -1.343e-06           2.995e-06  


[[7]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.569e+04          -1.671e-06           3.405e-06  


[[8]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.650e+04          -2.022e-06           3.775e-06  


[[9]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.682e+04          -2.025e-06           3.623e-06  


[[10]]

Call:
lm(formula = Formula2, data = FinalData[Fold(i), ])

Coefficients:
       (Intercept)  Tractor_Prop_Small    Tractor_Prop_Med  
         1.585e+04          -1.434e-06           3.157e-06 

stopCluster(cl)

head(Model, n=3)
[[1]]
NULL

[[2]]
NULL

[[3]]
NULL

Does someone know a way to extract and use my Model vector after the parallel processing just like I can use it without the parallel processing?

Thank you

Tim007
  • 11
  • 2
  • It's easier to help you if you include a simple [reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) with sample input that can be used to test and verify possible solutions. – Brian Montgomery Dec 09 '21 at 05:35
  • 1
    Usually, you have to define a return statement in the parallel loop. The returned object will then be saved as a list in a variable which you define e.g. list2return <- foreach(i = 1:Nb) %dopar% ... – JKupzig Dec 09 '21 at 08:17

0 Answers0