39

I recently got a computer with several cores and am learning to use parallel computing. I'm fairly proficient with lapply and was told parLapply works very similarly. I'm not operating it correctly though. It seems I have to explicitly put everything inside the parLapply to make it work (that is functions to be use, variables etc.). With lapply it reads from the parent environment and parLapply does not seem to do this. So in my example below I could make everything work by placing all info inside parLapply but if I use this inside a user defined function I can't explicitly put text.var inside of parLapply.

library(parallel)
text.var <- rep("I like cake and ice cream so much!", 20)
ntv <- length(text.var)
gc.rate <- 10

pos <-  function(i) {
    paste(sapply(strsplit(tolower(i), " "), nchar), collapse=" | ")
}

lapply(seq_len(ntv), function(i) {
        x <- pos(text.var[i])
        if (i%%gc.rate==0) gc()
        return(x)
    }

)

#doesn't work
cl <- makeCluster(mc <- getOption("cl.cores", 4))
parLapply(cl, seq_len(ntv), function(i) {
        x <- pos(text.var[i])
        if (i%%gc.rate==0) gc()
        return(x)
    }

)

#does work but have to specify all the stuff inside parLapply
cl <- makeCluster(mc <- getOption("cl.cores", 4))
parLapply(cl, seq_len(ntv), function(i) {
        ######stuff I have to put inside parLapply##########
        text.var <- rep("I like cake and ice cream so much!", 20)
        ntv <- length(text.var)
        gc.rate <- 10
        pos <-  function(i) {
            paste(sapply(strsplit(tolower(i), " "), nchar), collapse=" | ")
        }
        ######stuff I have to put inside parLapply##########
        x <- pos(text.var[i])
        if (i%%gc.rate==0) gc()
        return(x)
    }
)

How can I pass text.var, ntv, gc.rate, and pos to parLapply without explicitly putting them inside? (I'm guessing you pass them as a list somehow)

PS windows 7 machine so I need to use parLapply I think

Roman Luštrik
  • 69,533
  • 24
  • 154
  • 197
Tyler Rinker
  • 108,132
  • 65
  • 322
  • 519
  • 3
    I made a blog post on my learning with this for future searchers: http://trinkerrstuff.wordpress.com/2012/08/19/parallelization-speed-up-functions-in-a-package/ – Tyler Rinker Aug 20 '12 at 13:29

3 Answers3

49

You need to export those variables to the other R processes in the cluster:

cl <- makeCluster(mc <- getOption("cl.cores", 4))
clusterExport(cl=cl, varlist=c("text.var", "ntv", "gc.rate", "pos"))
Andy
  • 4,549
  • 31
  • 26
  • 1
    @ttmaccer - how long did it take in total? Is it just that the overall time is being washed out by the variability in total time? i.e. the task wasn't really long enough for things to stabilize? – Chase Aug 18 '12 at 17:29
  • Apparently the argument `varlist` is nowadays called `list`. – otwtm Jun 02 '20 at 18:12
13

An alternate method provided by Martin Morgan would work here as well.

This method supplies the objects to each node in the cluster directly in parLapply call with no need to use cluster export:

library(parallel)
text.var <- rep("I like cake and ice cream so much!", 20)
ntv <- length(text.var)
gc.rate <- 10

pos <-  function(i) {
    paste(sapply(strsplit(tolower(i), " "), nchar), collapse=" | ")
}

cl <- makeCluster(mc <- getOption("cl.cores", 4))
parLapply(cl, seq_len(ntv), function(i, pos, text.var, ntv, gc.rate) {
        x <- pos(text.var[i])
        if (i%%gc.rate==0) gc()
        return(x)
    }, pos, text.var, ntv, gc.rate
)
Community
  • 1
  • 1
Tyler Rinker
  • 108,132
  • 65
  • 322
  • 519
-1
out1<-lapply(seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)})
out2<-parLapply(cl, seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)})

>     identical(out1,out2)
# [1] TRUE
require(rbenchmark)
benchmark(lapply(seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)}),parLapply(cl, seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)}))


                                                                                       test
#1        lapply(seq_len(ntv), function(i) {\n    x <- pos(text.var[i])\n    if (i%%gc.rate == 0) \n        gc()\n    return(x)\n})
#2 parLapply(cl, seq_len(ntv), function(i) {\n    x <- pos(text.var[i])\n    if (i%%gc.rate == 0) \n        gc()\n    return(x)\n})
#  replications elapsed relative user.self sys.self user.child sys.child
#1          100   20.03 3.453448     20.31     0.05         NA        NA
#2          100    5.80 1.000000      0.22     0.03         NA        NA

> cl
socket cluster with 2 nodes on host ‘localhost’
shhhhimhuntingrabbits
  • 7,397
  • 2
  • 23
  • 23