library(parallel)
# Calculate the number of cores
no_cores <- detectCores() - 1
# Initiate cluster
cl <- makeCluster(no_cores)
statesNames=c("a","b","c")
mcA<-new("markovchain", states=statesNames, transitionMatrix=matrix(c(0.2,0.5,0.3,0,0.2,0.8,0.1,0.8,0.1),nrow=3, byrow=TRUE, dimnames=list(statesNames,statesNames)))
mcB<-new("markovchain", states=statesNames, transitionMatrix=matrix(c(0.2,0.5,0.3,0,0.2,0.8,0.1,0.8,0.1),nrow=3, byrow=TRUE, dimnames=list(statesNames,statesNames)))
mcC<-new("markovchain", states=statesNames, transitionMatrix=matrix(c(0.2,0.5,0.3,0,0.2,0.8,0.1,0.8,0.1),nrow=3, byrow=TRUE, dimnames=list(statesNames,statesNames)))
mclist <- new("markovchainList", markovchains = list(mcA, mcB, mcC))
mc <- mclist
clusterExport(cl, "mclist")
f <- function(x) {
n <- length(mclist@markovchains)
seq <- character(length = n)
t0 <- (mclist@markovchains[[1]]@states)[1]
for(i in 1:n) {
stateName <- mclist@markovchains[[i]]@states
t0 <- sample(x = stateName, size = 1,
prob = mclist@markovchains[[i]]@transitionMatrix[which(stateName == t0 ), ])
seq[i] <- t0
}
return(seq)
}
I have two function which performs same task. One is using parallel package and another does not. I thought after using the parallel package the execution will be fast. But rather it seems to be slow.
> microbenchmark(rmarkovchain(100, mc, "matrix",useRCpp = F), parSapply(cl, 1:100,f))
Unit: milliseconds
expr min lq mean median uq max neval
rmarkovchain(100, mc, "matrix", useRCpp = F) 3.632955 4.251373 5.611569 5.507326 6.681284 11.92689 100
parSapply(cl, 1:100, f) 40.929350 43.893277 45.516566 45.373365 47.366842 52.80290 100
Since I am using linux I have used mclapply instead of parSapply and now its better than parSapply but still slower.
> microbenchmark(rmarkovchain(100, mc, "matrix",useRCpp = F), mclapply(cl, 1:100,f))
Unit: milliseconds
expr min lq mean median uq max neval
rmarkovchain(100, mc, "matrix", useRCpp = F) 3.798599 3.97889 6.636692 6.053313 8.935721 18.08281 100
mclapply(cl, 1:100, f) 14.862175 20.81366 26.211019 25.636895 31.893560 34.42886 100
Why I am not able to speed up the work using parallel package in R?