1
require(plyr)
library(reshape)
library(iterators)
library(parallel)
library(foreach)
library(doParallel)    

getCosine <- function(x,y) 
{
  this.cosine <- sum(x*y) / (sqrt(sum(x*x)) * sqrt(sum(y*y)))
  return(this.cosine)
}

visitordata <- read.csv("~/Hotels.csv", sep = ",", header = TRUE , stringsAsFactors = FALSE )
visitordata <- subset(visitordata, Product.Views > 0)
head(visitordata)

   Visitor_ID       Products   Product.Views 
2 1001863689_3519696751   CZ1XQZ             2 
3 1001863689_3519696751   CZR3CN             1
4 1001863689_3519696751   CZTNKN             3
5 121021834007_98749174   CZ2LB0             1
6 11029477426_678878300   CZTNKN             1
7 21029477426_678878300   CZVDHR             1

ColumnBasedData <- reshape(visitordata, idvar="Visitor_ID", timevar="Products", direction="wide")

ColumnBasedData[is.na(ColumnBasedData)] <- 0

x <<- (ColumnBasedData[,!(names(ColumnBasedData) %in% c("Visitor_ID"))])
head(x)

  Product.Views.CZ1XQZ Product.Views.CZR3CN Product.Views.CZTNKN Product.Views.CZVDHR Product.Views.CZ36D3 Product.Views.CZE0EN
2                     1                    1                    1                    0                    0                    0
6                     0                    0                    1                    1                    0                    0
9                     0                    0                    0                    0                    1                    1
24                    0                    0                    0                    0                    0                    0
37                    0                    0                    0                    0                    0                    0
40                    0                    0                    0                    0                    0                    0

holder <- matrix(NA, nrow=ncol(x),ncol=ncol(x),dimnames=list(colnames(x),colnames(x)))

dataframe_y <<- as.data.frame(holder)



cl<-makeCluster(detectCores() -1)
doParallel::registerDoParallel(cl)

ls <- foreach(i = 1:ncol(x)) %dopar% {
 for(j in 1:ncol(x)) {
    dataframe_y[i,j] <- getCosine(x[i],x[j])
 }
}
stopCluster(cl)

write.csv(dataframe_y,file="~/cosine.csv")

It works with %do% but doesn't with %dopar%. With %dopar%, dataframe_y returns null. Any idea?

Edit : Libraries, funtions, data examples. I will process with big data, so I am trying to use parallel-processing. Script takes more than one day to complete without parallel-processing.

Emre Gerçek
  • 51
  • 1
  • 6
  • 1
    Provide [minimal reproducible example](http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example). Are you using `parallel`? What is `getCosine`? What is `x`? – m0nhawk Nov 03 '15 at 08:00
  • @m0nhawk I edited the question. Hope that this version is ok. Thanks for comment. – Emre Gerçek Nov 03 '15 at 08:45
  • 1
    Check this question.. Let us know if it solves your issue: http://stackoverflow.com/questions/33081342/foreach-parallel-loop-returns-nas –  Nov 03 '15 at 09:06
  • Also, use nested `foreach`: [`3`](https://cran.r-project.org/web/packages/foreach/vignettes/nested.pdf). – m0nhawk Nov 03 '15 at 09:09

1 Answers1

0

Many thanks to all. Nested Foreach worked for me. See what I have changed below.

 ls <-
   foreach(i = 1:ncol(x), .combine = rbind) %:%
   foreach(j = 1:ncol(x), .combine=cbind) %dopar% {
     dataframe_y[i,j] <- getCosine(x[i],x[j])
   }

holder <- matrix(ls, nrow=ncol(x),ncol=ncol(x),dimnames=list(colnames(x),colnames(x)))
dataframe_y <<- as.data.frame(holder)
Emre Gerçek
  • 51
  • 1
  • 6