I am trying out something along the line of market basket analysis but with certain twist. Supposing I am interested to conduct Market Basket Analysis on customers of different predefined segments and obtain associated rules for every 'item in market basket'. Here's the working code:
l <- NULL
rf <- NULL
rf_temp <- NULL
options(scipen=999)
options(digits = 10)
for (i in ori_distinct_char) #different customer segments
{
subset <-ori[which(ori$V3==paste(i, sep = "")),]
# subseting different segments
subset_data <- as(split(as.vector(subset[,2]),as.vector(subset[,1])), "transactions")
food<- unique(subset$V2)
for (j in route[1:length(food)])
{
rules_food <- apriori(subset_data, parameter = list(supp = 0.0000001,conf = 0.0000001, minlen = 2, target = "rules"),
appearance = list(lhs = paste(j, sep = "") ,default='rhs'))
# made minimum support and confidence as low as possible to allow more rules to be defined (due to lack of data)
rules_food <- sort(rules_food, by=c("confidence"), decreasing=TRUE)
rf_temp <- as(head(rules_food,50), "data.frame")
if (nrow(rf_temp)!=0)
{rf <- rbind(rf,cbind(rf_temp,paste(i, sep = "")))}
}
}
I am trying to find a way to run this script so that every permutation could be run in parallel manner : i.e: association rules to be defined on different customer segments and food in a parallel manner to cover all the possible permutations. Else the working script here is too slow, imagine 5 segments & 2000 choices of food.
Update with my attempt using 'foreach' loop thus far:
cl<- makeCluster(3)
registerDoParallel(cl)
l <- NULL
rf <- NULL
rf_temp <- NULL
options(scipen=999)
options(digits = 10)
foreach (i = 1:length(ori_distinct_char)) %dopar% #different customer segments
{
subset <-ori[which(ori$V3==paste(i, sep = "")),]
# subseting different segments
subset_data <- as(split(as.vector(subset[,2]),as.vector(subset[,1])), "transactions")
food<- unique(subset$V2)
foreach (o = 1:length(food),.combine=rbind,.packages = 'arules') %dopar%
{
rules_food <- apriori(subset_data, parameter = list(supp = 0.0000001,conf = 0.0000001, minlen = 2, target = "rules"),
appearance = list(lhs = paste(j, sep = "") ,default='rhs'))
# made minimum support and confidence as low as possible to allow more rules to be defined (due to lack of data)
rules_food <- sort(rules_food, by=c("confidence"), decreasing=TRUE)
rf_temp <- as(head(rules_food,50), "data.frame")
if (nrow(rf_temp)!=0)
{rf <- rbind(rf,cbind(rf_temp,paste(i, sep = "")))}
}
}