I have a large time series data set that normally takes 4 hrs to process using sequential processing through the 1800 time series. I'm looking for a way to use several cores to reduce this time because I have a number of these data sets to get through on a regular basis.
The R code I am using for the sequential processing is below. There are 4 files containing different data set, and each files contains over 1800 series. I have been trying to use doParallel to analyze each time series independently and concatenate the results into a single file. Even a CSV file would do.
# load the dataset
files <- c("3MH Clean", "3MH", "6MH", "12MH")
for (j in 1:4)
{
title <- paste("\n\n\n Evaluation of", files[j], " - Started at", date(), "\n\n\n")
cat(title)
History <- read.csv(paste(files[j],"csv", sep="."))
# output forecast to XLSX
outwb <- createWorkbook()
sheet <- createSheet(outwb, sheetName = paste(files[j], " - ETS"))
Item <- unique(unlist(History$Item))
for (i in 1:length(Item))
{
title <- paste("Evaluation of item ", Item[i], "-", i, "of", length(Item),"\n")
cat(title)
data <- subset(History, Item == Item[i])
dates <- unique(unlist(data$Date))
d <- as.Date(dates, format("%d/%m/%Y"))
data.ts <- ts(data$Volume, frequency=12, start=c(as.numeric(format(d[1],"%Y")), as.numeric(format(d[1],"%m"))))
try(data.ets <- ets(data.ts))
try(forecast.ets <- forecast.ets(data.ets, h=24))
IL <-c(Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i],Item[i])
ets.df <- data.frame(forecast.ets)
ets.df$Item <- IL
r <- 24*(i-1)+2
addDataFrame(ets.df, sheet, col.names=FALSE, startRow=r)
}
title <- paste("\n\n\n Evaluation of", files[j], " - Completed at", date(), "\n\n\n")
cat(title)
saveWorkbook(outwb, paste(files[j],"xlsx",sep='.'))
}