I have a time series data at hourly level. I am trying to build a forecast for that data. The following is sample of data:
sample <-
structure(list(group_type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Group 1",
"Group 2", "Group 5"), class = "factor"), sub_group_type = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("Sub Group 1", "Sub Group 2", "Sub Group 3"),
class = "factor"), date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1/1/17",
"1/2/17", "1/3/17"), class = "factor"), hour = c(6L, 7L, 8L, 9L, 10L, 11L, 12L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), weekday = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L),
.Label = c("Monday", "Sunday", "Tuesday"), class = "factor"), total = c(9L, 9L,
10L, 6L, 2L, 14L, 3L, 11L, 12L, 12L, 0L, 10L, 8L, 13L, 14L, 17L, 12L, 5L, 9L, 7L,
10L, 13L, 23L, 11L, 3L, 6L, 10L, 11L, 14L, 16L, 13L, 2L, 3L, 4L, 14L, 11L, 16L,
8L, 12L, 7L, 6L, 13L, 13L, 22L, 12L, 7L, 9L, 8L, 14L, 9L, 16L, 15L, 6L, 7L, 6L,
12L, 13L, 14L, 7L, 3L, 13L, 11L, 6L, 8L, 15L, 11L, 3L, 10L, 9L, 7L, 12L, 10L, 10L,
3L, 14L, 8L, 12L, 10L, 20L, 5L, 4L, 8L, 12L, 3L, 0L, 4L, 5L, 1L, 6L, 7L, 0L, 3L,
1L, 1L, 0L, 2L, 2L, 0L, 2L, 0L, 3L, 7L, 6L, 2L, 1L)), .Names = c("group_type",
"sub_group_type", "date", "hour", "weekday", "total"), class = "data.frame",
row.names = c(NA, -105L))
I am applying the following functions to the above data:
models <- function(x){
x <- msts(x, seasonal.periods=c(24,168))
mod_exp <- ets(x, ic='aicc', restrict=T)
mod_hwa <- HoltWinters(x,seasonal = "additive")
mod_hwm <- HoltWinters(x,seasonal = "multiplicative")
mod_neural <- nnetar(x, p=7, size=25)
mod_tbats <- tbats(x, ic='aicc', seasonal.periods=7)
mod_bats <- bats(x, ic='aicc', seasonal.periods=7)
mod_stl <- stlm(x, s.window=7, ic='aicc', robust=TRUE, method='ets')
mod_sts <- StructTS(x)
}
test <- by(sample,list(sample$group_type,sample$sub_group_type,sample$date, sample$hour
),models)
However, I am getting the following error:
Error in ets(x, ic = "aicc", restrict = T) : y should be a univariate time series
If I split the data as follows as apply ets() function, I am able to run it without any issues. But, this splitting of data is not a very feasible option for me as the number of Groups and Sub Groups are too many and each of them has a different time series pattern:
sub_sample_1 <- sample[sample$group_type == "Group 1" & sample$sub_group_type == "Sub Group 1",6]
x <- msts(sub_sample_1, seasonal.periods=24)
mod_arima <- auto.arima(x, ic='aicc', stepwise=F)
mod_exp <- ets(x, ic='aicc', restrict=T)
mod_hwa <- HoltWinters(x,seasonal = "additive")
mod_hwm <- HoltWinters(x,seasonal = "multiplicative")
mod_neural <- nnetar(x, p=24, size=10)
mod_tbats <- tbats(x, ic='aicc', seasonal.periods=24)
mod_bats <- bats(x, ic='aicc', seasonal.periods=24)
mod_stl <- stlm(x, s.window=24, ic='aicc', robust=TRUE, method='ets')
mod_sts <- StructTS(x)
Is there any work around so that I can apply the models by group of columns with out encountering any errors?
Also, not all models are working for all the groups. For the sub_sample_1 data, HoltWinters, neuralnet, bats and stl are giving me error and others are working
> mod_hwa <- HoltWinters(x,seasonal = "additive")
Error in decompose(ts(x[1L:wind], start = start(x), frequency = f), seasonal) :
time series has no or less than 2 periods
> mod_hwm <- HoltWinters(x,seasonal = "multiplicative")
Error in HoltWinters(x, seasonal = "multiplicative") :
data must be non-zero for multiplicative Holt-Winters
> mod_bats <- bats(x, ic='aicc', seasonal.periods=24)
Error in optim(par = param.vector$vect, fn = calcLikelihoodNOTransformed, :
function cannot be evaluated at initial parameters
I can understand why these models are not working for my data. How do I exclude them when they give errors when I apply the function?
Thanks in advance for the help!
This question is similar (extension maybe) to my other question here