1

I have a time series data at hourly level. I am trying to build a forecast for that data. The following is sample of data:

sample <-
structure(list(group_type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Group 1",
"Group 2", "Group 5"), class = "factor"), sub_group_type = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("Sub Group 1", "Sub Group 2", "Sub Group 3"),
class = "factor"), date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1/1/17",
"1/2/17", "1/3/17"), class = "factor"), hour = c(6L, 7L, 8L, 9L, 10L, 11L, 12L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), weekday = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L),
.Label = c("Monday", "Sunday", "Tuesday"), class = "factor"), total = c(9L, 9L,
10L, 6L, 2L, 14L, 3L, 11L, 12L, 12L, 0L, 10L, 8L, 13L, 14L, 17L, 12L, 5L, 9L, 7L,
10L, 13L, 23L, 11L, 3L, 6L, 10L, 11L, 14L, 16L, 13L, 2L, 3L, 4L, 14L, 11L, 16L,
8L, 12L, 7L, 6L, 13L, 13L, 22L, 12L, 7L, 9L, 8L, 14L, 9L, 16L, 15L, 6L, 7L, 6L,
12L, 13L, 14L, 7L, 3L, 13L, 11L, 6L, 8L, 15L, 11L, 3L, 10L, 9L, 7L, 12L, 10L, 10L,
3L, 14L, 8L, 12L, 10L, 20L, 5L, 4L, 8L, 12L, 3L, 0L, 4L, 5L, 1L, 6L, 7L, 0L, 3L,
1L, 1L, 0L, 2L, 2L, 0L, 2L, 0L, 3L, 7L, 6L, 2L, 1L)), .Names = c("group_type",
"sub_group_type", "date", "hour", "weekday", "total"), class = "data.frame",
row.names = c(NA, -105L))

I am applying the following functions to the above data:

models <- function(x){
  x <- msts(x, seasonal.periods=c(24,168))
  mod_exp <- ets(x, ic='aicc', restrict=T)
  mod_hwa <- HoltWinters(x,seasonal = "additive")
  mod_hwm <- HoltWinters(x,seasonal = "multiplicative")
  mod_neural <- nnetar(x, p=7, size=25)
  mod_tbats <- tbats(x, ic='aicc', seasonal.periods=7)
  mod_bats <- bats(x, ic='aicc', seasonal.periods=7)
  mod_stl <- stlm(x, s.window=7, ic='aicc', robust=TRUE, method='ets')
  mod_sts <- StructTS(x)
}

test <- by(sample,list(sample$group_type,sample$sub_group_type,sample$date, sample$hour
),models)

However, I am getting the following error:

 Error in ets(x, ic = "aicc", restrict = T) : y should be a univariate time series 

If I split the data as follows as apply ets() function, I am able to run it without any issues. But, this splitting of data is not a very feasible option for me as the number of Groups and Sub Groups are too many and each of them has a different time series pattern:

sub_sample_1 <- sample[sample$group_type == "Group 1" &    sample$sub_group_type == "Sub Group 1",6]
x <- msts(sub_sample_1, seasonal.periods=24)
mod_arima <- auto.arima(x, ic='aicc', stepwise=F)
mod_exp <- ets(x, ic='aicc', restrict=T)
mod_hwa <- HoltWinters(x,seasonal = "additive")
mod_hwm <- HoltWinters(x,seasonal = "multiplicative")
mod_neural <- nnetar(x, p=24, size=10)
mod_tbats <- tbats(x, ic='aicc', seasonal.periods=24)
mod_bats <- bats(x, ic='aicc', seasonal.periods=24)
mod_stl <- stlm(x, s.window=24, ic='aicc', robust=TRUE, method='ets')
mod_sts <- StructTS(x)

Is there any work around so that I can apply the models by group of columns with out encountering any errors?

Also, not all models are working for all the groups. For the sub_sample_1 data, HoltWinters, neuralnet, bats and stl are giving me error and others are working

> mod_hwa <- HoltWinters(x,seasonal = "additive")
Error in decompose(ts(x[1L:wind], start = start(x), frequency = f), seasonal) : 
 time series has no or less than 2 periods

> mod_hwm <- HoltWinters(x,seasonal = "multiplicative")
Error in HoltWinters(x, seasonal = "multiplicative") : 
 data must be non-zero for multiplicative Holt-Winters

> mod_bats <- bats(x, ic='aicc', seasonal.periods=24)
Error in optim(par = param.vector$vect, fn = calcLikelihoodNOTransformed,  : 
function cannot be evaluated at initial parameters

I can understand why these models are not working for my data. How do I exclude them when they give errors when I apply the function?

Thanks in advance for the help!

This question is similar (extension maybe) to my other question here

Community
  • 1
  • 1
EsBee
  • 231
  • 2
  • 13

1 Answers1

1

Several issues arise from your current setup:

  1. Functions return the last line if no return() is specified. So your first attempt will lose all lines except for mod_sts which will be the value test is assigned for each subset of by.

  2. In your subset code, you are actually passing the 6th column (an atomic vector) whereas you pass all columns of dataframe in your first code attempt. This may be the reason for your error where input should be per the msts docs:

    A numeric vector, ts object, matrix or data frame. It is intended that the time series data is univariate, otherwise treated the same as ts().

  3. Your by is receiving four groupings, group_type, sub_group_type, date, and hour unlike your second subset code of two. Unless your data is very large, these many groupings may result with few rows or no rows, and hence not enough data points for model procedures as your last code block seems to suggest.

With that said, consider the following adjustment in returning a list of named elements by first two groupings, specifying the 6th column. And because by takes a combinations of factors which in subsetting dataframe may yield no rows, below uses tryCatch to capture any errors and return empty lists to be filtered out in final line.

models <- function(x){
  x <- msts(x, seasonal.periods=c(24,168))
  list(
    mod_exp = ets(x, ic='aicc', restrict=T),
    mod_hwa = HoltWinters(x,seasonal = "additive"),
    mod_hwm = HoltWinters(x,seasonal = "multiplicative"),
    mod_neural = nnetar(x, p=7, size=25),
    mod_tbats = tbats(x, ic='aicc', seasonal.periods=7),
    mod_bats = bats(x, ic='aicc', seasonal.periods=7),
    mod_stl = stlm(x, s.window=7, ic='aicc', robust=TRUE, method='ets'),
    mod_sts = StructTS(x)
  )
}

# TRY/CATCH TO CAPTURE ERRORS AND RETURN EMPTY LIST
test <- by(sample[,6], list(sample$group_type, sample$sub_group_type), 
           function(x) tryCatch({ models(x)
                                }, error=function(e) return(list(NA))))

# TO REMOVE NULLs AND NAs (EMPTY ITEMS)
test <- Filter(function(i) length(i) > 0, test)
Parfait
  • 104,375
  • 17
  • 94
  • 125
  • Thanks @Parfait. I am updating my original code based on your suggestions. Will post the update once I am done. Appreciate your help – EsBee Feb 07 '17 at 19:27