There are two approaches you could use here: (1) use the forecast package as proposed; (2) use the fable package which is designed for this problem.
First, let's create some sample synthetic data.
library(tibble)
library(dplyr)
df <- tibble(
Store = rep(c("A", "B"), c(200,200)),
Month = rep(seq(as.Date("1995-01-01"), length=200, by="1 month"), 2),
Value = rnorm(400)
)
For the forecast package, we will split the data into a list of tibbles. We can use the as.data.frame()
function to make things easy in turning the forecast object into a data frame.
# Using forecast package
library(forecast)
my_data_set <- split(df, df$Store)
mod <- function(x) {
x$Value %>%
ts(frequency=12, start=lubridate::year(x$Month[1])) %>%
auto.arima() %>%
forecast() %>%
as.data.frame()
}
lapply(my_data_set, mod)
#> $A
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Sep 2011 0 -1.327999 1.327999 -2.031 2.031
#> Oct 2011 0 -1.327999 1.327999 -2.031 2.031
#> Nov 2011 0 -1.327999 1.327999 -2.031 2.031
#> Dec 2011 0 -1.327999 1.327999 -2.031 2.031
#> Jan 2012 0 -1.327999 1.327999 -2.031 2.031
#> Feb 2012 0 -1.327999 1.327999 -2.031 2.031
#> Mar 2012 0 -1.327999 1.327999 -2.031 2.031
#> Apr 2012 0 -1.327999 1.327999 -2.031 2.031
#> May 2012 0 -1.327999 1.327999 -2.031 2.031
#> Jun 2012 0 -1.327999 1.327999 -2.031 2.031
#> Jul 2012 0 -1.327999 1.327999 -2.031 2.031
#> Aug 2012 0 -1.327999 1.327999 -2.031 2.031
#> Sep 2012 0 -1.327999 1.327999 -2.031 2.031
#> Oct 2012 0 -1.327999 1.327999 -2.031 2.031
#> Nov 2012 0 -1.327999 1.327999 -2.031 2.031
#> Dec 2012 0 -1.327999 1.327999 -2.031 2.031
#> Jan 2013 0 -1.327999 1.327999 -2.031 2.031
#> Feb 2013 0 -1.327999 1.327999 -2.031 2.031
#> Mar 2013 0 -1.327999 1.327999 -2.031 2.031
#> Apr 2013 0 -1.327999 1.327999 -2.031 2.031
#> May 2013 0 -1.327999 1.327999 -2.031 2.031
#> Jun 2013 0 -1.327999 1.327999 -2.031 2.031
#> Jul 2013 0 -1.327999 1.327999 -2.031 2.031
#> Aug 2013 0 -1.327999 1.327999 -2.031 2.031
#>
#> $B
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Sep 2011 0 -1.274651 1.274651 -1.949411 1.949411
#> Oct 2011 0 -1.274651 1.274651 -1.949411 1.949411
#> Nov 2011 0 -1.274651 1.274651 -1.949411 1.949411
#> Dec 2011 0 -1.274651 1.274651 -1.949411 1.949411
#> Jan 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> Feb 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> Mar 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> Apr 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> May 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> Jun 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> Jul 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> Aug 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> Sep 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> Oct 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> Nov 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> Dec 2012 0 -1.274651 1.274651 -1.949411 1.949411
#> Jan 2013 0 -1.274651 1.274651 -1.949411 1.949411
#> Feb 2013 0 -1.274651 1.274651 -1.949411 1.949411
#> Mar 2013 0 -1.274651 1.274651 -1.949411 1.949411
#> Apr 2013 0 -1.274651 1.274651 -1.949411 1.949411
#> May 2013 0 -1.274651 1.274651 -1.949411 1.949411
#> Jun 2013 0 -1.274651 1.274651 -1.949411 1.949411
#> Jul 2013 0 -1.274651 1.274651 -1.949411 1.949411
#> Aug 2013 0 -1.274651 1.274651 -1.949411 1.949411
To use the fable package, we can just take the original data frame with all stores included, and turn it into a tsibble object, then pipe it to model and forecast as follows.
# Using fable
library(tsibble)
library(fable)
df %>%
mutate(Month = yearmonth(Month)) %>%
as_tsibble(index=Month, key=Store) %>%
model(ARIMA(Value)) %>%
forecast() %>%
mutate(
pi80 = hilo(Value, 80),
pi95 = hilo(Value, 95)
) %>%
unpack_hilo(cols = c(pi80, pi95))
#> # A fable: 48 x 9 [1M]
#> # Key: Store, .model [2]
#> Store .model Month Value .mean pi80_lower pi80_upper pi95_lower
#> <chr> <chr> <mth> <dist> <dbl> <dbl> <dbl> <dbl>
#> 1 A ARIMA(Value) 2011 Sep N(0, 1.1) 0 -1.33 1.33 -2.03
#> 2 A ARIMA(Value) 2011 Oct N(0, 1.1) 0 -1.33 1.33 -2.03
#> 3 A ARIMA(Value) 2011 Nov N(0, 1.1) 0 -1.33 1.33 -2.03
#> 4 A ARIMA(Value) 2011 Dec N(0, 1.1) 0 -1.33 1.33 -2.03
#> 5 A ARIMA(Value) 2012 Jan N(0, 1.1) 0 -1.33 1.33 -2.03
#> 6 A ARIMA(Value) 2012 Feb N(0, 1.1) 0 -1.33 1.33 -2.03
#> 7 A ARIMA(Value) 2012 Mar N(0, 1.1) 0 -1.33 1.33 -2.03
#> 8 A ARIMA(Value) 2012 Apr N(0, 1.1) 0 -1.33 1.33 -2.03
#> 9 A ARIMA(Value) 2012 May N(0, 1.1) 0 -1.33 1.33 -2.03
#> 10 A ARIMA(Value) 2012 Jun N(0, 1.1) 0 -1.33 1.33 -2.03
#> # … with 38 more rows, and 1 more variable: pi95_upper <dbl>
Created on 2021-09-30 by the reprex package (v2.0.1)
This approach is also much more flexible as there may be multiple grouping variables (Stores and Products, for example). The fable approach is documented in the open access textbook at https://OTexts.com/fpp3.