Here's a fairly basic way of averaging (taking the median) of the outputs from multiple models and producing a forecast. Note, none of the necessary EDA, model tuning, or model validation been carried out either prior or after modelling. Additionally, I'm not certain this method of averaging the prediction intervals is sound. For a best practice approach please see here.
# Install pacakges if they are not already installed: necessary_packages => vector
necessary_packages <- c("forecast", "ggplot2")
# Create a vector containing the names of any packages needing installation:
# new_pacakges => vector
new_packages <- necessary_packages[!(necessary_packages %in%
installed.packages()[, "Package"])]
# If the vector has more than 0 values, install the new pacakges
# (and their) associated dependencies:
if (length(new_packages) > 0) {
install.packages(new_packages, dependencies = TRUE)
}
# Initialise the packages in the session: list of boolean => stdout (console)
lapply(necessary_packages, require, character.only = TRUE)
# Coerce to multiple seasonality time series object:
y <- msts(
df$sales,
start = c(
min(as.integer(strftime(df$date, "%Y"))),
1
),
seasonal.periods = c(
7.009615384615385,
30.5
)
)
# Function to coalesce fit values with actuals:
coalesceActualsFit <- function(fit_obj){
# Coalesce actual values with fit values:
# res => double vector
res <- transform(
setNames(
data.frame(
cbind(
fit_obj$x,
fit_obj$fitted
)[,1:2]
),
c("x", "xhat")
),
xhat = ifelse(is.na(xhat), x, xhat)
)[,2,drop=TRUE]
# Explicitly define returned object:
# double vector => Global Env()
return(res)
}
# Store the forecast period (n-days):
fcast_period <- 30
# Fit a holt winters model:
hw_fit <- HoltWinters(y)
# Remove NAs from the fitted values:
hw_fit_vals <- coalesceActualsFit(hw_fit)
# Forecast out n-days:
hw_fcast <- forecast(hw_fit, h = fcast_period)
# Fit a neural network:
nnetar_fit <- nnetar(y, P = 10, repeats = 50)
# Remove NAs from the fitted values:
nnetar_fit_vals <- coalesceActualsFit(nnetar_fit)
# Forecast out n-days:
nnetar_fcast <- forecast(nnetar_fit, h = fcast_period)
# Fit an stlf model:
stlf_fit <- stlf(y)
# Forecast out n-days:
stlf_fcast <- forecast(stlf_fit, h = fcast_period)
# Create an ensemble for the predictions:
med_predict <- apply(
cbind(hw_fit_vals, nnetar_fit_vals, stlf_fit$fitted),
1,
median
)
# Create an ensemble for the forecasts:
med_fcast <- apply(
cbind(hw_fcast$mean, nnetar_fcast$mean, stlf_fcast$mean),
1,
median
)
# Calculate the enesmble prediction intervals:
fcast_pis <- do.call(
cbind,
lapply(
c("lower", "upper"),
function(x){
y <- data.frame(
cbind(
hw_fcast[[x]],
nnetar_fcast[[x]],
stlf_fcast[[x]]
)
)
pi_80 <- apply(
y[,endsWith(names(y), "80.")],
1,
median
)
pi_95 <- apply(
y[,endsWith(names(y), "95.")],
1,
median
)
cbind(
setNames(data.frame(pi_80), paste0(x, "_80")),
setNames(data.frame(pi_95), paste0(x, "_95"))
)
}
)
)
# Combine them into a single vector:
pt_fcasts <- c(
med_predict,
med_fcast
)
# Create a data.frame containing the original data and
# the point forecasts:
fcast_df <- transform(
data.frame(
actuals = c(df$sales, rep(NA_real_, fcast_period)),
pnt_fcasts = pt_fcasts,
date =
as.Date(
c(
df$date,
seq(
max(df$date) + 1,
max(df$date) + fcast_period,
by = 1
)
),
"%Y-%m-%d"
)
),
pnt_fcasts = ifelse(is.na(pnt_fcasts), sales, pnt_fcasts)
)
# Adjust the forecast predition intervals to be the same
# length as the combined data:
fcast_pis_adjusted <- transform(
rbind(
setNames(
data.frame(
do.call(
cbind,
lapply(seq_len(ncol(fcast_pis)), function(i){
pt_fcasts[1:nrow(df)]
}
)
)
),
names(fcast_pis)
),
fcast_pis
),
date = fcast_df$date
)
# Merge with the point forecast data:
chart_data <- merge(
fcast_df,
fcast_pis_adjusted,
by = "date"
)
# Chart the forecast vs actuals:
ggplot(chart_data, aes(date)) +
geom_line(aes(y = actuals, colour = "Actuals")) +
geom_line(aes(y = lower_80, colour = "Lower 80 PI")) +
geom_line(aes(y = lower_95, colour = "Lower 95 PI")) +
geom_line(aes(y = upper_80, colour = "Upper 80 PI")) +
geom_line(aes(y = upper_95, colour = "Upper 95 PI")) +
geom_line(aes(y = pnt_fcasts, colour = "Point Forecast")) +
xlab("Date") +
ylab("Sales") +
labs(colour = "Series")
Data:
df <- data.frame(
date = seq(as.Date("2021-01-01"), as.Date("2021-07-07"), by = "1 day"),
sales = c(
100,
105 ,
167,
106 ,
112,
107,
202,
98,
120,
109 ,
114,
195,
110,
121,
89,
128,
104,
194 ,
107 ,
127,
117,
100,
117 ,
205,
116,
112,
119,
129,
161,
132,
110,
114,
118,
194,
114,
113,
113 ,
172,
101 ,
161 ,
102,
135,
97,
122,
170,
126 ,
160,
110,
118,
108,
111,
163,
110,
123 ,
102 ,
116,
181,
119,
155,
108,
122,
169,
115,
122,
116,
168 ,
115 ,
101,
117,
113 ,
163,
115 ,
107,
106,
171 ,
109,
119,
107,
101 ,
166,
105,
102 ,
174,
108,
102,
114,
97,
114,
149,
100,
111,
94,
110 ,
108,
100 ,
92 ,
104,
112,
160,
105,
98,
91,
117 ,
44,
60 ,
36 ,
50 ,
51 ,
54,
62,
61 ,
62 ,
50 ,
59 ,
85 ,
49,
61 ,
56 ,
63,
39,
110 ,
56 ,
54 ,
55,
56,
63 ,
44,
115,
55,
50,
96 ,
129 ,
61,
59,
98 ,
90 ,
153,
90,
82 ,
98,
79,
149,
97 ,
85,
92,
78,
100 ,
69,
152,
88,
76 ,
91 ,
145,
106,
69,
84,
72,
144 ,
76,
74 ,
94 ,
70 ,
86 ,
76 ,
137 ,
71 ,
87 ,
91,
62 ,
150 ,
66 ,
77 ,
88,
135,
93 ,
62 ,
83,
83 ,
72,
71 ,
148 ,
91 ,
68 ,
78 ,
95 ,
124,
69 ,
78
)
)