I have the following problem:
- I am trying to fit two time series models (arima, ets)
- Perform "rolling window cross validation" on these models
- Record the average errors (mae, rmse) on different lengths of time (e.g. average mae for 1 time period ahead, average rmse for 1 time period ahead....average mae for 12 time period ahead, average rmse for 12 time period ahead).
- Plot the results
First I generated some random data:
library(forecast)
library(lubridate)
set.seed(123)
weeks <- rep(seq(as.Date("2010-01-01"), as.Date("2023-01-01"), by = "week"), each = 1)
counts <- rpois(length(weeks), lambda = 50)
df <- data.frame(Week = as.character(weeks), Count = counts)
# Convert Week column to Date format
df$Week <- as.Date(df$Week)
# Create a time series object
ts_data <- ts(df$Count, frequency = 52, start = c(year(min(df$Week)), 1))
Next, I set up different objects required for the loop:
# Set the length of data for fitting models
k <- 60
# Initialize matrices to store the MAE and RMSE values
# (I was not sure if they should be initialized to 0 or NA?)
#mae_arima <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)
#rmse_arima <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)
#mae_ets <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)
#rmse_ets <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)
mae_arima <- matrix(0, nrow = length(ts_data) - k, ncol = 12)
rmse_arima <- matrix(0, nrow = length(ts_data) - k, ncol = 12)
mae_ets <- matrix(0, nrow = length(ts_data) - k, ncol = 12)
rmse_ets <- matrix(0, nrow = length(ts_data) - k, ncol = 12)
Finally, I tried to write the cross validation loop:
for (i in 1:(length(ts_data) - k)) {
tryCatch({
# Define the training and testing sets
train_data <- window(
ts_data,
end = c(year(min(df$Week)) + floor((i+k-2)/52), (i+k-2)%%52+1)
)
test_data <- window(
ts_data,
start = c(year(min(df$Week)) + floor((i+k-1)/52), (i+k-1)%%52+1),
end = c(year(min(df$Week)) + floor((i+11+k-1)/52), (i+11+k-1)%%52+1)
)
# Fit and forecast using ARIMA model
fit_arima <- auto.arima(train_data, seasonal = TRUE, lambda = "auto")
fcast_arima <- forecast(fit_arima, h = 12)
# Calculate MAE and RMSE for ARIMA model's forecast
mae_arima[i, ] <- abs(fcast_arima[['mean']] - test_data)
rmse_arima[i, ] <- sqrt(mean((fcast_arima[['mean']] - test_data)^2))
# Fit and forecast using ETS model
fit_ets <- ets(train_data)
fcast_ets <- forecast(fit_ets, h = 12)
# Calculate MAE and RMSE for ETS model's forecast
mae_ets[i, ] <- abs(fcast_ets[['mean']] - test_data)
rmse_ets[i, ] <- sqrt(mean((fcast_ets[['mean']] - test_data)^2))
# Print model results, MAE and RMSE
cat("ARIMA model results for iteration", i, ":\n")
print(fit_arima)
print(fcast_arima)
cat(
"MAE (ARIMA):",
round(mean(mae_arima[i, ]), 2),
" RMSE (ARIMA):",
round(mean(rmse_arima[i, ]), 2),
"\n\n"
)
})
}
And here is the code for the plots:
# Create the plot
par(mfrow = c(2, 2))
# Plot for MAE (ARIMA)
plot(
1:12,
colMeans(mae_arima, na.rm = TRUE),
type = "b",
xlab = "Forecast Horizon (in months)",
ylab = "MAE",
main = "ARIMA Model",
col = "blue",
lty = 1
)
# Plot for MAE (ETS)
plot(
1:12,
colMeans(mae_ets, na.rm = TRUE),
type = "b",
xlab = "Forecast Horizon (in months)",
ylab = "MAE",
main = "ETS Model",
col = "green",
lty = 2
)
# Plot for RMSE (ARIMA)
plot(
1:12,
colMeans(rmse_arima, na.rm = TRUE),
type = "b",
xlab = "Forecast Horizon (in months)",
ylab = "RMSE",
main = "ARIMA Model",
col = "red",
lty = 3
)
# Plot for RMSE (ETS)
plot(
1:12,
colMeans(rmse_ets, na.rm = TRUE),
type = "b",
xlab = "Forecast Horizon (in months)",
ylab = "RMSE",
main = "ETS Model",
col = "orange",
lty = 4
)
The code seems to have run, but I am not sure if I am doing all of this correctly.
Can someone please tell me if I am doing this correctly?
Thanks!
References: