1

I have the following problem:

  • I am trying to fit two time series models (arima, ets)
  • Perform "rolling window cross validation" on these models
  • Record the average errors (mae, rmse) on different lengths of time (e.g. average mae for 1 time period ahead, average rmse for 1 time period ahead....average mae for 12 time period ahead, average rmse for 12 time period ahead).
  • Plot the results

First I generated some random data:

library(forecast)
library(lubridate)

set.seed(123)
    
weeks <- rep(seq(as.Date("2010-01-01"), as.Date("2023-01-01"), by = "week"), each = 1)
counts <- rpois(length(weeks), lambda = 50)
df <- data.frame(Week = as.character(weeks), Count = counts)
    
# Convert Week column to Date format
df$Week <- as.Date(df$Week)
    
# Create a time series object
ts_data <- ts(df$Count, frequency = 52, start = c(year(min(df$Week)), 1))

Next, I set up different objects required for the loop:

# Set the length of data for fitting models
k <- 60

# Initialize matrices to store the MAE and RMSE values
# (I was not sure if they should be initialized to 0 or NA?)
#mae_arima <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)
#rmse_arima <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)
#mae_ets <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)
#rmse_ets <- matrix(NA, nrow = length(ts_data) - k, ncol = 12)
    
mae_arima <- matrix(0, nrow = length(ts_data) - k, ncol = 12)
rmse_arima <- matrix(0, nrow = length(ts_data) - k, ncol = 12)
mae_ets <- matrix(0, nrow = length(ts_data) - k, ncol = 12)
rmse_ets <- matrix(0, nrow = length(ts_data) - k, ncol = 12)

Finally, I tried to write the cross validation loop:

for (i in 1:(length(ts_data) - k)) {
    tryCatch({
        # Define the training and testing sets
        train_data <- window(
            ts_data,
            end = c(year(min(df$Week)) + floor((i+k-2)/52), (i+k-2)%%52+1)
        )
        test_data <- window(
            ts_data,
            start = c(year(min(df$Week)) + floor((i+k-1)/52), (i+k-1)%%52+1),
            end = c(year(min(df$Week)) + floor((i+11+k-1)/52), (i+11+k-1)%%52+1)
        )
            
        # Fit and forecast using ARIMA model
        fit_arima <- auto.arima(train_data, seasonal = TRUE, lambda = "auto")
        fcast_arima <- forecast(fit_arima, h = 12)
            
        # Calculate MAE and RMSE for ARIMA model's forecast
        mae_arima[i, ] <- abs(fcast_arima[['mean']] - test_data)
        rmse_arima[i, ] <- sqrt(mean((fcast_arima[['mean']] - test_data)^2))
            
        # Fit and forecast using ETS model
        fit_ets <- ets(train_data)
        fcast_ets <- forecast(fit_ets, h = 12)
            
        # Calculate MAE and RMSE for ETS model's forecast
        mae_ets[i, ] <- abs(fcast_ets[['mean']] - test_data)
        rmse_ets[i, ] <- sqrt(mean((fcast_ets[['mean']] - test_data)^2))
            
        # Print model results, MAE and RMSE
        cat("ARIMA model results for iteration", i, ":\n")
        print(fit_arima)
        print(fcast_arima)
        cat(
            "MAE (ARIMA):",
            round(mean(mae_arima[i, ]), 2),
            " RMSE (ARIMA):",
            round(mean(rmse_arima[i, ]), 2),
            "\n\n"
        )
    })
}

And here is the code for the plots:

# Create the plot
par(mfrow = c(2, 2))
    
# Plot for MAE (ARIMA)
plot(
    1:12,
    colMeans(mae_arima, na.rm = TRUE),
    type = "b",
    xlab = "Forecast Horizon (in months)",
    ylab = "MAE",
    main = "ARIMA Model",
    col = "blue",
    lty = 1
)
    
# Plot for MAE (ETS)
plot(
    1:12,
    colMeans(mae_ets, na.rm = TRUE),
    type = "b",
    xlab = "Forecast Horizon (in months)",
    ylab = "MAE",
    main = "ETS Model",
    col = "green",
    lty = 2
)
    
# Plot for RMSE (ARIMA)
plot(
    1:12,
    colMeans(rmse_arima, na.rm = TRUE),
    type = "b",
    xlab = "Forecast Horizon (in months)",
    ylab = "RMSE",
    main = "ARIMA Model",
    col = "red",
    lty = 3
)
    
# Plot for RMSE (ETS)
plot(
    1:12,
    colMeans(rmse_ets, na.rm = TRUE),
    type = "b",
    xlab = "Forecast Horizon (in months)",
    ylab = "RMSE",
    main = "ETS Model",
    col = "orange",
    lty = 4
)

The code seems to have run, but I am not sure if I am doing all of this correctly.

Can someone please tell me if I am doing this correctly?

Thanks!

References:

stats_noob
  • 5,401
  • 4
  • 27
  • 83

0 Answers0