1

I need help solving this error, I am not sure how to, but it seems as when I run the "dp_stat" to get the statistics, an error appears when I do my OLS model. Essentially I try to find the cumulative difference between a benchmark and a given individual predictive model. Here dp is my independent variable and sg is my dependent variable. datanu is my excel data. I'm not sure how to attach the data here, however here is a link to the excel and the code: https://drive.google.com/drive/folders/12BOuNBODURIP7CQIBZWMmHFc1d7zXHxN?usp=sharing If anyone has a fix it would mean the world!

"#Error in lag():! n must be a positive integer, not a double vector of length 1."

rm(list= ls())  # Clear global environment
invisible(gc()) # Free up unused R-occupied memory
cat("\014")     # Clear console output: equivalent to ctrl + L 

library("tseries")
library("readxl")
library("Metrics")
library("lubridate")
library("ggplot2")
library("data.table")
library("dyn")
library("reshape2")

#header TRUE fordi første row er navne.
datanu <- read_xlsx("~/Documents/6.semester/Bachelor/Data/datanu.xlsx",
                    na = "NaN",
                    sheet = "datax", 
)


myts <- ts(datanu, start=c(1872, 1), end=c(2020, 12), frequency=12)
plot(myts[, c("dp", "dy", "ep", "de")])


get_statistics <- function(myts, dp, sg, h=1, start=1872, end=2020, est_periods_OOS = 20) {
  #### IS ANALYSIS
  
  #1. Historical mean model for en portefølje
  avg   <- mean(window(myts, start, end)[, sg], na.rm=TRUE)
  IS_error_N <- (window(myts, start, end)[, sg] - avg)             
  
  
  #2. OLS model - installér dyn her
  #reg <- dyn$lm(sg ~ lag(as.numeric(dp), 1), data=window(myts, start, end)) 
  reg <- dyn$lm(eval(parse(text=sg)) ~ lag(eval(parse(text=dp)), -1), data=window(myts, start, end))    #Error in `lag()`:! `n` must be a positive integer, not a double vector of length 1.
  IS_error_A <- reg$residuals
  
  
  #OOS ANALYSIS
  OOS_error_N <- numeric(end - start - est_periods_OOS)  
  OOS_error_A <- numeric(end - start - est_periods_OOS)  
  
  #anvender kun information op til forecasten er lavet. 
  j <- 0
  for (i in (start + est_periods_OOS):(end-1)) {
    j <- j + 1
    #Get the actual ERP that you want to predict
    actual_ERP <- as.numeric(window(myts, i+1, i+1)[, sg])
    
    #1. Historical mean model
    OOS_error_N[j] <- actual_ERP - mean(window(myts, start, i)[, sg], na.rm=TRUE)
    
    #2. OLS model
    reg_OOS <- dyn$lm(eval(parse(text=sg)) ~ lag(eval(parse(text=dp)), -1), 
                      data=window(myts, start, i))
    #Compute_error
    df <- data.frame(x=as.numeric(window(myts, i, i)[, dp]))
    names(df) <- dp
    pred_ERP   <- predict.lm(reg_OOS, newdata=df)    
    OOS_error_A[j] <-  pred_ERP - actual_ERP
    
  }
  
  #Compute statistics 
  MSE_N <- mean(OOS_error_N^2)
  MSE_A <- mean(OOS_error_A^2)
  T <- length(!is.na(myts[, sg]))
  OOS_R2  <- 1 - MSE_A/MSE_N
  #Is the -1 enough (maybe -2 needed because of lag)?
  OOS_oR2 <- OOS_R2 - (1-OOS_R2)*(reg$df.residual)/(T - 1)       
  dRMSE <- sqrt(MSE_N) - sqrt(MSE_A)
  ##
  
  #### CREATE PLOT
  IS  <- cumsum(IS_error_N[2:length(IS_error_N)]^2)-cumsum(IS_error_A^2)
  OOS <- cumsum(OOS_error_N^2)-cumsum(OOS_error_A^2)
  df  <- data.frame(x=seq.int(from=start + 1 + est_periods_OOS, to=end), 
                    IS=IS[(1 + est_periods_OOS):length(IS)], 
                    OOS=OOS) #Because you lose one observation due to the lag
  #Shift IS errors vertically, so that the IS line begins 
  # at zero on the date of first OOS prediction. (se Goyal/Welch (2008, side 1465))
  df$IS <- df$IS - df$IS[1] 
  df  <- melt(df, id.var="x") 
  plotGG <- ggplot(df) + 
    geom_line(aes(x=x, y=value,color=variable)) + 
    geom_rect(data=data.frame(),#Needed by ggplot2, otherwise not transparent
              aes(xmin=2008, xmax=2010,ymin=-0.2,ymax=0.2), 
              fill='red',
              alpha=0.1) + 
    scale_y_continuous('Cumulative SSE Difference', limits=c(-0.2, 0.2)) + 
    scale_x_continuous('Year')
  ##
  
  return(list(IS_error_N = IS_error_N,
              IS_error_A = reg$residuals,
              OOS_error_N = OOS_error_N,
              OOS_error_A = OOS_error_A,
              IS_R2 = summary(reg)$r.squared, 
              IS_aR2 = summary(reg)$adj.r.squared, 
              OOS_R2  = OOS_R2,
              OOS_oR2 = OOS_oR2,
              dRMSE = dRMSE,
              plotGG = plotGG))
  
}

dp_stat <- get_statistics(myts, "dp", "sg", start=1872)    
dp_stat$plotGG
  • Where did you make the change to `ts_annual`? I don't see any edits to your question. – r2evans Mar 17 '22 at 23:50
  • 1
    Where is `dyn` defined? Having the function reach out to use something in another environment is a bad idea for many reasons. What is `datanu`? Which version of `melt` are you using, `data.table::` or `reshape2::`? Please make this question *reproducible*; for good advice on how to do that, please read https://stackoverflow.com/q/5963269, [mcve], and https://stackoverflow.com/tags/r/info and then come back and [edit] your question to add the sample data. Thanks! – r2evans Mar 17 '22 at 23:54
  • dyn is an installation from library. datanu is my excel data. I will update the question asap. – Tristan Layu Mar 18 '22 at 09:47
  • reshape2 for melt. Nevermind r2evans, I just had to figure how to edit my post :) – Tristan Layu Mar 18 '22 at 09:59

0 Answers0