Problem seems to be in question 5?
Quitting from lines 65-75 (DAE3.Rmd) Error in model.frame.default(Terms, newdata, na.action = na.action, xlev = object$xlevels) : factor DISTRICT has new levels 3 Calls: ... predict -> predict.lm -> model.frame -> model.frame.default Execution halted
#1. Read in the data and determine which variables are quantitative and which are factors. If necessary, change the variables STATUS, DISTRICT, and SUBCONT to factors. Use the random seed 101, and a 70/30 training validation split. Split the data into a training set and a validation set. Print the head of the training set and the validation set.
df <- read.csv("FLAG2.csv")
df$STATUS <- factor(df$STATUS)
df$DISTRICT <- factor(df$DISTRICT)
df$SUBCONT <- factor(df$SUBCONT)
set.seed(101)
index <- sample(1:nrow(df), size = nrow(df)*0.7, replace = FALSE)
train <- df[index, ]
valid <- df[-index, ]
head(train)
head(valid)
#2. Use Stepwise in both directions to find a model for predicting the price (LOWBID) of a contract. Use all variables as predictors except DOTEST and SUBCONT. Consider interaction terms if they are appropriate. State the final model.
library(MASS)
nullmod <- formula(LOWBID ~ 1)
null <- lm(nullmod, data = train)
fullmod <- formula(LOWBID~(LBERATIO+NUMBIDS+DAYSEST+RDLNGTH+PCTASPH+PCTBASE+PCTEXCAV+PCTMOBIL+PCTSTRUC+PCTTRAFF)^2+STATUS+DISTRICT)
full <- lm(fullmod, data = train)
stepwise_mod <- stepAIC(null, scope=list(lower=null, upper=full), direction="both", trace=0, k=2)
final_model_stepwise <- summary(stepwise_mod)
final_model_stepwise
#3. Use Backward Selection to find a model for predicting the price (LOWBID) of a contract. Use all variables as predictors except DOTEST and SUBCONT. Consider interaction terms if they are appropriate. State the final model.
backward_mod <- stepAIC(full, scope=list(lower=null, upper=full), direction="backward", trace=0, k=2)
final_model_backward <- summary(backward_mod)
final_model_backward
#4. Compare the in-sample adequacy of the models using the r2, r2 adj, residual plots and tests of partial significance for the regression coefficients for each model you have selected.
# R-squared and adjusted R-squared values
cat("R-squared value for stepwise model:", round(summary(stepwise_mod)$r.squared, 4), "\n")
cat("Adjusted R-squared value for stepwise model:", round(summary(stepwise_mod)$adj.r.squared, 4), "\n")
cat("R-squared value for backward model:", round(summary(backward_mod)$r.squared, 4), "\n")
cat("Adjusted R-squared value for backward model:", round(summary(backward_mod)$adj.r.squared, 4), "\n")
# Residual plots
plot(stepwise_mod, which = c(1,2))
plot(backward_mod, which = c(1,2))
# Tests of partial significance for the regression coefficients
summary(stepwise_mod)$coefficients
summary(backward_mod)$coefficients
#5. Compare the out-of-sample validity of the models using the r2 validation and ASE validation measures for each model you have selected.
# R-squared validation values
valid_pred_stepwise <- predict(stepwise_mod, newdata = valid)
valid_pred_backward <- predict(backward_mod, newdata = valid)
cat("R-squared validation value for stepwise model:", round(cor(valid$LOWBID, valid_pred_stepwise)^2, 4), "\n")
cat("R-squared validation value for backward model:", round(cor(valid$LOWBID, valid_pred_backward)^2, 4), "\n")
# ASE validation measures
ASE_stepwise <- mean((valid$LOWBID - valid_pred_stepwise)^2)
ASE_backward <- mean((valid$LOWBID - valid_pred_backward)^2)
cat("ASE validation value for stepwise model:", round(ASE_stepwise, 4), "\n")
cat("ASE validation value for backward model:", round(ASE_backward, 4), "\n")
#6. Recommend a final model.