0

please find more code for my problem provided.

I have a problem with creating MSE errors for GLM, GAM and step_test functions.

data(Hitters)

#GLM Model

vars <- c("AtBat", "Hits", "HmRun", "Runs", "RBI", "Walks", "Years", "CAtBat", "CHits", "CHmRun", "CRuns", "CRBI", "CWalks", "PutOuts", "Assists", "Errors", "Salary")

model_glm <- glm(Salary ~ ., data = Hitters[, vars], family = gaussian())

#Gam Model:

library(mgcv)

gam_model <- gam(Salary ~ s(AtBat) + s(Hits) + s(HmRun) + s(Runs) + s(RBI) + s(Walks) + s(Years) + s(CAtBat) + s(CHits) + s(CHmRun) + s(CRuns) + s(CRBI) + s(CWalks) + s(PutOuts) + s(Assists) + s(Errors), data = Hitters, family = gaussian)

#Model step

model_step <- lm(Salary ~ 1, data = Hitters)
step(model_glm, direction = "forward", scope = formula(model_glm), trace = 0)

#MSE

library(caret)

Hitters[sapply(Hitters, is.numeric)] <- lapply(Hitters[sapply(Hitters, is.numeric)],
                                               function(x) {
                                                 x[is.na(x)] <- mean(x, na.rm = TRUE)
                                                 x})

set.seed(79511)

x <- Hitters[, c("AtBat", "Hits", "HmRun", "Runs", "RBI", "Walks", "Years", "CAtBat", "CHits", "CHmRun", "CRuns", "CRBI", "CWalks", "PutOuts", "Assists", "Errors")]
y <- Hitters$Salary

x <- scale(x)


train_test <- sample(1:2, nrow(x), TRUE, prob = 2:1)
train <- as.data.frame(cbind(Salary = y[train_test == 1], x[train_test == 1,]))
valid <- as.data.frame(cbind(Salary = y[train_test == 2], x[train_test == 2,]))

index <- createDataPartition(Hitters$Salary, p = 0.6, list = FALSE)
temp <- Hitters[-index, ]
index <- createDataPartition(temp$Salary, p = 0.5, list = FALSE)
test <- temp[-index, ]

mse_glm_test <- caret::RMSE(predict(model_glm, newdata = test), test$target_variable)
mse_gam_test <- caret::RMSE(predict(gam_model, newdata = test), test$target_variable)
mse_model_step_test <- caret::RMSE(predict(model_step, newdata = test), test$target_variable)

When I put the code into the R Studio it works but it gives me NaN value.

Whad do I do wrong?

Viktor
  • 13
  • 2
  • 1
    Where is `model_glm`, `gam_model`, and `model_step` created? It's easier to help you if you include a simple [reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) with sample input and desired output that can be used to test and verify possible solutions. – MrFlick Feb 27 '23 at 15:17
  • Are any of the values passed to `caret::RMSE` NaN? – Nick ODell Feb 27 '23 at 15:40
  • 1
    This is still not a [mcve], because we don't have the `Hitters` data set you're working with ... – Ben Bolker Feb 27 '23 at 17:46
  • @MrFlick I provided the reproducible example, I hope my problem is clear now :) Thanks for the tips! – Viktor Feb 28 '23 at 08:32
  • @BenBolker I edited the post and I think it is more clear now – Viktor Feb 28 '23 at 08:38

0 Answers0