Here is the code:
# Load required libraries
library(caret)
library(glmnet)
library(pROC)
# Convert response variable to factor with consistent levels in training and testing data
response_levels <- union(levels(TrainingSet$target), levels(TestingSet$target))
TrainingSet$target <- factor(TrainingSet$target, levels = response_levels)
TestingSet$target <- factor(TestingSet$target, levels = response_levels)
# Create a list of formulas
formulas <- list(
formula1 = as.formula(paste("target~. ")),
formula2 = as.formula(paste("target ~ sex + chest.pain.type + fasting.blood.sugar + max.heart.rate +
exercise.angina + oldpeak + ST.slope")),
formula3 = as.formula(paste("target~ cholesterol + sex + resting.bp.s +
age + fasting.blood.sugar")),
formula4 = as.formula(paste("target~ cholesterol + sex + age + fasting.blood.sugar")),
formula5 = as.formula(paste("target~ max.heart.rate + resting.ecg + oldpeak + ST.slope+
chest.pain.type + exercise.angina")),
formula6 = as.formula(paste("target~ max.heart.rate + oldpeak + ST.slope+
chest.pain.type + exercise.angina"))
)
# Create a list of models
model_list <- list(
logistic = list(method = "glm", family = "binomial"),
glmnet = list(method = "glmnet", family = "binomial")
)
# Create an empty list to store the model results
results <- list()
confusion_matrices <- list()
# Loop through the models
for (model in model_list) {
for (formula in formulas) {
# Train the model with the current formula
if (model$method == "glmnet") {
# For glmnet, specify the alpha values and lambda grid
model_fit <- train(
as.formula(formula),
data = TrainingSet,
method = model$method,
trControl = trainControl(method = "cv", number = 10),
preProcess = c("center", "scale"),
tuneGrid = expand.grid(alpha = 0:1, lambda = c(0.001, 0.01, 0.1, 1)),
family = model$family
)
} else {
# For other models, use default tuning parameter grid
model_fit <- train(
as.formula(formula),
data = TrainingSet,
method = model$method,
trControl = trainControl(method = "cv", number = 10),
preProcess = c("center", "scale"),
family = model$family
)
}
# Make predictions on the testing set
predicted <- predict(model_fit, newdata = TestingSet)
predicted <- as.numeric(predicted)
# Evaluate model performance
cm <- caret::confusionMatrix(predicted, as.factor(TestingSet$target))
auc <- pROC::auc(roc(response = TestingSet$target, predictor = as.numeric(predicted) ))
rmse <- sqrt(mean((predicted - as.numeric(TestingSet$target))^2))
r2 <- cor(predicted, as.numeric(TestingSet$target))^2
# Store results in the results list
results[[paste(model$method, "_", names(formula), sep = "")]] <- list(
Confusion_Matrix = cm,
Accuracy = cm$overall["Accuracy"],
Error_Rate = cm$byClass["Error Rate"],
Sensitivity = cm$byClass["Sensitivity"],
AUC = auc,
RMSE = rmse,
R2 = r2
)
# Store confusion matrix in the confusion_matrices list
confusion_matrices[[paste(model$method, "_", names(formula), sep = "")]] <- cm$table
}
}
# Convert results list to data frame
results_df <- do.call(rbind, lapply(results, data.frame, stringsAsFactors = FALSE))
# Print the results
print(results_df)
# Access confusion matrices
print(confusion_matrices)
I've tried everything but I don't understand how to solve this error. I've perform as.numeric to predicted, but the error won't go, I've used this levels = response_levels still does not work. Please guide me how can I solve this. I did this so by manually specifying the factor levels for both the training and testing data using factor() function with levels parameter, you can ensure that they have consistent factor levels, which should resolve the error related to factor level mismatch. And then updated code, the levels() function is used to retrieve the factor levels from both the training and testing data, and the union() function is used to combine the levels into a consistent set of factor levels. Then, the factor() function is used to convert the response variable to a factor with the consistent factor levels in both the training and testing data. This should resolve the error related to factor level mismatch.Still the same error.