I fitted a logistic regression model in 10-fold cv. I can use the pROC package to get the AUC but it seems the AUC is not for the 10-fold CV because the cvAUC library gave a different AUC. I suspect the AUC from pROC is for one fold. Please how can extract the joint AUC for the 10-fold using the pROC library?
data(iris)
data <- iris[which(iris$Species=="setosa" | iris$Species=="versicolor"),]
data$ID <- seq.int(nrow(data))
table(data$Species)
data$Species <-as.factor(data$Species)
confusion_matrices <- list()
accuracy <- c()
for (i in c(1:10)) {
set.seed(3456)
folds <- caret::createFolds(data$Species, k = 10)
test <- data[data$ID %in% folds[[i]], ]
train <- data[data$ID %in% unlist(folds[-i]), ]
model1 <- glm(as.factor(Species)~ ., family = binomial, data = train)
summary(model1)
pred <- predict(model1, newdata = test, type = "response")
predR <- as.factor( pred >= 0.5)
df <- data.frame(cbind(test$Species, predR))
df_list <- lapply(df, as.factor)
confusion_matrices[[i]] <- caret::confusionMatrix(df_list[[2]], df_list[[1]])
accuracy[[i]] <- confusion_matrices[[i]]$overall["Accuracy"]
}
library(pander)
library(dplyr)
names(accuracy) <- c("Fold 1",....,"Fold 10")
accuracy %>%
pander::pandoc.table()
mean(accuracy)