I am running analysis on a large group of independent and dependent variables, so I have created a for loop to speed up the process of cycling through these. However, for some reason, when I try to run the shapiro.test(x) function within the for loop, I get the following error:
"Error in shapiro.test(as.numeric(dependent_vars_ttest[[y]])) : sample size must be between 3 and 5000"
But, when I run each variable on their own, they work just fine. Is this an issue with how I've constructed the loop? Here is my complete code:
setwd("~/Desktop/Stats")
data <- read.csv("Stats.csv", header = TRUE)
view(data)
independent_vars <- c(data$Smokes,data$Anx,data$Depression)
dependent_vars_ttest <- c(data$ICU,data$Duration)
dependent_vars_chi2 <- c(data$Complication,data$X90)
wb <- createWorkbook()
for (x in 1:3){
for(y in 1:2){
for(z in 1:2){
if (shapiro.test(as.numeric(dependent_vars_ttest[[y]]))$p.value >= 0.05 &&
var.test(as.numeric(dependent_vars_ttest[[y]]) ~ independent_vars[[x]], data =
data)$p.value >= 0.05) {
t_test_result <- t.test(as.numeric(dependent_vars_ttest[[y]]) ~
independent_vars[[x]], data = data)
} else if (shapiro.test(as.numeric(dependent_vars_ttest[[y]]))$p.value >= 0.05) {
t_test_result <- wilcox.test(as.numeric(dependent_vars_ttest[[y]]) ~
independent_vars[[x]], data = data)
} else {
t_test_result <- t.test(as.numeric(dependent_vars_ttest[[y]]) ~
independent_vars[[x]], data = data, var.equal = FALSE)
}
t_test_table <- data.frame(
Variable = independent_vars[[x]],
Dependent_Variable = dependent_vars_ttest[[y]],
Mean_Group_1 = t_test_result$estimate[1],
Mean_Group_2 = t_test_result$estimate[2],
SD_Group_1 = t_test_result$sd[1],
SD_Group_2 = t_test_result$sd[2],
p_value = t_test_result$p.value,
CI_Lower = t_test_result$conf.int[1],
CI_Upper = t_test_result$conf.int[2]
)
addWorksheet(wb, sheetName = paste0(independent_vars[[x]], "_t_test"))
writeData(wb, sheet = paste0(independent_vars[[x]], "_t_test"), x = t_test_table)
contingency_table <- table(dependent_vars_chi2[[z]], independent_vars[[x]])
chi2_test_result <- chisq.test(contingency_table)
chi2_test_table <- data.frame(
Variable = independent_vars[[x]],
Dependent_Variable = dependent_vars_chi2[[z]],
Group_Count = chi2_test_result$observed,
Group_Count = chi2_test_result$observed,
Odds_Ratio = chi2_test_result$estimate,
Odds_Ratio_CI_Lower = chi2_test_result$conf.int[1],
Odds_Ratio_CI_Upper = chi2_test_result$conf.int[2],
p_value = chi2_test_result$p.value
)
addWorksheet(wb, sheetName = paste0(independent_vars[[x]], "_chi2_test"))
writeData(wb, sheet = paste0(independent_vars[[x]], "_chi2_test"), x =
chi2_test_table)
}
}
}
saveWorkbook(wb, "output.xlsx")