0

I am running analysis on a large group of independent and dependent variables, so I have created a for loop to speed up the process of cycling through these. However, for some reason, when I try to run the shapiro.test(x) function within the for loop, I get the following error:

"Error in shapiro.test(as.numeric(dependent_vars_ttest[[y]])) : sample size must be between 3 and 5000"

But, when I run each variable on their own, they work just fine. Is this an issue with how I've constructed the loop? Here is my complete code:

setwd("~/Desktop/Stats")

data <- read.csv("Stats.csv", header = TRUE)
view(data)

independent_vars <- c(data$Smokes,data$Anx,data$Depression)  
dependent_vars_ttest <- c(data$ICU,data$Duration) 
dependent_vars_chi2 <- c(data$Complication,data$X90)  

wb <- createWorkbook()

for (x in 1:3){
  for(y in 1:2){
    for(z in 1:2){

  if (shapiro.test(as.numeric(dependent_vars_ttest[[y]]))$p.value >= 0.05 && 
var.test(as.numeric(dependent_vars_ttest[[y]]) ~ independent_vars[[x]], data = 
data)$p.value >= 0.05) {

    t_test_result <- t.test(as.numeric(dependent_vars_ttest[[y]]) ~ 
independent_vars[[x]], data = data)
  } else if (shapiro.test(as.numeric(dependent_vars_ttest[[y]]))$p.value >= 0.05) {

    t_test_result <- wilcox.test(as.numeric(dependent_vars_ttest[[y]]) ~ 
independent_vars[[x]], data = data)
  } else {

    t_test_result <- t.test(as.numeric(dependent_vars_ttest[[y]]) ~ 
independent_vars[[x]], data = data, var.equal = FALSE)
  }


  t_test_table <- data.frame(
    Variable = independent_vars[[x]],
    Dependent_Variable = dependent_vars_ttest[[y]],
    Mean_Group_1 = t_test_result$estimate[1],
    Mean_Group_2 = t_test_result$estimate[2],
    SD_Group_1 = t_test_result$sd[1],
    SD_Group_2 = t_test_result$sd[2],
    p_value = t_test_result$p.value,
    CI_Lower = t_test_result$conf.int[1],
    CI_Upper = t_test_result$conf.int[2]
  )


  addWorksheet(wb, sheetName = paste0(independent_vars[[x]], "_t_test"))
  writeData(wb, sheet = paste0(independent_vars[[x]], "_t_test"), x = t_test_table)


  contingency_table <- table(dependent_vars_chi2[[z]], independent_vars[[x]])


  chi2_test_result <- chisq.test(contingency_table)


  chi2_test_table <- data.frame(
    Variable = independent_vars[[x]],
    Dependent_Variable = dependent_vars_chi2[[z]],
    Group_Count = chi2_test_result$observed,
    Group_Count = chi2_test_result$observed,
    Odds_Ratio = chi2_test_result$estimate,
    Odds_Ratio_CI_Lower = chi2_test_result$conf.int[1],
    Odds_Ratio_CI_Upper = chi2_test_result$conf.int[2],
    p_value = chi2_test_result$p.value
    )


addWorksheet(wb, sheetName = paste0(independent_vars[[x]], "_chi2_test"))
writeData(wb, sheet = paste0(independent_vars[[x]], "_chi2_test"), x = 
chi2_test_table)
    }
  }
}

saveWorkbook(wb, "output.xlsx")
MrFlick
  • 195,160
  • 17
  • 277
  • 295
emerson
  • 45
  • 3
  • It's easier to help you if you include a simple [reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) with sample input and desired output that can be used to test and verify possible solutions. What does the non-loop working code look line? Maybe you should be using `list()` rather than `c()` when defining `independent_vars`, `dependent_vars_ttest` and `dependent_vars_chi2` – MrFlick May 15 '23 at 20:13
  • Changing to list() fixed the issue. I didn't expect that to be the problem. Thank you. – emerson May 15 '23 at 22:39

0 Answers0