3

example of dataframe:

>

 df
    ID B C D
  1  A  1  1  3
  2  B  2  3  1
  3  C  1  1  1
  4  D  3  1  1
  5  E  1  0  0

I've looped anova on various variables on a dataframe with this code (found online)

data:df

library(car)
LLA <- rep(NA, ncol (df))
sink("dfresults.doc")
for (i in 4:ncol(df)) {
  column <- names(df[i])
  contrasts(df$Group)<-contr.helmert(2) 
  contrasts(df$Gender)<-contr.helmert(2) 
  model= aov(df[,i] ~ Group + Gender, data= df)
  SBCna=Anova(model, type="III")
  tk=TukeyHSD(aov(df[,i] ~ Group + Gender, data= df))
  print(column)
  print(LLA)
  print(tk)
}
sink()

(both Group and Gender are factorial) This produced a .doc file with the output of the analyses (very useful), sample of output:

    [1] "variable"
Anova Table (Type III tests)

Response: df[, i]
              Sum Sq  Df   F value    Pr(>F)    
(Intercept) 14313489   1 6922.5653 < 2.2e-16 ***
Group            280   1    0.1354    0.7133    
Gender         40487   1   19.5809 1.635e-05 ***
Residuals     386652 187                        
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = df[, i] ~ Group + Gender, data =df)

$Group
                  diff       lwr      upr    p adj
Group1-Group2 0.09016515 -14.99211 15.17244 0.990603

$Gender
                diff      lwr      upr    p adj
Male-Female 32.62016 18.02386 47.21646 1.75e-05

Now what I'd like to do is to create data.frame (that I can later save in .csv file) with the Sum f Square, f Value, P-values (the Pr(>F)) from the anova tables produced in the output.

StupidWolf
  • 45,075
  • 17
  • 40
  • 72
  • `anova_table <- as.data.frame(summary(model)[[1]])` here you are the df with results containing what do you need. I was unable to run this code there were some errors about group and gender – Adamm Mar 16 '20 at 09:34
  • There are no variables `Group` and `Gender` in your df. – stefan Mar 16 '20 at 15:50

1 Answers1

2

Let's say your data frame is like this:

df = data.frame(id=1:100,
Group=sample(letters[1:2],100,replace=TRUE),
Gender=sample(c("M","F"),100,replace=TRUE),
matrix(rnorm(7*100),ncol=7))
colnames(df)[4:10] = paste0("Var",1:7)
contrasts(df$Group)<-contr.helmert(2) 
contrasts(df$Gender)<-contr.helmert(2) 

Then a quick way is to store the AOV results in a list, and you only fit your model once with aov, and use it again with Anova.

Variables_to_regress = colnames(df)[4:ncol(df)]
anova_results = vector("list",length(Variables_to_regress))
names(anova_results) = Variables_to_regress

for (i in Variables_to_regress) {
  this_formula = as.formula(paste(i,"~ Group + Gender"))
  model = aov(formula=this_formula, data=df)
  anova_results[[i]] = Anova(model, data=df,type="III")
}

data.frame(anova_results[["Var1"]])
                  Sum.Sq Df     F.value    Pr..F.
(Intercept) 1.326132e-03  1 0.001214224 0.9722744
Group       2.818789e-01  1 0.258091920 0.6125874
Gender      7.351722e-01  1 0.673133183 0.4139730
Residuals   1.059400e+02 97          NA        NA

write.csv(data.frame(anova_results[["Var1"]]),....)

If you are interested, another way is to use purrr and broom to collect all the regression results:

library(purrr)
library(broom)
library(tidyr)

res = pivot_longer(df[,-1],-c(Group,Gender)) %>%
nest(data=c(Group, Gender, value)) %>% 
mutate(
fit=map(data,~aov(value ~ Group+Gender,data=.x)),
typeIII = map(fit,Anova,type="III"),
tidied = map(typeIII,tidy)
)

The code above basically convert your data to a long format, lumps everything below to one variable together, performs the aov, Anova and cleans up the table using tidy from broom. The above is useful in a way you can easily expand this to perform more test, or look at other statistics in the anova result.

To look at the results only, do:

res %>% unnest(tidied) %>% select(name,term,sumsq,df,statistic,p.value)
# A tibble: 28 x 6
   name  term            sumsq    df statistic p.value
   <chr> <chr>           <dbl> <dbl>     <dbl>   <dbl>
 1 Var1  (Intercept)   0.00133     1   0.00121   0.972
 2 Var1  Group         0.282       1   0.258     0.613
 3 Var1  Gender        0.735       1   0.673     0.414
 4 Var1  Residuals   106.         97  NA        NA    
 5 Var2  (Intercept)   1.32        1   1.04      0.311
 6 Var2  Group         0.102       1   0.0798    0.778
 7 Var2  Gender        1.63        1   1.28      0.261
 8 Var2  Residuals   124.         97  NA        NA    
 9 Var3  (Intercept)   0.0625      1   0.0649    0.799
10 Var3  Group         0.247       1   0.256     0.614
StupidWolf
  • 45,075
  • 17
  • 40
  • 72
  • for `lm` models AIC and BIC values etc are given is it possible to get the same for model that is run with anova? – PesKchan Apr 28 '22 at 19:56