2

I have a huge dataframe with the following basic structure:

data <- data.frame(species = factor(c(rep("species1", 4), rep("species2", 4), rep("species3", 4))),
                 trap = c(rep(c("A","B","C","D"), 3)),
                 count=c(6,3,7,9,5,3,6,6,5,8,1,3))
data

I want simultaneously chi-square tests for the species counting data between the four traps for each individually species, but not between them. It could be solved with the following code for each individually species, but because of my huge original dataframe it is not a suitable solution for me.

chi_species1 <- xtabs(count~trap, data, 
                       subset = species=="species1")
chi_species1
chisq.test(chi_species1)

Thanks for your help!!

2 Answers2

3

base

df <- data.frame(species = factor(c(rep("species1", 4), rep("species2", 4), rep("species3", 4))),
                   trap = c(rep(c("A","B","C","D"), 3)),
                   count=c(6,3,7,9,5,3,6,6,5,8,1,3))
df
#>     species trap count
#> 1  species1    A     6
#> 2  species1    B     3
#> 3  species1    C     7
#> 4  species1    D     9
#> 5  species2    A     5
#> 6  species2    B     3
#> 7  species2    C     6
#> 8  species2    D     6
#> 9  species3    A     5
#> 10 species3    B     8
#> 11 species3    C     1
#> 12 species3    D     3

species <- unique(df$species)

chi_species <- lapply(species, function(x) xtabs(count~trap, df, 
                      subset = species== x))

chi_species <- setNames(chi_species, species)

lapply(chi_species, chisq.test)

#> $species1
#> 
#>  Chi-squared test for given probabilities
#> 
#> data:  X[[i]]
#> X-squared = 3, df = 3, p-value = 0.3916
#> 
#> 
#> $species2
#> 
#>  Chi-squared test for given probabilities
#> 
#> data:  X[[i]]
#> X-squared = 1.2, df = 3, p-value = 0.753
#> 
#> 
#> $species3
#> 
#>  Chi-squared test for given probabilities
#> 
#> data:  X[[i]]
#> X-squared = 6.2941, df = 3, p-value = 0.09815

Created on 2022-04-25 by the reprex package (v2.0.1)

tidyverse

df %>% 
  group_by(species, trap) %>% 
  summarise(count = sum(count)) %>% 
  summarise(pvalue= chisq.test(count)$p.value) 

# A tibble: 3 × 2
  species  pvalue
  <fct>     <dbl>
1 species1 0.392 
2 species2 0.753 
3 species3 0.0981
Yuriy Saraykin
  • 8,390
  • 1
  • 7
  • 14
0

You want something like this:

library(dplyr)
data %>% 
  group_by(species) %>% 
  summarise(pvalue= chisq.test(count, trap)$p.value) 

Output:

# A tibble: 3 × 2
  species  pvalue
  <fct>     <dbl>
1 species1  0.213
2 species2  0.238
3 species3  0.213
Quinten
  • 35,235
  • 5
  • 20
  • 53