2

I am trying to conduct group-wise t-test , but the code i am using returnign an error. It has worked alright for me previously and on other data frame but for this data frame its giving this error

Error in t.test.default(x = 0.0268, y = 0.0223, paired = FALSE, var.equal = FALSE, : not enough 'x' observations

My Code is

stat.test.BACI5 <- Flaov %>%
  group_by(`Treatment`) %>%
  t_test(`Observed` ~  Control,  detailed = TRUE) %>%
  adjust_pvalue(method = "bonferroni") %>%
  add_significance()  

Here is the data structure

structure(list(Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("Phase1", "Phase2"), class = "factor"), Group = structure(c(3L, 
4L, 2L, 3L, 2L, 4L, 1L, 2L, 4L, 3L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 
1L, 2L, 1L, 1L, 1L, 4L, 2L, 3L, 2L, 4L, 3L, 1L, 2L, 4L, 1L, 3L, 
1L, 1L, 1L, 2L, 1L, 3L, 2L, 1L, 2L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 
4L, 2L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 3L, 4L, 2L, 1L, 1L, 2L, 4L, 
2L, 3L, 1L, 1L, 2L), .Label = c("Group A ", "Group B", "Group C ", 
"Group D"), class = "factor"), Observed = c(0.1057, 0.151, 0.0576, 
0.1267, 0.0941, 0.1554, 0.0247, 0.0832, 0.2807, 0.1137, 0.0325, 
0.0777, 0.0362, 0.0637, 0.0303, 0.0223, 0.0932, 0.0363, 0.0641, 
0.0453, 0.0359, 0.0334, 0.2006, 0.0538, 0.1114, 0.0661, 0.2452, 
0.1043, 0.0489, 0.0663, 0.1967, 0.0321, 0.1042, 0.0268, 0.0313, 
0.0255, 0.0787, 0.038, 0.1212, 0.0839, 0.0446, 0.0986, 0.1364, 
0.0335, 0.0409, 0.0407, 0.0871, 0.0584, 0.0875, 0.1961, 0.0711, 
0.0191, 0.0363, 0.0474, 0.1608, 0.0349, 0.1099, 0.0399, 0.1095, 
0.2011, 0.057, 0.0418, 0.0394, 0.054, 0.2033, 0.0631, 0.1089, 
0.0441, 0.0261, 0.0686), Control = c(0.1061, 0.154, 0.0585, 0.1289, 
0.1076, 0.15856, 0.02997, 0.1022, 0.2849, 0.1193, 0.03292, 0.0888, 
0.04628, 0.06454, 0.03341, 0.0239, 0.1013, 0.0364, 0.0883, 0.06363, 
0.0566, 0.04036, 0.20641, 0.06206, 0.1158, 0.0687, 0.2457, 0.12643, 
0.05126, 0.05705, 0.1987, 0.04719, 0.08199, 0.02312, 0.0317, 
0.07045, 0.06395, 0.06043, 0.1251, 0.0912, 0.04575, 0.1018, 0.1379, 
0.03834, 0.048, 0.04131, 0.0926, 0.06242, 0.0965, 0.1972, 0.0742, 
0.0211, 0.04318, 0.05741, 0.1616, 0.06552, 0.1104, 0.04814, 0.11015, 
0.2081, 0.06341, 0.04329, 0.04486, 0.06179, 0.2114, 0.05545, 
0.1127, 0.04327, 0.03355, 0.07189), factors = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L), .Label = c("Phase1", "Phase2"), class = "factor")), row.names = c(NA, 
70L), class = "data.frame")
StupidWolf
  • 45,075
  • 17
  • 40
  • 72
Shah
  • 31
  • 5
  • 1
    Please add some data here you can use `dput(dframe)` – PKumar Feb 17 '21 at 07:09
  • Treatment Group Observed Control factors Phase1:46 Group A :28 Min. :0.01910 Min. :0.02110 Phase1:46 Phase2:24 Group B :21 1st Qu.:0.03835 1st Qu.:0.04651 Phase2:24 Group C :11 Median :0.06390 Median :0.06503 Group D :10 Mean :0.08227 Mean :0.08796 3rd Qu.:0.10810 3rd Qu.:0.11034 Max. :0.28070 Max. :0.28490 – Shah Feb 17 '21 at 07:21
  • Rather edit the question with the results of the `dput()` function that will show the specific structure of the data.frame that returns the error as suggested by @PKumar. Without data, I expect that the data.frame was subset in a way that reduced the number of levels. Try `droplevels()`, when subsetting the data. – nya Feb 17 '21 at 07:33

1 Answers1

0

If you are doing a t test between observed and control in the different treatment groups, the formula is wrong, the left hand side of the formula should be the response variable and right hand side should be grouping variable.

In your case, you need to pivot the data long to get something like this:

library(tidyr)
Flaov[,c("Treatment","Observed","Control")] %>% 
pivot_longer(-c(Treatment)) %>% group_by(Treatment)
# A tibble: 140 x 3
# Groups:   Treatment [2]
   Treatment name      value
   <fct>     <chr>     <dbl>
 1 Phase1    Observed 0.106 
 2 Phase1    Control  0.106 
 3 Phase1    Observed 0.151 
 4 Phase1    Control  0.154 
 5 Phase1    Observed 0.0576
 6 Phase1    Control  0.0585
 7 Phase1    Observed 0.127 
 8 Phase1    Control  0.129 
 9 Phase1    Observed 0.0941
10 Phase1    Control  0.108 
# … with 130 more rows

Then we further pipe it to test:

Flaov[,c("Treatment","Observed","Control")] %>% 
pivot_longer(-c(Treatment)) %>% 
group_by(Treatment) %>% 
t_test(value ~ name)

# A tibble: 2 x 9
  Treatment .y.   group1  group2      n1    n2 statistic    df     p
* <fct>     <chr> <chr>   <chr>    <int> <int>     <dbl> <dbl> <dbl>
1 Phase1    value Control Observed    46    46     0.482  90.0 0.631
2 Phase2    value Control Observed    24    24     0.323  46.0 0.748
StupidWolf
  • 45,075
  • 17
  • 40
  • 72
  • It works , But still dont understand that why the previous did not worked, i have another data frame in eactly the same format as the previous one , and it worked – Shah Feb 17 '21 at 08:32
  • thats the way you use formulas in R. no idea why it worked in your magical previous example. check if results are correct – StupidWolf Feb 17 '21 at 08:39