0

I have these data in a data.frame named df.

 "nothing" "SNP" "Site" "Color" "Frequence"
 "19595089" "scaffold9976|size55684_51259" "Katiu" "Green" 0.153846153846154
 "41766717" "scaffold9976|size55684_51259" "Gambier" "Green" 0.149532710280374
 "63938345" "scaffold9976|size55684_51259" "Gambier" "Red" 0.102803738317757
 "86109973" "scaffold9976|size55684_51259" "Katiu" "Yellow" 0.1
 "108281601" "scaffold9976|size55684_51259" "Takapoto" "Yellow" 0.0465116279069767
 "130453229" "scaffold9976|size55684_51259" "Hatchery" "Red" 0.0459770114942529
 "152624857" "scaffold9976|size55684_51259" "Gambier" "Yellow" 0.123893805309735
 "174796485" "scaffold9976|size55684_51259" "Takapoto" "Red" 0.0476190476190476
 "196968113" "scaffold9976|size55684_51259" "Katiu" "Red" 0.076271186440678
 "219139741" "scaffold9976|size55684_51259" "Takapoto" "Green" 0.0957446808510638
 "241311369" "scaffold9976|size55684_51259" "Hatchery" "Yellow" 0.0705882352941176
 "263482997" "scaffold9976|size55684_51259" "Hatchery" "Green" 0.121212121212121

And I run this model:

 library(multcomp)

 SNP_name <- as.character(unique(df$SNP)[11])
 ok <- filter(df, df$SNP  == unique(df$SNP)[11])
 mod <- glm(Frequence ~ Color + Site, data = ok)
 K1 <- glht(mod, mcp(Color = "Tukey"))$linfct
 K2 <- glht(mod, mcp(Site = "Tukey"))$linfct
 pvaleur <- summary(glht(mod, linfct = rbind(K1, K2)))$test$pvalues[1:9]

Some pvalue are significant:

 > summary(glht(mod, linfct = rbind(K1, K2)))

     Simultaneous Tests for General Linear Hypotheses

 Fit: glm(formula = Frequence ~ Color + Site, data = ok)

 Linear Hypotheses:
                           Estimate Std. Error z value Pr(>|z|)    
 Red - Green == 0         -0.061916   0.007059  -8.771  < 1e-04 ***
 Yellow - Green == 0      -0.044835   0.007059  -6.352  < 1e-04 ***
 Yellow - Red == 0         0.017081   0.007059   2.420  0.11152    
 Hatchery - Gambier == 0  -0.046151   0.008151  -5.662  < 1e-04 ***
 Katiu - Gambier == 0     -0.015371   0.008151  -1.886  0.34344    
 Takapoto - Gambier == 0  -0.062118   0.008151  -7.621  < 1e-04 ***
 Katiu - Hatchery == 0     0.030780   0.008151   3.776  0.00133 ** 
 Takapoto - Hatchery == 0 -0.015967   0.008151  -1.959  0.30122    
 Takapoto - Katiu == 0    -0.046747   0.008151  -5.735  < 1e-04 ***

But the first comparison show smaller pvalue than others:

 $pvalues
 [1] 0.000000e+00 1.336467e-09 1.114732e-01 9.550331e-08 3.434281e-01 1.207923e-13 1.406660e-03 3.012392e-01
 [9] 1.276309e-07

Indeed, for reason explain here, R use 0 (or 0.000000e+00) instead of 1e-400 for example if the exponential is too "huge".

But I need to know exactly the "number of this exponential".

There are a lot of threads that explain how to display scientific notation after a statistical like here, here, or here with:

options(scipen = 0)
options(digits = 2)

or this

format(pvaleur, scientific = TRUE)

But this give only:

 [1] 0.0e+00 1.4e-09 1.1e-01 1.1e-07 3.4e-01 1.5e-13 1.3e-03 3.0e-01 5.8e-08 

So, how force the test to not obtain 0.0e+00 but the "real" pvalue ?

Any advices for help me coding this will be very appreciated

EDIT:

 > dput(head(df,15))
 structure(list(nothing = c(22173807L, 44345435L, 66517063L, 88688691L, 
 110860319L, 133031947L, 155203575L, 177375203L, 199546831L, 221718459L, 
 243890087L, 2180L, 22173808L, 44345436L, 66517064L), SNP = structure(c(1L, 
 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("scaffold1|size567472_100042", 
 "scaffold1|size567472_100059", "scaffold1|size567472_100087", 
 "scaffold1|size567472_100124", "scaffold1|size567472_100201", 
 "scaffold1|size567472_100212", "scaffold1|size567472_100253", 
 "scaffold1|size567472_100277", "scaffold1|size567472_100300", 
 "scaffold1|size567472_100304", "scaffold9976|size55684_51259"
 ), class = "factor"), Site = structure(c(1L, 1L, 3L, 4L, 2L, 
 1L, 4L, 3L, 4L, 2L, 2L, 3L, 1L, 1L, 3L), .Label = c("Gambier", 
 "Hatchery", "Katiu", "Takapoto"), class = "factor"), Color = structure(c(1L, 
 2L, 3L, 3L, 2L, 3L, 2L, 2L, 1L, 3L, 1L, 1L, 1L, 2L, 3L), .Label = c("Green", 
 "Red", "Yellow"), class = "factor"), Frequence = c(0.333333333333333, 
 0.225, 0.12, 0.0952380952380952, 0.297297297297297, 0.107142857142857, 
 0.0465116279069767, 0.196078431372549, 0.354166666666667, 0.222222222222222, 
 0.138888888888889, 0.1, 0.188679245283019, 0.315789473684211, 
 0.115384615384615)), row.names = c(NA, 15L), class = "data.frame")
  • instead of copy/pasting all of ```"nothing" "SNP" "Site" "Color" "Frequence" "19595089" "scaffold9976|size55684_51259" ``` give us `dput(df)` or if it is a very large dataframe give us `dput(head(df,15))` – M-- May 16 '19 at 17:41
  • R returns to 0 once I go below 1e-323. So maybe just add `1e-323` to all of the p-values? – which_command May 16 '19 at 17:43
  • I did the edit for the dput(head(df,15)) because yes, it's a large data.frame. Yes @which_command, indeed under 1e-323 R return 0 (I had ask why before here: https://stackoverflow.com/questions/56166891/trouble-with-precision-of-scientific-notation-in-r). But I can't put 1e-323 each time there is a 0, because I need after to produce a Manhattan Plot with a -log10(pvalue) after, and this give me a black horizontal dot line à y=323. I would like the "real" values in order to obtain de dot cloud in my Manhattan Plot. – Pierre-louis Stenger May 16 '19 at 18:04

0 Answers0