How to fix "Error in comp[[i]] : subscript out of bounds" to make letter compact display?

Question

I have a problem with an error message I have received below: "Error in comp[[i]] : subscript out of bounds"

I want to indicate a compact letter display in my analysis but it did not work. I have tried to find some solution in the internet but it failed. Can anyone help me? this is my data

structure(list(Ratio = c(0.267055286, 0.235446484, 0.224992335, 
0.228212575, 0.257381176, 0.256859674, 0.243903929, 0.252712714, 
0.241461807, 0.248338451, 0.256563425, 0.26601715, 0.250073217, 
0.251969117, 0.253287549, 0.263241548, 0.269360378, 0.264825074, 
0.25672374, 0.2534554, 0.246267242, 0.246695711, 0.236139498, 
0.249491444, 0.251564819, 0.240452818, 0.254713159, 0.25147281, 
0.26201919, 0.248360746, 0.246830304, 0.266038937, 0.26905912, 
0.24791562, 0.247594584, 0.256053813, 0.251228178, 0.246707173, 
0.250456004, 0.27637359, 0.26508449, 0.262086576, 0.256718454, 
0.248851991, 0.248653789, 0.252162637, 0.257240293, 0.256834233, 
0.28264247, 0.29802879, 0.258576741, 0.277733515, 0.296467765, 
0.286141117, 0.277513708, 0.273090289, 0.278239429, 0.267859464, 
0.264483192, 0.276063591, 0.262313997, 0.246508881, 0.279584358, 
0.287600757, 0.279089811, 0.278508984, 0.255397803, 0.282189954, 
0.281931686, 0.274297023, 0.314339694, 0.190332237, 0.200487283, 
0.221774473, 0.194636823, 0.212372143, 0.191236662, 0.172644425, 
0.22595976, 0.198123319, 0.211837134, 0.215018989, 0.195312021, 
0.20158237, 0.184286731, 0.19498543, 0.196400274, 0.17994453, 
0.208702986, 0.220364396, 0.202560056, 0.202323629, 0.209563815, 
0.211821257, 0.211889051, 0.169961202, 0.165792165, 0.143280229, 
0.141520745, 0.155981145, 0.1505676, 0.169778706, 0.148619699, 
0.14276644, 0.182916256, 0.134962743, 0.162540603, 0.147899504, 
0.172803323, 0.171328653, 0.148332232, 0.17731353, 0.137293375, 
0.167809004, 0.187015484, 0.16659136, 0.143882683, 0.195064548, 
0.145268859, 0.139506029, 0.158491822, 0.161545847, 0.142343264, 
0.172845598, 0.140114282, 0.14208018, 0.147465037, 0.158342427, 
0.141087175, 0.152013369, 0.152338253, 0.147960271, 0.159925355, 
0.127860026, 0.147602983, 0.152138695, 0.169946914, 0.151562855, 
0.130802593, 0.161859989, 0.12996254, 0.155459895, 0.150915199, 
0.16102091, 0.151073748, 0.169443662, 0.138065717, 0.141765129, 
0.168697363, 0.180178444, 0.152726489, 0.132928661, 0.137527664, 
0.162030059, 0.156803768, 0.144039257, 0.177741017, 0.162964524, 
0.17659578, 0.141199988, 0.158541033, 0.156337255, 0.147436957, 
0.155102179, 0.167067911, 0.158620908, 0.15569626), Strain = c("a_ M1",  "a_ M1", "a_ M1", "a_ M1", "a_ M1", "a_ M1", "a_ M1", "a_ M1",  "a_ M1", "a_ M1", "a_ M1", "a_ M1", "a_ M1", "a_ M1", "a_ M1",  "a_ M1", "a_ M1", "a_ M1", "a_ M1", "a_ M1", "a_ M1", "a_ M1",  "a_ M1", "a_ M1", "a_N1", "a_N1", "a_N1", "a_N1", "a_N1", "a_N1",  "a_N1", "a_N1", "a_N1", "a_N1", "a_N1", "a_N1", "a_N1", "a_N1",  "a_N1", "a_N1", "a_N1", "a_N1", "a_N1", "a_N1", "a_N1", "a_N1",  "a_N1", "a_N1", "a_ H1", "a_ H1", "a_ H1", "a_ H1", "a_ H1",  "a_ H1", "a_ H1", "a_ H1", "a_ H1", "a_ H1", "a_ H1", "a_ H1",  "a_ H1", "a_ H1", "a_ H1", "a_ H1", "a_ H1", "a_ H1", "a_ H1",  "a_ H1", "a_ H1", "a_ H1", "a_ H1", "b_S1", "b_S1", "b_S1", "b_S1",  "b_S1", "b_S1", "b_S1", "b_S1", "b_S1", "b_S1", "b_S1", "b_S1",  "b_S1", "b_S1", "b_S1", "b_S1", "b_S1", "b_S1", "b_S1", "b_S1",  "b_S1", "b_S1", "b_S1", "b_S1", "B_H1", "B_H1", "B_H1", "B_H1",  "B_H1", "B_H1", "B_H1", "B_H1", "B_H1", "B_H1", "B_H1", "B_H1",  "B_H1", "B_H1", "B_H1", "B_H1", "B_H1", "B_H1", "B_H1", "B_H1",  "B_H1", "B_H1", "B_H1", "B_H1", "B-O1", "B-O1", "B-O1", "B-O1",  "B-O1", "B-O1", "B-O1", "B-O1", "B-O1", "B-O1", "B-O1", "B-O1",  "B-O1", "B-O1", "B-O1", "B-O1", "B-O1", "B-O1", "B-O1", "B-O1",  "B-O1", "B-O1", "B-O1", "B-O1", "b_N1", "b_N1", "b_N1", "b_N1",  "b_N1", "b_N1", "b_N1", "b_N1", "b_N1", "b_N1", "b_N1", "b_N1",  "b_N1", "b_N1", "b_N1", "b_N1", "b_N1", "b_N1", "b_N1", "b_N1",  "b_N1", "b_N1", "b_N1", "b_N1"), species = c("a", "a", "a", "a",  "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",  "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",  "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",  "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",  "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",  "a", "a", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",  "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",  "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",  "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",  "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",  "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",  "b", "b", "b", "b", "b", "b", "b")), class = "data.frame", row.names = c(NA, 
-167L))

and this is the script

library(datasets)
library(ggplot2)
library(multcompView)
library(dplyr)
library(datasets)
library(tidyverse)
library(multcomp)

Data = read.csv("data.csv", h= TRUE)
qplot(x = species, y = Ratio, geom = "point", data = Data) +
  facet_grid(.~Strain)


# creating a variable as factor for the ANOVA
Data$Strain <- as.factor(Data$Strain)
Data$species <- as.factor(Data$species)
str(Data)

# analysis of variance
anova <- aov(Ratio ~ Strain*factor(species), data = Data)
summary(anova)

# table with factors, means and standard deviation
data_summary <- group_by(Data, Strain, species) %>%
  summarise(mean=mean(Ratio), sd=sd(Ratio)) %>%
  arrange(desc(mean))
print(data_summary)

# Tukey's test
tukey <- TukeyHSD(anova)
print(tukey)


# compact letter display
coba = multcompLetters4(anova, tukey)
print(coba)

# creating the compact letter display
tukey.cld <- multcompLetters4(anova, tukey)
print(tukey.cld)

this is what I want to get ---> I want to get the letter to indicate in my data

Please help me

Greetings! Generally speaking, it is always better to provide a minimal reproducible dataset for us to work with on SO. One way of achieving this is by using the `dput` command. You can find out how to use this function by watching this video: https://youtu.be/3EID3P1oisg — Shawn Hemelstrand, Sep 03 '22 at 10:17
Hi, I already put my data here, https://drive.google.com/file/d/1m-TNimLqnxXSsJniMjoB1lwKbU1uHIOE/view. I am sorry, I have seen the video and still not understand. Here, what I get from my R console function (x, file = "", control = c("keepNA", "keepInteger", "niceNames", "showAttributes")) { if (is.character(file)) if (nzchar(file)) { file <- file(file, "wt") on.exit(close(file)) } else file <- stdout() .Internal(dput(x, file, .deparseOpts(control))) } — Woony, Sep 04 '22 at 00:21
Basically if your csv file is called `data`, then you run `dput(data)` and then whatever R spits out, you copy and paste that into your question. — Shawn Hemelstrand, Sep 04 '22 at 00:23
then what is the point? I still cannot solve the problem here — Woony, Sep 04 '22 at 11:12
It's difficult for anybody to help you if they don't have your dataset to work with. — Shawn Hemelstrand, Sep 04 '22 at 11:14
I attach this link, this is the data https://drive.google.com/file/d/1m-TNimLqnxXSsJniMjoB1lwKbU1uHIOE/view — Woony, Sep 04 '22 at 11:19
Right but its usually better for people to not have to download things for three reasons: 1) viruses 2) space on their computers 3) dput is typically easier to just copy and paste into an R script. A thread on additional reasons can be found here: https://meta.stackoverflow.com/questions/333952/why-should-i-provide-a-minimal-reproducible-example-for-a-very-simple-sql-query — Shawn Hemelstrand, Sep 04 '22 at 11:53
I tried, but I'm not sure what the issue is. I only have an issue with the `multicompLetters4` part and it looks like it resembles the example in the help page for the function. So I'm not quite sure. — Shawn Hemelstrand, Sep 04 '22 at 13:11
I really want to get the answer soon but no one can help me right now — Woony, Sep 04 '22 at 14:11
Sometimes it can be difficult to get an answer from somebody once enough time has passed for the question to be asked (usually fresh/actively answered questions get answered sooner). You may need to google the error regarding the subscript outta bounds and see what's causing it. Usually that has something to do with the function and the matrix being used with it not matching. — Shawn Hemelstrand, Sep 05 '22 at 00:16

Paul Schmidt · Answer 1 · 2023-01-03T08:50:22.183

Does this work for you? I have added some notes below.

library(datasets)
library(ggplot2)
library(multcompView)
library(dplyr)
library(datasets)
library(tidyverse)
library(multcomp)

raw <- tibble::tribble(
         ~Ratio, ~Strain, ~species,
  "267.055.286", "a_ M1",      "a",
  "235.446.484", "a_ M1",      "a",
  "224.992.335", "a_ M1",      "a",
  "228.212.575", "a_ M1",      "a",
  "257.381.176", "a_ M1",      "a",
  "256.859.674", "a_ M1",      "a",
  "243.903.929", "a_ M1",      "a",
  "252.712.714", "a_ M1",      "a",
  "241.461.807", "a_ M1",      "a",
  "248.338.451", "a_ M1",      "a",
  "256.563.425", "a_ M1",      "a",
   "26.601.715", "a_ M1",      "a",
  "250.073.217", "a_ M1",      "a",
  "251.969.117", "a_ M1",      "a",
  "253.287.549", "a_ M1",      "a",
  "263.241.548", "a_ M1",      "a",
  "269.360.378", "a_ M1",      "a",
  "264.825.074", "a_ M1",      "a",
   "25.672.374", "a_ M1",      "a",
    "2.534.554", "a_ M1",      "a",
  "246.267.242", "a_ M1",      "a",
  "246.695.711", "a_ M1",      "a",
  "236.139.498", "a_ M1",      "a",
  "249.491.444", "a_ M1",      "a",
  "251.564.819",  "a_N1",      "a",
  "240.452.818",  "a_N1",      "a",
  "254.713.159",  "a_N1",      "a",
   "25.147.281",  "a_N1",      "a",
   "26.201.919",  "a_N1",      "a",
  "248.360.746",  "a_N1",      "a",
  "246.830.304",  "a_N1",      "a",
  "266.038.937",  "a_N1",      "a",
   "26.905.912",  "a_N1",      "a",
   "24.791.562",  "a_N1",      "a",
  "247.594.584",  "a_N1",      "a",
  "256.053.813",  "a_N1",      "a",
  "251.228.178",  "a_N1",      "a",
  "246.707.173",  "a_N1",      "a",
  "250.456.004",  "a_N1",      "a",
   "27.637.359",  "a_N1",      "a",
   "26.508.449",  "a_N1",      "a",
  "262.086.576",  "a_N1",      "a",
  "256.718.454",  "a_N1",      "a",
  "248.851.991",  "a_N1",      "a",
  "248.653.789",  "a_N1",      "a",
  "252.162.637",  "a_N1",      "a",
  "257.240.293",  "a_N1",      "a",
  "256.834.233",  "a_N1",      "a",
   "28.264.247", "a_ H1",      "a",
   "29.802.879", "a_ H1",      "a",
  "258.576.741", "a_ H1",      "a",
  "277.733.515", "a_ H1",      "a",
  "296.467.765", "a_ H1",      "a",
  "286.141.117", "a_ H1",      "a",
  "277.513.708", "a_ H1",      "a",
  "273.090.289", "a_ H1",      "a",
  "278.239.429", "a_ H1",      "a",
  "267.859.464", "a_ H1",      "a",
  "264.483.192", "a_ H1",      "a",
  "276.063.591", "a_ H1",      "a",
  "262.313.997", "a_ H1",      "a",
  "246.508.881", "a_ H1",      "a",
  "279.584.358", "a_ H1",      "a",
  "287.600.757", "a_ H1",      "a",
  "279.089.811", "a_ H1",      "a",
  "278.508.984", "a_ H1",      "a",
  "255.397.803", "a_ H1",      "a",
  "282.189.954", "a_ H1",      "a",
  "281.931.686", "a_ H1",      "a",
  "274.297.023", "a_ H1",      "a",
  "314.339.694", "a_ H1",      "a",
  "190.332.237",  "b_S1",      "b",
  "200.487.283",  "b_S1",      "b",
  "221.774.473",  "b_S1",      "b",
  "194.636.823",  "b_S1",      "b",
  "212.372.143",  "b_S1",      "b",
  "191.236.662",  "b_S1",      "b",
  "172.644.425",  "b_S1",      "b",
   "22.595.976",  "b_S1",      "b",
  "198.123.319",  "b_S1",      "b",
  "211.837.134",  "b_S1",      "b",
  "215.018.989",  "b_S1",      "b",
  "195.312.021",  "b_S1",      "b",
   "20.158.237",  "b_S1",      "b",
  "184.286.731",  "b_S1",      "b",
   "19.498.543",  "b_S1",      "b",
  "196.400.274",  "b_S1",      "b",
   "17.994.453",  "b_S1",      "b",
  "208.702.986",  "b_S1",      "b",
  "220.364.396",  "b_S1",      "b",
  "202.560.056",  "b_S1",      "b",
  "202.323.629",  "b_S1",      "b",
  "209.563.815",  "b_S1",      "b",
  "211.821.257",  "b_S1",      "b",
  "211.889.051",  "b_S1",      "b",
  "169.961.202",  "B_H1",      "b",
  "165.792.165",  "B_H1",      "b",
  "143.280.229",  "B_H1",      "b",
  "141.520.745",  "B_H1",      "b",
  "155.981.145",  "B_H1",      "b",
    "1.505.676",  "B_H1",      "b",
  "169.778.706",  "B_H1",      "b",
  "148.619.699",  "B_H1",      "b",
   "14.276.644",  "B_H1",      "b",
  "182.916.256",  "B_H1",      "b",
  "134.962.743",  "B_H1",      "b",
  "162.540.603",  "B_H1",      "b",
  "147.899.504",  "B_H1",      "b",
  "172.803.323",  "B_H1",      "b",
  "171.328.653",  "B_H1",      "b",
  "148.332.232",  "B_H1",      "b",
   "17.731.353",  "B_H1",      "b",
  "137.293.375",  "B_H1",      "b",
  "167.809.004",  "B_H1",      "b",
  "187.015.484",  "B_H1",      "b",
   "16.659.136",  "B_H1",      "b",
  "143.882.683",  "B_H1",      "b",
  "195.064.548",  "B_H1",      "b",
  "145.268.859",  "B_H1",      "b",
  "139.506.029",  "B-O1",      "b",
  "158.491.822",  "B-O1",      "b",
  "161.545.847",  "B-O1",      "b",
  "142.343.264",  "B-O1",      "b",
  "172.845.598",  "B-O1",      "b",
  "140.114.282",  "B-O1",      "b",
   "14.208.018",  "B-O1",      "b",
  "147.465.037",  "B-O1",      "b",
  "158.342.427",  "B-O1",      "b",
  "141.087.175",  "B-O1",      "b",
  "152.013.369",  "B-O1",      "b",
  "152.338.253",  "B-O1",      "b",
  "147.960.271",  "B-O1",      "b",
  "159.925.355",  "B-O1",      "b",
  "127.860.026",  "B-O1",      "b",
  "147.602.983",  "B-O1",      "b",
  "152.138.695",  "B-O1",      "b",
  "169.946.914",  "B-O1",      "b",
  "151.562.855",  "B-O1",      "b",
  "130.802.593",  "B-O1",      "b",
  "161.859.989",  "B-O1",      "b",
   "12.996.254",  "B-O1",      "b",
  "155.459.895",  "B-O1",      "b",
  "150.915.199",  "B-O1",      "b",
   "16.102.091",  "b_N1",      "b",
  "151.073.748",  "b_N1",      "b",
  "169.443.662",  "b_N1",      "b",
  "138.065.717",  "b_N1",      "b",
  "141.765.129",  "b_N1",      "b",
  "168.697.363",  "b_N1",      "b",
  "180.178.444",  "b_N1",      "b",
  "152.726.489",  "b_N1",      "b",
  "132.928.661",  "b_N1",      "b",
  "137.527.664",  "b_N1",      "b",
  "162.030.059",  "b_N1",      "b",
  "156.803.768",  "b_N1",      "b",
  "144.039.257",  "b_N1",      "b",
  "177.741.017",  "b_N1",      "b",
  "162.964.524",  "b_N1",      "b",
   "17.659.578",  "b_N1",      "b",
  "141.199.988",  "b_N1",      "b",
  "158.541.033",  "b_N1",      "b",
  "156.337.255",  "b_N1",      "b",
  "147.436.957",  "b_N1",      "b",
  "155.102.179",  "b_N1",      "b",
  "167.067.911",  "b_N1",      "b",
  "158.620.908",  "b_N1",      "b",
   "15.569.626",  "b_N1",      "b"
  ) 

Data <- raw %>% 
  mutate(Ratio = as.integer(str_remove_all(Ratio, "\\."))) %>% 
  mutate(across(where(is.character), as.factor))

# set up model
mod <- lm(Ratio ~ Strain*species, data = Data)

library(emmeans)
emmeans(object = mod, specs = ~ species) %>% cld(Letters = letters)
#> NOTE: A nesting structure was detected in the fitted model:
#>     Strain %in% species
#> NOTE: Results may be misleading due to involvement in interactions
#>  species   emmean      SE  df lower.CL upper.CL .group
#>  b       1.46e+08 7097529 160 1.32e+08  1.6e+08  a    
#>  a       2.24e+08 8254695 160 2.07e+08  2.4e+08   b   
#> 
#> Results are averaged over the levels of: Strain 
#> Confidence level used: 0.95 
#> significance level used: alpha = 0.05 
#> NOTE: If two or more means share the same grouping letter,
#>       then we cannot show them to be different.
#>       But we also did not show them to be the same.
emmeans(object = mod, specs = ~ species:Strain) %>% cld(Letters = letters)
#> NOTE: A nesting structure was detected in the fitted model:
#>     Strain %in% species
#>  Strain species   emmean       SE  df lower.CL upper.CL .group
#>  B_H1   b       1.35e+08 14195058 160 1.07e+08 1.63e+08  a    
#>  b_N1   b       1.38e+08 14195058 160 1.10e+08 1.66e+08  ab   
#>  B-O1   b       1.40e+08 14195058 160 1.12e+08 1.68e+08  ab   
#>  b_S1   b       1.72e+08 14195058 160 1.44e+08 2.00e+08  abc  
#>  a_N1   a       1.96e+08 14195058 160 1.68e+08 2.24e+08   bcd 
#>  a_ M1  a       2.21e+08 14195058 160 1.93e+08 2.49e+08    cd 
#>  a_ H1  a       2.55e+08 14500363 160 2.26e+08 2.83e+08     d 
#> 
#> Confidence level used: 0.95 
#> P value adjustment: tukey method for comparing a family of 7 estimates 
#> significance level used: alpha = 0.05 
#> NOTE: If two or more means share the same grouping letter,
#>       then we cannot show them to be different.
#>       But we also did not show them to be the same.

^{Created on 2022-09-05 with reprex v2.0.2}

The screenshot you show has mean comparisons with compact letter display for (i) the first factor, (ii) the second factor and (iii) their interaction. This is not possible for your data, since Strain is completely nested within species. Thus, I produce only mean comparisons with compact letter display for species and species:Strain.
Pleas see this chapter on compact letter displays for more details on the pros and cons of a compact letter display.
The topic of compact letter displays is a bit more complex when you have two factors and especially interactions between them. Check out this answer for details on what your options are.

Hi Mr. Paul. Thank you so much for your respond. I am sorry for my late respond. I tried to run your script and also checked this website https://schmidtpaul.github.io/DSFAIR/compactletterdisplay.html. I modify a little bit of your script according to this website, because I want to display the data in the boxplot similar to this website. I modified this part "library(emmeans) model_means <- emmeans(object = mod, specs = ~ species) model_means_cld <- cld(object= model_means, adjust = "Tukey", Letters = letters, alpha = 0.05)" — Woony, Sep 28 '22 at 05:19
but It gave me this message "> model_means <- emmeans(object = mod, specs = ~ species) NOTE: A nesting structure was detected in the fitted model: Strain %in% species NOTE: Results may be misleading due to involvement in interactions > > model_means_cld <- cld(object= model_means, adjust = "Tukey", Letters = letters, alpha = 0.05) Note: adjust = "tukey" was changed to "sidak" because "tukey" is only appropriate for one set of pairwise comparisons". Could you please help me what is going on? Thanks — Woony, Sep 28 '22 at 05:22
@Woony Hi! First of all - both are just notes, not errors - but obviously you would like to understand what they mean anyway. Regarding the first note: As you can see in my code, I also got the first `NOTE: A nesting structure was detected in the fitted model: Strain %in% species`. So this is not new. It basically means that not all species-Strain-combinations are present in your data. I also commented on this in the first comment below the code. You can read more on what *nested* means here: t.ly/c7tI It is not a problem, as you as you do not try to get means for `Strain` across `species`. — Paul Schmidt, Sep 28 '22 at 06:54
@Woony Regarding the second note `Note: adjust = "tukey" was changed to "sidak" because "tukey" is only appropriate for one set of pairwise comparisons"`, I have a little section commenting on the on the website I already gave you. Here is the link to the exact position on that website: t.ly/zLKfY — Paul Schmidt, Sep 28 '22 at 06:55

How to fix "Error in comp[[i]] : subscript out of bounds" to make letter compact display?

1 Answers1