1

TL;DR - create tile plot, two aesthetics in same tile cause tile color to not render, not sure if it's my code or something with ggplot2, anyone experienced this?

I'm getting a strange plotting issue (not an error, this silently fails and I only notice the problem when looking at the final representation) when I try to plot multiple aesthetics in the same tile using ggplot.

my data:

satur_muts <- structure(list(id = c("0400136_T1", "0400136_T1", "0400136_T1", 
"0400136_T1", "0400171_T1", "0400171_T1", "0400171_T1", "0400171_T1", 
"0400179_T1", "0400179_T1", "0400179_T1", "0400195_T1", "0400195_T1", 
"0400195_T1", "0400195_T1"), gene = c("CDKN2A", "TP53", "KRAS", 
"KRAS", "CDKN2A", "TP53", "KRAS", "KRAS", "CDKN2A", "TP53", "KRAS", 
"CDKN2A", "TP53", "KRAS", "KRAS"), mut_type = structure(c(NA, 
NA, 1L, NA, NA, NA, NA, 1L, NA, NA, 1L, NA, NA, 1L, NA), .Label = "Missense", class = "factor"), 
    is_cnv = c(NA, NA, FALSE, TRUE, NA, NA, TRUE, FALSE, NA, 
    NA, FALSE, NA, NA, FALSE, TRUE)), row.names = c(NA, -15L), .Names = c("id", 
"gene", "mut_type", "is_cnv"), class = c("tbl_df", "tbl", "data.frame"
))

cnv_dat <- structure(list(id = structure(1:3, .Label = c("0400136_T1", "0400171_T1", 
"0400195_T1"), class = "factor"), gene = structure(c(1L, 1L, 
1L), .Label = "KRAS", class = "factor"), mut_type = structure(c(NA_integer_, 
NA_integer_, NA_integer_), .Label = character(0), class = "factor"), 
    cnv = structure(c(1L, 1L, 1L), .Label = "High-level amplification", class = "factor"), 
    is_cnv = c(TRUE, TRUE, TRUE)), row.names = c(NA, -3L), .Names = c("id", 
"gene", "mut_type", "cnv", "is_cnv"), class = c("tbl_df", "tbl", 
"data.frame"))

cb_palette <- c("Missense" = "#095481",
                "Splice site" = "#DE5423",
                "Frameshift" = "#179F30",
                "Nonsense" = "#F5B02C",
                "Nonstop" = "#FF69B4",
                "In-frame indel" = "#9C39B6")

mut_levels = c("Missense", "Splice site", "Frameshift", "Nonsense", "Nonstop", "In-frame indel")

my code:

library(dplyr); library(readr); library(ggplot2)
p1 <- ggplot(data = satur_muts) + 
  geom_tile(mapping = aes(x = id, y = gene, fill = mut_type), height=0.9, width=0.9) +
  scale_fill_manual(values = cb_palette, na.value = "Grey95", drop = F,
                    guide = guide_legend(title = "Somatic mutations", override.aes = list(size=4.5))  ,
                    breaks = unique(satur_muts$mut_type)[order(mut_levels)] %>% na.omit()) + 
  geom_tile(data = filter(satur_muts, gene == ""), 
            mapping = aes(x=id, y=gene), 
            fill = "white",
            height = 1, width = 1)

o <- levels(cnv_dat$cnv)
v_tuple <- c("High-level amplification"= 24, "Homozygous deletion" = 25)
b <- c("High-level amplification", "Homozygous deletion")
keep <- which( names(v_tuple) %in% unique(cnv_dat$cnv)[ order(o) ] )
p2 <- p1 + 
  geom_point(data = cnv_dat,  aes(x = id, y = gene, shape = cnv),
             fill = "grey", size = 3, stroke = 0.5, inherit.aes = F) +
  scale_shape_manual(values = v_tuple[keep], 
                     breaks = b[keep]) +
  guides(shape = guide_legend(title = "Somatic Copy Number Aberration"), override.aes = list(size=4.5), drop = T )
p2

enter image description here

The two tiles to the left of the last blue-and-up-triangle tile should also have a blue background, per my data check below. I don't understand why one sample would plot correctly while the others don't.

mt <- left_join(satur_muts, cnv_dat) %>% filter(!is.na(is_cnv))
t <- mt %>% filter(gene == "KRAS") 

# A tibble: 7 × 5
          id  gene mut_type is_cnv                      cnv
       <chr> <chr>    <chr>  <lgl>                   <fctr>
1 0400136_T1  KRAS Missense  FALSE                       NA
2 0400136_T1  KRAS     <NA>   TRUE High-level amplification
3 0400171_T1  KRAS     <NA>   TRUE High-level amplification
4 0400171_T1  KRAS Missense  FALSE                       NA
5 0400179_T1  KRAS Missense  FALSE                       NA
6 0400195_T1  KRAS Missense  FALSE                       NA
7 0400195_T1  KRAS     <NA>   TRUE High-level amplification

Rsession info

R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] parallel  grid      stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] viridis_0.3.4      RColorBrewer_1.1-2 doMC_1.3.4         iterators_1.0.8    foreach_1.4.3      cowplot_0.7.0      lattice_0.20-34    gridExtra_2.2.1    readxl_0.1.1      
[10] forcats_0.2.0      stringr_1.2.0      devtools_1.12.0    dplyr_0.5.0        purrr_0.2.2        readr_1.0.0        tidyr_0.6.2        tibble_1.3.0       ggplot2_2.2.1     
[19] tidyverse_1.1.1   

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.10     plyr_1.8.4       tools_3.3.2      digest_0.6.12    jsonlite_1.4     lubridate_1.6.0  memoise_1.0.0    gtable_0.2.0     nlme_3.1-131     psych_1.6.12    
[11] DBI_0.5-1        haven_1.0.0      withr_1.0.2      httr_1.2.1       xml2_1.1.1       hms_0.3          R6_2.2.0         foreign_0.8-67   reshape2_1.4.2   modelr_0.1.0    
[21] magrittr_1.5     codetools_0.2-15 scales_0.4.1     assertthat_0.1   mnormt_1.5-5     rvest_0.3.2      colorspace_1.3-2 labeling_0.3     stringi_1.1.5    lazyeval_0.2.0  
[31] munsell_0.4.3    broom_0.4.2   

and Rstudio version

$version
[1] ‘1.0.136’
  • 1
    A [reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) would help. Else we can only try to guess how your code interacts with the dataset. – Z.Lin Aug 30 '17 at 14:44
  • @Z.Lin i hope this helps - please let me know if there's anything i can do to make this more clear – Emil_Longshore Aug 30 '17 at 15:46
  • added workable dataset using dput(droplevels(.)) and my rsession info -- does this help? – Emil_Longshore Aug 30 '17 at 19:18
  • 2
    Making a **minimal** reproducible example will make this much more approachable. (a) Don't put `rm(list = ls())` in your question - it's not needed and you will make people sad if they copy/paste your code and run it by accident. (b) Your `main_plot_sample_order.tsv` and `main_plot_gen_order` aren't copy/pastable. Can you use `dput(droplevels())` on subsets of them so they can be copy/pasted? (c) You're sharing a lot of data. Can you share less data and still show illustrate the problem? It's often easier to debug a plot with 3 y and 3 x values than one with 12 and 64 x values... – Gregor Thomas Aug 30 '17 at 19:27
  • 2
    (d) You have a plotting problem, but you share data, and then code used to transform it into the data you plot. Why not just share the data you plot? It looks like we just need a subset of `satur_muts` and `cnv_dat`. (e) You share almost 20 lines of code that are just theme options. If they are relevant to the question, mention that otherwise delete them as they just distract. (By *relevant to the question*, I mean if you delete those lines, does the problem go away? If so, they are necessary to reproduce the problem and it narrows down where we should look for issues. Otherwise they're noise) – Gregor Thomas Aug 30 '17 at 19:27
  • 2
    TL;DR, the code you share is awfully long. How much of that can you delete and still illustrate the problem? The shorter the code, the more likely people are to run it and answer the question. – Gregor Thomas Aug 30 '17 at 19:29
  • @Gregor I made these changes, does this help? Took out the theme (not relevant) and gave `satur_muts` and `cnv_dat` as input data – Emil_Longshore Aug 30 '17 at 19:46
  • Much much better! – Gregor Thomas Aug 30 '17 at 19:49
  • @Gregor i just updated `satur_muts`, i was creating it off of a table that had all of my sample ids (as factors), so it was cluttering the `dput` result. should be much more reasonable now – Emil_Longshore Aug 30 '17 at 19:57
  • Looks better, but the `satur_muts` is now a syntax error, I think you're missing some stuff at the beginning. – Gregor Thomas Aug 30 '17 at 20:05
  • @Gregor fixed that, should work now. thanks for your patience! – Emil_Longshore Aug 30 '17 at 20:15

1 Answers1

2

It has come to my attention by going through this exercise of making a reprex that passing in satur_muts, a table containing non-unique (x,y) pairings, causes ggplot to "guess" at which pairing to use, even though a single aesthetic is specified (per above code).

first get appropriate input data to make tile plot background (satur_muts):

genes <- structure(c(2L, 3L, 1L), .Label = c("CDKN2A", "KRAS", "TP53"), class = "factor")

ids <- structure(1:4, .Label = c("0400136_T1", "0400171_T1", "0400179_T1", 
    "0400195_T1"), class = "factor")

the_muts <- structure(list(id = c("0400136_T1", "0400171_T1", "0400179_T1", 
"0400195_T1"), gene = c("KRAS", "KRAS", "KRAS", "KRAS"), mut_type = structure(c(1L, 
1L, 1L, 1L), .Label = "Missense", class = "factor"), is_cnv = c(FALSE, 
FALSE, FALSE, FALSE), cnv = c("No High-level CNV", "No High-level CNV", 
"No High-level CNV", "No High-level CNV")), row.names = c(NA, 
-4L), .Names = c("id", "gene", "mut_type", "is_cnv", "cnv"), class = c("tbl_df", 
"tbl", "data.frame"))

Make the correct satur_muts:

library(dplyr)
satur_muts_expand <- expand.grid(gene = genes, id = ids) %>% as_data_frame()
new_satur_muts <- left_join(satur_muts_expand, the_muts %>% filter(!is.na(mut_type), !is_cnv) %>% distinct(), by = c("gene", "id")) 

NOT:

satur_muts <- left_join(satur_muts_expand, the_muts, by = c("gene", "id")) 

You should get this:

new_satur_muts <- structure(list(gene = c("KRAS", "TP53", "CDKN2A", "KRAS", "TP53", 
"CDKN2A", "KRAS", "TP53", "CDKN2A", "KRAS", "TP53", "CDKN2A"), 
    id = c("0400136_T1", "0400136_T1", "0400136_T1", "0400171_T1", 
    "0400171_T1", "0400171_T1", "0400179_T1", "0400179_T1", "0400179_T1", 
    "0400195_T1", "0400195_T1", "0400195_T1"), mut_type = structure(c(1L, 
    NA, NA, 1L, NA, NA, 1L, NA, NA, 1L, NA, NA), .Label = "Missense", class = "factor")), row.names = c(NA, 
-12L), .Names = c("gene", "id", "mut_type"), class = c("tbl_df", "tbl","data.frame"))

and using cnv_dat from OP and satur_muts <- new_satur_muts, running the same code as above fixes the problem (with some theme adjustments and coord_fixed()) fixed!

Basically, this problem happens if you're trying to plot multiple aesthetics (colors, symbols, etc) in the same tile. So in this example, I had to make sure that each sample-gene pairing was unique in the table used for plotting mutations (colors), then overlay additional information (cnv) is a SEPARATE table. So it turns out this is not a graphical issue, but rather the quality of the input data was comprised before plotting. Thanks everyone for all the help!