0

I'm having an issue with adding some text annotations to a ggplot, and I would appreciate some help understanding why adding text annotations to a ggplot causes elements of my plot to disappear. Up front, most of this isn't my code. I'm borrowing bits and pieces from people on the internet and the creators of Seurat (https://github.com/satijalab/seurat/issues/2201 and https://github.com/satijalab/seurat/blob/HEAD/R/visualization.R).

The goal of my function is to produce a heatmap with multiple labels overhead to see how my single cell data falls into different groups. It works fine until I try to add labels using geom_text.

Here is the full function with the offensive code labeled by The offensive lines of code!

DoMultiBarHeatmap <- function (object, 
                               features = NULL, 
                               cells = NULL, 
                               group.by = "ident", 
                               additional.group.by = NULL, 
                               group.bar = TRUE, 
                               disp.min = -2.5, 
                               disp.max = NULL, 
                               slot = "scale.data", 
                               assay = NULL, 
                               label = TRUE, 
                               size = 5.5, 
                               title.size = 10,
                               hjust = 0, 
                               angle = 45, 
                               raster = TRUE, 
                               draw.lines = TRUE, 
                               lines.width = NULL, 
                               group.bar.height = 0.02, 
                               combine = TRUE) 
{
  
  # # huge thanks to https://github.com/satijalab/seurat/issues/2201
  cells <- cells %||% colnames(x = object)
  if (is.numeric(x = cells)) {
    cells <- colnames(x = object)[cells]
  }
  assay <- assay %||% DefaultAssay(object = object)
  DefaultAssay(object = object) <- assay
  features <- features %||% VariableFeatures(object = object)
  ## Why reverse???
  features <- rev(x = unique(x = features))
  disp.max <- disp.max %||% ifelse(test = slot == "scale.data", 
                                   yes = 2.5, no = 6)
  possible.features <- rownames(x = GetAssayData(object = object, 
                                                 slot = slot))
  if (any(!features %in% possible.features)) {
    bad.features <- features[!features %in% possible.features]
    features <- features[features %in% possible.features]
    if (length(x = features) == 0) {
      stop("No requested features found in the ", slot, 
           " slot for the ", assay, " assay.")
    }
    warning("The following features were omitted as they were not found in the ", 
            slot, " slot for the ", assay, " assay: ", paste(bad.features, 
                                                             collapse = ", "))
  }
  data <- as.data.frame(x = as.matrix(x = t(x = GetAssayData(object = object, 
                                                             slot = slot)[features, cells, drop = FALSE])))
  
  object <- suppressMessages(expr = StashIdent(object = object, 
                                               save.name = "ident"))
  group.by <- group.by %||% "ident"
  groups.use <- object[[c(group.by, additional.group.by)]][cells, , drop = FALSE]
  plots <- list()
  for (i in group.by) {
    data.group <- data
    group.use <- groups.use[, c(i, additional.group.by), drop = FALSE]
    
    for(colname in colnames(group.use)){
      if (!is.factor(x = group.use[[colname]])) {
        group.use[[colname]] <- factor(x = group.use[[colname]])
      }  
    }
    
    if (draw.lines) {
      lines.width <- lines.width %||% ceiling(x = nrow(x = data.group) * 
                                                0.0025)
      placeholder.cells <- sapply(X = 1:(length(x = levels(x = group.use[[i]])) * 
                                           lines.width), FUN = function(x) {
                                             return(Seurat:::RandomName(length = 20))
                                           })
      placeholder.groups <- data.frame(foo=rep(x = levels(x = group.use[[i]]), times = lines.width))
      placeholder.groups[additional.group.by] = NA
      colnames(placeholder.groups) <- colnames(group.use)
      rownames(placeholder.groups) <- placeholder.cells
      
      group.levels <- levels(x = group.use[[i]])
      
      group.use <- sapply(group.use, as.vector)
      rownames(x = group.use) <- cells
      
      group.use <- rbind(group.use, placeholder.groups)
      
      na.data.group <- matrix(data = NA, nrow = length(x = placeholder.cells), 
                              ncol = ncol(x = data.group), dimnames = list(placeholder.cells, 
                                                                           colnames(x = data.group)))
      data.group <- rbind(data.group, na.data.group)
    }
    
    #group.use = group.use[order(group.use[[i]]), , drop=F]
    group.use <- group.use[with(group.use, eval(parse(text=paste('order(', paste(c(i, additional.group.by), collapse=', '), ')', sep='')))), , drop=F]
    
    print('got to the seurat::singlerastermap')
    plot <- Seurat:::SingleRasterMap(data = data.group, raster = raster, 
                                     disp.min = disp.min, disp.max = disp.max, feature.order = features, 
                                     cell.order = rownames(x = group.use), group.by = group.use[[i]])
    
    if (group.bar) {
      pbuild <- ggplot_build(plot = plot)
      group.use2 <- group.use
      cols <- list()
      na.group <- Seurat:::RandomName(length = 20)
      print("if statement checkpoint 1")
      for (colname in rev(x = colnames(group.use2))){
        if (colname == group.by){
          colid = paste0('Identity (', colname, ')')
        } else {
          colid = colname
        }
        
        print("if statement checkpoint 2")
        if (draw.lines) {
          levels(x = group.use2[[colname]]) <- c(levels(x = group.use2[[colname]]), na.group)  
          group.use2[placeholder.cells, colname] <- na.group
          cols[[colname]] <- c(scales::hue_pal()(length(x = levels(x = group.use[[colname]]))), "#FFFFFF")
        } else {
          cols[[colname]] <- c(scales::hue_pal()(length(x = levels(x = group.use[[colname]]))))
        }
        names(x = cols[[colname]]) <- levels(x = group.use2[[colname]])
        
        print("if statement checkpoint 3")
        y.range <- diff(x = pbuild$layout$panel_params[[1]]$y.range)
        y.pos <- max(pbuild$layout$panel_params[[1]]$y.range) + y.range * 0.015
        y.max <- y.pos + group.bar.height * y.range
        pbuild$layout$panel_params[[1]]$y.range <- c(pbuild$layout$panel_params[[1]]$y.range[1], y.max)
        
        print("if statement checkpoint 4")
        plot <- suppressMessages(plot + 
                                   annotation_raster(raster = t(x = cols[[colname]][group.use2[[colname]]]),  xmin = -Inf, xmax = Inf, ymin = y.pos, ymax = y.max) + 
                                   annotation_custom(grob = grid::textGrob(label = colid, hjust = 0, gp = grid::gpar(cex = 0.75)), ymin = mean(c(y.pos, y.max)), ymax = mean(c(y.pos, y.max)), xmin = Inf, xmax = Inf) +
                                   coord_cartesian(ylim = c(0, y.max), clip = "off")) 
        
        #temp <- as.data.frame(cols[[colname]][levels(group.use[[colname]])])
        #colnames(temp) <- 'color'
        #temp$x <- temp$y <- 1
        #temp[['name']] <- as.factor(rownames(temp))
        
        #temp <- ggplot(temp, aes(x=x, y=y, fill=name)) + geom_point(shape=21, size=5) + labs(fill=colname) + theme(legend.position = "bottom")
        #legend <- get_legend(temp)
        #multiplot(plot, legend, heights=3,1)
        
        print("if statement checkpoint 5")
        if ((colname == group.by) && label) {
          x.max <- max(pbuild$layout$panel_params[[1]]$x.range)
          # I used this change from seurat's own DoHeatmap function https://github.com/satijalab/seurat/blob/HEAD/R/visualization.R
          x.divs <- pbuild$layout$panel_params[[1]]$x.major %||% attr(x = pbuild$layout$panel_params[[1]]$x$get_breaks(), which = "pos") # used to be just pbuild$layout$panel_params[[1]]$x.major
          group.use$x <- x.divs
          label.x.pos <- tapply(X = group.use$x, INDEX = group.use[[colname]],
                                FUN = median) * x.max
          label.x.pos <- data.frame(group = names(x = label.x.pos), 
                                    label.x.pos)
          
          print("if statement checkpoint 6")
          #####################################################################
          # The offensive lines of code!
          #####################################################################
          # Neither this 
          plot <- plot + geom_text(stat = "identity", 
                                   data = label.x.pos, 
                                   aes_string(label = "group", 
                                              x = "label.x.pos"), 
                                   y = y.max + y.max * 0.03 * 0.5, 
                                   angle = angle, 
                                   hjust = hjust, 
                                   size = title.size)
          # nor this work.
          plot + ggplot2::annotate(geom = "text", x = label.x.pos$label.x.pos, y = y.max + y.max * 0.03 * 0.5, label = label.x.pos$group)
          # Both produce a missing heatmap
          #####################################################################
          
          
          plot <- suppressMessages(plot + coord_cartesian(ylim = c(0, 
                                                                   y.max + y.max * 0.002 * max(nchar(x = levels(x = group.use[[colname]]))) * 
                                                                     size), clip = "off"))
        }
      }
    }
    print("if statement checkpoint 7")
    print("got to plot theme setting")
    plot <- plot + theme(line = element_blank())
    plots[[i]] <- plot
  }
  if (combine) {
    plots <- CombinePlots(plots = plots)
  }
  return(plots)
}

I'm currently getting a plot that has the annotation bar, the labels, the legend, and the side gene labels, but the heatmap visualization itself is gone! I'm very confused what is happening because looking at Seurat's own DoHeatmap function, the + geom_text() line looks very similar but their implementation works.

enter image description here

Right before I called the + geom_text() this is what my plot looks like. enter image description here

Edit

Here is a minimum working example that I made. It actually highlights something interesting. It looks like the heatmap isn't being erased, but it is being squished! I still am not sure how to deal with this.

I also added @jared_mamrot suggestion of y.max - (y.max...) but that didn't fix the whole issue.

library(Seurat)
library(ggplot2)

# This is a simulated 100 gene x 100 cell matrix
count_mat <- matrix(runif(10000, min = 0, max = 10), nrow = 100, ncol = 100)
rownames(count_mat) <- paste0("gene", 1:100)
colnames(count_mat) <- paste0("cell", 1:100)
count_mat[1:5,1:5]
#          cell1    cell2    cell3    cell4    cell5
# gene1 6.058824 2.797155 5.941698 4.982548 7.969647
# gene2 3.534568 4.020849 2.100655 1.685938 1.808607
# gene3 1.119636 8.513394 6.585591 8.950073 7.711461
# gene4 7.776718 2.079949 9.435660 8.523215 4.750567
# gene5 3.692277 5.620272 4.565576 5.658301 9.054159

seurat.obj <- CreateSeuratObject(counts = count_mat,
                                project = "tiny_seurat")
seurat.obj@meta.data$axis1 <- sample(c("cond1", "cond2"), size = 100, replace = T)
seurat.obj@meta.data$axis2 <- sample(c("contrast1", "contrast2", "contrast3"), size = 100, replace = T)

seurat.obj <- NormalizeData(seurat.obj, normalization.method = "LogNormalize", scale.factor = 100)
seurat.obj <- ScaleData(seurat.obj, rownames(count_mat))

DoMultiBarHeatmap(seurat.obj,
                  features = paste0("gene", 1:25),
                  group.by="axis1", 
                  slot = 'scale.data', 
                  disp.max=1.5, disp.min=-1.5, 
                  assay='RNA', 
                  size=25,
                  additional.group.by = c("axis2"), 
                  draw.lines = F, 
                  label = T)

enter image description here

  • 2
    Can you try to isolate the problem and provide a minimal reproducible example that allows us to test possible solutions? I'd suggest to include some data that works with your code and trim down the code to only the parts that are necessary to expose the problem. Some primers on asking good questions here: https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example – teunbrand Apr 18 '23 at 21:59
  • Looks to me like `plot + ggplot2::annotate(geom = "text", x = label.x.pos$label.x.pos, y = y.max + y.max * 0.03 * 0.5, label = label.x.pos$group)` would be the way to go... What do you get with `plot + ggplot2::annotate(geom = "text", x = label.x.pos$label.x.pos, y = y.max - (y.max * 0.03 * 0.5), label = label.x.pos$group)`? Is the output the same? – jared_mamrot Apr 18 '23 at 22:51
  • Thank you @teunbrand I added a minimum working example. I hope it helps! – Matthew Kozubov Apr 19 '23 at 17:59
  • Similar problem answered [here](https://stackoverflow.com/questions/45948926/ggplotly-text-aesthetic-causing-geom-line-to-not-display)? – Idiot Tom Apr 27 '23 at 13:45

0 Answers0