I'm having an issue with adding some text annotations to a ggplot, and I would appreciate some help understanding why adding text annotations to a ggplot causes elements of my plot to disappear. Up front, most of this isn't my code. I'm borrowing bits and pieces from people on the internet and the creators of Seurat (https://github.com/satijalab/seurat/issues/2201 and https://github.com/satijalab/seurat/blob/HEAD/R/visualization.R).
The goal of my function is to produce a heatmap with multiple labels overhead to see how my single cell data falls into different groups. It works fine until I try to add labels using geom_text.
Here is the full function with the offensive code labeled by The offensive lines of code!
DoMultiBarHeatmap <- function (object,
features = NULL,
cells = NULL,
group.by = "ident",
additional.group.by = NULL,
group.bar = TRUE,
disp.min = -2.5,
disp.max = NULL,
slot = "scale.data",
assay = NULL,
label = TRUE,
size = 5.5,
title.size = 10,
hjust = 0,
angle = 45,
raster = TRUE,
draw.lines = TRUE,
lines.width = NULL,
group.bar.height = 0.02,
combine = TRUE)
{
# # huge thanks to https://github.com/satijalab/seurat/issues/2201
cells <- cells %||% colnames(x = object)
if (is.numeric(x = cells)) {
cells <- colnames(x = object)[cells]
}
assay <- assay %||% DefaultAssay(object = object)
DefaultAssay(object = object) <- assay
features <- features %||% VariableFeatures(object = object)
## Why reverse???
features <- rev(x = unique(x = features))
disp.max <- disp.max %||% ifelse(test = slot == "scale.data",
yes = 2.5, no = 6)
possible.features <- rownames(x = GetAssayData(object = object,
slot = slot))
if (any(!features %in% possible.features)) {
bad.features <- features[!features %in% possible.features]
features <- features[features %in% possible.features]
if (length(x = features) == 0) {
stop("No requested features found in the ", slot,
" slot for the ", assay, " assay.")
}
warning("The following features were omitted as they were not found in the ",
slot, " slot for the ", assay, " assay: ", paste(bad.features,
collapse = ", "))
}
data <- as.data.frame(x = as.matrix(x = t(x = GetAssayData(object = object,
slot = slot)[features, cells, drop = FALSE])))
object <- suppressMessages(expr = StashIdent(object = object,
save.name = "ident"))
group.by <- group.by %||% "ident"
groups.use <- object[[c(group.by, additional.group.by)]][cells, , drop = FALSE]
plots <- list()
for (i in group.by) {
data.group <- data
group.use <- groups.use[, c(i, additional.group.by), drop = FALSE]
for(colname in colnames(group.use)){
if (!is.factor(x = group.use[[colname]])) {
group.use[[colname]] <- factor(x = group.use[[colname]])
}
}
if (draw.lines) {
lines.width <- lines.width %||% ceiling(x = nrow(x = data.group) *
0.0025)
placeholder.cells <- sapply(X = 1:(length(x = levels(x = group.use[[i]])) *
lines.width), FUN = function(x) {
return(Seurat:::RandomName(length = 20))
})
placeholder.groups <- data.frame(foo=rep(x = levels(x = group.use[[i]]), times = lines.width))
placeholder.groups[additional.group.by] = NA
colnames(placeholder.groups) <- colnames(group.use)
rownames(placeholder.groups) <- placeholder.cells
group.levels <- levels(x = group.use[[i]])
group.use <- sapply(group.use, as.vector)
rownames(x = group.use) <- cells
group.use <- rbind(group.use, placeholder.groups)
na.data.group <- matrix(data = NA, nrow = length(x = placeholder.cells),
ncol = ncol(x = data.group), dimnames = list(placeholder.cells,
colnames(x = data.group)))
data.group <- rbind(data.group, na.data.group)
}
#group.use = group.use[order(group.use[[i]]), , drop=F]
group.use <- group.use[with(group.use, eval(parse(text=paste('order(', paste(c(i, additional.group.by), collapse=', '), ')', sep='')))), , drop=F]
print('got to the seurat::singlerastermap')
plot <- Seurat:::SingleRasterMap(data = data.group, raster = raster,
disp.min = disp.min, disp.max = disp.max, feature.order = features,
cell.order = rownames(x = group.use), group.by = group.use[[i]])
if (group.bar) {
pbuild <- ggplot_build(plot = plot)
group.use2 <- group.use
cols <- list()
na.group <- Seurat:::RandomName(length = 20)
print("if statement checkpoint 1")
for (colname in rev(x = colnames(group.use2))){
if (colname == group.by){
colid = paste0('Identity (', colname, ')')
} else {
colid = colname
}
print("if statement checkpoint 2")
if (draw.lines) {
levels(x = group.use2[[colname]]) <- c(levels(x = group.use2[[colname]]), na.group)
group.use2[placeholder.cells, colname] <- na.group
cols[[colname]] <- c(scales::hue_pal()(length(x = levels(x = group.use[[colname]]))), "#FFFFFF")
} else {
cols[[colname]] <- c(scales::hue_pal()(length(x = levels(x = group.use[[colname]]))))
}
names(x = cols[[colname]]) <- levels(x = group.use2[[colname]])
print("if statement checkpoint 3")
y.range <- diff(x = pbuild$layout$panel_params[[1]]$y.range)
y.pos <- max(pbuild$layout$panel_params[[1]]$y.range) + y.range * 0.015
y.max <- y.pos + group.bar.height * y.range
pbuild$layout$panel_params[[1]]$y.range <- c(pbuild$layout$panel_params[[1]]$y.range[1], y.max)
print("if statement checkpoint 4")
plot <- suppressMessages(plot +
annotation_raster(raster = t(x = cols[[colname]][group.use2[[colname]]]), xmin = -Inf, xmax = Inf, ymin = y.pos, ymax = y.max) +
annotation_custom(grob = grid::textGrob(label = colid, hjust = 0, gp = grid::gpar(cex = 0.75)), ymin = mean(c(y.pos, y.max)), ymax = mean(c(y.pos, y.max)), xmin = Inf, xmax = Inf) +
coord_cartesian(ylim = c(0, y.max), clip = "off"))
#temp <- as.data.frame(cols[[colname]][levels(group.use[[colname]])])
#colnames(temp) <- 'color'
#temp$x <- temp$y <- 1
#temp[['name']] <- as.factor(rownames(temp))
#temp <- ggplot(temp, aes(x=x, y=y, fill=name)) + geom_point(shape=21, size=5) + labs(fill=colname) + theme(legend.position = "bottom")
#legend <- get_legend(temp)
#multiplot(plot, legend, heights=3,1)
print("if statement checkpoint 5")
if ((colname == group.by) && label) {
x.max <- max(pbuild$layout$panel_params[[1]]$x.range)
# I used this change from seurat's own DoHeatmap function https://github.com/satijalab/seurat/blob/HEAD/R/visualization.R
x.divs <- pbuild$layout$panel_params[[1]]$x.major %||% attr(x = pbuild$layout$panel_params[[1]]$x$get_breaks(), which = "pos") # used to be just pbuild$layout$panel_params[[1]]$x.major
group.use$x <- x.divs
label.x.pos <- tapply(X = group.use$x, INDEX = group.use[[colname]],
FUN = median) * x.max
label.x.pos <- data.frame(group = names(x = label.x.pos),
label.x.pos)
print("if statement checkpoint 6")
#####################################################################
# The offensive lines of code!
#####################################################################
# Neither this
plot <- plot + geom_text(stat = "identity",
data = label.x.pos,
aes_string(label = "group",
x = "label.x.pos"),
y = y.max + y.max * 0.03 * 0.5,
angle = angle,
hjust = hjust,
size = title.size)
# nor this work.
plot + ggplot2::annotate(geom = "text", x = label.x.pos$label.x.pos, y = y.max + y.max * 0.03 * 0.5, label = label.x.pos$group)
# Both produce a missing heatmap
#####################################################################
plot <- suppressMessages(plot + coord_cartesian(ylim = c(0,
y.max + y.max * 0.002 * max(nchar(x = levels(x = group.use[[colname]]))) *
size), clip = "off"))
}
}
}
print("if statement checkpoint 7")
print("got to plot theme setting")
plot <- plot + theme(line = element_blank())
plots[[i]] <- plot
}
if (combine) {
plots <- CombinePlots(plots = plots)
}
return(plots)
}
I'm currently getting a plot that has the annotation bar, the labels, the legend, and the side gene labels, but the heatmap visualization itself is gone! I'm very confused what is happening because looking at Seurat's own DoHeatmap function, the + geom_text()
line looks very similar but their implementation works.
Right before I called the + geom_text()
this is what my plot looks like.
Edit
Here is a minimum working example that I made. It actually highlights something interesting. It looks like the heatmap isn't being erased, but it is being squished! I still am not sure how to deal with this.
I also added @jared_mamrot suggestion of y.max - (y.max...) but that didn't fix the whole issue.
library(Seurat)
library(ggplot2)
# This is a simulated 100 gene x 100 cell matrix
count_mat <- matrix(runif(10000, min = 0, max = 10), nrow = 100, ncol = 100)
rownames(count_mat) <- paste0("gene", 1:100)
colnames(count_mat) <- paste0("cell", 1:100)
count_mat[1:5,1:5]
# cell1 cell2 cell3 cell4 cell5
# gene1 6.058824 2.797155 5.941698 4.982548 7.969647
# gene2 3.534568 4.020849 2.100655 1.685938 1.808607
# gene3 1.119636 8.513394 6.585591 8.950073 7.711461
# gene4 7.776718 2.079949 9.435660 8.523215 4.750567
# gene5 3.692277 5.620272 4.565576 5.658301 9.054159
seurat.obj <- CreateSeuratObject(counts = count_mat,
project = "tiny_seurat")
seurat.obj@meta.data$axis1 <- sample(c("cond1", "cond2"), size = 100, replace = T)
seurat.obj@meta.data$axis2 <- sample(c("contrast1", "contrast2", "contrast3"), size = 100, replace = T)
seurat.obj <- NormalizeData(seurat.obj, normalization.method = "LogNormalize", scale.factor = 100)
seurat.obj <- ScaleData(seurat.obj, rownames(count_mat))
DoMultiBarHeatmap(seurat.obj,
features = paste0("gene", 1:25),
group.by="axis1",
slot = 'scale.data',
disp.max=1.5, disp.min=-1.5,
assay='RNA',
size=25,
additional.group.by = c("axis2"),
draw.lines = F,
label = T)