1

I'm trying to recreate the "Clustergram" from enrichR (reproduced below)

1

The part I'm missing are the bars that overlayed on the x-axis labels on top which correspond to the "combined score" from enrichR. Is there some way for me to recreate this in ggplot? TIA!

Here is what I have so far, using patchwork. I'm not sure how to get angle my bars and get the labels to line up properly?

library(ggplot2)
library(patchwork)    
gene.dat = data.frame(Term = c(rep("Diseases Of Base Excision Repair", 7), rep("HDR Thru Homologous Recombination (HRR)", 7)),
                      Genes = c(rep(c("A", "B", "C", "D", "E", "F", "G"), 2)),
                      n = c(NA, 1, NA, NA, 1, 1, 1, 
                               1, NA, 1, 1, NA, NA, 1))
scores.dat = data.frame(Term = c("HDR Thru Homologous Recombination (HRR)", "Diseases Of Base Excision Repair"),
                        Combined.Score = c(5329, 2738))
        
main.plot = ggplot(gene.dat, aes(Term, Genes)) +
  geom_tile(aes(fill = n))+
  theme(
    plot.title = element_text(hjust = 1), 
    #The line below is for my original x axis labels - with patchwork 
    #I'm leaving this blank and letting the score_plot have the labels?
    #axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0, size =16, color = 'black'),
    axis.text.x = element_blank(),
    legend.position="none",
    panel.background = element_blank()) + 
  ylab("") + xlab("")  + 
  scale_x_discrete(position = "top")
        
score.plot = ggplot(scores.dat, aes(Term, Combined.Score)) + 
  geom_col() + 
  geom_text(aes(label = Term), angle = 90, hjust = 0.5) + 
  theme_void()
        
Y <- (score.plot + main.plot) + 
  plot_layout(ncol = 1, nrow = 2, heights = c(1, 3), widths = c(2.75, 1))
Z.Lin
  • 28,055
  • 6
  • 54
  • 94
  • 1
    The general answer is yes. But IMHO this requires to create the "axis bar chart" as a separate plot, then glue it to your main plot via e.g. `patchwork`. For more help you have to provide [a minimal reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) including the code you have tried and a snippet of your data or some fake data. – stefan Apr 12 '23 at 17:14
  • Added an example, trying to use patchwork as you suggested. – radishhorse Apr 12 '23 at 17:43

1 Answers1

3

One option would be to create the bar charts for the axis via geom_polygon which however requires some effort to rotate the "bars" by 45 degrees. For the latter I adapted the answer by @teunbrand on this post.

library(ggplot2)
library(patchwork)
library(dplyr)

# Encode categorical values as numeric
bars <- scores.dat |>
  mutate(
    Term = factor(Term),
    x = as.numeric(Term),
    y = scales::rescale(Combined.Score, to = c(0, 1), from = c(0, max(Combined.Score)))
  )

# Expand observation to rectangle polygons
bars <- bars[rep(seq_len(nrow(bars)), each = 4), ]
xpand <- c(-1, -1, 1, 1) * 0.5
ypand <- c(0, 1, 1, 0)
bars <- bars |>
  group_by(Term) |> 
  mutate(
    x = x + xpand,
    y = y * ypand
  )

bars <- split(bars, bars$Term)
# Rotate coordinates 45 degrees
rotmat <- matrix(c(-.5, .5, .5, .5), ncol = 2)
bars <- lapply(bars, function(x) {
  minx <- min(x$x)
  x$x <- x$x - minx
  x[, c("x", "y")] <- t(rotmat %*% t(as.matrix(x[, c("x", "y")])))
  x$x <- x$x + minx + 1
  x
}) |>
  bind_rows()

triangles <- data.frame(
  Term = rep(levels(bars$Term), each = 3),
  x = c(c(.5, 1, 1.5), c(1.5, 2, 2.5)),
  y = rep(c(0, .5, 0), 2)
)

labels <- triangles |>
  group_by(Term) |>
  slice_max(x, n = 2) |>
  summarise(x = mean(x), y = mean(y))

score.plot <- ggplot(mapping = aes(x, y, group = Term)) +
  geom_polygon(data = triangles, fill = "grey", color = "white") +
  geom_polygon(data = bars, fill = "lightpink", color = "white") +
  geom_text(
    data = labels, aes(label = Term),
    angle = 45, hjust = -.025, vjust = .5, color = "black"
  ) +
  scale_x_continuous(
    breaks = seq_along(levels(bars$Term)),
    labels = levels(bars$Term),
    expand = c(0, 0)
  ) +
  scale_y_continuous(expand = c(0, 0, 0, .6)) +
  coord_equal(clip = "off") +
  theme_void()

main.plot <- ggplot(gene.dat, aes(Term, Genes)) +
  geom_tile(aes(fill = n)) +
  scale_x_discrete(position = "top", expand = c(0, 0, 0, 1)) +
  labs(x = NULL, y = NULL) +
  theme_void() +
  theme(axis.text.y = element_text(margin = margin(r = 5.5))) +
  guides(fill = "none")

score.plot + main.plot +
  plot_layout(ncol = 1)

enter image description here

stefan
  • 90,330
  • 6
  • 25
  • 51