0

Hello I have a df such as :

tab

  X       molecule gene start_gene end_gene start_scaff end_scaff   strand direction COL1 COL2
1 7  scaffold_1254   G7       6708    11967           1     20072 backward        -1   10   20
2 5  scaffold_7638   G5       9567    10665           1     15336 backward        -1   18    1
3 4  scaffold_7638   G4       3456     4479           1     15336  forward         1   18    1
4 2 scaffold_15158   G2      10105    10609           1     13487 backward        -1    5    9
5 6  scaffold_8315   G6       2760     3849           1     10827  forward         1   25    7
6 3  scaffold_7180   G3       9814    10132           1     10155 backward        -1   21    9
7 1 scaffold_74038   G1       1476     2010           1      2010  forward         1    8   34

so far with this code :

ggplot(tab, aes(x = start_scaff, xend = end_scaff, 
                y = molecule, yend = molecule)) +
  geom_segment(size = 3, col = "grey80") +
  geom_segment(aes(x = ifelse(direction == 1, start_gene, end_gene),
                   xend = ifelse(direction == 1, end_gene, start_gene)),
               data = tab, 
               arrow = arrow(length = unit(0.1, "inches")), size = 2) +
  geom_text_repel(aes(x = start_gene, y = molecule, label = gene),
            data = tab, nudge_y = 0.5,size=2) + 
  scale_y_discrete(limits = rev(levels(tab$molecule))) +
  theme_minimal()

I mannaged to get this plot : enter image description here

and I wondered if there were a way to add a column just next to geom_segment with COL1 and COL2 values and color the text inside the boxe depending on a threshold : green values > 10, red values <= 10

and get something like

enter image description here

dput(tab)

structure(list(X = c(7L, 5L, 4L, 2L, 6L, 3L, 1L), molecule = structure(c(1L, 
5L, 5L, 2L, 6L, 3L, 4L), .Label = c("scaffold_1254", "scaffold_15158", 
"scaffold_7180", "scaffold_74038", "scaffold_7638", "scaffold_8315"
), class = "factor"), gene = structure(c(7L, 5L, 4L, 2L, 6L, 
3L, 1L), .Label = c("G1", "G2", "G3", "G4", "G5", "G6", "G7"), class = "factor"), 
    start_gene = c(6708L, 9567L, 3456L, 10105L, 2760L, 9814L, 
    1476L), end_gene = c(11967L, 10665L, 4479L, 10609L, 3849L, 
    10132L, 2010L), start_scaff = c(1L, 1L, 1L, 1L, 1L, 1L, 1L
    ), end_scaff = c(20072L, 15336L, 15336L, 13487L, 10827L, 
    10155L, 2010L), strand = structure(c(1L, 1L, 2L, 1L, 2L, 
    1L, 2L), .Label = c("backward", "forward"), class = "factor"), 
    direction = c(-1L, -1L, 1L, -1L, 1L, -1L, 1L), COL1 = c(10L, 
    18L, 18L, 5L, 25L, 21L, 8L), COL2 = c(20L, 1L, 1L, 9L, 7L, 
    9L, 34L)), class = "data.frame", row.names = c(NA, -7L))
chippycentra
  • 3,396
  • 1
  • 6
  • 24
  • would one of the solutions here https://stackoverflow.com/questions/60349028/how-to-add-a-table-to-a-ggplot help you? – StupidWolf Sep 04 '20 at 08:59
  • This is in the plotting region https://stackoverflow.com/questions/12318120/adding-table-within-the-plotting-region-of-a-ggplot-in-r and this one in legend https://stackoverflow.com/questions/20432635/inserting-a-table-under-the-legend-in-a-ggplot2-and-saving-everything-to-a-file – Ronak Shah Sep 04 '20 at 09:01
  • Well, I need to incorporate it directly on the middle of each corresponding geom_segment, not only add a table in the table ... – chippycentra Sep 04 '20 at 09:02

1 Answers1

1
col_data <- tab %>% 
  select(molecule, COL1, COL2) %>%
  pivot_longer(cols = contains("COL")) %>%
  mutate(
    color = ifelse(value < 10, "darkred", "darkgreen"),
    x = ifelse(name == "COL1", max(tab$end_scaff) * 1.075, max(tab$end_scaff) * 1.2)
  )

header_data <- data.frame(
  x = col_data$x %>% unique() %>% sort(),
  label = c("COL1", "COL2")
)

ggplot(tab, aes(x = start_scaff, xend = end_scaff, 
                y = molecule, yend = molecule)) +
  geom_segment(size = 3, col = "grey80") +
  geom_segment(aes(x = ifelse(direction == 1, start_gene, end_gene),
                   xend = ifelse(direction == 1, end_gene, start_gene)),
               data = tab, 
               arrow = arrow(length = unit(0.1, "inches")), size = 2) +
  geom_text_repel(aes(x = start_gene, y = molecule, label = gene),
                  data = tab, nudge_y = 0.5,size=2) + 
  scale_y_discrete(limits = rev(levels(tab$molecule))) +
  theme_minimal() +
  geom_text(
    data = col_data, 
    aes(label = value, x = x, color = color, y = molecule), 
    fontface = "bold",
    inherit.aes = FALSE
  ) +
  geom_text(
    data = header_data,
    aes(label = label, x = x, y = c(Inf, Inf)),
    vjust = "inward",
    fontface = "bold",
    inherit.aes = FALSE
  ) +
  scale_color_identity()

gives:

enter image description here

You can add:

scale_x_continuous(breaks = function(x){
    l = scales::pretty_breaks(4)(x)
    l[l <= max(tab$end_scaff)]
  })

to remove exceeding labels on x-axis:

enter image description here

Using patchwork you can create 2 plots and then glue them:

p1 <- ggplot(tab, aes(x = start_scaff, xend = end_scaff, 
                y = molecule, yend = molecule)) +
  geom_segment(size = 3, col = "grey80") +
  geom_segment(aes(x = ifelse(direction == 1, start_gene, end_gene),
                   xend = ifelse(direction == 1, end_gene, start_gene)),
               data = tab, 
               arrow = arrow(length = unit(0.1, "inches")), size = 2) +
  geom_text_repel(aes(x = start_gene, y = molecule, label = gene),
                  data = tab, nudge_y = 0.5,size=2) + 
  scale_y_discrete(limits = rev(levels(tab$molecule))) +
  theme_minimal() 


col_data <- tab %>% 
  select(molecule, COL1, COL2) %>%
  pivot_longer(cols = contains("COL")) %>%
  mutate(
    color = ifelse(value < 10, "darkred", "darkgreen"),
    x = ifelse(name == "COL1", 0, 1) %>% factor()
  )

p2 <- ggplot(col_data, aes(x, molecule)) + 
  geom_text(aes(label = value, color = color), fontface = "bold", size = 5) +
  labs(x = NULL) +
  scale_color_identity() +
  theme_void() +
  theme(
    axis.ticks.x = element_blank(),
    axis.text.x = element_blank()
  ) +
  geom_text(
    data = data.frame(label = c("COL1", "COL2"), x = factor(c(0,1))),
    aes(label = label, x = x, y = c(Inf, Inf)),
    vjust = "inward",
    fontface = "bold",
    size = 6,
    inherit.aes = FALSE
  ) +
  scale_y_discrete(limits = rev(levels(col_data$molecule)))

p1 + p2 + plot_layout(widths = c(3,1))

enter image description here

det
  • 5,013
  • 1
  • 8
  • 16