1

I would like to position labels close to the legend.

In the code below I have hardcoded (x,y) values in geom_label to get desired result for the current dataframe:

#  Creating dataframe
library(ggplot2)
values <- c(rep(0,2), rep(2,3), rep(3,3), rep(4,3), 5, rep(6,2), 8, 9, rep(11,2) )
obs_number <- c(rep(18,18))
value_1 <- c(rep(4,18))
value_2 <- c(rep(7,18))
value_3 <- c(rep(3,18))
  
data_to_plot <- data.frame(values, obs_number, value_1, value_2, value_3)
#  Calculate max frequency value for using in `geom_label`

frequency_count <- data_to_plot %>% group_by(values) %>% count()%>% arrange(n)
max_frequency <- max(frequency_count$n)

# Plot
ggplot(data_to_plot, aes(x = values)) +
  geom_histogram(aes(y = ..count..), binwidth = 1, colour= "black", fill = "white") +
  geom_density(aes(y=..count..), fill="blue", alpha = .25)+
  
  
  geom_vline(aes(xintercept = value_1),
             color="red", linetype = "dashed", size = 0.5, alpha = 1) +
  
  geom_vline(aes(xintercept = value_1),
             color="forestgreen", linetype="dashed", size = 0.5, alpha = 1) +
  
  
  geom_vline(aes(xintercept = value_3),
             color="purple", linetype = "dashed", size = 0.5, alpha = 1) +
  
  
  geom_label(aes(label = obs_number, y = max_frequency*0.87, x = (max(values) - 2.2), color = 'blue'), size = 3.5, alpha = 1) +
  geom_label(aes(label = value_1, y = max_frequency * 0.83, x = (max(values) - 2.2 ), color = 'forestgreen'), size = 3.5, alpha = 1) +
  geom_label(aes(label = value_2, y = max_frequency * 0.79, x = (max(values) - 2.2) , color = 'purple'), size = 3.5, alpha = 1) +
  geom_label(aes(label = value_3, y = max_frequency * 0.75, x = (max(values) - 2.2) , color = 'red'), size = 3.5, alpha = 1) +
  
  
  scale_color_manual(name="Values", 
                     labels = c("Observations number",
                                "value_1",
                                "value_2",
                                "value_3"
                     ), 
                     
                     values = c( "blue",
                                 "forestgreen",
                                 "purple",
                                 "red")) +
  
  labs(title = "relevant_title", y = "Distribution fors DLT values", x = "DLT for the route: average values per batch") +
  theme(plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(colour = "darkblue"),
        axis.text.x = element_text(face="plain", color="black", 
                                   size=10, angle=0),
        axis.title.y = element_text(colour = "darkblue"),
        axis.text.y = element_text(face="plain", color="black", 
                                   size=10, angle=0),
        legend.position = c(.90, .80)
  )+
  
  
  labs(title="DLT values", y = "frequency", x = "days")+
  scale_x_continuous(breaks = seq(0, max(data_to_plot$values), 1))

This is desired result: Desired result

But this will not work for all datasets. Plot for the first 14 values: labels are not located close to the legend

Question:

How can I get cartesian coordinates of the plot area, so I would replace max_frequency and max(values) in geom_label and align labels with the legend, given that legend.position = c(.90, .80).

Other alternatives are also welcome.

Iraleksa
  • 155
  • 1
  • 9
  • 2
    welcome to SO. You're looking for npc coordinates. Check this question, and Allan's answer. I think this is what you're looking for. You can annotate using "label" geom. https://stackoverflow.com/a/63742203/7941188 – tjebo Jan 08 '21 at 18:53
  • Thank you! I am not sure if I can use npc units with `geom_label`. I am using npc units in `legend.position`. It should be in range between 0 and 1. But in `geom_label` npc units do not work, that is why I was calculating `max_frequency` to know the highest `y-axis` and used ` max(values)` for `x-axis`. – Iraleksa Jan 08 '21 at 19:01
  • 1
    Having said that - might not be easiest to combine the label geom with Allan's function. Maybe check https://cran.r-project.org/web/packages/ggpmisc/readme/README.html - in particular `ggpmisc::geom_label_npc` – tjebo Jan 08 '21 at 19:01
  • 1
    You may find this description of NPC units helpful: https://cran.r-project.org/web/packages/ggpmisc/vignettes/user-guide.html#normalised-parent-coordinates – Ian Campbell Jan 08 '21 at 19:03

1 Answers1

3

Under the flag of 'alternatives are also welcome': why not use a text glyph for the geom_vline()s and override the actual labels?

I rearranged the code a bit for my own understanding, but here is an example:

library(tidyverse)
#> Warning: package 'tibble' was built under R version 4.0.3
#> Warning: package 'tidyr' was built under R version 4.0.3
#> Warning: package 'readr' was built under R version 4.0.3
#> Warning: package 'dplyr' was built under R version 4.0.3
values <- c(rep(0,2), rep(2,3), rep(3,3), rep(4,3), 5, rep(6,2), 8, 9, rep(11,2) )
obs_number <- c(rep(18,18))
value_1 <- c(rep(4,18))
value_2 <- c(rep(7,18))
value_3 <- c(rep(3,18))

data_to_plot <- data.frame(values, obs_number, value_1, value_2, value_3)

# Extra dataframe for storing the xintercepts and labels
vals <- data.frame(xintercept = c(18, 4, 7, 3),
                   label = c("Observations number", "value_1", "value_2", "value_3"))


frequency_count <- data_to_plot %>% group_by(values) %>% count()%>% arrange(n)
max_frequency <- max(frequency_count$n)

ggplot(data_to_plot, aes(x = values)) +
  geom_histogram(aes(y = ..count..), 
                 binwidth = 1, colour= "black", fill = "white") +
  geom_density(aes(y=..count..), 
               fill="blue", alpha = .25)+
  geom_vline(aes(xintercept = xintercept, color = label),
             data = vals[2:nrow(vals), ], 
             linetype = "dashed", size = 0.5, alpha = 1,
             # Give different legend glyph for vlines
             key_glyph = draw_key_text) +
  scale_color_manual(
    name= "Values", 
    limits = vals$label,
    values = c("blue", "forestgreen", "purple", "red"),
    # Override the labels and set size to something sensible
    guide = guide_legend(override.aes = list(label = vals$xintercept, 
                                             size = 3.88))
  ) +
  labs(title = "relevant_title", y = "Distribution fors DLT values", 
       x = "DLT for the route: average values per batch") +
  theme(plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(colour = "darkblue"),
        axis.text.x = element_text(face="plain", color="black", 
                                   size=10, angle=0),
        axis.title.y = element_text(colour = "darkblue"),
        axis.text.y = element_text(face="plain", color="black", 
                                   size=10, angle=0),
        legend.position = c(.90, .80)
  )+
  labs(title="DLT values", y = "frequency", x = "days")+
  scale_x_continuous(breaks = seq(0, max(data_to_plot$values), 1))

Created on 2021-01-08 by the reprex package (v0.3.0)

teunbrand
  • 33,645
  • 4
  • 37
  • 63
  • Thank you, @teunbrand, this is very nice solution. One question - is there possibility not to plot all lines from `vals` ? I only need 3 line and do not need `geom_vline` for observations number, I need only label. – Iraleksa Jan 08 '21 at 19:32
  • 1
    Sure instead of `data = vals`, subset the bit you need. Then set the limits on the scale, so it knows to plot all labels. I've edited the answer with updated code. – teunbrand Jan 08 '21 at 19:37