1

Question

How to add labels showing the number of observations along a density plot?

Data

My dataset:

mwe <- structure(list(Gender = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("Female", "Male"), class = "factor"), 
    Age = c(23, 23, 23, 23, 23, 23, 39, 39, 39, 39, 39, 39, 30, 
    30, 30, 30, 30, 30, 30, 30, 24, 24, 24, 24, 24, 24, 24, 24, 
    18, 18, 18, 18, 18, 18, 23, 23, 23, 23, 23, 23, 23, 23, 26, 
    26, 26, 26, 26, 26, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 
    23, 23, 23, 23, 30, 30, 30, 30, 30, 30, 20, 20, 20, 20, 20, 
    20, 25, 25, 25, 25, 25, 25, 25, 25, 23, 23, 23, 23, 23, 23, 
    23, 23, 38, 38, 38, 38, 38, 38, 22, 22, 22, 22, 22, 22, 29, 
    29, 29, 29, 29, 29, 21, 21, 21, 21, 21, 21, 23, 23, 23, 23, 
    23, 23, 25, 25, 25, 25, 25, 25, 24, 24, 24, 24, 24, 24, 21, 
    21, 21, 21, 21, 21, 27, 27, 27, 27, 27, 27, 24, 24, 24, 24, 
    24, 24, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 23, 
    23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 21, 21, 
    21, 21, 27, 27, 27, 27, 27, 27, 34, 34, 34, 34, 34, 34, 26, 
    26, 26, 26, 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 39, 39, 
    39, 39, 39, 39, 26, 26, 26, 26, 26, 26), KmEuc = structure(c(1L, 
    1L, 1L, 1L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 
    1L, 1L, 3L, 2L, 1L, 1L, 1L, 1L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 
    2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 
    3L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 
    2L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
    3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 
    2L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 
    2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 
    2L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 
    3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 
    2L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 
    3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("1", "2", "3"), class = "factor")), class = "data.frame", row.names = c(NA, 
-218L))

I want to show the Age distribution using a density plot:

Code

p1 <- ggplot() +
  geom_freqpoly(aes(x = Age, color = KmEuc), stat = 'density', position = 'dodge', data=mwe) +
  scale_color_manual(guide = guide_legend(),name = 'Clusters',values = c("#E31A1C","#332288", "#66A61E"), labels = c("Pie", "Carrot", "Rice")) +
  theme_light(base_size=14) +
  facet_grid(facets = Gender ~ .) +
  theme(axis.title.x = element_blank(),axis.title.y = element_blank())

enter image description here

Trial

To add labels of counts I tried the following:

dfLabels <- mwe %>%
  select(c(Age, Gender, KmEuc)) %>%
  group_by(Age, Gender, KmEuc) %>%
  dplyr::summarise(N = n())

p1 + geom_label(data = dfLabels, aes(x = Age, y = 0.01, label = N), size = 3, vjust = 0, hjust = 0) 

Since y=0.01 I could only show N on a fixed line in the y-axis, how to make N appear along the density function in this case?

Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
doctorate
  • 1,381
  • 1
  • 19
  • 43

1 Answers1

1

Try this. Besides computing the counts I also compute the density for each age. I borrowed the general idea from here but adapted it to your problem and used a tidyverse approach.

library(ggplot2)
library(purrr)
library(dplyr)
library(tidyr)

dfLabels <- mwe %>%
  select(Age, Gender, KmEuc) %>%
  group_by(Gender, KmEuc) %>%
  nest() %>% 
  # Compute density
  mutate(dens = purrr::map(data, ~ density(.$Age))) %>% 
  # Unique Ages
  mutate(age_uniq = purrr::map(data, ~ unique(.$Age))) %>%
  unnest(age_uniq)

dfLabels1 <- dfLabels %>%
  # Compute "y" by interpolation and count 
  mutate(label.y = purrr::map2_dbl(age_uniq, dens, ~approx(.y$x, .y$y, .x)$y),
         label.n = purrr::map2_dbl(age_uniq, data, ~ sum(.y$Age == .x))) %>% 
  select(Gender, KmEuc, Age = age_uniq, label.y, label.n)

p1 <- ggplot() +
  geom_freqpoly(aes(x = Age, color = KmEuc), stat = 'density', position = 'dodge', data=mwe) +
  geom_text(aes(x = Age, y = label.y, color = KmEuc, label = label.n), 
            position = 'dodge', vjust = 0, show.legend = FALSE, data=dfLabels1) +
  scale_color_manual(guide = guide_legend(),name = 'Clusters',values = c("#E31A1C","#332288", "#66A61E"), labels = c("Pie", "Carrot", "Rice")) +
  theme_light(base_size=14) +
  facet_grid(facets = Gender ~ .) +
  theme(axis.title.x = element_blank(),axis.title.y = element_blank())
p1
#> Warning: Width not defined. Set with `position_dodge(width = ?)`

#> Warning: Width not defined. Set with `position_dodge(width = ?)`

Created on 2020-04-11 by the reprex package (v0.3.0)

doctorate
  • 1,381
  • 1
  • 19
  • 43
stefan
  • 90,330
  • 6
  • 25
  • 51
  • an error was thrown `Error in UseMethod("map") : no applicable method for 'map' applied to an object of class "c('vctrs_list_of', 'vctrs_vctr')"`, any idea why? – doctorate Apr 11 '20 at 13:40
  • R version 3.6.3 (2020-02-29) -- "Holding the Windsock", and tidyverse version 1.3.0 – doctorate Apr 11 '20 at 13:44
  • Hm. R version and tidyverse version is the same as on my machine. Have you tried restarting your R Session? Did you use the dataset from the post? – stefan Apr 11 '20 at 13:47
  • ... and check the versions of the single packages. For me: `ggplot2 3.3.0`. `purrr 0.3.3`. `dplyr 0.8.5`. `tidyr 1.0.2`. – stefan Apr 11 '20 at 13:50
  • I needed to update the individual packages, then restarted R, and I still get an error but with a different wording this time: `Error in UseMethod("map") : no applicable method for 'map' applied to an object of class "list"` – doctorate Apr 11 '20 at 15:11
  • I found the culprit, it was the `mclust::map()` and `kohonen::map()` so in your code explicit function names were needed. – doctorate Apr 11 '20 at 16:09
  • I just edited to `purrr::map` to refer to the same needed function as intended. – doctorate Apr 11 '20 at 16:19
  • I didn't get the part `~ density(.$Age)))` because at this point of code the variable `Age` was made like a list by `nested()`, and no `Age` was there, how could the function call the `Age` when it has gone at this step? – doctorate Apr 11 '20 at 18:13
  • By nesting we make `data` a list-column. Each element of `data` is a small data-frame with - in your case - one column `Age'. `map` loops over each element in `data`. In each step of the loop it passes one of the small dataframes to the `density`. The dot `.` is simply a shorthand for the dataframe. Therefore `.$Age` is like the standard way of acessing a column of a dataframe. Hope that was understandable. (: – stefan Apr 11 '20 at 21:35