2

I have data as follows;

dat <- structure(list(group_size = structure(c(6L, 3L, 3L, 4L, 1L, 2L, 
2L, 1L, 3L, 6L, 2L, 6L, 2L, 2L, 1L, 1L, 4L, 1L, 3L, 2L), .Label = c("(0,50]", 
"(50,100]", "(100,150]", "(150,200]", "(200,250]", "(250,3e+03]"
), class = "factor"), amount = c(409, 101, 103, 198, 40, 63, 
69, 49, 126, 304, 91, 401, 96, 63, 36, 1, 177, 7, 112, 61), group_sum = c(1114, 
442, 442, 375, 133, 443, 443, 133, 442, 1114, 443, 1114, 443, 
443, 133, 133, 375, 133, 442, 443), count = c(3L, 4L, 4L, 2L, 
5L, 6L, 6L, 5L, 4L, 3L, 6L, 3L, 6L, 6L, 5L, 5L, 2L, 5L, 4L, 6L
)), row.names = c(NA, -20L), class = c("data.table", "data.frame"
))

     group_size amount group_sum count
 1: (250,3e+03]    409      1114     3
 2:   (100,150]    101       442     4
 3:   (100,150]    103       442     4
 4:   (150,200]    198       375     2
 5:      (0,50]     40       133     5
 6:    (50,100]     63       443     6
 7:    (50,100]     69       443     6
 8:      (0,50]     49       133     5
 9:   (100,150]    126       442     4
10: (250,3e+03]    304      1114     3
11:    (50,100]     91       443     6
12: (250,3e+03]    401      1114     3
13:    (50,100]     96       443     6
14:    (50,100]     63       443     6
15:      (0,50]     36       133     5
16:      (0,50]      1       133     5
17:   (150,200]    177       375     2
18:      (0,50]      7       133     5
19:   (100,150]    112       442     4
20:    (50,100]     61       443     6

I would like to have a plot with the group size on the x-axis, and both the count and group_sum on the y-axis.

EDIT: The scale on the y-axis should have the group_sum. The count could just be listed on top of bar with a number.

I am trying to adapt this answer by tjebo, which was used for a continuous x-variable:

library(tidyverse)

ggplot() +
  geom_col(data = dat, aes(thevalues, n, fill = as.character(mistakes))) +
  geom_density(data = dat, aes(thevalues, y = ..count..), size = 0.7, alpha = 0.1) +
  scale_fill_brewer(palette = "Set1") +
  theme(legend.title = element_blank(), legend.position = c(0.1, 0.85))

I will have to replace thevalues and mistakes, with group_sum and count respectively:

ggplot() +
  geom_col(data = dat, aes(group_sum, n, fill = as.character(count))) +
  geom_density(data = dat, aes(group_sum, y = ..count..), size = 0.7, alpha = 0.1) +
  scale_fill_brewer(palette = "Set1") +
  theme(legend.title = element_blank(), legend.position = c(0.1, 0.85))

Desired output:

enter image description here

But I keep messing something up.. Any suggestions?

Tom
  • 2,173
  • 1
  • 17
  • 44

3 Answers3

3

Something like this?

library(tidyverse)

dat <- structure(list(group_size = structure(c(
  6L, 3L, 3L, 4L, 1L, 2L,
  2L, 1L, 3L, 6L, 2L, 6L, 2L, 2L, 1L, 1L, 4L, 1L, 3L, 2L
), .Label = c(
  "(0,50]",
  "(50,100]", "(100,150]", "(150,200]", "(200,250]", "(250,3e+03]"
), class = "factor"), amount = c(
  409, 101, 103, 198, 40, 63,
  69, 49, 126, 304, 91, 401, 96, 63, 36, 1, 177, 7, 112, 61
), group_sum = c(
  1114,
  442, 442, 375, 133, 443, 443, 133, 442, 1114, 443, 1114, 443,
  443, 133, 133, 375, 133, 442, 443
), count = c(
  3L, 4L, 4L, 2L,
  5L, 6L, 6L, 5L, 4L, 3L, 6L, 3L, 6L, 6L, 5L, 5L, 2L, 5L, 4L, 6L
)), row.names = c(NA, -20L), class = c("data.table", "data.frame"))

dat %>%
  as_tibble() %>%
  ggplot(aes(x = group_size)) +
  geom_col(aes(y = group_sum), position = "identity", color = "red", fill = "transparent") +
  geom_label(
    data = dat %>% distinct(group_size, .keep_all = TRUE),
    mapping = aes(y = group_sum, label = group_sum),
    color = "red"
  ) +
  geom_col(aes(y = count * 10), position = "identity", color = "blue", fill = "transparent") +
  geom_label(
    data = dat %>% distinct(count, .keep_all = TRUE),
    mapping = aes(y = count * 10, label = count),
    color = "blue"
  ) +
  scale_y_continuous(sec.axis = sec_axis(trans = ~ . / 10, name = "Count"))

Created on 2022-02-22 by the reprex package (v2.0.0)

danlooo
  • 10,067
  • 2
  • 8
  • 22
  • One small question. I am kind of struggling to add a legend in this case. I already tried a couple of things, but I cannot get it to work. Is there an easy solution for this? – Tom Feb 22 '22 at 10:26
  • with `data = dat %>% mutate(type = "Count")` and `aes(color = type)`, ggplot will create the legend for you, because it's now part of the mapping aesthetics – danlooo Feb 22 '22 at 11:58
2

You could use geom_text to add the label info.

ggplot(data = dat, aes(x = group_size,
                   y = group_sum,
                   fill = as.character(count))) +
  geom_col(position = "dodge") +
  scale_fill_brewer(palette = "Set1") +
  geom_text(aes(label = count, y = group_sum), vjust = "inward") +
  theme(legend.title = element_blank(), legend.position = c(0.1, 0.85))

example plot

Jakob Gepp
  • 463
  • 3
  • 10
0

This gives a dodged plot pivoting the original data. Additionally, one needs to plot unique values of those variables in the data ignoring amount.

    library(ggplot2)
    library(dplyr)
    library(tidyr)
    
    dat <- structure(list(group_size = structure(
      c(6L, 3L, 3L, 4L, 1L, 2L, 2L, 1L, 3L, 6L, 2L, 6L, 2L, 2L, 1L, 1L, 4L, 1L, 3L, 2L),
      .Label = c("(0,50]", "(50,100]", "(100,150]", "(150,200]", "(200,250]", "(250,3e+03]"), class = "factor"),
      amount = c(409, 101, 103, 198, 40, 63, 69, 49, 126, 304, 91, 401, 96, 63, 36, 1, 177, 7, 112, 61),
      group_sum = c(1114, 442, 442, 375, 133, 443, 443, 133, 442, 1114, 443, 1114, 443, 443, 133, 133, 375, 133, 442, 443),
      count = c(3L, 4L, 4L, 2L, 5L, 6L, 6L, 5L, 4L, 3L, 6L, 3L, 6L, 6L, 5L, 5L, 2L, 5L, 4L, 6L      )),
      row.names = c(NA, -20L), class = c("data.table", "data.frame"))
    
    # Create a data set with unique values of desired variables
    dat_unique <- dat %>% distinct(group_size, group_sum, count)
    
    # long format of dat_unique
    dat_long <- dat_unique %>% 
      pivot_longer(cols = c("group_sum", "count"), names_to = "y_varname", values_to = "y") %>% 
      # effectively multiply y by 50 if y=="count"[![enter image description here][1]][1] to make it comparable to group_sum on the plot
      mutate(
        y_label = y,
        y = y + 49*y*(y_varname=="count"))
    
    # plot
    dat_long %>% ggplot(aes(group_size, y, fill = y_varname)) +
      geom_col(position = "dodge") +
      geom_text(aes(group_size, y, label =  y_label), position = position_dodge())

enter image description here

altynbei
  • 87
  • 1
  • 8