1

I read dozens of posts on how to add a label in each facet_wrap but I am hoping for some assistance here as I'm stuck to add a label with the groups mean (λ) and number of observations (k) in the dataset. I used most of this post but struggle with the following items:

  1. Add the correct mean and number of observations by group to the facet
  2. Round the mean to two digits
  3. Changing the 'mean' label to the correct greek letter lambda - unicode (u+03BB)
df_dztpois <- subset(df_full, select=c("PIA_ITEM", "PIA_TYPE", "DELY_QTY_WINS", "DELY_QTY_DZTPOIS"))

df_dztpois_summary = df_dztpois %>% group_by(PIA_ITEM) %>%
  summarize(count = length(DELY_QTY_WINS),
            mean = mean(DELY_QTY_WINS)) %>%
  mutate(lab = paste("count = ", count, "\nmean = ", mean))

p <- ggplot(data = df_dztpois, aes(x=DELY_QTY_WINS))
p <- p + geom_line(aes(y=DELY_QTY_DZTPOIS))
p <- p + facet_wrap( ~ PIA_TYPE, nrow=3, scales = "free")
p <- p + xlab("Observation") + ylab("Probability")
p <- p + theme(
  legend.position = "bottom",
  legend.text = element_text(size = 12),
  strip.text.x = element_text(size = 12))
p <- p + labs(title = "Zero-truncated Poisson distribution of order quantity",
              subtitle = "Grouped by product (PIA_TYPE)",
              caption = "Data source: df_dztpois$DELY_QTY_WINS and df_full$DELY_QTY_DZTPOIS")
p <- p + geom_text(data = df_dztpois_summary, aes(label = lab), x=Inf, y=Inf, hjust=1, vjust=1.2)
p

Brief sample of the data set:

> dput(df_dztpois[1:10,])
structure(list(PIA_ITEM = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L), .Label = c("RX20 1.5t Type 6209-10", "RX20 1.6t Type 6211", 
"RX20 1.6t Type 6221", "RX20 1.8t Type 6213", "RX20 2.0t Type 6215", 
"RX20 2.0t Type 6225", "RX20 2.0t Type 6230"), class = "factor"), 
    PIA_TYPE = c("6211", "6211", "6211", "6211", "6211", "6211", 
    "6211", "6211", "6211", "6211"), DELY_QTY_WINS = c(75, 62, 
    57, 57, 67, 57, 53, 70, 70, 60), DELY_QTY_DZTPOIS = c(1.09669678480388e-08, 
    3.8744657910606e-05, 0.000448728658516301, 0.000448728658516301, 
    2.24150656988175e-06, 0.000448728658516301, 0.0023318386482722, 
    3.38775858809732e-07, 3.38775858809732e-07, 0.000108487832825478
    )), row.names = c(NA, 10L), class = "data.frame")
> str(df_dztpois)
'data.frame':   959 obs. of  4 variables:
 $ PIA_ITEM        : Factor w/ 7 levels "RX20 1.5t Type 6209-10",..: 2 2 2 2 2 2 2 2 2 2 ...
 $ PIA_TYPE        : chr  "6211" "6211" "6211" "6211" ...
 $ DELY_QTY_WINS   : num  75 62 57 57 67 57 53 70 70 60 ...
 $ DELY_QTY_DZTPOIS: num  1.10e-08 3.87e-05 4.49e-04 4.49e-04 2.24e-06 ...

Current plot output:

enter image description here

Max
  • 185
  • 1
  • 11
  • `df_dztpois_summary` does not contain `PIA_TYPE`, and therefore it does not contain any information about what facet the label should appear on. – Axeman Aug 17 '20 at 19:18

1 Answers1

2

There was one issue with your code:

  • You facet by PIA_TYPE, but the summary df was grouped by PIA_ITEM

I'm not sure if this was the root cause because in your example data, both PIA_TYPE and PIA_ITEM are the same, so I've changed PIA_TYPE into 2 different levels.

When creating the summary df, you can use n() to count the number of observations, use round to get the correct number of digits and express lambda as \U03BB.

df_dztpois <- structure(list(PIA_ITEM = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 
                                                   2L, 2L, 2L, 2L), .Label = c("RX20 1.5t Type 6209-10", "RX20 1.6t Type 6211", 
                                                                               "RX20 1.6t Type 6221", "RX20 1.8t Type 6213", "RX20 2.0t Type 6215", 
                                                                               "RX20 2.0t Type 6225", "RX20 2.0t Type 6230"), class = "factor"), 
                            PIA_TYPE = c("6211", "6211", "6211", "6211", "6211", "6212", 
                                         "6212", "6212", "6212", "6212"), DELY_QTY_WINS = c(75, 62, 
                                                                                            57, 57, 67, 57, 53, 70, 70, 60), DELY_QTY_DZTPOIS = c(1.09669678480388e-08, 
                                                                                                                                                  3.8744657910606e-05, 0.000448728658516301, 0.000448728658516301, 
                                                                                                                                                  2.24150656988175e-06, 0.000448728658516301, 0.0023318386482722, 
                                                                                                                                                  3.38775858809732e-07, 3.38775858809732e-07, 0.000108487832825478
                                                                                            )), row.names = c(NA, 10L), class = "data.frame")

library(ggplot2)
library(dplyr)

df_dztpois_summary <- df_dztpois %>%
  group_by(PIA_TYPE) %>%
  summarize(count = n(),
            mean = mean(DELY_QTY_WINS)) %>%
  mutate(lab = paste("count = ", count, "\n\U03BB = ", round(mean, digits = 2)))

p <- ggplot(data = df_dztpois, aes(x=DELY_QTY_WINS))
p <- p + geom_line(aes(y=DELY_QTY_DZTPOIS))
p <- p + facet_wrap( ~ PIA_TYPE, nrow=3, scales = "free")
p <- p + xlab("Observation") + ylab("Probability")
p <- p + theme(
  legend.position = "bottom",
  legend.text = element_text(size = 12),
  strip.text.x = element_text(size = 12))
p <- p + labs(title = "Zero-truncated Poisson distribution of order quantity",
              subtitle = "Grouped by product (PIA_TYPE)",
              caption = "Data source: df_dztpois$DELY_QTY_WINS and df_full$DELY_QTY_DZTPOIS")
p <- p + geom_text(data = df_dztpois_summary, aes(label = lab), x=Inf, y=Inf, hjust=1, vjust=1.2)
p

enter image description here

starja
  • 9,887
  • 1
  • 13
  • 28
  • thanks for the prompt answer. I noticed my mistake in the summary and facet but as you said they are just a long and a short version of one another. The `n()` didn't work for me and I get the error: ``n()` must only be used inside dplyr verbs.` – Max Aug 17 '20 at 20:47
  • 1
    Turns out that a restart of R solved the n() issue. Thanks again ! – Max Aug 17 '20 at 20:52