Facet Wrap Issue with ggplot Rain Cloud Plot

Question

I'm very new to R and am trying to facet_wrap raincloud plots. I am trying to facet_wrap by Hypothesis chosen (which has been binary coded), so ideally would like to plot proportion of confirmatory and disconfirmatory leads chosen by hypothesis.

Here is what I have so far:

my_data2 <- melt(my_data, id.vars = c("ID"), 
             measure.vars = c("Proportion.of.Disconfirmatory.Leads.Chosen","Proportion.of.Confirmatory.Leads.Chosen", "Hypothesis"), 
             variable.name = "Leads", "Hyp",
             value.name = "Proportion")

  plot3 <- ggplot(data = my_data2, aes(y = Proportion, x = Leads, fill = Leads)) +
    geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .8) +
    geom_point(aes(y = Proportion, color = Leads), position = position_jitter(width = .15), size = .5, alpha = 0.8) +
    geom_boxplot(width = .1, guides = FALSE, outlier.shape = NA, alpha = 0.5) +
    facet_wrap(vars(Hypothesis), nrow = 2)+
    expand_limits(x = 5.25) +
    guides(fill = FALSE) +
    guides(color = FALSE) +
    scale_color_brewer(palette = "Spectral") +
    scale_fill_brewer(palette = "Spectral") +
    coord_flip() +
    theme_bw() 

  plot3

However, I am receiving this error:

"Error: At least one layer must contain all faceting variables: `Hypothesis`.
* Plot is missing `Hypothesis`
* Layer 1 is missing `Hypothesis`
* Layer 2 is missing `Hypothesis`
* Layer 3 is missing `Hypothesis`
* Layer 4 is missing `Hypothesis`" 



> dput(my_data)
structure(list(ID = c(2L, 5L, 23L, 34L, 35L, 48L, 53L, 59L, 71L, 
76L, 1L, 3L, 4L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
17L, 18L, 19L, 20L, 21L, 22L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 
31L, 32L, 33L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 
46L, 47L, 49L, 50L, 51L, 52L, 54L, 55L, 56L, 57L, 58L, 60L, 61L, 
62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 72L, 73L, 74L, 75L, 
78L), Hypothesis = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), Sum.of.Disconfirmatory.Leads.Chosen = c(9L, 7L, 0L, 
3L, 4L, 1L, 2L, 3L, 6L, 3L, 2L, 3L, 5L, 3L, 4L, 3L, 3L, 5L, 0L, 
5L, 5L, 1L, 4L, 5L, 6L, 4L, 5L, 2L, 6L, 4L, 6L, 1L, 4L, 4L, 8L, 
3L, 4L, 2L, 5L, 2L, 4L, 7L, 1L, 1L, 2L, 3L, 5L, 2L, 5L, 8L, 0L, 
5L, 4L, 7L, 3L, 4L, 6L, 1L, 1L, 4L, 4L, 8L, 7L, 3L, 4L, 6L, 2L, 
5L, 2L, 5L, 5L, 8L, 2L, 4L, 5L, 7L), Sum.of.Confirmatory.Leads.Chosen = c(5L, 
2L, 2L, 2L, 8L, 3L, 4L, 5L, 4L, 2L, 4L, 6L, 3L, 7L, 4L, 3L, 2L, 
3L, 3L, 7L, 4L, 5L, 2L, 3L, 6L, 4L, 9L, 6L, 5L, 5L, 1L, 1L, 3L, 
6L, 6L, 3L, 7L, 1L, 2L, 3L, 6L, 8L, 2L, 2L, 6L, 9L, 5L, 6L, 5L, 
4L, 6L, 6L, 2L, 3L, 2L, 5L, 6L, 4L, 5L, 4L, 5L, 4L, 5L, 7L, 4L, 
5L, 4L, 4L, 3L, 5L, 5L, 7L, 6L, 4L, 3L, 7L), Proportion.of.Disconfirmatory.Leads.Chosen = c(64.28571429, 
77.77777778, 0, 60, 33.33333333, 25, 33.33333333, 37.5, 60, 60, 
33.33333333, 33.33333333, 62.5, 30, 50, 50, 60, 62.5, 0, 41.66666667, 
55.55555556, 16.66666667, 66.66666667, 62.5, 50, 50, 35.71428571, 
25, 54.54545455, 44.44444444, 85.71428571, 50, 57.14285714, 40, 
57.14285714, 50, 36.36363636, 66.66666667, 71.42857143, 40, 40, 
46.66666667, 33.33333333, 33.33333333, 25, 25, 50, 25, 50, 66.66666667, 
0, 45.45454545, 66.66666667, 70, 60, 44.44444444, 50, 20, 16.66666667, 
50, 44.44444444, 66.66666667, 58.33333333, 30, 50, 54.54545455, 
33.33333333, 55.55555556, 40, 50, 50, 53.33333333, 25, 50, 62.5, 
50), Proportion.of.Confirmatory.Leads.Chosen = c(35.71428571, 
22.22222222, 100, 40, 66.66666667, 75, 66.66666667, 62.5, 40, 
40, 66.66666667, 66.66666667, 37.5, 70, 50, 50, 40, 37.5, 100, 
58.33333333, 44.44444444, 83.33333333, 33.33333333, 37.5, 50, 
50, 64.28571429, 75, 45.45454545, 55.55555556, 14.28571429, 50, 
42.85714286, 60, 42.85714286, 50, 63.63636364, 33.33333333, 28.57142857, 
60, 60, 53.33333333, 66.66666667, 66.66666667, 75, 75, 50, 75, 
50, 33.33333333, 100, 54.54545455, 33.33333333, 30, 40, 55.55555556, 
50, 80, 83.33333333, 50, 55.55555556, 33.33333333, 41.66666667, 
70, 50, 45.45454545, 66.66666667, 44.44444444, 60, 50, 50, 46.66666667, 
75, 50, 37.5, 50)), class = "data.frame", row.names = c(NA, -76L
))

> head(my_data)
  ID Hypothesis Sum.of.Disconfirmatory.Leads.Chosen Sum.of.Confirmatory.Leads.Chosen
1  2          0                                   9                                5
2  5          0                                   7                                2
3 23          0                                   0                                2
4 34          0                                   3                                2
5 35          0                                   4                                8
6 48          0                                   1                                3
  Proportion.of.Disconfirmatory.Leads.Chosen Proportion.of.Confirmatory.Leads.Chosen
1                                   64.28571                                35.71429
2                                   77.77778                                22.22222
3                                    0.00000                               100.00000
4                                   60.00000                                40.00000
5                                   33.33333                                66.66667
6                                   25.00000                                75.00000

I suspect that I have introduced the variable Hypothesis incorrectly in the code, however I have no idea where or how! I have tried to include it in sumld however am receiving this error when doing so: Error in fs[[1]](x, ...) : attempt to apply non-function

Thank you all in advance for your help.

Are you able to provide a minimal working example, use `dput` to provide us `mydata`. — George, Mar 14 '19 at 03:14
Hi George, thank you for your reply. Could you please provide some more detail as to how I do that? Do I just add dput(my_data) to my script? Please excuse my ignorance! — Bronte, Mar 14 '19 at 03:45
you don't need to add `dput` to your example. If you run `dput(mydata)` that will provide the data structure. If not too big you can paste that into your example. If it is massive then subset mydata so there is the minimal amount of data to reproduce your error. No worries about ignorance, we all have it. — George, Mar 14 '19 at 03:48
[See here](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) on making a reproducible R question. Right now we don't have a sample of your data and we can't see your plot, so there aren't a whole lot of specifics we can do to help — camille, Mar 14 '19 at 03:50
Thank you George and Camille. I have attempted it and have added it to my example. Please let me know if I have done it incorrectly (teaching myself as I go!), and thank you again. — Bronte, Mar 14 '19 at 04:03
Thanks for the data, this is much easier. I'm confused: the data you are plotting is `my_data2`, and `my_data2` doesn't have a column called `"Hypothesis"`. So there's not really anything to facet by... perhaps you want to include `"Hypothesis"` as an `id.var` not as a `measure.var`? — Gregor Thomas, Mar 14 '19 at 04:09
I'm also going to edit your question and remove some of the extra stuff that isn't part of the problem (like your custom theme, the `sumlb` that isn't used ...) This extra stuff makes your question seem longer and more complicated than it really is. — Gregor Thomas, Mar 14 '19 at 04:11
It's also hard to tell because the data sample you provide has `Hypothesis = 0` in every row. If you need more help, please provide a `dput` sample of data with both values of `Hypothesis`. — Gregor Thomas, Mar 14 '19 at 04:15
Hi Gregor. Thank you for your help. I have updated the example code to include Hypothesis = 1 as well. That is so bizarre that Hypothesis isn't in my_data2 (it definitely was before but I think I've tweaked the code too much trying to fix this issue that I don't know what I have done!). — Bronte, Mar 14 '19 at 04:25

Djork · Answer 1 · 2022-03-12T06:42:41.047

Hypothesis is not a independent column in your melted data frame mydata2, and cannot be used for faceting. You have included Hypothesis in measure.vars and it has been converted to one of the categories of Leads in mydata2.

my_data2 %>% group_by(Leads) %>% summarize(n=n())
# A tibble: 3 x 2
  Leads                                          n
  <fct>                                      <int>
1 Proportion.of.Disconfirmatory.Leads.Chosen    76
2 Proportion.of.Confirmatory.Leads.Chosen       76
3 Hypothesis                                    76

If you want to use it for faceting, include Hypothesis in id.vars instead.

my_data2 <- melt(my_data, id.vars = c("ID", "Hypothesis"), 
                 measure.vars = c("Proportion.of.Disconfirmatory.Leads.Chosen",
                                  "Proportion.of.Confirmatory.Leads.Chosen"), 
                 variable.name = "Leads",
                 value.name = "Proportion")

head(my_data2)
  ID Hypothesis                                      Leads Proportion
1  2          0 Proportion.of.Disconfirmatory.Leads.Chosen   64.28571
2  5          0 Proportion.of.Disconfirmatory.Leads.Chosen   77.77778
3 23          0 Proportion.of.Disconfirmatory.Leads.Chosen    0.00000
4 34          0 Proportion.of.Disconfirmatory.Leads.Chosen   60.00000
5 35          0 Proportion.of.Disconfirmatory.Leads.Chosen   33.33333
6 48          0 Proportion.of.Disconfirmatory.Leads.Chosen   25.00000

Now you can use Hypothesis for faceting:

library(ggplot2)
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")

plot3 <- ggplot(data = my_data2, aes(y = Proportion, x = Leads, fill = Leads)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .8) +
  geom_point(aes(y = Proportion, color = Leads), 
             position = position_jitter(width = .15), size = .5, alpha = 0.8) +
  geom_boxplot(width = .1, guides = FALSE, outlier.shape = NA, alpha = 0.5) +
  facet_wrap(~Hypothesis, nrow = 2) +
  expand_limits(x = 5.25) +
  guides(fill = FALSE) +
  guides(color = FALSE) +
  scale_color_brewer(palette = "Spectral") +
  scale_fill_brewer(palette = "Spectral") +
  coord_flip() +
  theme_bw() 

plot3

Edited: Solution to follow-up question on how to modifying variable names on axis labels. One approach is by factoring variable and assigning labels to factors.

my_data2$Leads <- factor(my_data2$Leads,
                         levels=c("Proportion.of.Disconfirmatory.Leads.Chosen", 
                                  "Proportion.of.Confirmatory.Leads.Chosen"),
                         labels=c("Proportion of Confirmatory Leads Chosen", 
                                  "Proportion of Disconfirmatory Leads Chosen"))

Rerun ggplot code to produce this:

Hi Djork, thank you so much for your help. I knew it would be something as simple as putting the variable in the wrong place! Thanks again. — Bronte, Mar 14 '19 at 21:29
Hi everyone, is it possible to change the variable names (to "Confirmatory Leads Chosen" rather than "Proportion.of.Confirmatory.Leads.Chosen"? I am currently only able to change x and y axis labels (or legend labels). Thank you. — Bronte, Mar 14 '19 at 23:21
@Bronte very late reply, answer edited to provide solution to your follow-up question. — Djork, Mar 12 '22 at 06:35

Facet Wrap Issue with ggplot Rain Cloud Plot

1 Answers1