1

I would love to have a clearer legend in my ggplot (below). Specifically, the point shapes overlapping with line geometries is not clear. It would be nice to have one legend for the predictive data (linetypes, ribbon fills, color) and another for the raw data (point shapes, color).

After playing around I managed to separate the two groups, but I can't find a way to have group colors applied to both legends. Furthermore, it seems that even when I group linetype and fill together using the same labs() name, there is still a grey fill on the point shape legend. This is a lot of information but the takeaway is after much playing around, I can't get the legend to behave.

I have provided my code and datasets; maybe someone could help me out?

Here's my ggplot:

My plot

and here's the code that generated it:

ggplot(predict.df, aes(x = x, y = predicted, colour = group, shape = group, linetype = group)) +
  stat_smooth(method = "lm", formula = y ~ x) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = group), alpha = 0.1) +
  geom_jitter(data = raw, mapping = aes(x = x, y = response), width = 8, height = 0) +
  scale_x_continuous(name = "Sample date", breaks = c(0, 35, 70, 105), labels = c("June 26", "July 31", "Sept 4", "Oct 9")) +
  scale_y_continuous(name = "Viral load (virus/\u00b5g DNA)", breaks = c(0, 1, 2, 3), labels = c(1, 10, 100, 1000)) +
  theme_pubr(legend = "right") +
  labs(color = "Locus", shape = "Locus", fill = "Locus", linetype = "Locus") +
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2")

Here are the two plugged in data frames:

> dput(predict.df)
structure(list(x = c(0, 0, 0, 35, 35, 35, 70, 70, 70, 105, 105, 
105), predicted = c(1.76102123590214, 0.37702177715769, 0.502111657963439, 
2.16765850174448, 1.13876128504506, 0.72723857556493, 2.57429576758682, 
1.90050079293243, 0.95236549316642, 2.98093303342917, 2.66224030081979, 
1.17749241076791), std.error = c(0.210212347913819, 0.167399741123415, 
0.183879503224061, 0.145514809621925, 0.106824868721082, 0.108685998504236, 
0.121276566028898, 0.176399586908295, 0.18802919804674, 0.15744703287395, 
0.295192239702551, 0.323169724522022), conf.low = c(1.34901260488545, 
0.0489243135344682, 0.141714454149163, 1.88245471566831, 0.929388389698517, 
0.514217932872852, 2.33659806600149, 1.55476395570442, 0.58383503695286, 
2.67234251952353, 2.08367414248708, 0.544091389811016), conf.high = c(2.17302986691883, 
0.705119240780912, 0.862508861777715, 2.45286228782065, 1.3481341803916, 
0.940259218257007, 2.81199346917216, 2.24623763016043, 1.32089594937998, 
3.2895235473348, 3.24080645915251, 1.8108934317248), group = structure(c(1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("Basal", 
"Medial", "Distal"), class = "factor")), row.names = c(NA, -12L
), class = c("ggeffects", "data.frame"), legend.labels = c("Basal", 
"Medial", "Distal"), x.is.factor = "0", continuous.group = FALSE, rawdata = structure(list(
    response = c(2.23667815433731, 0.585725623985699, 0, 2.63585811315419, 
    0, 0, 2.33374975605124, 0, 0.884942436609621, 1.16286703780418, 
    1.06709808539313, 0, 0.890758164061038, 0, 0.639404117570214, 
    2.47790759499121, 0, 0, 2.14524220682092, 0, 0, 1.9740589910418, 
    0, 0, 0, 1.25516666996646, 2.66751722923715, 0, 0, 1.92521830152371, 
    0.903597044237988, 0.945099129999455, 2.6442228874261, 2.58142029334097, 
    0, 2.32153495885343, 1.3801896215756, 1.02489379026788, 2.05743286900869, 
    1.11074999614049, 0.860338006570994, 1.55767122072119, 0.989126434016326, 
    1.21554496604287, 2.14770759585995, 1.92492507723203, 1.86644529881602, 
    2.40375462277453, 1.34200834355252, 1.14456300882461, 2.25413032451885, 
    1.05881158464742, 0.880851309461926, 2.72704842428525, 1.01414176946023, 
    1.13102772933728, 2.78511309801747, 2.0899051114394, 1.02716459664481, 
    1.90825217816864, 0.791001136920094, 2.27946840305546, 2.48278954016739, 
    0.940828612330139, 2.20441891726465, 1.95118485766815, 1.01752434498353, 
    1.22992186490351, 1.18351341124408, 1.17491390968833, 1.54441258580579, 
    1.24401572811876, 0.878566226876958, 2.35327316999556, 2.13603849721832, 
    2.29146399657572, 2.41044404137815, 2.21254419548501, 0.826472233221594, 
    2.20421121140994, 1.57587890647473, 0.897437484440932, 2.34225098899079, 
    2.38136059256142, 0.972352525321007, 1.6028414421383, 2.20134373839759, 
    0.810820367137012, 2.77026731894286, 2.29241135584685, 0.57988940596851, 
    3.71170452590752, 3.4954877455041, 3.18504082301818, 3.67423352412794, 
    2.98414705180641, 0.141639163861031, 2.41217775364379, 0.51123419122885, 
    0.476456687670469, 3.36887472776869, 3.02081502150107, 0.513445669923521, 
    3.63514892267612, 3.51925820816477, 3.40746252696249, 3.27998529166281, 
    0.623505586942313, 0.798307744015814, 3.27752745673065, 3.18652456493964, 
    0.184008911262831, 3.18983391030149, 2.77631390383718, 0.395623039048979, 
    3.46567596502934, 3.02900875924267, 0.433385505287688), x = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35, 35, 35, 35, 35, 
    35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 
    35, 35, 35, 35, 35, 35, 35, 70, 70, 70, 70, 70, 70, 70, 70, 
    70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 
    70, 70, 70, 70, 70, 70, 105, 105, 105, 105, 105, 105, 105, 
    105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 
    105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105), group = structure(c(1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 3L, 1L, 2L, 3L, 1L, 2L, 
    3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
    3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 2L, 3L, 
    1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
    1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
    1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
    1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("Basal", 
    "Medial", "Distal"), class = "factor")), class = "data.frame", row.names = c(NA, 
-118L)), title = "Predicted values of log.Virus.Particles", x.title = "Date", y.title = "log.Virus.Particles", legend.title = "Locus", constant.values = list(
    Study.ID = "0 (population-level)"), terms = c("Date", "Locus"
), original.terms = c("Date", "Locus"), at.list = list(Date = c(0, 
35, 70, 105), Locus = c("Basal", "Medial", "Distal")), prediction.interval = FALSE, ci.lvl = 0.95, family = "gaussian", link = "identity", logistic = "0", is.trial = "0", fitfun = "lm", model.name = "model1")

and

>  dput(raw)
structure(list(response = c(2.23667815433731, 0.585725623985699, 
0, 2.63585811315419, 0, 0, 2.33374975605124, 0, 0.884942436609621, 
1.16286703780418, 1.06709808539313, 0, 0.890758164061038, 0, 
0.639404117570214, 2.47790759499121, 0, 0, 2.14524220682092, 
0, 0, 1.9740589910418, 0, 0, 0, 1.25516666996646, 2.66751722923715, 
0, 0, 1.92521830152371, 0.903597044237988, 0.945099129999455, 
2.6442228874261, 2.58142029334097, 0, 2.32153495885343, 1.3801896215756, 
1.02489379026788, 2.05743286900869, 1.11074999614049, 0.860338006570994, 
1.55767122072119, 0.989126434016326, 1.21554496604287, 2.14770759585995, 
1.92492507723203, 1.86644529881602, 2.40375462277453, 1.34200834355252, 
1.14456300882461, 2.25413032451885, 1.05881158464742, 0.880851309461926, 
2.72704842428525, 1.01414176946023, 1.13102772933728, 2.78511309801747, 
2.0899051114394, 1.02716459664481, 1.90825217816864, 0.791001136920094, 
2.27946840305546, 2.48278954016739, 0.940828612330139, 2.20441891726465, 
1.95118485766815, 1.01752434498353, 1.22992186490351, 1.18351341124408, 
1.17491390968833, 1.54441258580579, 1.24401572811876, 0.878566226876958, 
2.35327316999556, 2.13603849721832, 2.29146399657572, 2.41044404137815, 
2.21254419548501, 0.826472233221594, 2.20421121140994, 1.57587890647473, 
0.897437484440932, 2.34225098899079, 2.38136059256142, 0.972352525321007, 
1.6028414421383, 2.20134373839759, 0.810820367137012, 2.77026731894286, 
2.29241135584685, 0.57988940596851, 3.71170452590752, 3.4954877455041, 
3.18504082301818, 3.67423352412794, 2.98414705180641, 0.141639163861031, 
2.41217775364379, 0.51123419122885, 0.476456687670469, 3.36887472776869, 
3.02081502150107, 0.513445669923521, 3.63514892267612, 3.51925820816477, 
3.40746252696249, 3.27998529166281, 0.623505586942313, 0.798307744015814, 
3.27752745673065, 3.18652456493964, 0.184008911262831, 3.18983391030149, 
2.77631390383718, 0.395623039048979, 3.46567596502934, 3.02900875924267, 
0.433385505287688), x = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 70, 70, 70, 70, 70, 
70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 
70, 70, 70, 70, 70, 70, 70, 70, 105, 105, 105, 105, 105, 105, 
105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 
105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105), group = structure(c(1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 2L, 3L, 1L, 2L, 3L, 1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
2L, 3L, 1L, 2L, 3L), .Label = c("Basal", "Medial", "Distal"), class = "factor")), class = "data.frame", row.names = c(NA, 
-118L))

Edit 1, addressing MrFlick's comment

I tried the suggestions at the link you provided:

## Model with predictive slopes
ggplot(predict.df, aes(x = x, y = predicted, colour = group, shape = group, linetype = group)) +
  stat_smooth(method = "lm", formula = y ~ x) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = group), alpha = 0.1) +
  geom_jitter(data = raw, mapping = aes(x = x, y = response), width = 8, height = 0) +
  scale_x_continuous(name = "Sample date", breaks = c(0, 35, 70, 105), labels = c("June 26", "July 31", "Sept 4", "Oct 9")) +
  scale_y_continuous(name = "Viral load (virus/\u00b5g DNA)", breaks = c(0, 1, 2, 3), labels = c(1, 10, 100, 1000)) +
  theme_pubr(legend = "right") +
  scale_color_brewer(name = "Raw data", palette = "Dark2") +
  scale_fill_brewer(name = "Prediction", palette = "Dark2") +
  scale_linetype_discrete(name = "Prediction") +
  scale_shape_discrete(name = "Raw data")

And ended up with: Plot 2

This is more or less where I end up no matter what I try.

  • Rather than `print()`ing your data, please share a `dput()` tp put your data in a more [reproducible format](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example). – MrFlick Dec 03 '20 at 08:48
  • Also see this question for a similar issue: https://stackoverflow.com/questions/23343333/ggplot2-shape-color-and-linestyle-into-one-legend. You should be able to get separate legends if you give them separate names. – MrFlick Dec 03 '20 at 08:52
  • Thanks for the advice MrFlick. I changed it to ```dput()```. I've experimented with the advice from the linked response as well as others--it works with the exception that I cannot get colors to show up in both legends. In the linked example he has no aes in the base layer and instead has aes calls in the geom_ layers. I'm not sure that the structure of my data would allow me to do the same thing easily – Dieter Kahl Dec 03 '20 at 09:27
  • Oh you want color to be used in two different legends? That's not something ggplot likes very much. – MrFlick Dec 03 '20 at 09:31
  • I'm starting to see that. I added an edit to my question showing where I end up when I try the solution you linked. – Dieter Kahl Dec 03 '20 at 09:42
  • @MrFlick the ability to change `guides` in the later versions of ggplot makes it possible to put color in multiple legends, though as far as I can tell, you need to re-specify the colors. – Allan Cameron Dec 03 '20 at 10:32

1 Answers1

2

I think you can get what you want by altering the guides:

ggplot(predict.df, aes(x = x, y = predicted, colour = group, 
                       shape = group, linetype = group)) +
  stat_smooth(method = "lm", formula = y ~ x) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = group), alpha = 0.1) +
  geom_jitter(data = raw, mapping = aes(x = x, y = response), width = 8, height = 0) +
  scale_x_continuous(name = "Sample date", breaks = c(0, 35, 70, 105), 
                     labels = c("June 26", "July 31", "Sept 4", "Oct 9")) +
  scale_y_continuous(name = "Viral load (virus/\u00b5g DNA)", 
                     breaks = c(0, 1, 2, 3), 
                     labels = c(1, 10, 100, 1000)) +
  theme_pubr(legend = "right") +
  labs(color = "Raw", shape = "Raw", fill = "Locus", linetype = "Locus") +
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  guides(fill = guide_legend(name = "Locus", 
                             override.aes = list(
                                    fill = RColorBrewer::brewer.pal(3, "Dark2"),
                                    alpha = 0.1)),
         linetype = guide_legend(name = "Locus", 
                                 override.aes = list(
                                    color = RColorBrewer::brewer.pal(3, "Dark2"))),
         shape = guide_legend(name = "Raw", override.aes = list(size = 3)),
         color = guide_legend(name = "Raw", 
                              override.aes = list(fill = NA,
                              linetype = 0)))

enter image description here

Allan Cameron
  • 147,086
  • 7
  • 49
  • 87