Add legend to sjPlot model outputs modified with ggplot_build

Question

I have two models, and I want to plot their estimates (using sjPlot), combining them in one plot. I create my models:

data(iris)

mod1 <- glmmTMB::glmmTMB(Sepal.Length ~ Petal.Length + Petal.Width + (1|Species),
                         data = iris,
                         family = "gaussian")

mod2 <- glmmTMB::glmmTMB(Sepal.Width ~ Petal.Length + Petal.Width + (1|Species),
                         data = iris,
                         family = "gaussian")

And I create my plot objects, with some small personalization:

plot_mod1 <- plot_model(model = mod1,
                        type = "est", # compute the estimates for linear models (standardized beta coefficients)
                        ci.lvl = 0.95, # adds 95% CI
                        ci.style = "whisker", # adds the horizontal lines
                        transform = NULL, # avoid automatic exponential tranformation of coefficients
                        axis.labels = "", # avoid automatic labelling
                        show.legend = T,
                        colors = "blue",
                        vline.color = "grey50") 

plot_mod2 <- plot_model(model = mod2,
                        type = "est", # compute the estimates for linear models (standardized beta coefficients)
                        ci.lvl = 0.95, # adds 95% CI
                        ci.style = "whisker", # adds the horizontal lines
                        transform = NULL, # avoid automatic exponential tranformation of coefficients
                        axis.labels = "", # avoid automatic labelling
                        show.legend = T,
                        colors = "red",
                        vline.color = "grey50") 

p1 <- plot_mod1 + 
  # add large x axis limits - no idea why it needs ylim and not xlim
  scale_y_continuous(breaks = seq(-0.5, 1.5, 0.5), limits = c(-0.5, 1.5)) + 
  # change y axis labels - no idea why it needs the scale_x and not scale_y
  scale_x_discrete(labels = list(
    Petal.Length = "Petal length",
    Petal.Width = "Petal width")) + 
  # add horizontal line to separate variables
  geom_vline(xintercept = 1.5, linetype = "dashed", color = "black") + 
  ylab(c("Estimate")) + 
  labs(title = "My plot")

p2 <- plot_mod2 + 
  # add large x axis limits - no idea why it needs ylim and not xlim
  scale_y_continuous(breaks = seq(-0.5, 1.5, 0.5), limits = c(-0.5, 1.5)) + 
  # change y axis labels - no idea why it needs the scale_x and not scale_y
  scale_x_discrete(labels = list(
    Petal.Length = "Petal length",
    Petal.Width = "Petal width")) + 
  ylab(c("Estimate")) + 
  labs(title = "My plot 2")

Even if I don't understand why the axes are reversed, i.e., why I need to specify discrete names on the x-axis if they are plotted on the y-axis, the plots look fine:

cowplot::plot_grid(p1,p2)

Now I need some deeper manipulation (change in transparency, plotting the estimates from the two models in the same plot...), so I extract the plot information with ggplot_build and play around with that:

qq1 <- ggplot_build(p1)
qq2 <- ggplot_build(p2)

# change plot 1 point size and transparency
qq1$data[[2]]$size <- 5  
qq1$data[[2]]$alpha <- 0.4
qq1$data[[3]]$alpha <- 0.4

# move mod1 estimates up 
qq1$data[[2]]$x <- (qq1$data[[2]]$x) + 0.25
qq1$data[[3]]$x <- (qq1$data[[3]]$x) + 0.25

# move mod2 estimates down
qq2$data[[1]]$x <- (qq2$data[[1]]$x) - 0.25
qq2$data[[2]]$x <- (qq2$data[[2]]$x) - 0.25

# merge everything
qq1$data[[2]] <- bind_rows(qq1$data[[2]], 
                            qq2$data[[1]])

qq1$data[[3]] <- bind_rows(qq1$data[[3]], 
                            qq2$data[[2]])

plot(ggplot_gtable(qq1))

And the output is what I want:

Finally, I would like to have a legend, with blue for mod1 and red for mod2. However, I have no clue where to map my legend. I tried sjPlot::plot_model(show.legend = T) but nothing appears (as they work for marginal effects plots and not for coefficients). I tried adding scale_color_manual(values = c("blue", "red") to p1 <- plot_mod1 + ..., but it changes the colours of the points without creating a legend. I am getting a bit lost as I don't know how to add a legend and where is it being mapped. Any clarification would be appreciated. I am using R version 4.2.2 and sjPlot_2.8.14.

stefan · Answer 1 · 2023-08-23T21:07:05.133

0

An easier approach would be to extract the model data using sjPlot::get_model_data. Then use ggplot2 to create your plot from scratch:

library(ggplot2)

mod_data <- lapply(list(mod1, mod2), \(mod) sjPlot::get_model_data(
  model = mod,
  type = "est",
  ci.lvl = 0.95,
  ci.style = "whisker",
  transform = NULL
))
names(mod_data) <- c("mod1", "mod2")
mod_data <- dplyr::bind_rows(mod_data, .id = "model")
mod_data$model <- forcats::fct_rev(mod_data$model)

ggplot(mod_data, aes(x = estimate, y = term, color = model)) +
  geom_vline(xintercept = 0, linewidth = .25) +
  geom_pointrange(
    aes(xmax = conf.high, xmin = conf.low, size = model, alpha = model),
    position = position_dodge(width = .75)
  ) +
  scale_y_discrete(labels = c(
    Petal.Length = "Petal length",
    Petal.Width = "Petal width"
  )) +
  scale_color_manual(values = c(mod1 = "blue", mod2 = "red"), breaks = rev) +
  scale_size_manual(values = c(mod1 = 1.25, mod2 = .25), breaks = rev) +
  scale_alpha_manual(values = c(mod1 = .4, mod2 = 1), breaks = rev) +
  geom_hline(yintercept = 1.5, linetype = "dashed", color = "black") +
  labs(title = "My plot", x = "Estimate", y = NULL) +
  scale_x_continuous(
    breaks = seq(-0.5, 1.5, 0.5), limits = c(-0.5, 1.5)
  )

edited Aug 23 '23 at 21:07

answered Aug 23 '23 at 20:46

stefan

90,330
6
25
51

Thanks a lot! I have few questions: 1) what is exactly the `\(mod)` part? If I understand, it's equivalent to `lapply(list(mod1, mod2), function(mod){get_model_data(...)})`? 2) what is `breaks = rev`? – LT17 Aug 24 '23 at 14:22
3) I created a column to conditionally change the `alpha` of points (my real data have several models and several variables, and each one can have + or - values), with `mod_data$alpha_plot <- 1` and then `mod_data$alpha_plot[which(mod_data$estimate < 0)] <- .4`, but assigning it to `ggplot` with `scale_alpha_manual(values = c(mod1= c(mod_data[mod_data$model == "mod1",]$alpha_plot)` doesn't do the job, even if what I'm supplying is a vector of numbers of the same length as my variables. – LT17 Aug 24 '23 at 14:23
1) Correct. `\(x) f(x)` is just a shortcut for `function(x) f(x)` similar `~ f(.x)` I the tidyverse. 2) `breaks = rev` or more verbose `breaks = function(x) rev(x)` uses the base R `rev()` function to reverse the order of the categories in the legend aka the breaks. 3) Instead of `scale_alpha_manual` you could use `scale_alpha_identity()` if you want to provide your alpha values via a column of your data. – stefan Aug 24 '23 at 15:09
However, if I understand your correctly you want to use `alpha` to discriminate negative and positive estimates? In that case you could do `alpha = estimate < 0` and use `scale_alpha_manual(values = c("TRUE" = .4, "FALSE" = 1))`. – stefan Aug 24 '23 at 15:09
1

1) and 2): super cool, I didn't know about those shortcuts, thanks. 3) with `geom_pointrange(aes(..., alpha = alpha_plot), ...)` and `scale_alpha_identity()` I managed it. I also managed to increase the whiskers size. Thanks again! – LT17 Aug 24 '23 at 16:10

Add legend to sjPlot model outputs modified with ggplot_build

1 Answers1