1

I want an overlay of a forest plot from the ZINB models of full and the subset of data using the sjPlot package. As you may know, the ZINB model produces two models: one for the count model and one for the zero-inflated model. plot_model works fine when employing the ZINB model from either full or a subset of data meaning producing a plot for both models (count and zero models), but when I overlay using plot_models then only one plot is produced for the count model. I am looking for the count and zero-inflated model plots from the full and sub-model for both the full and the subset of data. any help would be much appreciated

library(sjPlot)
library(sjlabelled)
library(sjmisc)
library(ggplot2)
library(MASS)
library(pscl)
library(boot)


zinb_all_uni <- zeroinfl(ivdays~age,
                 link="logit",
                 dist = "negbin",
                 data=caterpillor)



summary(zinb_all_uni)
plot_model(zinb_all_uni, type="est")


zinb_full_adj <- zeroinfl(ivdays~age+sex+edu,
                 link="logit",
                 dist = "negbin",
                 data=caterpillor)

summary(zinb_full_adj)
plot_model(zinb_full_adj, type="est", terms = c("count_ageb", "count_agec", "zero_ageb", "zero_agec"))

############ second model#######


Zinb_uni_sub <- zeroinfl(ivdays~age,
                     link="logit",
                     dist = "negbin",
                     data=subset(caterpillor, country=="eng"))



summary(zinb_uni_sub)
plot_model(zinb_uni_sub, type="est")



zinb_adj_sub <- zeroinfl(ivdays~age+sex+edu,
                 link="logit",
                 dist = "negbin",
                 data=subset(caterpillor, country=="eng"))

summary(zinb_adj_sub)

plot_model(zinb_adj_sub, type="est", terms = c("count_ageb", "count_agec", "zero_ageb", "zero_agec"))



### overlying plots from both models


plot_models(zinb_all_uni, Zinb_uni_sub)

plot_models(zinb_full_adj, zinb_adj_sub)

DATA:

caterpillor=structure(list(id = 1:100,
               age = structure(c(1L, 1L, 2L, 1L, 
                                 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 
                                 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 
                                 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 
                                 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 
                                 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 
                                 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L),
                               .Label = c("a", "b", "c"), class = "factor"),
               sex = structure(c(2L, 
                                 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 
                                 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 
                                 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 
                                 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
                                 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 
                                 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 
                                 2L, 1L, 1L),
                               .Label = c("F", "M"), class = "factor"),
               country = structure(c(1L, 
                                     1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 
                                     2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 
                                     1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 
                                     2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 
                                     3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 
                                     1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 
                                     2L, 2L, 2L),
                                   .Label = c("eng", "scot", "wale"), class = "factor"), 
               edu = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
                                 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 
                                 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
                                 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 
                                 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
                                 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 
                                 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L),
                               .Label = c("x", "y", "z"), class = "factor"),
               lungfunction = c(45L, 
                                23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L, 
                                70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L, 
                                50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L, 
                                23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L, 
                                70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L, 
                                50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L, 
                                23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 25L, 45L, 70L, 69L, 
                                90L, 50L, 62L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 25L, 45L, 
                                70L, 69L, 90L),
               ivdays = c(15L, 26L, 36L, 34L, 2L, 4L, 5L, 
                          8L, 9L, 15L, 26L, 36L, 34L, 2L, 4L, 5L, 8L, 9L, 15L, 26L, 
                          0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
                          0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
                          0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
                          0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
                          0L, 0L, 0L, 0L, 0L, 5L, 8L, 9L, 36L, 34L, 2L, 4L, 5L, 8L, 
                          9L, 36L, 34L, 2L, 4L, 5L),
               no2_quintile = structure(c(1L, 
                                          1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                          1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                          3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                          3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 
                                          4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
                                          5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L),
                                        .Label = c("q1", "q2", 
                                                   "q3", "q4", "q5"), class = "factor")),
          class = "data.frame", row.names = c(NA, 
                                              -100L))

enter image description here

but when i overlay plots i get only one plot

enter image description here

Ben Bolker
  • 211,554
  • 25
  • 370
  • 453
skpak
  • 79
  • 6

1 Answers1

1

Code below, basic points:

  • when I run into trouble with automated machinery like plot_model I usually prefer to use machinery like broom::tidy() (for coefficients) or the ggeffects or emmeans packages (for predictions) and build my own ggplot — for me, it's easier than trying to figure out what the more automated tool is doing
  • broom doesn't have a tidy() method for zeroinfl models, but a little googling finds one in the poissonreg package ...
  • ... however, that tidy() method doesn't have machinery for constructing confidence intervals or back-transforming coefficients to a count-ratio or odds-ratio scale, so I had to implement my own below ...
library(broom)
library(poissonreg)
library(tidyverse) ## purrr::map_dfr, ggplot ...
theme_set(theme_bw())
library(colorspace)
mod_list <- list(all_uni = zinb_all_uni, uni_sub = Zinb_uni_sub,
     full_adj = zinb_full_adj, adj_sub = zinb_adj_sub)


tidy(zinb_all_uni, type = "all")

coefs <- (mod_list
    |> map_dfr(tidy, type = "all",
               .id = "model")
    ## construct CIs
    |> mutate(conf.low  = qnorm(0.025, estimate, std.error),
              conf.high = qnorm(0.975, estimate, std.error))
    |> filter(term != "(Intercept)")  ## usually don't want this
    ## cosmetic (strip results down to the components we actually need)
    |> select(model, term, type, estimate, conf.low, conf.high)
    ## back-transform
    |> mutate(across(c(estimate, conf.low, conf.high), exp))
)

ggplot(coefs, aes(x = estimate, y = term, colour = model)) +
    geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
                    position = position_dodge(width = 0.5)) +
    ## separate count-ratio and odds-ratio (conditional/zero) plots
    facet_wrap(~type, scale = "free") +
    scale_color_discrete_qualitative() ## cosmetic

If you only want to see the age-related coefficients you can add

 |> filter(stringr::str_detect(term, "^age"))

to the end of the pipeline that defines coefs.

Ben Bolker
  • 211,554
  • 25
  • 370
  • 453
  • many thanks for your help and answer. when i run the code. i got following erorr Error: unexpected symbol in: " ggplot" – skpak Dec 22 '22 at 21:46
  • I added a missing parenthesis. – Ben Bolker Dec 22 '22 at 21:59
  • Thank you so much for adding the missing parenthesis. Where should I start if I want to learn all about this code? I know the basics of R. Is It is possible to display only the selected variable from the adjusted model (in this case, age)? Moreover, I was looking for 4 graphs. The first two graphs (count and zero-inflated) for showing estimates from uni models (estimates from full and sub-model). The second two graphs (count and zero-inflated) for showing estimates from adjusted models (estimates from full and sub-model). – skpak Dec 23 '22 at 11:02
  • in this adjusted plot I want to show only the selected variable (in this case age variable only) from both the full and sub-model. many thanks in advance for all your help. – skpak Dec 23 '22 at 11:02
  • See edits for selecting the age-related variables only. I put all the plots together because I was feeling lazy, but you should be able to figure out how to adapt this code to make the plots you want. "R for Data Science" (Grolemund and Wickham, I think) goes through the basics of the tidyverse data "verbs" (mutate, select, filter, etc.), or you can see the tidyverse cheat sheets ... – Ben Bolker Dec 23 '22 at 17:36
  • @ Ben Bolker thanks. s. I'll have a look and sort the rest. I really appreciate it. have a wonderful holiday season – skpak Dec 24 '22 at 13:25
  • Did you install the `poissonreg` package and `library("poissonreg")` before running the code? – Ben Bolker Jan 05 '23 at 19:27
  • @ Ben Bolker i hope you are doing well. and wish you happy new year. when i tried to create separate graphs for uni models, i removed the adjusted models from mod_list (mod_list <- list(all_uni = zinb_all_uni, uni_sub = Zinb_uni_sub) and run the code i got en error Error in select(filter(mutate(map_dfr(mod_list, tidy, type = "all", .id = "model"), : unused arguments (model, term, type, estimate, conf.low, conf.high) – skpak Jan 05 '23 at 20:05
  • Best guess, you are running into this problem: https://stackoverflow.com/questions/24202120/dplyrselect-function-clashes-with-massselect (fix: use `dplyr::select` instead of `select`). (It seems from the error that you posted that you're taking out the pipes [maybe I'm missing something though]? It will be easier to adapt my code if you make only the **minimal** necessary changes at first ...) – Ben Bolker Jan 05 '23 at 20:41
  • PS please don't update your question to add new information based on my answer (i.e. the image you added at the top of your question); asking in the comments is best. Then someone can either give you the solution (if it's quick), or suggest that you ask a new question [which in this case would quickly get closed as a duplicate]. (PPS, I see now that you didn't take out the pipes.) – Ben Bolker Jan 05 '23 at 21:11
  • thanks for your prompt reply and suggestion, i do agree i should not copy the error message on the original question. dplyr::select worked. thank you have a great day. – skpak Jan 06 '23 at 10:55
  • @ Ben Bolker, I created separate graphs for the two uni models (all and sub). I looked for a way to display the estimate value and reference line (in this case, 1) with different color on the graph but I was unable. – skpak Jan 06 '23 at 11:31
  • Maybe `+ geom_vline(xintercept = 1, lty = 2, color = "purple")`. Please post a follow-up question with a [mcve] ... you might want – Ben Bolker Jan 06 '23 at 13:27
  • many thanks for your prompt reply. i do not know how to post a follow-up question. + geom_vline(xintercept = 1, lty = 2, color = "purple") worked for adding reference line. but it did not add an estimated value to the graph. – skpak Jan 06 '23 at 13:59
  • By "follow-up question" I mean, post a new question to Stack Overflow, self-contained (with the most **minimal** example you can construct) but including a link to this question. – Ben Bolker Jan 06 '23 at 14:50
  • @ Ben Bolker, many thanks for clarification. i posted new question . https://stackoverflow.com/questions/75033407/showing-estimate-value-in-the-forest-plot-from-zinb-model – skpak Jan 06 '23 at 16:16