Is there a way to plot hurdle model results in R? I was able to plot the zero part (binomial with logit link) of the hurdle model (below) but I can't figure out how to plot the count part of the model (truncated negative binomial with log link). I am using the pscl package for the hurdle model.
Example data (df name = data):
structure(list(Byths = c(333L, 107L, 0L, 0L, 684L, 0L, 113L,
0L, 0L, 20L, 251L, 20L, 0L, 0L, 32L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 182L, 30L, 0L, 0L, 183L, 0L, 0L, 0L, 8L, 213L, 108L,
310L, 960L, 720L, 0L, 0L, 6L, 72L, 78L, 15L, 196L, 256L, 608L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 105L, 0L, 194L, 0L, 440L, 0L, 0L,
0L, 0L, 0L, 18L, 0L, 239L, 262L, 0L, 102L, 17L, 0L, 0L, 0L, 0L,
0L, 68L, 93L, 0L, 226L, 118L, 91L, 330L, 104L, 68L, 224L, 0L,
0L, 18L, 79L, 71L, 8L, 73L, 38L, 39L, 7L), Season = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Pre",
"Peak", "Post"), class = "factor"), depthm = c(20.1170446232626,
9.1441111923921, 18.2882223847842, 18.2882223847842, 17.373811265545,
13.7161667885881, 13.7161667885881, 13.106559375762, 13.106559375762,
8.53450377956596, 8.53450377956596, 12.4969519629359, 20.1170446232626,
20.1170446232626, 8.83930748597903, 19.2026335040234, 19.2026335040234,
20.1170446232626, 20.1170446232626, 20.1170446232626, 20.1170446232626,
20.1170446232626, 14.0209704950012, 14.0209704950012, 14.0209704950012,
14.0209704950012, 14.0209704950012, 8.53450377956596, 12.8017556693489,
20.7266520360888, 20.7266520360888, 17.373811265545, 14.0209704950012,
14.0209704950012, 14.0209704950012, 8.22970007315289, 8.22970007315289,
18.5930260911973, 17.373811265545, 8.53450377956596, 7.92489636673982,
7.92489636673982, 7.92489636673982, 7.92489636673982, 7.92489636673982,
7.92489636673982, 9.44891489880517, 9.44891489880517, 9.44891489880517,
9.44891489880517, 15.8497927334796, 12.1921482565228, 8.83930748597903,
8.83930748597903, 18.8978297976103, 8.83930748597903, 8.83930748597903,
17.0690075591319, 20.4218483296757, 20.4218483296757, 20.1170446232626,
20.1170446232626, 13.106559375762, 13.106559375762, 13.106559375762,
13.106559375762, 13.106559375762, 13.106559375762, 8.83930748597903,
10.6681297244574, 9.75371860521824, 9.75371860521824, 9.75371860521824,
11.5825408436967, 9.1441111923921, 17.0690075591319, 17.0690075591319,
17.0690075591319, 17.0690075591319, 17.0690075591319, 17.0690075591319,
20.1170446232626, 10.0585223116313, 17.373811265545, 20.1170446232626,
20.4218483296757, 20.4218483296757, 7.92489636673982, 7.92489636673982,
7.92489636673982, 7.92489636673982, 7.92489636673982, 7.92489636673982,
7.92489636673982, 7.92489636673982)), row.names = c(5L, 7L, 12L,
13L, 14L, 16L, 18L, 21L, 22L, 23L, 24L, 26L, 28L, 29L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 39L, 46L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 59L, 61L, 62L, 63L, 65L, 66L, 71L, 73L, 75L,
77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 92L, 95L, 98L,
107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L,
118L, 119L, 125L, 137L, 139L, 142L, 143L, 144L, 147L, 150L, 151L,
152L, 154L, 156L, 157L, 159L, 160L, 161L, 162L, 163L, 165L, 177L,
178L, 179L, 180L, 181L, 183L, 184L, 185L), class = "data.frame")
Model:
mod <- hurdle(Byths ~ depthm * Season, data = data, dist = "negbin")
summary(mod)
Gives:
Call:
hurdle(formula = Byths ~ depthm * Season, data = data, dist = "negbin")
Pearson residuals:
Min 1Q Median 3Q Max
-0.8326 -0.5676 -0.2256 0.1534 4.7222
Count model coefficients (truncated negbin with log link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) 5.88538 0.63004 9.341 < 2e-16 ***
depthm -0.05014 0.05577 -0.899 0.368658
SeasonPeak -3.03180 0.86315 -3.512 0.000444 ***
SeasonPost -2.74270 1.95696 -1.402 0.161061
depthm:SeasonPeak 0.17058 0.07106 2.400 0.016380 *
depthm:SeasonPost 0.21029 0.13113 1.604 0.108783
Log(theta) 0.13512 0.19772 0.683 0.494367
Zero hurdle model coefficients (binomial with logit link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.02258 1.10922 3.627 0.000287 ***
depthm -0.32024 0.08317 -3.851 0.000118 ***
SeasonPeak -2.55477 1.65557 -1.543 0.122798
SeasonPost -3.94742 3.35163 -1.178 0.238892
depthm:SeasonPeak 0.27367 0.12088 2.264 0.023569 *
depthm:SeasonPost 0.30061 0.21990 1.367 0.171617
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Theta: count = 1.1447
Number of iterations in BFGS optimization: 15
Log-likelihood: -344.6 on 13 Df
I exponentiated the coefficients to interpret them, however I would like to make some figures for my thesis.
The only way I figured out how to plot the zero part of the hurdle model was by doing the following:
#turned counts into presence/absence
structure(list(Byths = c(1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1,
1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1),
Season = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Pre",
"Peak", "Post"), class = "factor"), depthm = c(20.1170446232626,
9.1441111923921, 18.2882223847842, 18.2882223847842, 17.373811265545,
13.7161667885881, 13.7161667885881, 13.106559375762, 13.106559375762,
8.53450377956596, 8.53450377956596, 12.4969519629359, 20.1170446232626,
20.1170446232626, 8.83930748597903, 19.2026335040234, 19.2026335040234,
20.1170446232626, 20.1170446232626, 20.1170446232626, 20.1170446232626,
20.1170446232626, 14.0209704950012, 14.0209704950012, 14.0209704950012,
14.0209704950012, 14.0209704950012, 8.53450377956596, 12.8017556693489,
20.7266520360888, 20.7266520360888, 17.373811265545, 14.0209704950012,
14.0209704950012, 14.0209704950012, 8.22970007315289, 8.22970007315289,
18.5930260911973, 17.373811265545, 8.53450377956596, 7.92489636673982,
7.92489636673982, 7.92489636673982, 7.92489636673982, 7.92489636673982,
7.92489636673982, 9.44891489880517, 9.44891489880517, 9.44891489880517,
9.44891489880517, 15.8497927334796, 12.1921482565228, 8.83930748597903,
8.83930748597903, 18.8978297976103, 8.83930748597903, 8.83930748597903,
17.0690075591319, 20.4218483296757, 20.4218483296757, 20.1170446232626,
20.1170446232626, 13.106559375762, 13.106559375762, 13.106559375762,
13.106559375762, 13.106559375762, 13.106559375762, 8.83930748597903,
10.6681297244574, 9.75371860521824, 9.75371860521824, 9.75371860521824,
11.5825408436967, 9.1441111923921, 17.0690075591319, 17.0690075591319,
17.0690075591319, 17.0690075591319, 17.0690075591319, 17.0690075591319,
20.1170446232626, 10.0585223116313, 17.373811265545, 20.1170446232626,
20.4218483296757, 20.4218483296757, 7.92489636673982, 7.92489636673982,
7.92489636673982, 7.92489636673982, 7.92489636673982, 7.92489636673982,
7.92489636673982, 7.92489636673982)), row.names = c(5L, 7L,
12L, 13L, 14L, 16L, 18L, 21L, 22L, 23L, 24L, 26L, 28L, 29L, 32L,
33L, 34L, 35L, 36L, 37L, 38L, 39L, 46L, 48L, 49L, 50L, 51L, 52L,
53L, 54L, 55L, 56L, 57L, 59L, 61L, 62L, 63L, 65L, 66L, 71L, 73L,
75L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 92L, 95L,
98L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L,
117L, 118L, 119L, 125L, 137L, 139L, 142L, 143L, 144L, 147L, 150L,
151L, 152L, 154L, 156L, 157L, 159L, 160L, 161L, 162L, 163L, 165L,
177L, 178L, 179L, 180L, 181L, 183L, 184L, 185L), class = "data.frame")
Then plotted hurdle part of model...
ggplot(dataPA, aes(x = depthm, y = Byths)) +
geom_point() +
stat_smooth(method = "glm", method.args = list(family="binomial"), se = TRUE) + facet_wrap(~ Season)
Is there a way to plot the count model part of the hurdle model (truncated negbin)? Or a better way to plot hurdle model results in general?
Should I be using predict.hurdle for this and use predicted values? If so, how do I do this, the examples I've found haven't been clear enough for me to understand and what I have been trying for a few weeks has not worked.
Thank you!