0

I want to extract the predicted values (in the generated quantities block) of the Stan fit and compare them with the real observations but I can't find an easy solution. here is how did it with a simple logistic regression model:

library(rstan)
library(tidyverse)
library(boot)

rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

T <- 40
set.seed(123)
x <- sort(runif(T, 0, 10))
alpha <- 1
beta <- 0.2
logit_p <- alpha + beta * x
p <- inv.logit(logit_p)
y <- rbinom(T, 1, p)

model_code <- "
data {
  int<lower=0> N;
  vector[N] x;
  
  int<lower=0,upper=1> y[N];
  
}
parameters {
  real alpha;
  real beta;
}
model {
  y ~ bernoulli_logit(alpha + beta * x);
}
generated quantities {
  vector[N] z;
  for (n in 1:N)
    z[n] = bernoulli_logit_rng(alpha + beta * x[n]);
    
}"

model_data <- list(
  N = T, 
  x = x,
  y = y
)


stan_run <- stan(
  data = model_data,
  model_code = model_code
)


posterior <- rstan::extract(stan_run)

df <- as.data.frame(posterior$z)
df <-df %>% summarise(across(everything(.), 
                             ~ ifelse(length(.[which(. == 1)]) > length(.[which(. == 0)]), 1, 0)))

I don't know if my approach is correct. Does anyone know any straightforward way of doing it?

Amin Shn
  • 532
  • 2
  • 11
  • I am getting the error "object `x` not found" when assigning the `model_data` object. What are `x` and `y`? – scrameri Sep 19 '21 at 11:55
  • @scrameri I just wanted to show my approach of extracting posteriors after the fit so it deosn't matter what are x and y, you can simulate any x and y of your own choice and feed them to the model. – Amin Shn Sep 19 '21 at 14:29
  • Ok but I can't fit the model even after simulating x and y (Syntax Error: dimension mismatch in assignment, line 18). Your problem seems more about how to extract and summarize something from a data.frame, so perhaps it's better to provide an example `posterior` object, or a working, reproducible example fit using rstan with simulated x and y: https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example – scrameri Sep 19 '21 at 15:03
  • @scrameri I assumed anyone who knows the answer doesn't even need to run the code at all (I gave the code redundantly) because the question is very general and has nothing to do with the model or data (it can be any data or model). But now that you asked I edited the code and now it should work. – Amin Shn Sep 19 '21 at 17:39
  • Yes, but one needs to know how the structure of `posterior` looks like, which is why it's good to have this reproducible example now. It would have been sufficient to `dput` a small subset of your `posterior` object, though. – scrameri Sep 19 '21 at 18:04

1 Answers1

1

The apply function can be your friend here:

Run example

library(rstan)
#> Loading required package: StanHeaders
#> Loading required package: ggplot2
#> rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)
#> For execution on a local, multicore CPU with excess RAM we recommend calling
#> options(mc.cores = parallel::detectCores()).
#> To avoid recompilation of unchanged Stan programs, we recommend calling
#> rstan_options(auto_write = TRUE)
library(tidyverse)
library(boot)

rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

T <- 40
set.seed(123)
x <- sort(runif(T, 0, 10))
alpha <- 1
beta <- 0.2
logit_p <- alpha + beta * x
p <- inv.logit(logit_p)
y <- rbinom(T, 1, p)

model_code <- "
data {
  int<lower=0> N;
  vector[N] x;
  
  int<lower=0,upper=1> y[N];
  
}
parameters {
  real alpha;
  real beta;
}
model {
  y ~ bernoulli_logit(alpha + beta * x);
}
generated quantities {
  vector[N] z;
  for (n in 1:N)
    z[n] = bernoulli_logit_rng(alpha + beta * x[n]);
    
}"

model_data <- list(
  N = T, 
  x = x,
  y = y
)

# run model
stan_run <- stan(
  data = model_data,
  model_code = model_code
)
#> Trying to compile a simple C file
#> Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
#> clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.1/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
#> In file included from <built-in>:1:
#> In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
#> In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
#> In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:88:
#> /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
#> namespace Eigen {
#> ^
#> /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
#> namespace Eigen {
#>                ^
#>                ;
#> In file included from <built-in>:1:
#> In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
#> In file included from /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Dense:1:
#> /Library/Frameworks/R.framework/Versions/4.1/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
#> #include <complex>
#>          ^~~~~~~~~
#> 3 errors generated.
#> make: *** [foo.o] Error 1

Use apply

In the case of logistic regression, you can just apply median to get the modal (most frequent) value.

(df.pred <- apply(rstan::extract(stan_run)$z, 2, median)

#>  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#> [39] 1 1

Created on 2021-09-19 by the reprex package (v2.0.1)

scrameri
  • 667
  • 2
  • 12
  • Thanks mate but this solution is not very different than mine (although it's better). I was kinda hopping for a single line solution/function available for it in Stan. In JAGS you can do it via "model_fit$BUGSoutput$mean$z" because JAGS do it for you and you just have to access it. I wish it was this simple in Stan as well. – Amin Shn Sep 19 '21 at 18:42
  • Ok I thought it was more about simplifying your `summarise` code. I just changed my solution to make a one-liner. I don't know any `rstan` function that would extract and summarize it for you, but you could try checking the `rstanarm` and `rstantools` packages. – scrameri Sep 19 '21 at 18:50