I am giving bambi (version 0.1.0) a try for a simple Poisson regression model. However the results differ when compared to straight pymc3 or statsmodels implementations, and I cannot seem to figure out how to interpret the coefficients that bambi gives me. The test code is below. Did I specify the model wrong, or should I not rely on the automatic priors of bambi?
import numpy as np
import scipy.stats
import pandas
import patsy
import statsmodels
import pymc3
import bambi
%matplotlib inline
# generate data
num_subjects = 4
mu = [5, 8, 10, 11]
num_samples = [43, 60, 56, 38]
counts = [scipy.stats.poisson.rvs(m,size=n,random_state=m) for m,n in zip(mu,num_samples)]
counts = np.concatenate(counts)
subject = np.repeat(np.arange(num_subjects), num_samples)
df = pandas.DataFrame( np.vstack([subject,counts]).T, columns=['subject','counts'])
# sample means
print( df.groupby('subject').mean() )
# subject 0 = 5.0
# subject 1 = 7.4
# subject 2 = 9.5
# subject 3 = 10.0
# fit with bambi
model_bambi = bambi.Model(df)
result_bambi = model_bambi.fit('counts ~ C(subject)', categorical=['subject'], family='poisson', chains=2)
print(result_bambi.summary(hpd=None, diagnostics=None))
# resulting posterior means:
# Intercept 9.3310 -> ?
# C(subject)[T.1] 3.8171 -> ?
# C(subject)[T.2] 4.4419 -> ?
# C(subject)[T.3] 3.8652 -> ?
# fit directly with pymc3
with pymc3.Model() as model_pymc3:
pymc3.glm.GLM.from_formula("counts ~ C(subject)", df, family=pymc3.glm.families.Poisson())
trace = pymc3.sample(2000, njobs=2, tune=500)
pymc3.plot_posterior(trace, varnames=[x for x in trace.varnames if x[:2]!='mu']);
# resulting posterior means:
# Intercept 1.6065 -> mu = 5.0 = exp(1.6065)
# C(subject)[T.1] 0.3990 -> mu = 7.4 = exp(1.6065+0.3990)
# C(subject)[T.2] 0.6477 -> mu = 9.5 = exp(1.6065+0.6477)
# C(subject)[T.3] 0.6977 -> mu = 10.0 = exp(1.6065+0.6977)
# fit with statsmodels
my, mx = patsy.dmatrices( "counts ~ C(subject)", df, NA_action='raise')
model_sm = statsmodels.api.GLM(my, mx, family=statsmodels.api.families.Poisson())
result_sm = model_sm.fit()
print(result_sm.summary())
# resulting posterior means:
# Intercept 1.6094 -> mu = 5.0 = exp(1.6094)
# C(subject)[T.1] 0.3965 -> mu = 7.4 = exp(1.6094+0.3965)
# C(subject)[T.2] 0.6456 -> mu = 9.5 = exp(1.6094+0.6456)
# C(subject)[T.3] 0.6958 -> mu = 10.0 = exp(1.6094+0.6958)