Why is tempered mcmc fit not convering well?

Question

I am trying to fit a simple straight line y=mx+c type to some synthetic data using parallel-tempered mcmc. My goal is to just be able to understand how to use it, so that I can apply to some more complex models later. The example I am trying is the replica of what has already been done in a simple emcee code : http://dfm.io/emcee/current/user/line/ but instead of using mcmc, I want to use parallel-tempered mcmc: http://dfm.io/emcee/current/user/pt/

Here is a working code:

import numpy as np
from emcee import PTSampler
import emcee

# Choose the "true" parameters.
m_true = -0.9594
b_true = 4.294
f_true = 0.534

# Generate some synthetic data from the model.
N = 50
x = np.sort(10*np.random.rand(N))
yerr = 0.1+0.5*np.random.rand(N)
y = m_true*x+b_true
y += np.abs(f_true*y) * np.random.randn(N)
y += yerr * np.random.randn(N)


def lnlike(theta, x, y, yerr):
    m, b, lnf = theta
    model = m * x + b
    inv_sigma2 = 1.0/(yerr**2 + model**2*np.exp(2*lnf))
    return -0.5*(np.sum((y-model)**2*inv_sigma2 - np.log(inv_sigma2)))
def lnprior(theta):
    m, b, lnf = theta
    if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
        return 0.0
    return -np.inf
def lnprob(theta, x, y, yerr):
    lp = lnprior(theta)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike(theta, x, y, yerr)

import scipy.optimize as op
nll = lambda *args: -lnlike(*args)
result = op.minimize(nll, [m_true, b_true, np.log(f_true)], args=(x, y, yerr))
m_ml, b_ml, lnf_ml = result["x"]
init = [0.5, m_ml, b_ml, lnf_ml]

ntemps = 10
nwalkers = 100
ndim = 3
from multiprocessing import Pool
pos = np.random.uniform(low=-1, high=1, size=(ntemps, nwalkers, ndim))
for i in range(ntemps):
    #initialize parameters near scipy optima
    pos[i:,] = np.array([result["x"] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)])
pool = Pool(processes=4)
sampler=PTSampler(ntemps,nwalkers, ndim, lnlike, lnprior, loglargs=(x, y, yerr), pool=pool)# args=(x, y, yerr))
#burn-in 
sampler.run_mcmc(pos, 1000)
sampler.reset()
sampler.run_mcmc(pos, 10000, thin=10)
samples = sampler.chain.reshape((-1, ndim))
print('Number of posterior samples is {}'.format(samples.shape[0]))
#print best fit value together with errors
print(map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
                             zip(*np.percentile(samples, [16, 50, 84],
                                                axis=0))))

import corner
fig = corner.corner(samples, labels=["$m$", "$b$", "$\ln\,f$"],
                      truths=[m_true, b_true, np.log(f_true)])
fig.savefig("triangle.png")

The only problem when running this code is I get optimal parameters value which are way off the true values. And increasing the number of walkers or samples is not helping in any sense. Can anyone please advice why tempered-mcmc is not working here?

Update:

I found out a useful package called ptemcee (https://pypi.org/project/ptemcee/#description), although the documentation of this package is non-existent. It seems that this package might be useful, any help on how to implement the same linear fitting with this package would also be highly appreciated.

Not necessarily, but I would like to learn how to implement it with `emcee` just because I have been using `emcee` for simple mcmc fittings. — konstant, May 27 '19 at 02:42
This is a linear model " where the quoted uncertainties are underestimated by a constant fractional amount." It is an exact duplicate of the example presented in the website of `emcee`. — konstant, May 28 '19 at 21:24

David · Answer 1 · 2019-06-01T00:27:09.043

I have modified some lines

import time
import numpy as np
from emcee import PTSampler
import corner
import matplotlib.pyplot as plt
import scipy.optimize as op

t1 = time.time()

np.random.seed(6) # To reproduce results
# Choose the "true" parameters.
m_true = -0.9594
b_true = 4.294
f_true = 0.534

# Generate some synthetic data from the model.
N = 50
x = np.sort(10 * np.random.rand(N))
yerr = 0.1 + 0.5 * np.random.rand(N)
y_1 = m_true * x + b_true
y = np.abs(f_true * y_1) * np.random.randn(N) + y_1
y += yerr * np.random.randn(N)

plt.plot(x, y, 'o')


# With emcee

def lnlike(theta, x, y, yerr):
    m, b, lnf = theta
    model = m * x + b
    inv_sigma2 = 1.0/(yerr**2 + model**2*np.exp(2*lnf))
    return -0.5*(np.sum((y-model)**2*inv_sigma2 - np.log(inv_sigma2)))
def lnprior(theta):
    m, b, lnf = theta
    if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
        return 0.0
    return -np.inf
def lnprob(theta, x, y, yerr):
    lp = lnprior(theta)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike(theta, x, y, yerr)


nll = lambda *args: -lnlike(*args)
result = op.minimize(nll, [m_true, b_true, np.log(f_true)], args=(x, y, yerr))
m_ml, b_ml, lnf_ml = result["x"]

init = [0.5, m_ml, b_ml, lnf_ml]

ntemps = 10
nwalkers = 100
ndim = 3

pos = np.random.uniform(low=-1, high=1, size=(ntemps, nwalkers, ndim))
for i in range(ntemps):
    pos[i:, :] = np.array([result["x"] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)])

sampler = PTSampler(ntemps, nwalkers, ndim, lnlike, lnprior, loglargs=(x, y, yerr), threads=4) # args=(x, y, yerr))

#burn-in 
print(pos.shape)
sampler.run_mcmc(pos, 100)
sampler.reset()
sampler.run_mcmc(pos, 5000, thin=10)
samples = sampler.chain.reshape((-1, ndim))

print('Number of posterior samples is {}'.format(samples.shape[0]))

#print best fit value together with errors

p1, p2, p3 = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
                             zip(*np.percentile(samples, [16, 50, 84],
                                                axis=0)))

print(p1, '\n', p2, '\n', p3)

fig = corner.corner(samples, labels=["$m$", "$b$", "$\ln\,f$"],
                      truths=[m_true, b_true, np.log(f_true)])

t2 = time.time()

print('It took {:.3f} s'.format(t2 - t1))

plt.show()

The figure I get with corner is:

The important line is

sampler = PTSampler(ntemps, nwalkers, ndim, lnlike, lnprior, loglargs=(x, y, yerr), threads=4)

I have used threads=4 instead of Pool.

Look closely at this line print(p1, '\n', p2, '\n', p3), it prints the values of m_true, b_true and f_true you get:

(-1.277782877669762, 0.5745273177144817, 2.0813620981463297) 
(4.800481378230051, 3.1747356851201163, 2.245189235990341) 
(-0.9391847529845194, 1.1196053087321716, 3.6017609114364273)

For f, you need np.exp(-0.93918), which is 0.3909, which is close to 0.534. The values you get are close (-1.277 compared to -0.9594 and 4.8 compared to 4.294), although the errors are not bad (except for f). I mean, are you expecting to get the exact numbers? With this method, in my computer, it takes 111 s to complete, is that normal?

Let's try something different. Let's be clear: the problem is not easy when f_true is added. I will use pymc3 (you don't need to know how to use pymc3, I want to check the results found by emcee).

import time
import numpy as np
import corner
import matplotlib.pyplot as plt
import pymc3 as pm

t1 = time.time()

np.random.seed(6)
# Choose the "true" parameters.
m_true = -0.9594
b_true = 4.294
f_true = 0.534

# Generate some synthetic data from the model.
N = 50
x = np.sort(10 * np.random.rand(N))
yerr = 0.1 + 0.5 * np.random.rand(N)
y_1 = m_true * x + b_true
y = np.abs(f_true * y_1) * np.random.randn(N) + y_1
y += yerr * np.random.randn(N)

plt.plot(x, y, 'o')


with pm.Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
    # Define priors
    f = pm.HalfCauchy('f', beta=5)
    m = pm.Normal('m', 0, sd=20)
    b = pm.Normal('b', 0, sd=20)

    mu2 = b + m * x
    sigma2 = yerr**2 + f**2 * (y_1)**2

    post = pm.Normal('y', mu=mu2, sd=pm.math.sqrt(sigma2), observed=y)

with model:
    trace = pm.sample(2000, tune=2000)

print(pm.summary(trace))
pm.traceplot(trace)

all_values = np.stack([trace.get_values('b'), trace.get_values('m'), trace.get_values('f')], axis=1)

fig2 = corner.corner(all_values, labels=["$b$", "$m$", "$f$"],
                      truths=[b_true, m_true, f_true])

t2 = time.time()

print('It took {:.3f} s'.format(t2 - t1))

plt.show()

The summary is

       mean        sd  mc_error   hpd_2.5  hpd_97.5        n_eff      Rhat
m -0.995545  0.067818  0.001174 -1.123187 -0.857653  2685.610018  1.000121
b  4.398158  0.332526  0.005585  3.767336  5.057909  2746.736563  1.000201
f  0.425442  0.063884  0.000904  0.311037  0.554446  4195.591204  1.000309

The important part is the column mean, you see that the values found by pymc3 are close to the true values. The columns hpd_2.5 and hpd_97.5 are the errors for f, b and m. And it took 14 s.

The figure I get with corner is

You will say that the results of emcee are not quite good, but if you really want more accuracy, you have to modify this function:

def lnprior(theta):
    m, b, lnf = theta
    if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
        return 0.0
    return -np.inf

The famous prior. In this case, it is flat, and since there are a lot of priors...

This does not address my concern though. The best fit results are way off from the true values, and the error bars are way too large. — konstant, May 31 '19 at 15:34

Why is tempered mcmc fit not convering well?

1 Answers1