Marginal distribution integration for likelihood with PyMC3 and theano

Question

I'm trying to reproduce Random Fatigue-Limit model (Pascual & Meeker 1999, Ryan 2003 doi: 10.1198/1061860032012) using PyMC3. The performance of the code I came up with is terrible:

Applied log-transform to s_v and added transformed s_v_log to model.
Applied log-transform to tau and added transformed tau_log to model.
[                  0%                  ] 2 of 100000 complete in 1.7 sec
[                  0%                  ] 3 of 100000 complete in 2.9 sec
[                  0%                  ] 4 of 100000 complete in 3.5 sec
[                  0%                  ] 5 of 100000 complete in 6.2 sec
[                  0%                  ] 6 of 100000 complete in 7.5 sec
[                  0%                  ] 7 of 100000 complete in 13.2 sec
[                  0%                  ] 8 of 100000 complete in 13.7 sec
[                  0%                  ] 9 of 100000 complete in 19.4 sec
[                  0%                  ] 10 of 100000 complete in 113.5 sec
...
[                  0%                  ] 39 of 100000 complete in 588.8 sec

I suspect the problem lies in the marginal integrations of the fatigue life conditioned to the fatigue limit (trapezoidal rule). I left my previous attempts of getting the code to run also commented to maybe help in the readability. What would be the correct way to do this?

The model is log(lifetime) ~ N(beta0 + beta1*log(stress - fatigue_limit), eps). Fatigue limit is a limit stress which defines whether the lifetime is finite or not. log(fatigue_limit) ~ N(mu_v, s_v). Also, right-censorship is needed for observations where the test had to be stopped at finite log-life w (called run-out) so lumped likelihood 1-P(W<=w) is used for those observations in my custom likelihood function.

Thank you!

import numpy as np
import pymc3 as pymc
import theano
import theano.tensor as T
from theano.ifelse import ifelse

data = np.array([[161.908352526, 10000000.0],
    [181.550578943, 10000000.0],
    [201.19280536, 10000000.0],
    [220.835031777, 10000000.0],
    [240.477258194, 10000000.0],
    [260.119484611, 3771909.80463],
    [279.761711028, 3031517.02602],
    [299.403937445, 246228.344425],
    [319.046163862, 164947.588452],
    [338.688390279, 57509.1400708],
    [358.330616697, 80404.6132032],
    [377.972843114, 38003.7533737],
    [397.615069531, 5875.28886189],
    [417.257295948, 1337.63562072],
    [436.899522365, 1641.72977154],
    [456.541748782, 184.309099829],
    [476.183975199, 239.35420232]])

s = data[:,0] # stresses
y = data[:,1] # lifetimes
infty = 1e7 # Run-out limit

c = np.zeros(y.shape) # Censor vector
c[y <= infty] = 0 # Broken, finite lifetime
c[y > infty] = 1 # Survived, right-censor

x = np.log(s) # Logarithmic stresses
w = np.log(y) # Logarithmic lifetimes

with pymc.Model() as model:
    # Priors
    b0 = pymc.Normal('b0', 70.0, 1.0/35.0**2) # Constant parameter
    b1 = pymc.Normal('b1', -10.0, 1.0/5.0**2) # Slope parameter
    mu_v = pymc.Normal('mu_v', np.log(450.0), 1.0/np.log(0.2**2+1)) # Log-Fatigue limit mean
    s_v = pymc.Lognormal('s_v', np.log(np.sqrt(np.log(0.2**2+1)))-0.5*np.log(0.2**2+1), 1.0/np.log(0.2**2+1)) # Log-Fatigue limit standard deviation
    tau = pymc.Gamma('tau', 0.01, 0.01) # Measurement precision
    v = pymc.Normal('v', mu_v, 1.0/s_v**2) # Log-Fatigue limit

    def mu(x, b0, b1, v): # Logarithmic Random Fatigue-Limit lifetime median value
        # if x-v<=0: # Stress below fatigue limit
            # return 1e200 # Big number
        # else:
            # return b0 + b1*np.log(np.exp(x)-np.exp(v))
        results, updates = theano.scan(lambda vi: ifelse(T.lt(x, vi), 1e200, b0 + b1*T.log(T.exp(x)-T.exp(vi))), sequences=v)
        return results

    def p_w(w, x, b0, b1, mu_v, s_v, tau, N=200):
        # Lifetime distribution
        # Integration limits
        # a = min(x, mu_v - 6*s_v)
        # b = min(x, mu_v + 6*s_v)
        a = ifelse(T.lt(mu_v-6*s_v, x), mu_v-6*s_v, x)
        b = ifelse(T.lt(mu_v+6*s_v, x), mu_v+6*s_v, x)
        dv = (b-a)/N
        # Trapezoidal quadrature
        # sum = 0.0
        # for i in range(N+1):
            ## fi = norm.pdf(w, mu(x, b0, b1, a+i*dv), 1.0/np.sqrt(tau))*norm.pdf(a+i*dv, mu_v, s_v)
            # fi = T.sqrt(tau/(2.0*np.pi))*T.exp(-tau/2.0*(w - mu(x, b0, b1, a+i*dv))**2)*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((a+i*dv - mu_v)/s_v)**2)
            # if i==0 or i==N: # End points
                # sum += 0.5*fi
            # else: # Interior
                # sum += fi
        # return sum
        vs = a + T.arange(N+1)*dv
        values = T.sqrt(tau/(2.0*np.pi))*T.exp(-tau/2.0*(w - mu(x, b0, b1, vs))**2)*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((vs - mu_v)/s_v)**2)
        return dv*(T.sum(values[1:-1]) + 0.5*values[0] + 0.5*values[-1])

    def p_W(w, x, b0, b1, mu_v, s_v, tau, N=200):
        # Cumulative lifetime distribution
        # Integration limits
        # a = min(x, mu_v - 6*s_v)
        # b = min(x, mu_v + 6*s_v)
        a = ifelse(T.lt(mu_v-6*s_v, x), mu_v-6*s_v, x)
        b = ifelse(T.lt(mu_v+6*s_v, x), mu_v+6*s_v, x)
        dv = (b-a)/N        
        # Trapezoidal quadrature
        # sum = 0.0
        # for i in range(N+1):
            ## fi = norm.cdf(w, mu(x, b0, b1, a+i*dv), 1.0/np.sqrt(tau))*norm.pdf(a+i*dv, mu_v, s_v)
            # fi = 0.5*(1.0 + T.erf(T.sqrt(tau/2.0)*(w - mu(x, b0, b1, a+i*dv))))*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((a+i*dv - mu_v)/s_v)**2)
            # if i==0 or i==N: # End points
                # sum += 0.5*fi
            # else: # Interior
                # sum += fi
        # return sum
        vs = a + T.arange(N+1)*dv
        values = 0.5*(1.0 + T.erf(T.sqrt(tau/2.0)*(w - mu(x, b0, b1, vs))))*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((vs - mu_v)/s_v)**2)
        return dv*(T.sum(values[1:-1]) + 0.5*values[0] + 0.5*values[-1])

    def Li(value):
        # Log-likelihood of observation
        # value = np.array([ci, wi, xi])
        # ci = 0 : Broken | 1 : Survived 
        # wi : log-lifetime
        # xi : log-stress
        ci = value[0]
        wi = value[1]
        xi = value[2]
        # if ci==0: # Finite lifetime
            # return np.log(p_w(wi, xi, b0, b1, mu_v, s_v, tau))
            # return T.log(p_w(wi, xi, b0, b1, mu_v, s_v, tau))
        # else: # Right-censored observation
            # return np.log(1.0-p_W(wi, xi, b0, b1, mu_v, s_v, tau))
            # return T.log(1.0-p_W(wi, xi, b0, b1, mu_v, s_v, tau))
        return ifelse(T.eq(ci, 0), T.log(p_w(wi, xi, b0, b1, mu_v, s_v, tau)), T.log(1.0-p_W(wi, xi, b0, b1, mu_v, s_v, tau)))

    def L(values):
        # Log-likelihood of observations
        # retval = 0.0
        # for i in range(values.shape[0].eval()):
            # retval += Li(values[i,:])
        # return retval
        results, updates = theano.scan(lambda i: Li(values[i]), sequences=T.arange(values.shape[0]))
        return T.sum(results)

    data = np.vstack([c,w,x]).T
    mylike = pymc.DensityDist('mylike', L, observed=data)

    # mu, sds, elbo = pymc.variational.advi(n=200000) # pymc advi
    trace = pymc.sample(100000, pymc.NUTS()) # pymc NUTS

I have a similar question concerning optimization (https://stackoverflow.com/questions/42205123/how-to-fit-a-method-belonging-to-an-instance-with-pymc3). — Stéphane, Aug 29 '17 at 08:05

Marginal distribution integration for likelihood with PyMC3 and theano

0 Answers0