Code 8: Approximate Bayesian Computation

Code 8: Approximate Bayesian Computation#

This is a reference notebook for the book Bayesian Modeling and Computation in Python

The textbook is not needed to use or run this code, though the context and explanation is missing from this notebook.

If you’d like a copy it’s available from the CRC Press or from Amazon. ``

%matplotlib inline
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
from scipy import stats

from scripts.rf_selector import select_model

az.style.use("arviz-grayscale")
plt.rcParams['figure.dpi'] = 300
np.random.seed(1346)

Fitting a Gaussian the ABC-way#

Figure 8.2#

a = stats.norm(-2.5, 0.5)
b = stats.norm(2.5, 1)
c = stats.norm(0, 3)
x = np.linspace(-6, 6, 500)

lpdf = 0.65 * a.pdf(x) + 0.35* b.pdf(x)
ppdf = c.pdf(x)
_, ax = plt.subplots(figsize=(10, 4))
for c, β in zip(["#A8A8A8", "#585858", "#000000", "#2a2eec"],
                [0, 0.2, 0.5, 1]):
    post = ppdf * lpdf**β
    post /= post.sum()
    ax.plot(x, post, lw=3, label=f"β={β}", color=c)
ax.set_yticks([])
ax.set_xticks([])
ax.set_xlabel("θ")
ax.legend()
plt.savefig("img/chp08/smc_tempering.png")

../_images/20ec2522c71da4393b116f6980b4ba544d88669299caa2bb4baf23fd305da7df.png

Fitting a Gaussian the ABC-way#

data = np.random.normal(loc=0, scale=1, size=1000)

def normal_simulator(μ, σ):
    return np.random.normal(μ, σ, 1000)

Code 8.2 and Figure 8.3#

with pm.Model() as gauss:
    μ = pm.Normal('μ', mu=0, sd=1)
    σ = pm.HalfNormal('σ', sd=1)
    s = pm.Simulator('s', normal_simulator, params=[μ, σ],
                     distance="gaussian",
                     sum_stat="sort",          
                     epsilon=1,
                     observed=data)
    trace_g = pm.sample_smc(kernel="ABC",
                            parallel=True)

Initializing SMC sampler...
Sampling 4 chains in 4 jobs
Stage:   0 Beta: 0.004
Stage:   1 Beta: 0.015
Stage:   2 Beta: 0.049
Stage:   3 Beta: 0.166
Stage:   4 Beta: 0.538
Stage:   5 Beta: 1.000

az.summary(trace_g)

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

	mean	sd	hdi_3%	hdi_97%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
μ	-0.062	0.044	-0.148	0.019	0.0	0.0	7913.0	7584.0	1.0
σ	0.998	0.039	0.926	1.069	0.0	0.0	8025.0	7463.0	1.0

az.plot_trace(trace_g, kind="rank_vlines", figsize=(10, 4));
plt.savefig('img/chp08/trace_g.png')

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

../_images/13ff4dd9dc99320821a16f83c31346c9532d430a748e8a1c10c7f48a5d196d60.png

Choosing the Distance Function, \(\epsilon\) and the Summary Statistics#

Codes 8.4, 8.5, 8.6, 8.7, and 8.8#

with pm.Model() as gauss_001:
    μ = pm.Normal('μ', mu=0, sd=1)
    σ = pm.HalfNormal('σ', sd=1)
    s = pm.Simulator('s', normal_simulator, params=[μ, σ],
                     sum_stat="sort",
                     epsilon=0.1,
                     observed=data)
    trace_g_001, sim_data_001 = pm.sample_smc(kernel="ABC",
                                      parallel=True,
                                      save_sim_data=True)

with pm.Model() as gauss_01:
    μ = pm.Normal('μ', mu=0, sd=1)
    σ = pm.HalfNormal('σ', sd=1)
    s = pm.Simulator('s', normal_simulator, params=[μ, σ],
                     sum_stat="sort",
                     epsilon=1,
                     observed=data)
    trace_g_01, sim_data_01 = pm.sample_smc(kernel="ABC",
                                      parallel=True,
                                      save_sim_data=True)
    
with pm.Model() as gauss_02:
    μ = pm.Normal('μ', mu=0, sd=1)
    σ = pm.HalfNormal('σ', sd=1)
    s = pm.Simulator('s', normal_simulator, params=[μ, σ],
                     sum_stat="sort",
                     epsilon=2,
                     observed=data)
    trace_g_02, sim_data_02 = pm.sample_smc(kernel="ABC",
                                      parallel=True,
                                      save_sim_data=True)  
    
with pm.Model() as gauss_05:
    μ = pm.Normal('μ', mu=0, sd=1)
    σ = pm.HalfNormal('σ', sd=1)
    s = pm.Simulator('s', normal_simulator, params=[μ, σ],
                     sum_stat="sort",
                     epsilon=5,
                     observed=data)
    trace_g_05, sim_data_05 = pm.sample_smc(kernel="ABC",
                                      parallel=True,
                                      save_sim_data=True)
    
with pm.Model() as gauss_10:
    μ = pm.Normal('μ', mu=0, sd=1)
    σ = pm.HalfNormal('σ', sd=1)
    s = pm.Simulator('s', normal_simulator, params=[μ, σ],
                     sum_stat="sort",
                     epsilon=10,
                     observed=data)
    trace_g_10, sim_data_10 = pm.sample_smc(kernel="ABC",
                                      parallel=True,
                                      save_sim_data=True)


with pm.Model() as gauss_NUTS:
    μ = pm.Normal('μ', mu=0, sd=1)
    σ = pm.HalfNormal('σ', sd=1)
    s = pm.Normal('s', μ, σ,
                  observed=data)
    trace_g_nuts = pm.sample()

Initializing SMC sampler...
Sampling 4 chains in 4 jobs
Stage:   0 Beta: 0.000
Stage:   1 Beta: 0.000
Stage:   2 Beta: 0.000
Stage:   3 Beta: 0.002
Stage:   4 Beta: 0.005
Stage:   5 Beta: 0.015
Stage:   6 Beta: 0.034
Stage:   7 Beta: 0.064
Stage:   8 Beta: 0.114
Stage:   9 Beta: 0.213
Stage:  10 Beta: 0.325
Stage:  11 Beta: 0.465
Stage:  12 Beta: 1.000
Initializing SMC sampler...
Sampling 4 chains in 4 jobs
Stage:   0 Beta: 0.004
Stage:   1 Beta: 0.015
Stage:   2 Beta: 0.052
Stage:   3 Beta: 0.182
Stage:   4 Beta: 0.590
Stage:   5 Beta: 1.000
Initializing SMC sampler...
Sampling 4 chains in 4 jobs
Stage:   0 Beta: 0.015
Stage:   1 Beta: 0.060
Stage:   2 Beta: 0.218
Stage:   3 Beta: 0.753
Stage:   4 Beta: 1.000
Initializing SMC sampler...
Sampling 4 chains in 4 jobs
Stage:   0 Beta: 0.095
Stage:   1 Beta: 0.379
Stage:   2 Beta: 1.000
Initializing SMC sampler...
Sampling 4 chains in 4 jobs
Stage:   0 Beta: 0.365
Stage:   1 Beta: 1.000
<ipython-input-8-931bddbea258>:62: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
  trace_g_nuts = pm.sample()
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [σ, μ]

100.00% [8000/8000 00:01<00:00 Sampling 4 chains, 0 divergences]

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 2 seconds.

traces = [trace_g_nuts, trace_g_01, trace_g_05, trace_g_10]
az.plot_forest(traces, model_names=["NUTS", "ϵ 1", "ϵ 5", "ϵ 10"],
               colors=["#2a2eec", "#000000", "#585858", "#A8A8A8"],
               figsize=(8, 3));
plt.savefig("img/chp08/trace_g_many_eps.png")

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

../_images/f36d8bca4ae968fda24afd67d35d97abfd30486f7ad9699c42777458cad503e7.png

az.plot_trace(trace_g_001, kind="rank_vlines", figsize=(10, 4));
plt.savefig("img/chp08/trace_g_eps_too_low.png")

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

../_images/49e5671ba4bad4a35f796d84a4f61a97d2ebbca41f4f7b1f8b403e1f587aa7ba.png

traces_ = [trace_g_001, trace_g_01, trace_g_05, trace_g_10]
sim_data_ = [sim_data_001, sim_data_01, sim_data_05, sim_data_10]
epsilons = [0.1, 1, 5, 10]

_, axes = plt.subplots(2, 2, figsize=(10,5))

for i, ax in enumerate(axes.ravel()):
    dada = az.from_pymc3(traces_[i], 
                         posterior_predictive=sim_data_[i])

    pp_vals = np.reshape(sim_data_[i]["s"], (8000, -1))
    tstat_pit = np.mean(pp_vals <= data, axis=0)
    _, tstat_pit_dens = az.kde(tstat_pit)
    
    ax.axhline(1, color="w")
    az.plot_bpv(dada, kind="u_value", ax=ax, reference="analytical")
    ax.tick_params(axis='both', pad=7)
    ax.set_title(f"ϵ={epsilons[i]}, mse={np.mean((1 - tstat_pit_dens)**2) * 100:.2f}")

plt.savefig("img/chp08/bpv_g_many_eps_00.png")

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

../_images/324c642dcc1f5d27f985fff3d07f3ff025515564c0ed564a1bb8641f4966545d.png

_, ax = plt.subplots(2, 2, figsize=(10,5))

ax = ax.ravel()
for i in range(4):
    dada = az.from_pymc3(traces_[i], 
                         posterior_predictive=sim_data_[i])

    az.plot_bpv(dada, kind="p_value", reference='samples', color="C4", ax=ax[i],
               plot_ref_kwargs={"color":"C2"})
    ax[i].set_title(f"ϵ={epsilons[i]}")
plt.savefig("img/chp08/bpv_g_many_eps_01.png")

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

../_images/86ade49b4ec329642636a65d6f16d78ed3f4ec77f24774e304953c5908517a24.png

_, axes = plt.subplots(2, 2, figsize=(10,5))

for i, ax in enumerate(axes.ravel()):
    dada = az.from_pymc3(traces_[i], 
                         posterior_predictive=sim_data_[i])

    az.plot_ppc(dada, num_pp_samples=100, ax=ax, color="C2",
                mean=False, legend=False, observed=False)
    az.plot_kde(dada.observed_data["s"], plot_kwargs={"color":"C4"}, ax=ax)
    ax.set_xlabel("s")
    ax.set_title(f"ϵ={epsilons[i]}")
plt.savefig("img/chp08/ppc_g_many_eps.png")

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

../_images/d87c3e9c3bde4c808c94c57a0ccfd322f2d3deb3000b8eee325376adde6287bf.png

g-and-k distributions#

Figure 8.9#

data = pd.read_csv("../data/air_pollution_bsas.csv")
bsas_co = data["co"].dropna().values

_, axes = plt.subplots(2,1,  figsize=(10,4), sharey=True)
axes[0].hist(bsas_co, bins="auto", color="C1", density=True)
axes[0].set_yticks([])
axes[1].hist(bsas_co[bsas_co < 3], bins="auto", color="C1", density=True)
axes[1].set_yticks([])
axes[1].set_xlabel("CO levels (ppm)")
plt.savefig("img/chp08/co_ppm_bsas.png")
f"We have {sum(bsas_co > 3)} observations larger than 3 ppm"

'We have 8 observations larger than 3 ppm'

../_images/1b7080fc6ff5c1f42d43a30b0a39dcd8fd0995a405a60fa4f2511f0132359e81.png

Code 8.4 and Figure 8.10#

class g_and_k_quantile:
    def __init__(self):
        self.quantile_normal = stats.norm(0, 1).ppf
        self.pdf_normal = stats.norm(0, 1).pdf

    def ppf(self, x, a, b, g, k):
        z = self.quantile_normal(x)
        return a + b * (1 + 0.8 * np.tanh(g*z/2)) * ((1 + z**2)**k) * z

    
    def rvs(self, samples, a, b, g, k):
        x = np.random.uniform(0, 1, samples)
        return self.ppf(x, a, b, g, k)

    def cdf(self, x, a, b, g, k, zscale=False):   
        optimize.fminbound(f, -5, 5)

    def pdf(self, x, a, b, g, k):
        #z = cdf(x, a, b, g, k)
        z = x
        z_sq = z**2
        term1 = (1+z_sq)**k
        term2 = 1+0.8*np.tanh(g*x/2)
        term3 = (1+(2*k+1)*z_sq)/(1+z_sq)
        term4 = 0.8*g*z/(2*np.cosh(g*z/2)**2)

        deriv = b*term1*(term2*term3+term4)
        return self.pdf_normal(x) / deriv

gk = g_and_k_quantile()
u = np.linspace(1E-14, 1-1E-14, 10000)

params = ((0, 1, 0, 0), 
 (0, 1, .4, 0),
 (0, 1,-.4, 0),
 (0, 1, 0, 0.25))

_, ax = plt.subplots(2, 4, sharey="row", figsize=(10, 5))
for i, p in enumerate(params):
    a, b, g, k = p
    ppf = gk.ppf(u, a, b, g, k)
    ax[0, i].plot(u, ppf)
    ax[0, i].set_title(f"a={a}, b={b},\ng={g}, k={k}")
    #ax[1, i].plot(x, gk.pdf(x, a, b, g, k))
    az.plot_kde(ppf, ax=ax[1, i], bw=0.5)
plt.savefig("img/chp08/gk_quantile.png")

../_images/ec37e3ee3aa170d3d3d0c3effc66e7f0c6d9abd529c3e4a5a7d4d91850adce40.png

Code 8.5#

def octo_summary(x):
    e1, e2, e3, e4, e5, e6, e7 = np.quantile(x, [0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875])
    sa = e4
    sb = e6 - e2
    sg = (e6 + e2 - 2*e4)/sb
    sk = (e7 - e5 + e3 - e1)/sb
    return np.array([sa, sb, sg, sk])

Code 8.6#

gk = g_and_k_quantile()
def gk_simulator(a, b, g, k):
    return gk.rvs(len(bsas_co), a, b, g, k)

Code 8.7 and Figure 8.11#

with pm.Model() as gkm:
    a = pm.HalfNormal('a', sd=1)
    b = pm.HalfNormal('b', sd=1)
    g = pm.HalfNormal('g', sd=1)
    k = pm.HalfNormal('k', sd=1)
    
    s = pm.Simulator('s', gk_simulator, params=[a, b, g, k],        
                     sum_stat=octo_summary,
                     epsilon=0.1,
                     observed=bsas_co)
    
    trace_gk, sim_data_gk = pm.sample_smc(kernel="ABC",
                                          parallel=True,
                                          save_sim_data=True,
                                          )

Initializing SMC sampler...
Sampling 4 chains in 4 jobs
Stage:   0 Beta: 0.013
Stage:   1 Beta: 0.066
Stage:   2 Beta: 0.235
Stage:   3 Beta: 0.642
Stage:   4 Beta: 1.000

az.summary(trace_gk)

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

	mean	sd	hdi_3%	hdi_97%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
a	0.505	0.099	0.312	0.683	0.001	0.001	7692.0	7789.0	1.0
b	0.196	0.070	0.062	0.325	0.001	0.001	7927.0	8040.0	1.0
g	0.451	0.288	0.007	0.954	0.003	0.002	7911.0	7594.0	1.0
k	0.145	0.087	0.005	0.296	0.001	0.001	7492.0	7450.0	1.0

az.plot_trace(trace_gk, kind="rank_vlines")
plt.savefig("img/chp08/trace_gk.png")

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

../_images/589f4c4cd322662d98ed12181bc7f80b07545d06a5be6b429987446290ce08f1.png

axes = az.plot_pair(trace_gk, 
                    kind="kde", 
                    marginals=True,
                    textsize=45,
                    kde_kwargs={"contourf_kwargs":{"cmap":plt.cm.viridis}},
                    )

for ax, pad in zip(axes[:,0], (70, 30, 30, 30)):
    ax.set_ylabel(ax.get_ylabel(), rotation=0, labelpad=pad)

plt.savefig("img/chp08/pair_gk.png")

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

../_images/444244961b4f4fce9c1a7da03862db08830a08c9b314a5b69b18bde23f8521d3.png

Approximating moving averages#

Code 8.8 and Figure 8.12#

def moving_average_2(θ1, θ2, n_obs=200):
    λ = np.random.normal(0, 1, n_obs+2)
    y = λ[2:] + θ1*λ[1:-1] + θ2*λ[:-2]
    return y

We are calling the simulator one more time to generate “observed data”.

θ1_true = 0.7
θ2_true = 0.3
y_obs = moving_average_2(θ1_true, θ2_true)

az.plot_trace({'one sample':moving_average_2(θ1_true, θ2_true),
               'another sample':moving_average_2(θ1_true, θ2_true)},
              trace_kwargs={'alpha':1},
              figsize=(10, 4)
             )
plt.savefig("img/chp08/ma2_simulator_abc.png")

../_images/04789715c90ffbf27cc9186b114a43fe6d2ea4c99314fb28c263e2b8656fc285.png

Code 8.9#

def autocov(x):
    a = np.mean(x[1:] * x[:-1])
    b = np.mean(x[2:] * x[:-2])
    return np.array((a, b))

Code 8.10 and Figure 8.13#

with pm.Model() as model_ma2:
    θ1 = pm.Uniform('θ1', -2, 2)
    θ2 = pm.Uniform('θ2', -1, 1)
    p1 = pm.Potential("p1", pm.math.switch(θ1+θ2 > -1, 0, -np.inf))
    p2 = pm.Potential("p2", pm.math.switch(θ1-θ2 < 1, 0, -np.inf))

    y = pm.Simulator('y', moving_average_2,
                     params=[θ1, θ2],
                     sum_stat=autocov,
                     epsilon=0.1,
                     observed=y_obs)

    trace_ma2 = pm.sample_smc(3000, kernel="ABC", parallel=True)

Initializing SMC sampler...
Sampling 4 chains in 4 jobs
Potentials will be added to the prior term
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/sampling.py:1925: UserWarning: The effect of Potentials on other parameters is ignored during prior predictive sampling. This is likely to lead to invalid or biased predictive samples.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/sampling.py:1925: UserWarning: The effect of Potentials on other parameters is ignored during prior predictive sampling. This is likely to lead to invalid or biased predictive samples.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/sampling.py:1925: UserWarning: The effect of Potentials on other parameters is ignored during prior predictive sampling. This is likely to lead to invalid or biased predictive samples.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/sampling.py:1925: UserWarning: The effect of Potentials on other parameters is ignored during prior predictive sampling. This is likely to lead to invalid or biased predictive samples.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/smc/smc.py:240: RuntimeWarning: invalid value encountered in subtract
  (proposal_logp + backward) - (self.posterior_logp + forward)
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/smc/smc.py:240: RuntimeWarning: invalid value encountered in subtract
  (proposal_logp + backward) - (self.posterior_logp + forward)
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/smc/smc.py:240: RuntimeWarning: invalid value encountered in subtract
  (proposal_logp + backward) - (self.posterior_logp + forward)
Stage:   0 Beta: 0.016
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/smc/smc.py:240: RuntimeWarning: invalid value encountered in subtract
  (proposal_logp + backward) - (self.posterior_logp + forward)
Stage:   1 Beta: 0.084
Stage:   2 Beta: 0.357
Stage:   3 Beta: 1.000

az.plot_trace(trace_ma2, kind="rank_vlines", figsize=(10, 4))
plt.savefig("img/chp08/ma2_trace.png")

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

../_images/a8fef7d4b195f12f7020fac6d1432479a533d5eca59c3f618f7df8995538736c.png

#ax = az.plot_pair(t_p, var_names=["θ1", "θ2"], marginals=True)
axes = az.plot_pair(trace_ma2, kind="kde", var_names=["θ1", "θ2"],
                    marginals=True, figsize=(10,5),
                    kde_kwargs={"contourf_kwargs":{"cmap":plt.cm.viridis}},
                    point_estimate="mean",
                    point_estimate_kwargs={"ls":"none"},
                    point_estimate_marker_kwargs={"marker":".",
                                                  "facecolor":"k",
                                                  "zorder":2})

axes[1,0].set_xlim(-2.1, 2.1)
axes[1,0].set_ylim(-1.1, 1.1)
axes[1,0].set_ylabel(axes[1,0].get_ylabel(), rotation=0)
axes[1,0].plot([0, 2, -2, 0], [-1, 1, 1, -1], "C2", lw=2)
plt.savefig("img/chp08/ma2_triangle.png")

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(

../_images/62777dbe8aa4d492cea26027e7a739f6e9fc863328cc2501f497430287276509.png

Model Comparison in the ABC context#

To reproduce the figures in the book, run loo_abc.py

Model choice via random forest#

def moving_average_1(θ1, n_obs=500):
    λ = np.random.normal(0, 1, n_obs+1)
    y = λ[2:] + θ1*λ[1:-1]
    return y

def moving_average_2(θ1, θ2, n_obs=500):
    λ = np.random.normal(0, 1, n_obs+2)
    y = λ[2:] + θ1*λ[1:-1] + θ2*λ[:-2]
    return y

θ1_true = 0.7
θ2_true = 0.3
y_obs = moving_average_2(θ1_true, θ2_true)

def autocov(x, n=2):
    return np.array([np.mean(x[i:] * x[:-i]) for i in range(1, n+1)])

Code 8.12#

with pm.Model() as model_ma1:
    θ1 = pm.Uniform('θ1', -1, 1)
    y = pm.Simulator('y', moving_average_1,
                     params=[θ1], sum_stat=autocov, epsilon=0.1, observed=y_obs)
    trace_ma1 = pm.sample_smc(3000, kernel="ABC", parallel=True)

Initializing SMC sampler...
Sampling 4 chains in 4 jobs
Stage:   0 Beta: 0.033
Stage:   1 Beta: 0.174
Stage:   2 Beta: 0.534
Stage:   3 Beta: 1.000

with pm.Model() as model_ma2:
    θ1 = pm.Uniform('θ1', -2, 2)
    θ2 = pm.Uniform('θ2', -1, 1)
    p1 = pm.Potential("p1", pm.math.switch(θ1+θ2 > -1, 0, -np.inf))
    p2 = pm.Potential("p2", pm.math.switch(θ1-θ2 < 1, 0, -np.inf))

    y = pm.Simulator('y', moving_average_2,
                     params=[θ1, θ2],
                     sum_stat=autocov,
                     epsilon=0.1,
                     observed=y_obs)

    trace_ma2 = pm.sample_smc(3000, kernel="ABC", parallel=True)

Initializing SMC sampler...
Sampling 4 chains in 4 jobs
Potentials will be added to the prior term
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/sampling.py:1925: UserWarning: The effect of Potentials on other parameters is ignored during prior predictive sampling. This is likely to lead to invalid or biased predictive samples.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/sampling.py:1925: UserWarning: The effect of Potentials on other parameters is ignored during prior predictive sampling. This is likely to lead to invalid or biased predictive samples.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/sampling.py:1925: UserWarning: The effect of Potentials on other parameters is ignored during prior predictive sampling. This is likely to lead to invalid or biased predictive samples.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/sampling.py:1925: UserWarning: The effect of Potentials on other parameters is ignored during prior predictive sampling. This is likely to lead to invalid or biased predictive samples.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/smc/smc.py:240: RuntimeWarning: invalid value encountered in subtract
  (proposal_logp + backward) - (self.posterior_logp + forward)
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/smc/smc.py:240: RuntimeWarning: invalid value encountered in subtract
  (proposal_logp + backward) - (self.posterior_logp + forward)
Stage:   0 Beta: 0.016
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/smc/smc.py:240: RuntimeWarning: invalid value encountered in subtract
  (proposal_logp + backward) - (self.posterior_logp + forward)
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/smc/smc.py:240: RuntimeWarning: invalid value encountered in subtract
  (proposal_logp + backward) - (self.posterior_logp + forward)
Stage:   1 Beta: 0.087
Stage:   2 Beta: 0.377
Stage:   3 Beta: 1.000

mll_ma2 = np.exp(trace_ma2.report.log_marginal_likelihood.mean())
mll_ma1 = np.exp(trace_ma1.report.log_marginal_likelihood.mean())

mll_ma2/mll_ma1

2.643545896452606

Code 8.13#

idata_ma1 = az.from_pymc3(trace_ma1)
lpll = {"s":np.array(trace_ma1.report.log_pseudolikelihood)}
idata_ma1.log_likelihood = az.data.base.dict_to_dataset(lpll)

idata_ma2 = az.from_pymc3(trace_ma2)
lpll = {"s":trace_ma2.report.log_pseudolikelihood}
idata_ma2.log_likelihood = az.data.base.dict_to_dataset(lpll)


cmp = az.compare({"model_ma1":idata_ma1, "model_ma2":idata_ma2})
cmp

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/stats/stats.py:145: UserWarning: The default method used to estimate the weights for each model,has changed from BB-pseudo-BMA to stacking
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/arviz/stats/stats.py:655: UserWarning: Estimated shape parameter of Pareto distribution is greater than 0.7 for one or more samples. You should consider using a more robust model, this is because importance sampling is less likely to work well if the marginal posterior and LOO posterior are very different. This is more likely to happen with a non-robust model and highly influential observations.
  warnings.warn(

	rank	loo	p_loo	d_loo	weight	se	dse	warning	loo_scale
model_ma2	0	-2.283655	1.594485	0.000000	1.0	0.160007	0.000000	True	log
model_ma1	1	-3.558110	2.075971	1.274455	0.0	1.443720	1.603727	False	log

Code 8.14#

from functools import partial
select_model([(model_ma1, trace_ma1), (model_ma2, trace_ma2)], 
             statistics=[partial(autocov, n=6)],
             n_samples=10000,
             observations=y_obs)

/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/sampling.py:1689: UserWarning: samples parameter is smaller than nchains times ndraws, some draws and/or chains may not be represented in the returned posterior predictive sample
  warnings.warn(
/u/32/martino5/unix/anaconda3/envs/pymcv3/lib/python3.9/site-packages/pymc3/sampling.py:1698: UserWarning: The effect of Potentials on other parameters is ignored during posterior predictive sampling. This is likely to lead to invalid or biased predictive samples.
  warnings.warn(

(1, 0.9799999999999998)

Code 8: Approximate Bayesian Computation

Contents

Code 8: Approximate Bayesian Computation#

Fitting a Gaussian the ABC-way#

Figure 8.2#

Fitting a Gaussian the ABC-way#

Code 8.2 and Figure 8.3#

Choosing the Distance Function, \(\epsilon\) and the Summary Statistics#

Codes 8.4, 8.5, 8.6, 8.7, and 8.8#

g-and-k distributions#

Figure 8.9#

Code 8.4 and Figure 8.10#

Code 8.5#

Code 8.6#

Code 8.7 and Figure 8.11#

Approximating moving averages#

Code 8.8 and Figure 8.12#

Code 8.9#

Code 8.10 and Figure 8.13#

Model Comparison in the ABC context#

Model choice via random forest#

Code 8.12#

Code 8.13#

Code 8.14#