Code 4: Extending Linear Models#

import pymc3 as pm
import matplotlib.pyplot as plt
import arviz as az
import pandas as pd
import numpy as np
from scipy import stats

import theano.tensor as tt
import theano

import datetime
print(f"Last Run {datetime.datetime.now()}")

Last Run 2021-11-19 16:44:33.955392

az.style.use("arviz-grayscale")
plt.rcParams['figure.dpi'] = 300


Transforming Covariates#

Code 4.1#

babies = pd.read_csv('../data/babies.csv')

# Add a constant term so we can use a the dot product approach
babies["Intercept"] = 1


Month Length Intercept
0 0 48.5 1
1 0 50.5 1
2 0 50.5 1
3 0 52.0 1
4 0 47.5 1

Figure 4.1#

fig, ax = plt.subplots()

ax.plot(babies["Month"], babies["Length"], 'C0.', alpha=0.1)
ax.set_ylabel("Length")
ax.set_xlabel("Month");
plt.savefig('img/chp04/baby_length_scatter.png', dpi=300)


Code 4.2#

with pm.Model() as model_baby_linear:
β = pm.Normal('β', sigma=10, shape=2)

# Use dot product instead of expanded multiplication
μ = pm.Deterministic("μ", pm.math.dot(babies[["Intercept", "Month"]], β))
ϵ = pm.HalfNormal("ϵ", sigma=10)

length = pm.Normal("length", mu=μ, sigma=ϵ, observed=babies["Length"])

trace_linear = pm.sample(draws=2000, tune=4000)
pcc_linear = pm.sample_posterior_predictive(trace_linear)
inf_data_linear = az.from_pymc3(trace=trace_linear,
posterior_predictive=pcc_linear)

/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/2954483905.py:10: FutureWarning: In v4.0, pm.sample will return an arviz.InferenceData object instead of a MultiTrace by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
trace_linear = pm.sample(draws=2000, tune=4000)
Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ϵ, β]

100.00% [24000/24000 00:08<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 4_000 tune and 2_000 draw iterations (16_000 + 8_000 draws total) took 18 seconds.
The acceptance probability does not match the target. It is 0.5738214538580835, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.8837497551468421, but should be close to 0.8. Try to increase the number of tuning steps.

100.00% [8000/8000 00:06<00:00]

Figure 4.2#

fig, ax = plt.subplots()

ax.set_ylabel("Length")
ax.set_xlabel("Month");

μ_m = inf_data_linear.posterior["μ"].values.reshape(-1, babies["Length"].shape[0]).mean(axis=0)

ax.plot(babies["Month"], μ_m, c='C4')
az.plot_hdi(babies["Month"], inf_data_linear.posterior_predictive["length"], hdi_prob=.50, ax=ax)
az.plot_hdi(babies["Month"], inf_data_linear.posterior_predictive["length"], hdi_prob=.94, ax=ax)

ax.plot(babies["Month"], babies["Length"], 'C0.', alpha=0.1)

plt.savefig('img/chp04/baby_length_linear_fit.png', dpi=300)

az.loo(inf_data_linear)

Computed from 8000 by 800 log-likelihood matrix

Estimate       SE
elpd_loo -2133.32    18.59
p_loo        3.23        -


Code 4.3#

with pm.Model() as model_baby_sqrt:
β = pm.Normal("β", sigma=10, shape=2)

μ = pm.Deterministic("μ", β[0] + β[1] * np.sqrt(babies["Month"]))
σ = pm.HalfNormal("σ", sigma=10)

length = pm.Normal("length", mu=μ, sigma=σ, observed=babies["Length"])
inf_data_sqrt = pm.sample(draws=2000, tune=4000)

ppc_baby_sqrt = pm.sample_posterior_predictive(inf_data_sqrt)
inf_data_sqrt = az.from_pymc3(trace=inf_data_sqrt,
posterior_predictive=ppc_baby_sqrt)

/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/628193978.py:8: FutureWarning: In v4.0, pm.sample will return an arviz.InferenceData object instead of a MultiTrace by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
inf_data_sqrt = pm.sample(draws=2000, tune=4000)
Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [σ, β]

100.00% [24000/24000 00:10<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 4_000 tune and 2_000 draw iterations (16_000 + 8_000 draws total) took 19 seconds.

100.00% [8000/8000 00:06<00:00]
fig, ax = plt.subplots()

ax.plot(babies["Month"], babies["Length"], 'C0.', alpha=0.1)

ax.set_ylabel("Length")
ax.set_xlabel("Month");

μ_m = inf_data_sqrt.posterior["μ"].values.reshape(-1, babies["Length"].shape[0]).mean(axis=0)

az.plot_hdi(babies["Month"], inf_data_sqrt.posterior_predictive["length"], hdi_prob=.50, ax=ax)
az.plot_hdi(babies["Month"], inf_data_sqrt.posterior_predictive["length"], hdi_prob=.94, ax=ax)

ax.plot(babies["Month"], μ_m, c='C4')

plt.savefig('img/chp04/baby_length_sqrt_fit.png', dpi=300)


Figure 4.3#

fig, axes = plt.subplots(1,2)
axes[0].plot(babies["Month"], babies["Length"], 'C0.', alpha=0.1)

μ_m = inf_data_sqrt.posterior["μ"].values.reshape(-1, babies["Length"].shape[0]).mean(axis=0)

axes[0].plot(babies["Month"], μ_m, c='C4')
az.plot_hdi(babies["Month"], inf_data_sqrt.posterior_predictive["length"], hdi_prob=.50, ax=axes[0])
az.plot_hdi(babies["Month"], inf_data_sqrt.posterior_predictive["length"], hdi_prob=.94, ax=axes[0])

axes[0].set_ylabel("Length")
axes[0].set_xlabel("Month");

axes[1].plot(np.sqrt(babies["Month"]), babies["Length"], 'C0.', alpha=0.1)
axes[1].set_xlabel("Square Root of Month");

az.plot_hdi(np.sqrt(babies["Month"]), inf_data_sqrt.posterior_predictive["length"], hdi_prob=.50, ax=axes[1])
az.plot_hdi(np.sqrt(babies["Month"]), inf_data_sqrt.posterior_predictive["length"], hdi_prob=.94, ax=axes[1])
axes[1].plot(np.sqrt(babies["Month"]), μ_m, c='C4')

axes[1].set_yticks([])
axes[1]

plt.savefig('img/chp04/baby_length_sqrt_fit.png', dpi=300)

az.compare({"Linear Model":inf_data_linear, "Non Linear Model":inf_data_sqrt})

/opt/miniconda3/envs/bmcp/lib/python3.9/site-packages/arviz/stats/stats.py:145: UserWarning: The default method used to estimate the weights for each model,has changed from BB-pseudo-BMA to stacking
warnings.warn(

rank loo p_loo d_loo weight se dse warning loo_scale
Non Linear Model 0 -1968.218405 3.079101 0.000000 0.928708 18.990940 0.000000 False log
Linear Model 1 -2133.321593 3.229639 165.103188 0.071292 18.589207 20.083518 False log

Varying Uncertainty#

Code 4.4#

with pm.Model() as model_baby_vv:
β = pm.Normal("β", sigma=10, shape=2)

δ = pm.HalfNormal("δ", sigma=10, shape=2)

μ = pm.Deterministic("μ", β[0] + β[1] * np.sqrt(babies["Month"]))
σ = pm.Deterministic("σ", δ[0] + δ[1] * babies["Month"])

length = pm.Normal("length", mu=μ, sigma=σ, observed=babies["Length"])

trace_baby_vv = pm.sample(2000, target_accept=.95)
ppc_baby_vv = pm.sample_posterior_predictive(trace_baby_vv,
var_names=["length", "σ"])
inf_data_baby_vv = az.from_pymc3(trace=trace_baby_vv,
posterior_predictive=ppc_baby_vv)

/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/3065721124.py:12: FutureWarning: In v4.0, pm.sample will return an arviz.InferenceData object instead of a MultiTrace by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
trace_baby_vv = pm.sample(2000, target_accept=.95)
Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [δ, β]

100.00% [12000/12000 00:09<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 19 seconds.

100.00% [8000/8000 00:08<00:00]
az.summary(inf_data_baby_vv, var_names=["δ"])

mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
δ[0] 2.389 0.117 2.17 2.609 0.002 0.002 2715.0 2919.0 1.0
δ[1] 0.038 0.009 0.02 0.056 0.000 0.000 2663.0 2329.0 1.0
fig, ax = plt.subplots()

ax.set_ylabel("Length")
ax.set_xlabel("Month");

ax.plot(babies["Month"], babies["Length"], 'C0.', alpha=0.1)

μ_m = inf_data_baby_vv.posterior["μ"].values.reshape(-1, babies["Length"].shape[0]).mean(axis=0)

ax.plot(babies["Month"], μ_m, c='C4')

az.plot_hdi(babies["Month"], inf_data_baby_vv.posterior_predictive["length"], hdi_prob=.50, ax=ax)
az.plot_hdi(babies["Month"], inf_data_baby_vv.posterior_predictive["length"], hdi_prob=.94, ax=ax)

plt.savefig('img/chp04/baby_length_sqrt_vv_fit.png', dpi=300)


Figure 4.4#

fig, axes = plt.subplots(2,1)

axes[0].plot(babies["Month"], babies["Length"], 'C0.', alpha=0.1)

μ_m = inf_data_baby_vv.posterior["μ"].values.reshape(-1, babies["Length"].shape[0]).mean(axis=0)

axes[0].plot(babies["Month"], μ_m, c='C4')

az.plot_hdi(babies["Month"], inf_data_baby_vv.posterior_predictive["length"], hdi_prob=.50, ax=axes[0])
az.plot_hdi(babies["Month"], inf_data_baby_vv.posterior_predictive["length"], hdi_prob=.94, ax=axes[0])
axes[0].set_ylabel("Length")

σ_m = inf_data_baby_vv.posterior["σ"].values.reshape(-1, 800).mean(axis=0)

axes[1].plot(babies["Month"], σ_m, c='C1')

axes[1].set_ylabel("σ")
axes[1].set_xlabel("Month")

axes[0].set_xlim(0,24)
axes[1].set_xlim(0,24)

plt.savefig('img/chp04/baby_length_sqrt_vv_fit_include_error.png', dpi=300)


Interaction effects#

Code 4.5#

tips_df = pd.read_csv('../data/tips.csv')

total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
tips = tips_df["tip"]
total_bill_c = (tips_df["total_bill"] - tips_df["total_bill"].mean())
smoker = pd.Categorical(tips_df["smoker"]).codes

with pm.Model() as model_no_interaction:
β = pm.Normal("β", mu=0, sigma=1, shape=3)
σ = pm.HalfNormal("σ", 1)

μ = (β[0] +
β[1] * total_bill_c +
β[2] * smoker)

obs = pm.Normal("obs", μ, σ, observed=tips)
trace_no_interaction = pm.sample(1000, tune=1000)

/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/3786948218.py:14: FutureWarning: In v4.0, pm.sample will return an arviz.InferenceData object instead of a MultiTrace by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
trace_no_interaction = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [σ, β]

100.00% [8000/8000 00:03<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 13 seconds.

_, ax = plt.subplots(figsize=(8, 4.5))

β0_nonint = trace_no_interaction['β'][:,0]
β1_nonint = trace_no_interaction['β'][:,1]
β2_nonint = trace_no_interaction['β'][:,2]

pred_y_non_smokers = β0_nonint + β1_nonint * total_bill_c[:,None]
pred_y_smokers = β0_nonint + β1_nonint * total_bill_c[:,None] + β2_nonint

ax.scatter(total_bill_c[smoker==0], tips[smoker==0], label='non-smokers', marker='.')
ax.scatter(total_bill_c[smoker==1], tips[smoker==1], label='smokers', marker='.', c="C4")
ax.set_xlabel('Total Bill')
ax.set_ylabel('Tip')
ax.legend()

ax.plot(total_bill_c, pred_y_non_smokers.mean(1), lw=2)
ax.plot(total_bill_c, pred_y_smokers.mean(1), lw=2, c="C4");

/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/2436504886.py:7: FutureWarning: Support for multi-dimensional indexing (e.g. obj[:, None]) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.
pred_y_non_smokers = β0_nonint + β1_nonint * total_bill_c[:,None]
/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/2436504886.py:8: FutureWarning: Support for multi-dimensional indexing (e.g. obj[:, None]) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.
pred_y_smokers = β0_nonint + β1_nonint * total_bill_c[:,None] + β2_nonint


Code 4.6#

with pm.Model() as model_interaction:
β = pm.Normal('β', mu=0, sigma=1, shape=4)
σ = pm.HalfNormal('σ', 1)

μ = (β[0] +
β[1] * total_bill_c +
β[2] * smoker +
β[3] * smoker * total_bill_c
)

obs = pm.Normal('obs', μ, σ, observed=tips)
trace_interaction = pm.sample(1000, tune=1000)

/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/146995100.py:12: FutureWarning: In v4.0, pm.sample will return an arviz.InferenceData object instead of a MultiTrace by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
trace_interaction = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [σ, β]

100.00% [8000/8000 00:04<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 13 seconds.


Figure 4.5#

total_bill_c = total_bill_c.values

_, ax = plt.subplots(1, 2, figsize=(8, 4.5))

#α_nonint = trace_nonint['α']

β0_nonint = trace_no_interaction['β'][:,0]
β1_nonint = trace_no_interaction['β'][:,1]
β2_nonint = trace_no_interaction['β'][:,2]

pred_y_non_smokers = β0_nonint + β1_nonint * total_bill_c[:,None]
pred_y_smokers = β0_nonint + β1_nonint * total_bill_c[:,None] + β2_nonint

ax[0].scatter(total_bill_c[smoker==0], tips[smoker==0], label='non-smokers', marker='.')
ax[0].scatter(total_bill_c[smoker==1], tips[smoker==1], label='smokers', marker='.', c="C4")
ax[0].set_xlabel('Total Bill (Centered)')
ax[0].set_ylabel('Tip')
ax[0].legend(frameon=True)

ax[0].plot(total_bill_c, pred_y_non_smokers.mean(1), lw=2)
ax[0].plot(total_bill_c, pred_y_smokers.mean(1), lw=2, c="C4")
ax[0].set_title('No Interaction')

az.plot_hdi(total_bill_c, pred_y_non_smokers.T, color='C0', ax=ax[0])
az.plot_hdi(total_bill_c, pred_y_smokers.T, ax=ax[0], color="C4");

β0_int = trace_interaction['β'][:,0]
β1_int = trace_interaction['β'][:,1]
β2_int = trace_interaction['β'][:,2]
β3_int = trace_interaction['β'][:,3]

# Because smoker=0 I am ommiting the terms including the smoker covariate
pred_y_non_smokers = (β0_int +
β1_int * total_bill_c[:, None])

# Because x1=1 I am ommiting x1
pred_y_smokers = (β0_int +
β1_int * total_bill_c[:, None] +
β2_int +
β3_int * total_bill_c[:, None])

ax[1].scatter(total_bill_c[smoker==0], tips[smoker==0], label='non-smokers', marker='.')
ax[1].scatter(total_bill_c[smoker==1], tips[smoker==1], label='smokers', marker='.', c="C4")
ax[1].set_xlabel('Total Bill (Centered)')
ax[1].set_yticks([])

ax[1].set_title('Interaction')

ax[1].plot(total_bill_c, pred_y_non_smokers.mean(1), lw=2)
ax[1].plot(total_bill_c, pred_y_smokers.mean(1), lw=2)
az.plot_hdi(total_bill_c, pred_y_non_smokers.T, color='C0', ax=ax[1])
az.plot_hdi(total_bill_c, pred_y_smokers.T, ax=ax[1], color="C4");

plt.savefig('img/chp04/smoker_tip_interaction.png', dpi=300)

/opt/miniconda3/envs/bmcp/lib/python3.9/site-packages/arviz/stats/stats.py:456: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
warnings.warn(
/opt/miniconda3/envs/bmcp/lib/python3.9/site-packages/arviz/stats/stats.py:456: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
warnings.warn(
/opt/miniconda3/envs/bmcp/lib/python3.9/site-packages/arviz/stats/stats.py:456: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
warnings.warn(
/opt/miniconda3/envs/bmcp/lib/python3.9/site-packages/arviz/stats/stats.py:456: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
warnings.warn(


Robust Regression#

Figure 4.6#

mean = 5
sigma = 2

x = np.linspace(-5, 15, 1000)
fig, ax = plt.subplots()

ax.plot(x, stats.norm(5,2).pdf(x), label=f"Normal μ={mean}, σ={sigma}", color="C4")

for i, nu in enumerate([1, 2, 20],1):
ax.plot(x, stats.t(loc=5, scale=2, df=nu).pdf(x), label=f"Student T μ={mean}, σ={sigma}, ν={nu}", color=f"C{i}")

ax.set_xlim(-5, 18)
ax.legend(loc="upper right", frameon=False)
ax.set_yticks([])
plt.savefig('img/chp04/studentt_normal_comparison.png', dpi=300)


Figure 4.7#

def generate_sales(*, days, mean, std, label):
np.random.seed(0)
df = pd.DataFrame(index=range(1, days+1), columns=["customers", "sales"])
for day in range(1, days+1):
num_customers = stats.randint(30, 100).rvs()+1

# This is correct as there is an independent draw for each customers orders
dollar_sales = stats.norm(mean, std).rvs(num_customers).sum()

df.loc[day, "customers"] = num_customers
df.loc[day, "sales"] = dollar_sales

# Fix the types as not to cause Theano errors
df = df.astype({'customers': 'int32', 'sales': 'float32'})

# Sorting will make plotting the posterior predictive easier later
df["Food_Category"] = label
df = df.sort_values("customers")
return df

fig, ax = plt.subplots()

ax.set_ylabel("Argentine Peso")
ax.set_xlabel("Customer Count")


Code 4.7#

with pm.Model() as model_non_robust:
σ = pm.HalfNormal("σ", 50)
β = pm.Normal('β', mu=150, sigma=20)

μ = pm.Deterministic("μ", β * empanadas["customers"])

sales = pm.Normal("sales", mu=μ, sigma=σ, observed=empanadas["sales"])


/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/1588961933.py:9: FutureWarning: In v4.0, pm.sample will return an arviz.InferenceData object instead of a MultiTrace by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [β, σ]

100.00% [8000/8000 00:07<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 16 seconds.
The acceptance probability does not match the target. It is 0.8860777877942666, but should be close to 0.8. Try to increase the number of tuning steps.

100.00% [4000/4000 00:03<00:00]

Figure 4.8#

fig, axes = plt.subplots(2, 1, figsize=(8, 4), sharex=True)

for i in range(2):

axes[1].set_ylabel("Argentine Peso")

axes[0].set_ylabel("")
axes[1].set_xlabel("Customer Count")
axes[1].set_ylim(400, 25000);


Table 4.1#

az.summary(inf_data_non_robust, kind="stats", var_names=["β", "σ"]).round(1)

mean sd hdi_3% hdi_97%
β 207.0 3.0 201.4 212.5
σ 2952.3 25.3 2904.3 2997.6
with pm.Model() as model_robust:
σ = pm.HalfNormal("σ", 50)
β = pm.Normal("β", mu=150, sigma=20)
ν = pm.HalfNormal("ν", 20)

μ = pm.Deterministic("μ", β * empanadas["customers"])

sales = pm.StudentT("sales", mu=μ, sigma=σ, nu=ν,


/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/2239246076.py:11: FutureWarning: In v4.0, pm.sample will return an arviz.InferenceData object instead of a MultiTrace by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ν, β, σ]

100.00% [8000/8000 00:03<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 12 seconds.

100.00% [4000/4000 00:02<00:00]

Table 4.2#

az.summary(inf_data_robust, var_names=["β", "σ", "ν"], kind="stats").round(1)

mean sd hdi_3% hdi_97%
β 179.6 0.3 179.1 180.1
σ 152.0 13.9 126.4 178.4
ν 1.3 0.2 1.0 1.6

Figure 4.9#

fig, ax = plt.subplots(figsize=(10, 6))

ax.set_ylim(4000, 20000);
ax.set_ylabel("Argentine Peso")
ax.set_xlabel("Customer Count")
ax.set_title("Empanada Sales with Robust Regression Fit")

az.compare({"Non robust": inf_data_non_robust, "Robust":inf_data_robust})

/opt/miniconda3/envs/bmcp/lib/python3.9/site-packages/arviz/stats/stats.py:145: UserWarning: The default method used to estimate the weights for each model,has changed from BB-pseudo-BMA to stacking
warnings.warn(
/opt/miniconda3/envs/bmcp/lib/python3.9/site-packages/arviz/stats/stats.py:655: UserWarning: Estimated shape parameter of Pareto distribution is greater than 0.7 for one or more samples. You should consider using a more robust model, this is because importance sampling is less likely to work well if the marginal posterior and LOO posterior are very different. This is more likely to happen with a non-robust model and highly influential observations.
warnings.warn(

rank loo p_loo d_loo weight se dse warning loo_scale
Robust 0 -1473.978565 5.912134 0.000000 1.000000e+00 31.902351 0.000000 False log
Non robust 1 -3698.588569 146.177400 2224.610004 1.146134e-09 829.237992 799.490297 True log

Pooling, Multilevel Models, and Mixed Effects#

def generate_sales(*, days, mean, std, label):
np.random.seed(0)
df = pd.DataFrame(index=range(1, days+1), columns=["customers", "sales"])
for day in range(1, days+1):
num_customers = stats.randint(30, 100).rvs()+1

# This is correct as there is an independent draw for each customers orders
dollar_sales = stats.norm(mean, std).rvs(num_customers).sum()

df.loc[day, "customers"] = num_customers
df.loc[day, "sales"] = dollar_sales

# Fix the types as not to cause Theano errors
df = df.astype({'customers': 'int32', 'sales': 'float32'})

# Sorting will make plotting the posterior predictive easier later
df["Food_Category"] = label
df = df.sort_values("customers")
return df

pizza_df = generate_sales(days=365, mean=13, std=5, label="Pizza")
sandwich_df = generate_sales(days=100, mean=6, std=5, label="Sandwich")


sales_df = pd.concat([pizza_df, sandwich_df, salad_df]).reset_index(drop=True)
sales_df["Food_Category"] = pd.Categorical(sales_df["Food_Category"])
sales_df

customers sales Food_Category
0 31 459.895203 Pizza
1 31 401.147736 Pizza
2 31 413.345245 Pizza
3 31 371.909241 Pizza
4 32 433.797089 Pizza
... ... ... ...
463 100 611.736816 Sandwich
464 100 667.152954 Sandwich

468 rows × 3 columns

Figure 4.10#

fig, ax = plt.subplots()
pizza_df.plot(x="customers", y="sales", kind="scatter", ax=ax, c="C1", label="Pizza", marker="^", s=60);
sandwich_df.plot(x="customers", y="sales", kind="scatter", ax=ax,  label="Sandwich", marker="s");

ax.set_xlabel("Number of Customers")
ax.set_ylabel("Daily Sales Dollars")
ax.set_title("Aggregated Sales Dollars")
ax.legend()

plt.savefig('img/chp04/restaurant_order_scatter.png', dpi=300)


Code 4.9#

customers = sales_df.loc[:, "customers"].values
sales_observed = sales_df.loc[:, "sales"].values
food_category = pd.Categorical(sales_df["Food_Category"])

with pm.Model() as model_sales_unpooled:
σ = pm.HalfNormal("σ", 20, shape=3)
β = pm.Normal("β", mu=10, sigma=10, shape=3)

μ = pm.Deterministic("μ", β[food_category.codes] *customers)

sales = pm.Normal("sales", mu=μ, sigma=σ[food_category.codes],
observed=sales_observed)

trace_sales_unpooled = pm.sample(target_accept=.9)
inf_data_sales_unpooled = az.from_pymc3(
trace=trace_sales_unpooled,
coords={"β_dim_0":food_category.categories,
"σ_dim_0":food_category.categories})

/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/3049063767.py:14: FutureWarning: In v4.0, pm.sample will return an arviz.InferenceData object instead of a MultiTrace by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
trace_sales_unpooled = pm.sample(target_accept=.9)
Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [β, σ]

100.00% [8000/8000 00:05<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 14 seconds.


Figure 4.12#

sales_unpooled_diagram = pm.model_to_graphviz(model_sales_unpooled)
sales_unpooled_diagram

inf_data_salads_sales_unpooled = inf_data_sales_unpooled.posterior.sel(β_dim_0="Salad", μ_dim_0=slice(465, 467))

az.summary(inf_data_sales_unpooled, var_names=["β", "σ"])

mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
β[0] 13.025 0.030 12.970 13.081 0.000 0.000 5104.0 2785.0 1.0
β[1] 8.129 0.214 7.720 8.561 0.004 0.003 3504.0 2280.0 1.0
β[2] 6.111 0.052 6.010 6.204 0.001 0.001 5298.0 2765.0 1.0
σ[0] 40.128 1.469 37.362 42.801 0.020 0.014 5405.0 2605.0 1.0
σ[1] 21.361 8.327 8.919 37.105 0.158 0.116 3148.0 3021.0 1.0
σ[2] 35.931 2.506 31.092 40.501 0.034 0.024 5377.0 2902.0 1.0
az.plot_trace(inf_data_sales_unpooled, var_names=["β", "σ"], compact=False);


Figure 4.13#

axes = az.plot_forest([inf_data_sales_unpooled],
model_names = ["Unpooled",],
var_names=["β"], combined=True, figsize=(7, 1.8));
axes[0].set_title("β parameter estimates 94% HDI")


Figure 4.14#

axes = az.plot_forest([inf_data_salads_sales_unpooled,],
model_names = ["Unpooled",],
var_names=["σ"], combined=True, figsize=(7, 1.8));
axes[0].set_title("σ parameter estimates 94% HDI")


Code 4.10#

with pm.Model() as model_sales_pooled:
σ = pm.HalfNormal("σ", 20)
β = pm.Normal("β", mu=10, sigma=10)

μ = pm.Deterministic("μ", β * customers)

sales = pm.Normal("sales", mu=μ, sigma=σ,
observed=sales_observed)

inf_data_sales_pooled = pm.sample()

/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/1895007518.py:10: FutureWarning: In v4.0, pm.sample will return an arviz.InferenceData object instead of a MultiTrace by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
inf_data_sales_pooled = pm.sample()
Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [β, σ]

100.00% [8000/8000 00:02<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 11 seconds.

with model_sales_pooled:
ppc_sales_pooled = pm.sample_posterior_predictive(inf_data_sales_pooled)
inf_data_sales_pooled = az.from_pymc3(
trace=inf_data_sales_pooled,
posterior_predictive=ppc_sales_pooled)

100.00% [4000/4000 00:03<00:00]

Figure 4.16#

pooled_sales_diagram = pm.model_to_graphviz(model_sales_pooled)
pooled_sales_diagram

az.plot_trace(inf_data_sales_pooled, var_names=["β", "σ"], compact=False);

az.summary(inf_data_sales_pooled, var_names=["β", "σ"])

mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
β 11.498 0.126 11.264 11.729 0.002 0.001 3561.0 2629.0 1.0
σ 186.204 5.271 176.336 195.841 0.090 0.063 3466.0 3240.0 1.0

Figure 4.17#

axes = az.plot_forest([inf_data_sales_pooled, inf_data_sales_unpooled],
model_names = ["Pooled", "Unpooled"], var_names=["σ"], combined=True, figsize=(7, 1.8));
axes[0].set_title("Comparison of pooled and unpooled models \n 94% HDI")



Figure 4.18#

fig, ax = plt.subplots(figsize=(10, 6))
μ_m = inf_data_sales_pooled.posterior["μ"].values.reshape(-1, sales_df.shape[0]).mean(axis=0)
σ_m = inf_data_sales_pooled.posterior["σ"].mean().values

ax.plot(customers, μ_m, c='C4')

az.plot_hdi(customers, inf_data_sales_pooled.posterior_predictive["sales"], hdi_prob=.50, ax=ax)
az.plot_hdi(customers, inf_data_sales_pooled.posterior_predictive["sales"], hdi_prob=.94, ax=ax)

pizza_df.plot(x="customers", y="sales", kind="scatter", ax=ax, c="C1", label="Pizza", marker="^", s=60);
sandwich_df.plot(x="customers", y="sales", kind="scatter", ax=ax,  label="Sandwich", marker="s");

ax.set_xlabel("Number of Customers")
ax.set_ylabel("Daily Sales Dollars")
ax.set_title("Pooled Regression")


Code 4.11#

with pm.Model() as model_pooled_sigma_sales:
σ = pm.HalfNormal("σ", 20)
β = pm.Normal("β", mu=10, sigma=20, shape=3)

μ = pm.Deterministic("μ", β[food_category.codes] * customers)

sales = pm.Normal("sales", mu=μ, sigma=σ, observed=sales_observed)

trace_pooled_sigma_sales = pm.sample()
ppc_pooled_sigma_sales = pm.sample_posterior_predictive(
trace_pooled_sigma_sales)

inf_data_pooled_sigma_sales = az.from_pymc3(
trace=trace_pooled_sigma_sales,
posterior_predictive=ppc_pooled_sigma_sales,
coords={"β_dim_0":food_category.categories})

/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/2977589499.py:9: FutureWarning: In v4.0, pm.sample will return an arviz.InferenceData object instead of a MultiTrace by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
trace_pooled_sigma_sales = pm.sample()
Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [β, σ]

100.00% [8000/8000 00:03<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 12 seconds.

100.00% [4000/4000 00:03<00:00]
multilevel_sales_diagram = pm.model_to_graphviz(model_pooled_sigma_sales)
multilevel_sales_diagram

az.summary(inf_data_pooled_sigma_sales, var_names=["β", "σ"])

mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
β[0] 13.025 0.030 12.971 13.082 0.000 0.000 7261.0 3250.0 1.0
β[1] 8.128 0.366 7.441 8.776 0.005 0.003 6291.0 3066.0 1.0
β[2] 6.113 0.059 6.003 6.226 0.001 0.001 6336.0 2674.0 1.0
σ 39.257 1.318 36.809 41.689 0.017 0.012 6281.0 2926.0 1.0

Figure 4.20#

fig, ax = plt.subplots(figsize=(10, 6))
σ_m = inf_data_sales_pooled.posterior["σ"].mean().values

for i in range(3):

pizza_df.plot(x="customers", y="sales", kind="scatter", ax=ax, c="C1", label="Pizza", marker="^", s=60);
sandwich_df.plot(x="customers", y="sales", kind="scatter", ax=ax,  label="Sandwich", marker="s");

ax.set_xlabel("Number of Customers")
ax.set_ylabel("Daily Sales Dollars")
ax.set_title("Unpooled Slope Pooled Sigma Regression")


Figure 4.21#

axes = az.plot_forest([inf_data_sales_unpooled,
inf_data_pooled_sigma_sales
],
model_names = ["Unpooled",
"Multilevel "
],
var_names=["σ"], combined=True, figsize=(7, 1.8));
axes[0].set_title("Comparison of σ parameters 94% HDI")


axes = az.plot_forest([inf_data_sales_unpooled,
inf_data_pooled_sigma_sales
],
model_names = ["Unpooled",
"Multilevel"
],
var_names=["β"], combined=True, figsize=(7, 2.8));
axes[0].set_title("Comparison of β parameters 94% HDI")

Text(0.5, 1.0, 'Comparison of β parameters 94% HDI')

inf_posterior_salads = inf_data_pooled_sigma_sales.posterior.sel(β_dim_0="Salad", μ_dim_0=slice(465, 467))


Code 4.12#

with pm.Model() as model_hierarchical_sales:
σ_hyperprior = pm.HalfNormal("σ_hyperprior", 20)
σ = pm.HalfNormal("σ", σ_hyperprior, shape=3)

β = pm.Normal("β", mu=10, sigma=20, shape=3)
μ = pm.Deterministic("μ", β[food_category.codes] * customers)

sales = pm.Normal("sales", mu=μ, sigma=σ[food_category.codes],
observed=sales_observed)

trace_hierarchical_sales = pm.sample(target_accept=.9)

inf_data_hierarchical_sales = az.from_pymc3(
trace=trace_hierarchical_sales,
coords={"β_dim_0":food_category.categories,
"σ_dim_0":food_category.categories})

/var/folders/7p/srk5qjp563l5f9mrjtp44bh800jqsw/T/ipykernel_8872/2572718925.py:11: FutureWarning: In v4.0, pm.sample will return an arviz.InferenceData object instead of a MultiTrace by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
trace_hierarchical_sales = pm.sample(target_accept=.9)
Auto-assigning NUTS sampler...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [β, σ, σ_hyperprior]

100.00% [8000/8000 00:05<00:00 Sampling 4 chains, 7 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 15 seconds.
There were 5 divergences after tuning. Increase target_accept or reparameterize.
There were 2 divergences after tuning. Increase target_accept or reparameterize.

az.plot_trace(inf_data_hierarchical_sales, compact=False, var_names=["β", "σ", "σ_hyperprior"]);

az.plot_parallel(inf_data_hierarchical_sales, var_names=["σ", "σ_hyperprior"])

<AxesSubplot:>


Figure 4.23#

hierarchial_sales_diagram = pm.model_to_graphviz(model_hierarchical_sales)
hierarchial_sales_diagram

az.summary(inf_data_hierarchical_sales, var_names=["β", "σ"])

mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
β[0] 13.025 0.031 12.968 13.082 0.000 0.000 5196.0 2491.0 1.0
β[1] 8.131 0.275 7.613 8.641 0.006 0.004 2558.0 1788.0 1.0
β[2] 6.113 0.053 6.012 6.208 0.001 0.001 4419.0 2689.0 1.0
σ[0] 40.261 1.474 37.489 43.064 0.021 0.015 4979.0 2825.0 1.0
σ[1] 26.007 13.588 7.889 50.398 0.304 0.215 2404.0 2152.0 1.0
σ[2] 36.274 2.604 31.514 41.230 0.038 0.027 4816.0 2676.0 1.0
axes = az.plot_forest(inf_data_hierarchical_sales, var_names=["β"], combined=True,  figsize=(7, 1.5))
axes[0].set_title("Hierarchical β estimates 94% HDI")

Text(0.5, 1.0, 'Hierarchical β estimates 94% HDI')

axes = az.plot_forest(inf_data_hierarchical_sales, var_names=["σ", "σ_hyperprior"], combined=True,  figsize=(7, 1.8))
axes[0].set_title("Hierarchical σ estimates 94% HDI")

print(food_category.categories)

Index(['Pizza', 'Salad', 'Sandwich'], dtype='object')


Table 4.3#

az.summary(inf_data_sales_unpooled.posterior["σ"], kind="stats").round(1)

mean sd hdi_3% hdi_97%
σ[0] 40.1 1.5 37.4 42.8
σ[1] 21.4 8.3 8.9 37.1
σ[2] 35.9 2.5 31.1 40.5

Table 4.4#

az.summary(inf_data_hierarchical_sales, var_names=["σ", "σ_hyperprior"], kind="stats").round(1)

mean sd hdi_3% hdi_97%
σ[0] 40.3 1.5 37.5 43.1
σ[1] 26.0 13.6 7.9 50.4
σ[2] 36.3 2.6 31.5 41.2
σ_hyperprior 31.7 9.1 16.6 48.4
axes = az.plot_forest([inf_data_sales_unpooled.posterior["σ"].isel(σ_dim_0=1),
inf_data_hierarchical_sales
],
model_names = ["sales_unpooled",
"sales_hierarchical"
], combined=True, figsize=(7, 2.6),
var_names=["σ", "σ_hyperprior"]
);
axes[0].set_title("Comparison of σ parameters from unpooled \n and hierarchical models \n 94% HDI")


fig, ax = plt.subplots()
az.plot_kde(inf_data_hierarchical_sales.posterior["σ"].isel(σ_dim_0=1), label="Hierarchical Salad Sigma", plot_kwargs={"color":"C4"}, ax=ax);

ax.set_title("Comparison of Hierarchical versus Unpooled Variance")

Text(0.5, 1.0, 'Comparison of Hierarchical versus Unpooled Variance')


Figure 4.25#

nsample = 10000
nd=1
yr = stats.norm.rvs(loc=2., scale=3., size=nsample)
xnr = stats.norm.rvs(loc=0., scale=np.exp(yr/4), size=(nd, nsample))

fig, ax = plt.subplots()
ax.scatter(xnr[0], yr, marker='.', alpha=.05, color="C4")
ax.set_xlim(-20, 20)
ax.set_ylim(-9, 9)
ax.set_xlabel('x')
ax.set_ylabel('y')

Text(0, 0.5, 'y')

def salad_generator(hyperprior_beta_mean=5, hyperprior_beta_sigma=.2, sigma=50, days_per_location=[6, 4, 15, 10, 3, 5], sigma_per_location=[50,10,20,80,30,20]):
beta_hyperprior = stats.norm(hyperprior_beta_mean, hyperprior_beta_sigma)

# Generate demands days per restaurant
df = pd.DataFrame()
for i, days in enumerate(days_per_location):
np.random.seed(0)

num_customers = stats.randint(30, 100).rvs(days)
sales_location = beta_hyperprior.rvs()*num_customers + stats.norm(0, sigma_per_location[i]).rvs(num_customers.shape)

location_df = pd.DataFrame({"customers":num_customers, "sales":sales_location})
location_df["location"] = i
location_df.sort_values(by="customers", ascending=True)
df = pd.concat([df, location_df])

df.reset_index(inplace=True, drop=True)
return df


Figure 4.26#

fig, axes, = plt.subplots(2,3, sharex=True, sharey=True)

for i, ax in enumerate(axes.ravel()):
ax.set_xlabel("")
ax.set_ylabel("")

axes[1,0].set_xlabel("Number of Customers")
axes[1,0].set_ylabel("Sales");


Code 4.13#

import tensorflow as tf
import tensorflow_probability as tfp

tfd = tfp.distributions
root = tfd.JointDistributionCoroutine.Root

run_mcmc = tf.function(
autograph=False, jit_compile=True)

def gen_hierarchical_salad_sales(input_df, beta_prior_fn, dtype=tf.float32):
customers = tf.constant(

@tfd.JointDistributionCoroutine
β_μ_hyperprior = yield root(tfd.Normal(0, 10, name="beta_mu"))
β_σ_hyperprior = yield root(tfd.HalfNormal(.1, name="beta_sigma"))
β = yield from beta_prior_fn(β_μ_hyperprior, β_σ_hyperprior)

σ_hyperprior = yield root(tfd.HalfNormal(30, name="sigma_prior"))
σ = yield tfd.Sample(tfd.HalfNormal(σ_hyperprior), 6, name="sigma")

loc = tf.gather(β, location_category, axis=-1) * customers
scale = tf.gather(σ, location_category, axis=-1)
sales = yield tfd.Independent(tfd.Normal(loc, scale),
reinterpreted_batch_ndims=1,
name="sales")



Code 4.14 and 4.15#

def centered_beta_prior_fn(hyper_mu, hyper_sigma):
β = yield tfd.Sample(tfd.Normal(hyper_mu, hyper_sigma), 6, name="beta")
return β

# hierarchical_salad_df is the generated dataset as pandas.DataFrame

mcmc_samples_centered, sampler_stats_centered = run_mcmc(
sales=observed)

divergent_per_chain = np.sum(sampler_stats_centered['diverging'], axis=0)
print(f"""There were {divergent_per_chain} divergences after tuning per chain.""")

WARNING:tensorflow:From /opt/miniconda3/envs/bmcp/lib/python3.9/site-packages/tensorflow_probability/python/distributions/distribution.py:342: calling MultivariateNormalDiag.__init__ (from tensorflow_probability.python.distributions.mvn_diag) with scale_identity_multiplier is deprecated and will be removed after 2020-01-01.
Instructions for updating:
scale_identity_multiplier is deprecated; please combine it into scale_diag directly instead.

2021-11-19 16:49:41.809766: W tensorflow/compiler/tf2xla/kernels/random_ops.cc:102] Warning: Using tf.random.uniform with XLA compilation will ignore seeds; consider using tf.random.stateless_uniform instead if reproducible behavior is desired. sanitize_seed/seed
2021-11-19 16:49:41.952731: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:38] Ignoring Assert operator mcmc_retry_init/assert_equal_1/Assert/AssertGuard/Assert

There were [ 24 112 194  44] divergences after tuning per chain.

idata_centered_model = az.from_dict(
posterior={
k:np.swapaxes(v, 1, 0)
for k, v in mcmc_samples_centered._asdict().items()},
sample_stats={
k:np.swapaxes(sampler_stats_centered[k], 1, 0)
for k in ["target_log_prob", "diverging", "accept_ratio", "n_steps"]}
)

az.plot_trace(idata_centered_model, compact=True);

az.summary(idata_centered_model)

mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
beta_mu 5.053 0.071 4.911 5.179 0.003 0.002 451.0 1462.0 1.02
beta_sigma 0.072 0.053 0.008 0.167 0.004 0.003 90.0 54.0 1.05
beta[0] 5.012 0.118 4.759 5.217 0.004 0.003 653.0 1563.0 1.02
beta[1] 5.042 0.071 4.908 5.174 0.003 0.002 591.0 1775.0 1.02
beta[2] 5.087 0.073 4.949 5.225 0.003 0.002 713.0 1453.0 1.01
beta[3] 5.057 0.100 4.865 5.261 0.003 0.002 855.0 1370.0 1.01
beta[4] 5.076 0.112 4.864 5.289 0.003 0.002 1059.0 1600.0 1.01
beta[5] 5.041 0.077 4.878 5.176 0.002 0.002 724.0 1966.0 1.01
sigma_prior 48.410 12.377 27.661 71.280 0.427 0.302 905.0 1645.0 1.01
sigma[0] 59.925 16.018 35.037 90.595 0.715 0.506 379.0 340.0 1.02
sigma[1] 17.692 9.324 6.065 32.821 0.774 0.548 203.0 1209.0 1.03
sigma[2] 27.447 5.538 18.332 37.794 0.196 0.146 1050.0 761.0 1.00
sigma[3] 83.552 16.075 54.561 111.829 0.480 0.340 794.0 1345.0 1.01
sigma[4] 60.412 19.803 29.284 93.902 1.080 0.765 375.0 1595.0 1.01
sigma[5] 23.581 9.324 9.895 40.715 0.301 0.229 1089.0 932.0 1.01

Figure 4.27#

slope = mcmc_samples_centered.beta[..., 4].numpy().flatten()
sigma = mcmc_samples_centered.beta_sigma.numpy().flatten()
divergences = sampler_stats_centered['diverging'].numpy().flatten()

axes = az.plot_joint({"β[4]": slope, "β_σ_hyperprior": sigma},
joint_kwargs={"alpha": .05}, figsize=(6, 6))
axes[0].scatter(slope[divergences], sigma[divergences], c="C4", alpha=.3, label='divergent sample')
axes[0].legend(frameon=True)
axes[0].set_ylim(0, .3)
axes[0].set_xlim(4.5, 5.5)


/opt/miniconda3/envs/bmcp/lib/python3.9/site-packages/arviz/plots/jointplot.py:144: UserWarning: plot_joint will be deprecated. Please use plot_pair instead.


Code 4.16#

def non_centered_beta_prior_fn(hyper_mu, hyper_sigma):
β_offset = yield root(tfd.Sample(tfd.Normal(0, 1), 6, name="beta_offset"))
return β_offset * hyper_sigma[..., None] + hyper_mu[..., None]

# hierarchical_salad_df is the generated dataset as pandas.DataFrame

mcmc_samples_noncentered, sampler_stats_noncentered = run_mcmc(
sales=observed)

divergent_per_chain = np.sum(sampler_stats_noncentered['diverging'], axis=0)
print(f"""There were {divergent_per_chain} divergences after tuning per chain.""")

2021-11-19 16:50:17.643939: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:38] Ignoring Assert operator mcmc_retry_init/assert_equal_1/Assert/AssertGuard/Assert

There were [ 0  0 20  0] divergences after tuning per chain.

idata_non_centered_model = az.from_dict(
posterior={
k:np.swapaxes(v, 1, 0)
for k, v in mcmc_samples_noncentered._asdict().items()},
sample_stats={
k:np.swapaxes(sampler_stats_noncentered[k], 1, 0)
for k in ["target_log_prob", "diverging", "accept_ratio", "n_steps"]}
)

az.plot_trace(idata_non_centered_model, compact=True);

az.summary(idata_non_centered_model)

mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
beta_mu 5.050 0.073 4.914 5.188 0.002 0.001 2006.0 2092.0 1.0
beta_sigma 0.069 0.054 0.000 0.163 0.001 0.001 1384.0 1262.0 1.0
beta_offset[0] -0.366 0.996 -2.141 1.592 0.018 0.015 3061.0 2485.0 1.0
beta_offset[1] -0.096 0.885 -1.761 1.557 0.018 0.014 2556.0 2648.0 1.0
beta_offset[2] 0.392 0.890 -1.339 2.047 0.018 0.013 2457.0 2927.0 1.0
beta_offset[3] 0.040 0.972 -1.685 1.930 0.017 0.016 3099.0 2785.0 1.0
beta_offset[4] 0.230 1.038 -1.738 2.169 0.019 0.017 2940.0 2233.0 1.0
beta_offset[5] -0.154 0.872 -1.794 1.466 0.017 0.013 2508.0 2880.0 1.0
sigma_prior 50.026 12.585 28.433 72.742 0.320 0.242 1842.0 1525.0 1.0
sigma[0] 60.975 16.688 33.831 93.832 0.400 0.307 2291.0 1523.0 1.0
sigma[1] 18.473 11.477 6.049 37.322 0.511 0.402 1086.0 601.0 1.0
sigma[2] 27.445 5.453 18.557 38.042 0.114 0.086 2785.0 1787.0 1.0
sigma[3] 83.830 17.153 55.301 114.967 0.418 0.330 2335.0 1584.0 1.0
sigma[4] 60.699 20.419 29.070 98.661 0.470 0.369 2558.0 1826.0 1.0
sigma[5] 24.146 10.760 10.322 42.645 0.400 0.322 1400.0 821.0 1.0

Figure 4.28#

noncentered_beta = (mcmc_samples_noncentered.beta_mu[..., None]
+ mcmc_samples_noncentered.beta_offset * mcmc_samples_noncentered.beta_sigma[..., None])
slope = noncentered_beta[..., 4].numpy().flatten()
sigma = mcmc_samples_noncentered.beta_sigma.numpy().flatten()
divergences = sampler_stats_noncentered['diverging'].numpy().flatten()

axes = az.plot_joint({"β[4]": slope, "β_σ_hyperprior": sigma},
joint_kwargs={"alpha": .05}, figsize=(6, 6))
axes[0].scatter(slope[divergences], sigma[divergences], c="C4", alpha=.3, label='divergent sample')
axes[0].legend(frameon=True)
axes[0].set_ylim(0, .3)
axes[0].set_xlim(4.5, 5.5)


/opt/miniconda3/envs/bmcp/lib/python3.9/site-packages/arviz/plots/jointplot.py:144: UserWarning: plot_joint will be deprecated. Please use plot_pair instead.


Figure 4.29#

centered_β_sigma = mcmc_samples_centered.beta_sigma.numpy()
noncentered_β_sigma = mcmc_samples_noncentered.beta_sigma.numpy()

fig, ax = plt.subplots()
az.plot_kde(centered_β_sigma, label="Centered β_σ_hyperprior", ax=ax)
az.plot_kde(noncentered_β_sigma, label="Noncentered β_σ_hyperprior", plot_kwargs={"color":"C4"}, ax=ax);

ax.set_title("Comparison of Centered vs Non Centered Estimates");


Code 4.17#

out_of_sample_customers = 50.

@tfd.JointDistributionCoroutine
def out_of_sample_prediction_model():
model = yield root(non_centered_model)
β = model.beta_offset * model.beta_sigma[..., None] + model.beta_mu[..., None]

β_group = yield tfd.Normal(
model.beta_mu, model.beta_sigma, name="group_beta_prediction")
group_level_prediction = yield tfd.Normal(
β_group * out_of_sample_customers,
model.sigma_prior,
name="group_level_prediction")
for l in [2, 4]:
yield tfd.Normal(
tf.gather(β, l, axis=-1) * out_of_sample_customers,
tf.gather(model.sigma, l, axis=-1),
name=f"location_{l}_prediction")

amended_posterior = tf.nest.pack_sequence_as(
non_centered_model.sample(),
list(mcmc_samples_noncentered) + [observed],
)
ppc = out_of_sample_prediction_model.sample(var0=amended_posterior)

fig, ax = plt.subplots()

az.plot_kde(ppc.group_level_prediction, plot_kwargs={"color":"C0"}, ax=ax, label="All locations")
az.plot_kde(ppc.location_2_prediction, plot_kwargs={"color":"C2"}, ax=ax, label="Location 2")
az.plot_kde(ppc.location_4_prediction, plot_kwargs={"color":"C4"}, ax=ax, label="Location 4")

ax.set_xlabel("Predicted revenue with 50 customers")
ax.set_xlim([0,600])

ax.set_yticks([])



Code 4.18#

out_of_sample_customers2 = np.arange(50, 90)

@tfd.JointDistributionCoroutine
def out_of_sample_prediction_model2():
model = yield root(non_centered_model)

β_new_loc = yield tfd.Normal(
model.beta_mu, model.beta_sigma, name="beta_new_loc")
σ_new_loc = yield tfd.HalfNormal(model.sigma_prior, name="sigma_new_loc")
group_level_prediction = yield tfd.Normal(
β_new_loc[..., None] * out_of_sample_customers2,
σ_new_loc[..., None],
name="new_location_prediction")

ppc = out_of_sample_prediction_model2.sample(var0=amended_posterior)

az.plot_hdi(out_of_sample_customers2, ppc.new_location_prediction, hdi_prob=.95)

/opt/miniconda3/envs/bmcp/lib/python3.9/site-packages/arviz/data/base.py:169: UserWarning: More chains (1000) than draws (4). Passed array should have shape (chains, draws, *shape)
warnings.warn(

<AxesSubplot:>