Files
2025-03-23 16:54:36 -04:00

422 lines
14 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import stan\n",
"import arviz as az\n",
"\n",
"# stupid stan problems\n",
"import nest_asyncio\n",
"nest_asyncio.apply()\n",
"\n",
"# true param\n",
"alpha_true = 2.3,\n",
"beta_true = 4.0,\n",
"sigma_true = 2.0,\n",
"N = 100\n",
"\n",
"# simulation\n",
"np.random.seed(42)\n",
"x = np.random.normal(size=N)\n",
"y = alpha_true + beta_true * x + sigma_true * np.random.normal(size=N)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"stanCode = \"\"\"\n",
"data {\n",
" int<lower=0> N;\n",
" vector[N] x;\n",
" vector[N] y;\n",
"}\n",
"parameters {\n",
" real alpha;\n",
" real beta;\n",
" real<lower=0> sigma_sq;\n",
"}\n",
"transformed parameters {\n",
" real<lower=0> sigma = sqrt(sigma_sq);\n",
"}\n",
"model {\n",
" sigma_sq ~ inv_gamma(1, 1); // prior on variance\n",
" alpha ~ normal(0, 10);\n",
" beta ~ normal(0, 10);\n",
" y ~ normal(alpha + beta * x, sigma); // likelihood\n",
"}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Building...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Building: found in cache, done.Sampling: 0%\n",
"Sampling: 25% (3000/12000)\n",
"Sampling: 50% (6000/12000)\n",
"Sampling: 75% (9000/12000)\n",
"Sampling: 100% (12000/12000)\n",
"Sampling: 100% (12000/12000), done.\n",
"Messages received during sampling:\n",
" Gradient evaluation took 1.7e-05 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.\n",
" Adjust your expectations accordingly!\n",
" Gradient evaluation took 2.7e-05 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.\n",
" Adjust your expectations accordingly!\n",
" Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
" Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
" If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
" but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
" Gradient evaluation took 2e-05 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 0.2 seconds.\n",
" Adjust your expectations accordingly!\n",
" Gradient evaluation took 1e-05 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.\n",
" Adjust your expectations accordingly!\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean</th>\n",
" <th>sd</th>\n",
" <th>hdi_3%</th>\n",
" <th>hdi_97%</th>\n",
" <th>mcse_mean</th>\n",
" <th>mcse_sd</th>\n",
" <th>ess_bulk</th>\n",
" <th>ess_tail</th>\n",
" <th>r_hat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>alpha</th>\n",
" <td>2.317</td>\n",
" <td>0.192</td>\n",
" <td>1.959</td>\n",
" <td>2.683</td>\n",
" <td>0.002</td>\n",
" <td>0.002</td>\n",
" <td>6909.0</td>\n",
" <td>5804.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>beta</th>\n",
" <td>3.713</td>\n",
" <td>0.208</td>\n",
" <td>3.327</td>\n",
" <td>4.117</td>\n",
" <td>0.002</td>\n",
" <td>0.002</td>\n",
" <td>7805.0</td>\n",
" <td>5904.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sigma_sq</th>\n",
" <td>3.615</td>\n",
" <td>0.511</td>\n",
" <td>2.716</td>\n",
" <td>4.584</td>\n",
" <td>0.006</td>\n",
" <td>0.006</td>\n",
" <td>7166.0</td>\n",
" <td>5819.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sigma</th>\n",
" <td>1.897</td>\n",
" <td>0.133</td>\n",
" <td>1.648</td>\n",
" <td>2.141</td>\n",
" <td>0.002</td>\n",
" <td>0.001</td>\n",
" <td>7166.0</td>\n",
" <td>5819.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk \\\n",
"alpha 2.317 0.192 1.959 2.683 0.002 0.002 6909.0 \n",
"beta 3.713 0.208 3.327 4.117 0.002 0.002 7805.0 \n",
"sigma_sq 3.615 0.511 2.716 4.584 0.006 0.006 7166.0 \n",
"sigma 1.897 0.133 1.648 2.141 0.002 0.001 7166.0 \n",
"\n",
" ess_tail r_hat \n",
"alpha 5804.0 1.0 \n",
"beta 5904.0 1.0 \n",
"sigma_sq 5819.0 1.0 \n",
"sigma 5819.0 1.0 "
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Define data first\n",
"data = {\"N\": N, \"x\": x, \"y\": y}\n",
"\n",
"# Build the model with data\n",
"model = stan.build(stanCode, data=data)\n",
"\n",
"# Sample\n",
"fit = model.sample(num_chains=4, num_samples=2000)\n",
"\n",
"az.summary(az.from_pystan(fit))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 4: Analyze Results for N=100\n",
"\n",
"Posterior summaries should be close to the true values:\n",
"\n",
"- **α**: approximately 2.3\n",
"- **β**: approximately 4.0\n",
"- **σ**: approximately 2.0\n",
"\n",
"Also compute the 95% credible intervals."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 5: Repeat with N=1000\n",
"\n",
"Increase the sample size and rerun the simulation and model fitting."
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Building...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Building: found in cache, done.Sampling: 0%\n",
"Sampling: 25% (3000/12000)\n",
"Sampling: 50% (6000/12000)\n",
"Sampling: 75% (9000/12000)\n",
"Sampling: 100% (12000/12000)\n",
"Sampling: 100% (12000/12000), done.\n",
"Messages received during sampling:\n",
" Gradient evaluation took 0.000146 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 1.46 seconds.\n",
" Adjust your expectations accordingly!\n",
" Gradient evaluation took 0.000126 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 1.26 seconds.\n",
" Adjust your expectations accordingly!\n",
" Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
" Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
" If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
" but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
" Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
" Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
" If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
" but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
" Gradient evaluation took 0.000123 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 1.23 seconds.\n",
" Adjust your expectations accordingly!\n",
" Gradient evaluation took 0.000135 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 1.35 seconds.\n",
" Adjust your expectations accordingly!\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean</th>\n",
" <th>sd</th>\n",
" <th>hdi_3%</th>\n",
" <th>hdi_97%</th>\n",
" <th>mcse_mean</th>\n",
" <th>mcse_sd</th>\n",
" <th>ess_bulk</th>\n",
" <th>ess_tail</th>\n",
" <th>r_hat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>alpha</th>\n",
" <td>2.366</td>\n",
" <td>0.062</td>\n",
" <td>2.253</td>\n",
" <td>2.484</td>\n",
" <td>0.001</td>\n",
" <td>0.001</td>\n",
" <td>7563.0</td>\n",
" <td>5508.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>beta</th>\n",
" <td>3.929</td>\n",
" <td>0.063</td>\n",
" <td>3.814</td>\n",
" <td>4.048</td>\n",
" <td>0.001</td>\n",
" <td>0.001</td>\n",
" <td>8352.0</td>\n",
" <td>5934.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sigma_sq</th>\n",
" <td>3.895</td>\n",
" <td>0.174</td>\n",
" <td>3.588</td>\n",
" <td>4.236</td>\n",
" <td>0.002</td>\n",
" <td>0.002</td>\n",
" <td>8354.0</td>\n",
" <td>6044.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sigma</th>\n",
" <td>1.973</td>\n",
" <td>0.044</td>\n",
" <td>1.894</td>\n",
" <td>2.058</td>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>8354.0</td>\n",
" <td>6044.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk \\\n",
"alpha 2.366 0.062 2.253 2.484 0.001 0.001 7563.0 \n",
"beta 3.929 0.063 3.814 4.048 0.001 0.001 8352.0 \n",
"sigma_sq 3.895 0.174 3.588 4.236 0.002 0.002 8354.0 \n",
"sigma 1.973 0.044 1.894 2.058 0.000 0.000 8354.0 \n",
"\n",
" ess_tail r_hat \n",
"alpha 5508.0 1.0 \n",
"beta 5934.0 1.0 \n",
"sigma_sq 6044.0 1.0 \n",
"sigma 6044.0 1.0 "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"N_large = 1000;\n",
"x_large = np.random.normal(size=N_large);\n",
"y_large = alpha_true + beta_true * x_large + sigma_true * np.random.normal(size=N_large);\n",
"\n",
"# create new data dictionary\n",
"data_large = {\"N\": N_large, \"x\": x_large, \"y\": y_large};\n",
"model_large = stan.build(stanCode, data=data_large)\n",
"\n",
"# fit the model again\n",
"fit_large = model_large.sample(num_chains=4, num_samples=2000);\n",
"\n",
"# check diagnostics for larger data\n",
"az.summary(az.from_pystan(fit_large))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.x"
}
},
"nbformat": 4,
"nbformat_minor": 2
}