{ "cells": [ { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import stan\n", "import arviz as az\n", "\n", "# stupid stan problems\n", "import nest_asyncio\n", "nest_asyncio.apply()\n", "\n", "# true param\n", "alpha_true = 2.3,\n", "beta_true = 4.0,\n", "sigma_true = 2.0,\n", "N = 100\n", "\n", "# simulation\n", "np.random.seed(42)\n", "x = np.random.normal(size=N)\n", "y = alpha_true + beta_true * x + sigma_true * np.random.normal(size=N)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "stanCode = \"\"\"\n", "data {\n", " int N;\n", " vector[N] x;\n", " vector[N] y;\n", "}\n", "parameters {\n", " real alpha;\n", " real beta;\n", " real sigma_sq;\n", "}\n", "transformed parameters {\n", " real sigma = sqrt(sigma_sq);\n", "}\n", "model {\n", " sigma_sq ~ inv_gamma(1, 1); // prior on variance\n", " alpha ~ normal(0, 10);\n", " beta ~ normal(0, 10);\n", " y ~ normal(alpha + beta * x, sigma); // likelihood\n", "}\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Building...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "Building: found in cache, done.Sampling: 0%\n", "Sampling: 25% (3000/12000)\n", "Sampling: 50% (6000/12000)\n", "Sampling: 75% (9000/12000)\n", "Sampling: 100% (12000/12000)\n", "Sampling: 100% (12000/12000), done.\n", "Messages received during sampling:\n", " Gradient evaluation took 1.7e-05 seconds\n", " 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.\n", " Adjust your expectations accordingly!\n", " Gradient evaluation took 2.7e-05 seconds\n", " 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.\n", " Adjust your expectations accordingly!\n", " Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n", " Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n", " If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n", " but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n", " Gradient evaluation took 2e-05 seconds\n", " 1000 transitions using 10 leapfrog steps per transition would take 0.2 seconds.\n", " Adjust your expectations accordingly!\n", " Gradient evaluation took 1e-05 seconds\n", " 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.\n", " Adjust your expectations accordingly!\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meansdhdi_3%hdi_97%mcse_meanmcse_sdess_bulkess_tailr_hat
alpha2.3170.1921.9592.6830.0020.0026909.05804.01.0
beta3.7130.2083.3274.1170.0020.0027805.05904.01.0
sigma_sq3.6150.5112.7164.5840.0060.0067166.05819.01.0
sigma1.8970.1331.6482.1410.0020.0017166.05819.01.0
\n", "
" ], "text/plain": [ " mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk \\\n", "alpha 2.317 0.192 1.959 2.683 0.002 0.002 6909.0 \n", "beta 3.713 0.208 3.327 4.117 0.002 0.002 7805.0 \n", "sigma_sq 3.615 0.511 2.716 4.584 0.006 0.006 7166.0 \n", "sigma 1.897 0.133 1.648 2.141 0.002 0.001 7166.0 \n", "\n", " ess_tail r_hat \n", "alpha 5804.0 1.0 \n", "beta 5904.0 1.0 \n", "sigma_sq 5819.0 1.0 \n", "sigma 5819.0 1.0 " ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Define data first\n", "data = {\"N\": N, \"x\": x, \"y\": y}\n", "\n", "# Build the model with data\n", "model = stan.build(stanCode, data=data)\n", "\n", "# Sample\n", "fit = model.sample(num_chains=4, num_samples=2000)\n", "\n", "az.summary(az.from_pystan(fit))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Analyze Results for N=100\n", "\n", "Posterior summaries should be close to the true values:\n", "\n", "- **α**: approximately 2.3\n", "- **β**: approximately 4.0\n", "- **σ**: approximately 2.0\n", "\n", "Also compute the 95% credible intervals." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5: Repeat with N=1000\n", "\n", "Increase the sample size and rerun the simulation and model fitting." ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Building...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "Building: found in cache, done.Sampling: 0%\n", "Sampling: 25% (3000/12000)\n", "Sampling: 50% (6000/12000)\n", "Sampling: 75% (9000/12000)\n", "Sampling: 100% (12000/12000)\n", "Sampling: 100% (12000/12000), done.\n", "Messages received during sampling:\n", " Gradient evaluation took 0.000146 seconds\n", " 1000 transitions using 10 leapfrog steps per transition would take 1.46 seconds.\n", " Adjust your expectations accordingly!\n", " Gradient evaluation took 0.000126 seconds\n", " 1000 transitions using 10 leapfrog steps per transition would take 1.26 seconds.\n", " Adjust your expectations accordingly!\n", " Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n", " Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n", " If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n", " but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n", " Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n", " Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n", " If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n", " but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n", " Gradient evaluation took 0.000123 seconds\n", " 1000 transitions using 10 leapfrog steps per transition would take 1.23 seconds.\n", " Adjust your expectations accordingly!\n", " Gradient evaluation took 0.000135 seconds\n", " 1000 transitions using 10 leapfrog steps per transition would take 1.35 seconds.\n", " Adjust your expectations accordingly!\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meansdhdi_3%hdi_97%mcse_meanmcse_sdess_bulkess_tailr_hat
alpha2.3660.0622.2532.4840.0010.0017563.05508.01.0
beta3.9290.0633.8144.0480.0010.0018352.05934.01.0
sigma_sq3.8950.1743.5884.2360.0020.0028354.06044.01.0
sigma1.9730.0441.8942.0580.0000.0008354.06044.01.0
\n", "
" ], "text/plain": [ " mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk \\\n", "alpha 2.366 0.062 2.253 2.484 0.001 0.001 7563.0 \n", "beta 3.929 0.063 3.814 4.048 0.001 0.001 8352.0 \n", "sigma_sq 3.895 0.174 3.588 4.236 0.002 0.002 8354.0 \n", "sigma 1.973 0.044 1.894 2.058 0.000 0.000 8354.0 \n", "\n", " ess_tail r_hat \n", "alpha 5508.0 1.0 \n", "beta 5934.0 1.0 \n", "sigma_sq 6044.0 1.0 \n", "sigma 6044.0 1.0 " ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "N_large = 1000;\n", "x_large = np.random.normal(size=N_large);\n", "y_large = alpha_true + beta_true * x_large + sigma_true * np.random.normal(size=N_large);\n", "\n", "# create new data dictionary\n", "data_large = {\"N\": N_large, \"x\": x_large, \"y\": y_large};\n", "model_large = stan.build(stanCode, data=data_large)\n", "\n", "# fit the model again\n", "fit_large = model_large.sample(num_chains=4, num_samples=2000);\n", "\n", "# check diagnostics for larger data\n", "az.summary(az.from_pystan(fit_large))\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.x" } }, "nbformat": 4, "nbformat_minor": 2 }