COGMOD-HWI/HW3/Q2/Q4/Q4.ipynb

{
	"cells": [
		{
			"cell_type": "code",
			"execution_count": 52,
			"metadata": {},
			"outputs": [],
			"source": [
				"import numpy as np\n",
				"import stan\n",
				"import arviz as az\n",
				"\n",
				"# stupid stan problems\n",
				"import nest_asyncio\n",
				"nest_asyncio.apply()\n",
				"\n",
				"# true param\n",
				"alpha_true = 2.3,\n",
				"beta_true = 4.0,\n",
				"sigma_true = 2.0,\n",
				"N = 100\n",
				"\n",
				"# simulation\n",
				"np.random.seed(42)\n",
				"x = np.random.normal(size=N)\n",
				"y = alpha_true + beta_true * x + sigma_true * np.random.normal(size=N)"
			]
		},
		{
			"cell_type": "code",
			"execution_count": 53,
			"metadata": {},
			"outputs": [],
			"source": [
				"stanCode = \"\"\"\n",
				"data {\n",
				"  int<lower=0> N;\n",
				"  vector[N] x;\n",
				"  vector[N] y;\n",
				"}\n",
				"parameters {\n",
				"  real alpha;\n",
				"  real beta;\n",
				"  real<lower=0> sigma_sq;\n",
				"}\n",
				"transformed parameters {\n",
				"  real<lower=0> sigma = sqrt(sigma_sq);\n",
				"}\n",
				"model {\n",
				"  sigma_sq ~ inv_gamma(1, 1);  // prior on variance\n",
				"  alpha ~ normal(0, 10);\n",
				"  beta ~ normal(0, 10);\n",
				"  y ~ normal(alpha + beta * x, sigma);  // likelihood\n",
				"}\n",
				"\"\"\""
			]
		},
		{
			"cell_type": "code",
			"execution_count": 54,
			"metadata": {},
			"outputs": [
				{
					"name": "stdout",
					"output_type": "stream",
					"text": [
						"Building...\n"
					]
				},
				{
					"name": "stderr",
					"output_type": "stream",
					"text": [
						"\n",
						"Building: found in cache, done.Sampling:   0%\n",
						"Sampling:  25% (3000/12000)\n",
						"Sampling:  50% (6000/12000)\n",
						"Sampling:  75% (9000/12000)\n",
						"Sampling: 100% (12000/12000)\n",
						"Sampling: 100% (12000/12000), done.\n",
						"Messages received during sampling:\n",
						"  Gradient evaluation took 1.7e-05 seconds\n",
						"  1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.\n",
						"  Adjust your expectations accordingly!\n",
						"  Gradient evaluation took 2.7e-05 seconds\n",
						"  1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.\n",
						"  Adjust your expectations accordingly!\n",
						"  Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
						"  Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
						"  If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
						"  but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
						"  Gradient evaluation took 2e-05 seconds\n",
						"  1000 transitions using 10 leapfrog steps per transition would take 0.2 seconds.\n",
						"  Adjust your expectations accordingly!\n",
						"  Gradient evaluation took 1e-05 seconds\n",
						"  1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.\n",
						"  Adjust your expectations accordingly!\n"
					]
				},
				{
					"data": {
						"text/html": [
							"<div>\n",
							"<style scoped>\n",
							"    .dataframe tbody tr th:only-of-type {\n",
							"        vertical-align: middle;\n",
							"    }\n",
							"\n",
							"    .dataframe tbody tr th {\n",
							"        vertical-align: top;\n",
							"    }\n",
							"\n",
							"    .dataframe thead th {\n",
							"        text-align: right;\n",
							"    }\n",
							"</style>\n",
							"<table border=\"1\" class=\"dataframe\">\n",
							"  <thead>\n",
							"    <tr style=\"text-align: right;\">\n",
							"      <th></th>\n",
							"      <th>mean</th>\n",
							"      <th>sd</th>\n",
							"      <th>hdi_3%</th>\n",
							"      <th>hdi_97%</th>\n",
							"      <th>mcse_mean</th>\n",
							"      <th>mcse_sd</th>\n",
							"      <th>ess_bulk</th>\n",
							"      <th>ess_tail</th>\n",
							"      <th>r_hat</th>\n",
							"    </tr>\n",
							"  </thead>\n",
							"  <tbody>\n",
							"    <tr>\n",
							"      <th>alpha</th>\n",
							"      <td>2.317</td>\n",
							"      <td>0.192</td>\n",
							"      <td>1.959</td>\n",
							"      <td>2.683</td>\n",
							"      <td>0.002</td>\n",
							"      <td>0.002</td>\n",
							"      <td>6909.0</td>\n",
							"      <td>5804.0</td>\n",
							"      <td>1.0</td>\n",
							"    </tr>\n",
							"    <tr>\n",
							"      <th>beta</th>\n",
							"      <td>3.713</td>\n",
							"      <td>0.208</td>\n",
							"      <td>3.327</td>\n",
							"      <td>4.117</td>\n",
							"      <td>0.002</td>\n",
							"      <td>0.002</td>\n",
							"      <td>7805.0</td>\n",
							"      <td>5904.0</td>\n",
							"      <td>1.0</td>\n",
							"    </tr>\n",
							"    <tr>\n",
							"      <th>sigma_sq</th>\n",
							"      <td>3.615</td>\n",
							"      <td>0.511</td>\n",
							"      <td>2.716</td>\n",
							"      <td>4.584</td>\n",
							"      <td>0.006</td>\n",
							"      <td>0.006</td>\n",
							"      <td>7166.0</td>\n",
							"      <td>5819.0</td>\n",
							"      <td>1.0</td>\n",
							"    </tr>\n",
							"    <tr>\n",
							"      <th>sigma</th>\n",
							"      <td>1.897</td>\n",
							"      <td>0.133</td>\n",
							"      <td>1.648</td>\n",
							"      <td>2.141</td>\n",
							"      <td>0.002</td>\n",
							"      <td>0.001</td>\n",
							"      <td>7166.0</td>\n",
							"      <td>5819.0</td>\n",
							"      <td>1.0</td>\n",
							"    </tr>\n",
							"  </tbody>\n",
							"</table>\n",
							"</div>"
						],
						"text/plain": [
							"           mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \\\n",
							"alpha     2.317  0.192   1.959    2.683      0.002    0.002    6909.0   \n",
							"beta      3.713  0.208   3.327    4.117      0.002    0.002    7805.0   \n",
							"sigma_sq  3.615  0.511   2.716    4.584      0.006    0.006    7166.0   \n",
							"sigma     1.897  0.133   1.648    2.141      0.002    0.001    7166.0   \n",
							"\n",
							"          ess_tail  r_hat  \n",
							"alpha       5804.0    1.0  \n",
							"beta        5904.0    1.0  \n",
							"sigma_sq    5819.0    1.0  \n",
							"sigma       5819.0    1.0  "
						]
					},
					"execution_count": 54,
					"metadata": {},
					"output_type": "execute_result"
				}
			],
			"source": [
				"# Define data first\n",
				"data = {\"N\": N, \"x\": x, \"y\": y}\n",
				"\n",
				"# Build the model with data\n",
				"model = stan.build(stanCode, data=data)\n",
				"\n",
				"# Sample\n",
				"fit = model.sample(num_chains=4, num_samples=2000)\n",
				"\n",
				"az.summary(az.from_pystan(fit))"
			]
		},
		{
			"cell_type": "markdown",
			"metadata": {},
			"source": [
				"### Step 4: Analyze Results for N=100\n",
				"\n",
				"Posterior summaries should be close to the true values:\n",
				"\n",
				"- **α**: approximately 2.3\n",
				"- **β**: approximately 4.0\n",
				"- **σ**: approximately 2.0\n",
				"\n",
				"Also compute the 95% credible intervals."
			]
		},
		{
			"cell_type": "markdown",
			"metadata": {},
			"source": [
				"### Step 5: Repeat with N=1000\n",
				"\n",
				"Increase the sample size and rerun the simulation and model fitting."
			]
		},
		{
			"cell_type": "code",
			"execution_count": 55,
			"metadata": {},
			"outputs": [
				{
					"name": "stdout",
					"output_type": "stream",
					"text": [
						"Building...\n"
					]
				},
				{
					"name": "stderr",
					"output_type": "stream",
					"text": [
						"\n",
						"Building: found in cache, done.Sampling:   0%\n",
						"Sampling:  25% (3000/12000)\n",
						"Sampling:  50% (6000/12000)\n",
						"Sampling:  75% (9000/12000)\n",
						"Sampling: 100% (12000/12000)\n",
						"Sampling: 100% (12000/12000), done.\n",
						"Messages received during sampling:\n",
						"  Gradient evaluation took 0.000146 seconds\n",
						"  1000 transitions using 10 leapfrog steps per transition would take 1.46 seconds.\n",
						"  Adjust your expectations accordingly!\n",
						"  Gradient evaluation took 0.000126 seconds\n",
						"  1000 transitions using 10 leapfrog steps per transition would take 1.26 seconds.\n",
						"  Adjust your expectations accordingly!\n",
						"  Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
						"  Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
						"  If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
						"  but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
						"  Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
						"  Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
						"  If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
						"  but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
						"  Gradient evaluation took 0.000123 seconds\n",
						"  1000 transitions using 10 leapfrog steps per transition would take 1.23 seconds.\n",
						"  Adjust your expectations accordingly!\n",
						"  Gradient evaluation took 0.000135 seconds\n",
						"  1000 transitions using 10 leapfrog steps per transition would take 1.35 seconds.\n",
						"  Adjust your expectations accordingly!\n"
					]
				},
				{
					"data": {
						"text/html": [
							"<div>\n",
							"<style scoped>\n",
							"    .dataframe tbody tr th:only-of-type {\n",
							"        vertical-align: middle;\n",
							"    }\n",
							"\n",
							"    .dataframe tbody tr th {\n",
							"        vertical-align: top;\n",
							"    }\n",
							"\n",
							"    .dataframe thead th {\n",
							"        text-align: right;\n",
							"    }\n",
							"</style>\n",
							"<table border=\"1\" class=\"dataframe\">\n",
							"  <thead>\n",
							"    <tr style=\"text-align: right;\">\n",
							"      <th></th>\n",
							"      <th>mean</th>\n",
							"      <th>sd</th>\n",
							"      <th>hdi_3%</th>\n",
							"      <th>hdi_97%</th>\n",
							"      <th>mcse_mean</th>\n",
							"      <th>mcse_sd</th>\n",
							"      <th>ess_bulk</th>\n",
							"      <th>ess_tail</th>\n",
							"      <th>r_hat</th>\n",
							"    </tr>\n",
							"  </thead>\n",
							"  <tbody>\n",
							"    <tr>\n",
							"      <th>alpha</th>\n",
							"      <td>2.366</td>\n",
							"      <td>0.062</td>\n",
							"      <td>2.253</td>\n",
							"      <td>2.484</td>\n",
							"      <td>0.001</td>\n",
							"      <td>0.001</td>\n",
							"      <td>7563.0</td>\n",
							"      <td>5508.0</td>\n",
							"      <td>1.0</td>\n",
							"    </tr>\n",
							"    <tr>\n",
							"      <th>beta</th>\n",
							"      <td>3.929</td>\n",
							"      <td>0.063</td>\n",
							"      <td>3.814</td>\n",
							"      <td>4.048</td>\n",
							"      <td>0.001</td>\n",
							"      <td>0.001</td>\n",
							"      <td>8352.0</td>\n",
							"      <td>5934.0</td>\n",
							"      <td>1.0</td>\n",
							"    </tr>\n",
							"    <tr>\n",
							"      <th>sigma_sq</th>\n",
							"      <td>3.895</td>\n",
							"      <td>0.174</td>\n",
							"      <td>3.588</td>\n",
							"      <td>4.236</td>\n",
							"      <td>0.002</td>\n",
							"      <td>0.002</td>\n",
							"      <td>8354.0</td>\n",
							"      <td>6044.0</td>\n",
							"      <td>1.0</td>\n",
							"    </tr>\n",
							"    <tr>\n",
							"      <th>sigma</th>\n",
							"      <td>1.973</td>\n",
							"      <td>0.044</td>\n",
							"      <td>1.894</td>\n",
							"      <td>2.058</td>\n",
							"      <td>0.000</td>\n",
							"      <td>0.000</td>\n",
							"      <td>8354.0</td>\n",
							"      <td>6044.0</td>\n",
							"      <td>1.0</td>\n",
							"    </tr>\n",
							"  </tbody>\n",
							"</table>\n",
							"</div>"
						],
						"text/plain": [
							"           mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \\\n",
							"alpha     2.366  0.062   2.253    2.484      0.001    0.001    7563.0   \n",
							"beta      3.929  0.063   3.814    4.048      0.001    0.001    8352.0   \n",
							"sigma_sq  3.895  0.174   3.588    4.236      0.002    0.002    8354.0   \n",
							"sigma     1.973  0.044   1.894    2.058      0.000    0.000    8354.0   \n",
							"\n",
							"          ess_tail  r_hat  \n",
							"alpha       5508.0    1.0  \n",
							"beta        5934.0    1.0  \n",
							"sigma_sq    6044.0    1.0  \n",
							"sigma       6044.0    1.0  "
						]
					},
					"execution_count": 55,
					"metadata": {},
					"output_type": "execute_result"
				}
			],
			"source": [
				"N_large = 1000;\n",
				"x_large = np.random.normal(size=N_large);\n",
				"y_large = alpha_true + beta_true * x_large + sigma_true * np.random.normal(size=N_large);\n",
				"\n",
				"# create new data dictionary\n",
				"data_large = {\"N\": N_large, \"x\": x_large, \"y\": y_large};\n",
				"model_large = stan.build(stanCode, data=data_large)\n",
				"\n",
				"# fit the model again\n",
				"fit_large = model_large.sample(num_chains=4, num_samples=2000);\n",
				"\n",
				"# check diagnostics for larger data\n",
				"az.summary(az.from_pystan(fit_large))\n"
			]
		}
	],
	"metadata": {
		"kernelspec": {
			"display_name": "Python 3",
			"language": "python",
			"name": "python3"
		},
		"language_info": {
			"name": "python",
			"version": "3.x"
		}
	},
	"nbformat": 4,
	"nbformat_minor": 2
}