finished HW3

2026-05-14 22:16:57 +00:00 · 2025-03-23 16:54:36 -04:00
parent 3faf2fcfc2
commit e5d50c19d8
23 changed files with 1706 additions and 0 deletions
@@ -0,0 +1,2 @@
+build/
+.venv/
@@ -0,0 +1,50 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def simulate_ddm(v, a=1.0, beta=0.5, tau=0.3, sigma=1.0, dt=0.001, max_steps=3000):
+    X = beta * a  # start position
+    t = 0.0
+    for _ in range(max_steps):
+        dW = np.random.normal(0, np.sqrt(dt))
+        dX = v * dt + sigma * dW
+        X += dX
+        t += dt
+        if X >= a:
+            return t + tau, 1  # upper bound hit
+        elif X <= 0:
+            return t + tau, 0  # lower bound hit
+    return max_steps * dt + tau, None  # Timeout (optional)
+
+
+# terrible params (upped in part 2)
+vs = np.linspace(0.5, 1.5, 25)  # drift rates for test
+n_trials = 2000
+
+# store
+upper_means, lower_means = [], []
+
+for v in vs:
+    upper_rts, lower_rts = [], []
+    for _ in range(n_trials):
+        rt, choice = simulate_ddm(v)
+        if choice == 1:
+            upper_rts.append(rt)
+        elif choice == 0:
+            lower_rts.append(rt)
+    # means (ignore cases where no hits)
+    upper_means.append(np.mean(upper_rts) if upper_rts else np.nan)
+    lower_means.append(np.mean(lower_rts) if lower_rts else np.nan)
+
+# plotting yay
+plt.figure(figsize=(10, 6))
+plt.plot(vs, upper_means, 'o-', label='Upper Boundary Mean RT')
+plt.plot(vs, lower_means, 's-', label='Lower Boundary Mean RT')
+plt.plot(vs, np.array(upper_means) - np.array(lower_means),
+         'd-', label='Mean Difference')
+plt.xlabel('Drift Rate (v)')
+plt.ylabel('Response Time (s)')
+plt.title('Effect of Drift Rate on RT Distributions')
+plt.legend()
+plt.grid(True)
+plt.savefig('part1.png')
@@ -0,0 +1,104 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from multiprocessing import Pool, cpu_count
+from functools import partial
+
+
+def sim_ddm(v=1.0, a=1.0, beta=0.5, tau=0.3, sigma=1.0, dt=0.001, max_steps=3000):
+	X = beta * a  # start
+	t = 0.0
+	for _ in range(max_steps):
+		dW = np.random.normal(0, np.sqrt(dt))
+		dX = v * dt + sigma * dW
+		X += dX
+		t += dt
+		if X >= a:
+			return t + tau, 1  # upper bound hit
+		elif X <= 0:
+			return t + tau, 0  # lower bound hit
+	return max_steps * dt + tau, None  # timeout (which I ignored)
+
+
+def sim_param(param_name, param_value, n_trials=200000):
+	default_params = {'v': 1.0, 'a': 1.0,
+                   'beta': 0.5, 'tau': 0.3, 'sigma': 1.0}
+	params = default_params.copy()
+	params[param_name] = param_value
+	upper_rts, lower_rts = [], []
+	for _ in range(n_trials):
+		rt, choice = sim_ddm(**params)
+		if choice == 1:
+			upper_rts.append(rt)
+		elif choice == 0:
+			lower_rts.append(rt)
+	return (upper_rts, lower_rts)  # Return all RTs
+
+
+# deepseek-r1 wrote this to help parallelize my code (because for loops aren't cool when they're frying my laptop)
+def parallel_sim_param(param_name, param_values, n_trials):
+	worker = partial(sim_param,
+                  param_name, n_trials=n_trials)
+	with Pool(processes=cpu_count()) as pool:
+		results = pool.map(worker, param_values)
+	return results
+
+
+parameters = {
+	'v': np.linspace(0.5, 1.5, 25),
+	'a': np.linspace(0.5, 2.0, 25),
+	'beta': np.linspace(0.3, 0.7, 25),
+	'tau': np.linspace(0.1, 0.5, 25),
+}
+
+fig, axes = plt.subplots(4, 2, figsize=(15, 20))  # should this be (15, 15)?
+axes = axes.flatten()
+
+for i, (param, values) in enumerate(parameters.items()):
+	results = parallel_sim_param(param, values, n_trials=200000)
+
+	# no bootstrapping
+	means_upper, means_lower = [], []
+	stdev_upper, stdev_lower = [], []
+
+	for upper_rts, lower_rts in results:
+		mu_upper = np.mean(upper_rts) if upper_rts else np.nan
+		mu_lower = np.mean(lower_rts) if lower_rts else np.nan
+		std_upper = np.std(upper_rts) if upper_rts else np.nan
+		std_lower = np.std(lower_rts) if lower_rts else np.nan
+
+		means_upper.append(mu_upper)
+		means_lower.append(mu_lower)
+		stdev_upper.append(std_upper)
+		stdev_lower.append(std_lower)
+
+	# means
+	ax_mean = axes[2 * i]
+	ax_mean.plot(values, means_upper, 'o-', label='Upper Boundary Mean RT')
+	ax_mean.plot(values, means_lower, 's-', label='Lower Boundary Mean RT')
+	ax_mean.plot(values, np.subtract(means_upper, means_lower),
+	             'd-', label='Difference', color='red')
+	ax_mean.set_xlabel(param)
+	ax_mean.set_ylabel('Response Time (s)')
+	ax_mean.set_title(f'Effect of {param} on RT Means')
+	ax_mean.legend()
+	ax_mean.grid(True)
+
+	# STDDEV
+	ax_std = axes[2 * i + 1]
+	ax_std.plot(values, stdev_upper, 'o-', label='Upper Boundary Std RT')
+	ax_std.plot(values, stdev_lower, 's-', label='Lower Boundary Std RT')
+	ax_std.set_xlabel(param)
+	ax_std.set_ylabel('Standard Deviation (s)')
+	ax_std.set_title(f'Effect of {param} on RT Std Devs')
+	ax_std.legend()
+	ax_std.grid(True)
+
+	plt.tight_layout()
+	plt.savefig('part2.png')
+
+	# DEBUGGING
+	print(f"\nVARYING {param.upper()}:\n")
+	print(f"Means (Upper): {np.round(means_upper, 5)}")
+	print(f"Means (Lower): {np.round(means_lower, 5)}")
+	print(f"Std (Upper): {np.round(stdev_upper, 5)}")
+	print(f"Std (Lower): {np.round(stdev_lower, 5)}")
@@ -0,0 +1,421 @@
+{
+	"cells": [
+		{
+			"cell_type": "code",
+			"execution_count": 52,
+			"metadata": {},
+			"outputs": [],
+			"source": [
+				"import numpy as np\n",
+				"import stan\n",
+				"import arviz as az\n",
+				"\n",
+				"# stupid stan problems\n",
+				"import nest_asyncio\n",
+				"nest_asyncio.apply()\n",
+				"\n",
+				"# true param\n",
+				"alpha_true = 2.3,\n",
+				"beta_true = 4.0,\n",
+				"sigma_true = 2.0,\n",
+				"N = 100\n",
+				"\n",
+				"# simulation\n",
+				"np.random.seed(42)\n",
+				"x = np.random.normal(size=N)\n",
+				"y = alpha_true + beta_true * x + sigma_true * np.random.normal(size=N)"
+			]
+		},
+		{
+			"cell_type": "code",
+			"execution_count": 53,
+			"metadata": {},
+			"outputs": [],
+			"source": [
+				"stanCode = \"\"\"\n",
+				"data {\n",
+				"  int<lower=0> N;\n",
+				"  vector[N] x;\n",
+				"  vector[N] y;\n",
+				"}\n",
+				"parameters {\n",
+				"  real alpha;\n",
+				"  real beta;\n",
+				"  real<lower=0> sigma_sq;\n",
+				"}\n",
+				"transformed parameters {\n",
+				"  real<lower=0> sigma = sqrt(sigma_sq);\n",
+				"}\n",
+				"model {\n",
+				"  sigma_sq ~ inv_gamma(1, 1);  // prior on variance\n",
+				"  alpha ~ normal(0, 10);\n",
+				"  beta ~ normal(0, 10);\n",
+				"  y ~ normal(alpha + beta * x, sigma);  // likelihood\n",
+				"}\n",
+				"\"\"\""
+			]
+		},
+		{
+			"cell_type": "code",
+			"execution_count": 54,
+			"metadata": {},
+			"outputs": [
+				{
+					"name": "stdout",
+					"output_type": "stream",
+					"text": [
+						"Building...\n"
+					]
+				},
+				{
+					"name": "stderr",
+					"output_type": "stream",
+					"text": [
+						"\n",
+						"Building: found in cache, done.Sampling:   0%\n",
+						"Sampling:  25% (3000/12000)\n",
+						"Sampling:  50% (6000/12000)\n",
+						"Sampling:  75% (9000/12000)\n",
+						"Sampling: 100% (12000/12000)\n",
+						"Sampling: 100% (12000/12000), done.\n",
+						"Messages received during sampling:\n",
+						"  Gradient evaluation took 1.7e-05 seconds\n",
+						"  1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.\n",
+						"  Adjust your expectations accordingly!\n",
+						"  Gradient evaluation took 2.7e-05 seconds\n",
+						"  1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.\n",
+						"  Adjust your expectations accordingly!\n",
+						"  Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
+						"  Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
+						"  If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
+						"  but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
+						"  Gradient evaluation took 2e-05 seconds\n",
+						"  1000 transitions using 10 leapfrog steps per transition would take 0.2 seconds.\n",
+						"  Adjust your expectations accordingly!\n",
+						"  Gradient evaluation took 1e-05 seconds\n",
+						"  1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.\n",
+						"  Adjust your expectations accordingly!\n"
+					]
+				},
+				{
+					"data": {
+						"text/html": [
+							"<div>\n",
+							"<style scoped>\n",
+							"    .dataframe tbody tr th:only-of-type {\n",
+							"        vertical-align: middle;\n",
+							"    }\n",
+							"\n",
+							"    .dataframe tbody tr th {\n",
+							"        vertical-align: top;\n",
+							"    }\n",
+							"\n",
+							"    .dataframe thead th {\n",
+							"        text-align: right;\n",
+							"    }\n",
+							"</style>\n",
+							"<table border=\"1\" class=\"dataframe\">\n",
+							"  <thead>\n",
+							"    <tr style=\"text-align: right;\">\n",
+							"      <th></th>\n",
+							"      <th>mean</th>\n",
+							"      <th>sd</th>\n",
+							"      <th>hdi_3%</th>\n",
+							"      <th>hdi_97%</th>\n",
+							"      <th>mcse_mean</th>\n",
+							"      <th>mcse_sd</th>\n",
+							"      <th>ess_bulk</th>\n",
+							"      <th>ess_tail</th>\n",
+							"      <th>r_hat</th>\n",
+							"    </tr>\n",
+							"  </thead>\n",
+							"  <tbody>\n",
+							"    <tr>\n",
+							"      <th>alpha</th>\n",
+							"      <td>2.317</td>\n",
+							"      <td>0.192</td>\n",
+							"      <td>1.959</td>\n",
+							"      <td>2.683</td>\n",
+							"      <td>0.002</td>\n",
+							"      <td>0.002</td>\n",
+							"      <td>6909.0</td>\n",
+							"      <td>5804.0</td>\n",
+							"      <td>1.0</td>\n",
+							"    </tr>\n",
+							"    <tr>\n",
+							"      <th>beta</th>\n",
+							"      <td>3.713</td>\n",
+							"      <td>0.208</td>\n",
+							"      <td>3.327</td>\n",
+							"      <td>4.117</td>\n",
+							"      <td>0.002</td>\n",
+							"      <td>0.002</td>\n",
+							"      <td>7805.0</td>\n",
+							"      <td>5904.0</td>\n",
+							"      <td>1.0</td>\n",
+							"    </tr>\n",
+							"    <tr>\n",
+							"      <th>sigma_sq</th>\n",
+							"      <td>3.615</td>\n",
+							"      <td>0.511</td>\n",
+							"      <td>2.716</td>\n",
+							"      <td>4.584</td>\n",
+							"      <td>0.006</td>\n",
+							"      <td>0.006</td>\n",
+							"      <td>7166.0</td>\n",
+							"      <td>5819.0</td>\n",
+							"      <td>1.0</td>\n",
+							"    </tr>\n",
+							"    <tr>\n",
+							"      <th>sigma</th>\n",
+							"      <td>1.897</td>\n",
+							"      <td>0.133</td>\n",
+							"      <td>1.648</td>\n",
+							"      <td>2.141</td>\n",
+							"      <td>0.002</td>\n",
+							"      <td>0.001</td>\n",
+							"      <td>7166.0</td>\n",
+							"      <td>5819.0</td>\n",
+							"      <td>1.0</td>\n",
+							"    </tr>\n",
+							"  </tbody>\n",
+							"</table>\n",
+							"</div>"
+						],
+						"text/plain": [
+							"           mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \\\n",
+							"alpha     2.317  0.192   1.959    2.683      0.002    0.002    6909.0   \n",
+							"beta      3.713  0.208   3.327    4.117      0.002    0.002    7805.0   \n",
+							"sigma_sq  3.615  0.511   2.716    4.584      0.006    0.006    7166.0   \n",
+							"sigma     1.897  0.133   1.648    2.141      0.002    0.001    7166.0   \n",
+							"\n",
+							"          ess_tail  r_hat  \n",
+							"alpha       5804.0    1.0  \n",
+							"beta        5904.0    1.0  \n",
+							"sigma_sq    5819.0    1.0  \n",
+							"sigma       5819.0    1.0  "
+						]
+					},
+					"execution_count": 54,
+					"metadata": {},
+					"output_type": "execute_result"
+				}
+			],
+			"source": [
+				"# Define data first\n",
+				"data = {\"N\": N, \"x\": x, \"y\": y}\n",
+				"\n",
+				"# Build the model with data\n",
+				"model = stan.build(stanCode, data=data)\n",
+				"\n",
+				"# Sample\n",
+				"fit = model.sample(num_chains=4, num_samples=2000)\n",
+				"\n",
+				"az.summary(az.from_pystan(fit))"
+			]
+		},
+		{
+			"cell_type": "markdown",
+			"metadata": {},
+			"source": [
+				"### Step 4: Analyze Results for N=100\n",
+				"\n",
+				"Posterior summaries should be close to the true values:\n",
+				"\n",
+				"- **α**: approximately 2.3\n",
+				"- **β**: approximately 4.0\n",
+				"- **σ**: approximately 2.0\n",
+				"\n",
+				"Also compute the 95% credible intervals."
+			]
+		},
+		{
+			"cell_type": "markdown",
+			"metadata": {},
+			"source": [
+				"### Step 5: Repeat with N=1000\n",
+				"\n",
+				"Increase the sample size and rerun the simulation and model fitting."
+			]
+		},
+		{
+			"cell_type": "code",
+			"execution_count": 55,
+			"metadata": {},
+			"outputs": [
+				{
+					"name": "stdout",
+					"output_type": "stream",
+					"text": [
+						"Building...\n"
+					]
+				},
+				{
+					"name": "stderr",
+					"output_type": "stream",
+					"text": [
+						"\n",
+						"Building: found in cache, done.Sampling:   0%\n",
+						"Sampling:  25% (3000/12000)\n",
+						"Sampling:  50% (6000/12000)\n",
+						"Sampling:  75% (9000/12000)\n",
+						"Sampling: 100% (12000/12000)\n",
+						"Sampling: 100% (12000/12000), done.\n",
+						"Messages received during sampling:\n",
+						"  Gradient evaluation took 0.000146 seconds\n",
+						"  1000 transitions using 10 leapfrog steps per transition would take 1.46 seconds.\n",
+						"  Adjust your expectations accordingly!\n",
+						"  Gradient evaluation took 0.000126 seconds\n",
+						"  1000 transitions using 10 leapfrog steps per transition would take 1.26 seconds.\n",
+						"  Adjust your expectations accordingly!\n",
+						"  Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
+						"  Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
+						"  If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
+						"  but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
+						"  Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
+						"  Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
+						"  If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
+						"  but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
+						"  Gradient evaluation took 0.000123 seconds\n",
+						"  1000 transitions using 10 leapfrog steps per transition would take 1.23 seconds.\n",
+						"  Adjust your expectations accordingly!\n",
+						"  Gradient evaluation took 0.000135 seconds\n",
+						"  1000 transitions using 10 leapfrog steps per transition would take 1.35 seconds.\n",
+						"  Adjust your expectations accordingly!\n"
+					]
+				},
+				{
+					"data": {
+						"text/html": [
+							"<div>\n",
+							"<style scoped>\n",
+							"    .dataframe tbody tr th:only-of-type {\n",
+							"        vertical-align: middle;\n",
+							"    }\n",
+							"\n",
+							"    .dataframe tbody tr th {\n",
+							"        vertical-align: top;\n",
+							"    }\n",
+							"\n",
+							"    .dataframe thead th {\n",
+							"        text-align: right;\n",
+							"    }\n",
+							"</style>\n",
+							"<table border=\"1\" class=\"dataframe\">\n",
+							"  <thead>\n",
+							"    <tr style=\"text-align: right;\">\n",
+							"      <th></th>\n",
+							"      <th>mean</th>\n",
+							"      <th>sd</th>\n",
+							"      <th>hdi_3%</th>\n",
+							"      <th>hdi_97%</th>\n",
+							"      <th>mcse_mean</th>\n",
+							"      <th>mcse_sd</th>\n",
+							"      <th>ess_bulk</th>\n",
+							"      <th>ess_tail</th>\n",
+							"      <th>r_hat</th>\n",
+							"    </tr>\n",
+							"  </thead>\n",
+							"  <tbody>\n",
+							"    <tr>\n",
+							"      <th>alpha</th>\n",
+							"      <td>2.366</td>\n",
+							"      <td>0.062</td>\n",
+							"      <td>2.253</td>\n",
+							"      <td>2.484</td>\n",
+							"      <td>0.001</td>\n",
+							"      <td>0.001</td>\n",
+							"      <td>7563.0</td>\n",
+							"      <td>5508.0</td>\n",
+							"      <td>1.0</td>\n",
+							"    </tr>\n",
+							"    <tr>\n",
+							"      <th>beta</th>\n",
+							"      <td>3.929</td>\n",
+							"      <td>0.063</td>\n",
+							"      <td>3.814</td>\n",
+							"      <td>4.048</td>\n",
+							"      <td>0.001</td>\n",
+							"      <td>0.001</td>\n",
+							"      <td>8352.0</td>\n",
+							"      <td>5934.0</td>\n",
+							"      <td>1.0</td>\n",
+							"    </tr>\n",
+							"    <tr>\n",
+							"      <th>sigma_sq</th>\n",
+							"      <td>3.895</td>\n",
+							"      <td>0.174</td>\n",
+							"      <td>3.588</td>\n",
+							"      <td>4.236</td>\n",
+							"      <td>0.002</td>\n",
+							"      <td>0.002</td>\n",
+							"      <td>8354.0</td>\n",
+							"      <td>6044.0</td>\n",
+							"      <td>1.0</td>\n",
+							"    </tr>\n",
+							"    <tr>\n",
+							"      <th>sigma</th>\n",
+							"      <td>1.973</td>\n",
+							"      <td>0.044</td>\n",
+							"      <td>1.894</td>\n",
+							"      <td>2.058</td>\n",
+							"      <td>0.000</td>\n",
+							"      <td>0.000</td>\n",
+							"      <td>8354.0</td>\n",
+							"      <td>6044.0</td>\n",
+							"      <td>1.0</td>\n",
+							"    </tr>\n",
+							"  </tbody>\n",
+							"</table>\n",
+							"</div>"
+						],
+						"text/plain": [
+							"           mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \\\n",
+							"alpha     2.366  0.062   2.253    2.484      0.001    0.001    7563.0   \n",
+							"beta      3.929  0.063   3.814    4.048      0.001    0.001    8352.0   \n",
+							"sigma_sq  3.895  0.174   3.588    4.236      0.002    0.002    8354.0   \n",
+							"sigma     1.973  0.044   1.894    2.058      0.000    0.000    8354.0   \n",
+							"\n",
+							"          ess_tail  r_hat  \n",
+							"alpha       5508.0    1.0  \n",
+							"beta        5934.0    1.0  \n",
+							"sigma_sq    6044.0    1.0  \n",
+							"sigma       6044.0    1.0  "
+						]
+					},
+					"execution_count": 55,
+					"metadata": {},
+					"output_type": "execute_result"
+				}
+			],
+			"source": [
+				"N_large = 1000;\n",
+				"x_large = np.random.normal(size=N_large);\n",
+				"y_large = alpha_true + beta_true * x_large + sigma_true * np.random.normal(size=N_large);\n",
+				"\n",
+				"# create new data dictionary\n",
+				"data_large = {\"N\": N_large, \"x\": x_large, \"y\": y_large};\n",
+				"model_large = stan.build(stanCode, data=data_large)\n",
+				"\n",
+				"# fit the model again\n",
+				"fit_large = model_large.sample(num_chains=4, num_samples=2000);\n",
+				"\n",
+				"# check diagnostics for larger data\n",
+				"az.summary(az.from_pystan(fit_large))\n"
+			]
+		}
+	],
+	"metadata": {
+		"kernelspec": {
+			"display_name": "Python 3",
+			"language": "python",
+			"name": "python3"
+		},
+		"language_info": {
+			"name": "python",
+			"version": "3.x"
+		}
+	},
+	"nbformat": 4,
+	"nbformat_minor": 2
+}
@@ -0,0 +1,71 @@
+To solve Problem 5, follow these steps:
+
+### Step 1: Simulate Data
+
+### Step 2: Stan Model Code
+Write the Stan model (`bayesian_regression.stan`):
+```stan
+data {
+  int<lower=0> N;
+  vector[N] x;
+  vector[N] y;
+}
+parameters {
+  real alpha;
+  real beta;
+  real<lower=0> sigma_sq;
+}
+transformed parameters {
+  real<lower=0> sigma = sqrt(sigma_sq);
+}
+model {
+  sigma_sq ~ inv_gamma(1, 1); // Prior on variance
+  alpha ~ normal(0, 10);
+  beta ~ normal(0, 10);
+  y ~ normal(alpha + beta * x, sigma); // Likelihood
+}
+```
+
+### Step 3: Fit the Model and Check Diagnostics
+Use `pystan` or `cmdstanpy` to run the model. Check Rhat (≈1) and ESS (sufficiently large). For example:
+```python
+import cmdstanpy
+
+model = cmdstanpy.CmdStanModel(stan_file="bayesian_regression.stan")
+data = {"N": N, "x": x, "y": y}
+fit = model.sample(data=data, chains=4, iter_sampling=2000)
+
+# Check diagnostics
+print(fit.diagnose())
+```
+
+### Step 4: Analyze Results for N=100
+Posterior summaries:
+- **Posterior means** should be close to true values (α=2.3, β=4.0, σ=2.0).
+- **Uncertainty**: Compute 95% credible intervals. Example output:
+  - α: 2.1 ± 0.4 (1.7 to 2.5)
+  - β: 3.8 ± 0.5 (3.3 to 4.3)
+  - σ: 1.9 ± 0.2 (1.7 to 2.1)
+
+### Step 5: Repeat with N=1000
+Increase sample size and rerun:
+```python
+N_large = 1000
+x_large = np.random.normal(size=N_large)
+y_large = alpha_true + beta_true * x_large + sigma_true * np.random.normal(size=N_large)
+```
+Fit the model again. Results will show:
+- **Tighter credible intervals** (e.g., β: 3.95 ± 0.1).
+- Reduced posterior variance, indicating higher precision.
+
+### Key Observations:
+1. **Accuracy**: Posterior means align closely with true parameters.
+2. **Uncertainty**: Credible intervals narrow as \(N\) increases, reflecting reduced uncertainty.
+3. **Diagnostics**: Ensure Rhat ≈1 and sufficient ESS for reliable inferences.
+
+**Visualization**: Plot prior vs. posterior histograms for parameters (using tools like `arviz` or `seaborn`), showing posterior concentration around true values, especially for \(N=1000\).
+
+---
+
+**Answer for LMS Submission**  
+Implement the steps above, ensuring your write-up includes code snippets, diagnostic results, and graphical comparisons. Highlight the reduction in posterior variance when increasing \(N\), demonstrating the influence of data quantity on Bayesian inference.
@@ -0,0 +1,46 @@
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+# H Y P E R P A R A M E T E R S
+mu_prior = 0      # prior mean
+sigma2_prior = 2   # prior variance (omega_0^2)
+sigma2_likelihood = 1  # likelihood variance (omega^2)
+n_samples = 1000000      # number of Monte Carlo samples
+
+# simulate θ ~ N(mu_0, omega_0^2) and y ~ N(θ, (omega)^2)
+theta = np.random.normal(mu_prior, np.sqrt(sigma2_prior), n_samples)
+y = np.random.normal(theta, np.sqrt(sigma2_likelihood))
+
+# posterior params for each y
+sigma2_posterior = 1 / (1 / sigma2_prior + 1 / sigma2_likelihood)
+mu_posterior = (mu_prior / sigma2_prior + y / sigma2_likelihood) * \
+    sigma2_posterior  # posterior mean
+
+# E[Var[θ|y]]
+expected_posterior_var = sigma2_posterior
+var_posterior_mean = np.var(mu_posterior)  # var[𝔼[θ|y]]
+prior_var = sigma2_prior                   # var[θ]
+
+# verify identity
+sum_terms = expected_posterior_var + var_posterior_mean
+
+print(f"Prior Variance (Var[θ]): {prior_var:.4f}")
+print(
+    f"Expected Posterior Variance (𝔼[Var[θ|y]]): {expected_posterior_var:.4f}")
+print(f"Variance of Posterior Mean (Var[𝔼[θ|y]]): {var_posterior_mean:.4f}")
+print(f"Sum of Terms: {sum_terms:.4f}")
+print(f"Identity Holds: {np.isclose(prior_var, sum_terms, atol=1e-3)}")
+
+# Plot posterior means and variances
+plt.figure(figsize=(10, 6))
+plt.hist(mu_posterior, bins=50, density=True,
+         alpha=0.6, label="Posterior Means")
+plt.axvline(mu_prior, color='r', linestyle='--', label="Prior Mean")
+plt.xlabel("Posterior Mean (𝔼[θ|y])")
+plt.ylabel("Density")
+plt.title("Distribution of Posterior Means vs. Prior")
+plt.legend()
+plt.grid(True)
+# plt.show()
+plt.savefig('part3.png')
@@ -0,0 +1,76 @@
+As a culinary data scientist, you investigate how cooking time (\(x\)) affects the length of "massive ramen noodles" (\(y\)). Using Bayesian linear regression, you model the relationship to quantify expansion rates and uncertainty.
+
+\subsection*{Methods}
+\subsubsection*{Model Specification}
+The regression model is:
+\[
+y_n = \alpha + \beta x_n + \epsilon_n, \quad \epsilon_n \sim \mathcal{N}(0, \sigma^2)
+\]
+\begin{itemize}
+    \item \textbf{Priors}: 
+    \begin{align*}
+        \alpha &\sim \mathcal{N}(0, 10) \quad \text{(Intercept)} \\
+        \beta &\sim \mathcal{N}(0, 10) \quad \text{(Slope)} \\
+        \sigma^2 &\sim \text{Inv-Gamma}(1, 1) \quad \text{(Noise)}
+    \end{align*}
+\end{itemize}
+
+\subsubsection*{Data Simulation}
+Data was generated with:
+\begin{itemize}
+    \item True parameters: \(\alpha = 2.3\), \(\beta = 4.0\), \(\sigma = 2.0\)
+    \item \(N = 100\) observations, \(x \sim \mathcal{N}(0, 1)\), \(y = \alpha + \beta x + \mathcal{N}(0, \sigma^2)\)
+\end{itemize}
+
+\subsection*{Results}
+\subsubsection*{Posterior Estimates (\(N = 100\))}
+\begin{table}[h]
+    \centering
+    \begin{tabular}{@{}lccc@{}}
+        \toprule
+        Parameter & Posterior Mean & 95\% HDI & True Value \\
+        \midrule
+        \(\alpha\) (Intercept) & 2.31 & [1.94, 2.65] & 2.3 \\
+        \(\beta\) (Slope) & 3.71 & [3.32, 4.13] & 4.0 \\
+        \(\sigma\) (Noise) & 1.91 & [1.67, 2.18] & 2.0 \\
+        \bottomrule
+    \end{tabular}
+    \caption{Posterior summaries vs. true values. HDI = Highest Density Interval.}
+\end{table}
+
+\subsubsection*{Convergence Diagnostics}
+\begin{itemize}
+    \item \textbf{R-hat}: 1.0 for all parameters (ideal: \(\leq 1.01\)).
+    \item \textbf{ESS (Effective Sample Size)}: \(\alpha\): 6123, \(\beta\): 7356, \(\sigma\): 6362 (exceeding thresholds for reliability).
+\end{itemize}
+
+\begin{figure}[h]
+    \centering
+    \includegraphics[width=0.8\textwidth]{posterior_plots.png}
+    \caption{Posterior distributions for \(\alpha\), \(\beta\), and \(\sigma\). Dashed lines indicate true values.}
+\end{figure}
+
+\subsubsection*{Effect of Increased Data (\(N = 1000\), Hypothetical)}
+\begin{itemize}
+    \item Expected uncertainty reduction: Credible interval widths shrink by \(\sim 60\%\).
+    \item Posteriors concentrate tightly around true values (law of large numbers).
+\end{itemize}
+
+\subsection*{Discussion}
+\subsubsection*{Accuracy and Uncertainty}
+\begin{itemize}
+    \item With \(N = 100\), estimates align closely with ground truth (e.g., \(\beta = 3.71\) vs. true \(4.0\)), but credible intervals reflect residual uncertainty.
+    \item Noise (\(\sigma\)) slightly underestimated but within plausible range.
+\end{itemize}
+
+\subsubsection*{Model Insights}
+\begin{itemize}
+    \item Noodles expand by \(\sim 3.7\) units per second (\(\beta\)), validating the hypothesis.
+    \item Stan's MCMC sampler achieved excellent convergence (R-hat = 1.0, ESS > 5000).
+\end{itemize}
+
+\subsubsection*{Limitations}
+\begin{itemize}
+    \item Assumes linearity and normality; real-world noodle expansion may exhibit nonlinear dynamics.
+    \item Hyperparameters (e.g., \(\mathcal{N}(0, 10)\)) chosen for demonstration, not domain knowledge.
+\end{itemize}
@@ -0,0 +1,37 @@
+data {
+    int<lower=1> N;
+    array[N] real<lower=0> y;
+    array[N] int<lower=1, upper=2> condition;
+    array[N] int<lower=0, upper=1> choice;
+}
+
+parameters {
+    // Your code here
+}
+
+model {
+    // Priors
+    // Your code here
+
+    // Likelihood
+    for (n in 1:N) {
+        // Condition 1
+        if (condition[n] == 1) {
+            if (choice[n] == 1) {
+                 // Your code here
+            }
+            else {
+                 // Your code here
+            }
+        }
+        // Condition 2
+        if (condition[n] == 2) {
+            if (choice[n] == 1) {
+                // Your code here
+            }
+            else {
+                 // Your code here
+            }
+        }
+    }
+}
@@ -0,0 +1,301 @@
+rt;choice;condition
+0.477;1.0;1.0
+0.6;1.0;1.0
+0.5;0.0;1.0
+0.416;1.0;1.0
+0.435;1.0;1.0
+0.499;1.0;1.0
+0.531;1.0;1.0
+0.616;1.0;1.0
+0.492;1.0;1.0
+0.682;1.0;1.0
+0.525;1.0;1.0
+0.714;1.0;1.0
+0.467;0.0;1.0
+1.106;1.0;1.0
+0.427;1.0;1.0
+0.681;1.0;1.0
+0.438;1.0;1.0
+0.584;0.0;1.0
+0.461;1.0;1.0
+0.466;1.0;1.0
+0.488;1.0;1.0
+0.431;1.0;1.0
+0.501;1.0;1.0
+0.444;1.0;1.0
+0.496;1.0;1.0
+0.5;1.0;1.0
+0.716;1.0;1.0
+0.449;1.0;1.0
+0.45;1.0;1.0
+0.552;1.0;1.0
+0.479;1.0;1.0
+0.497;1.0;1.0
+0.463;1.0;1.0
+0.54;0.0;1.0
+0.44;1.0;1.0
+0.425;1.0;1.0
+0.554;1.0;1.0
+0.663;1.0;1.0
+0.434;1.0;1.0
+0.463;1.0;1.0
+0.423;1.0;1.0
+0.423;1.0;1.0
+0.45;1.0;1.0
+0.687;1.0;1.0
+0.587;1.0;1.0
+0.584;1.0;1.0
+0.531;1.0;1.0
+0.718;1.0;1.0
+0.534;1.0;1.0
+0.565;1.0;1.0
+0.43;1.0;1.0
+0.505;0.0;1.0
+0.456;1.0;1.0
+0.668;1.0;1.0
+0.459;1.0;1.0
+0.509;1.0;1.0
+0.506;1.0;1.0
+0.741;1.0;1.0
+0.633;1.0;1.0
+0.475;1.0;1.0
+0.635;1.0;1.0
+0.456;1.0;1.0
+0.466;1.0;1.0
+0.567;1.0;1.0
+0.449;1.0;1.0
+0.451;1.0;1.0
+0.464;1.0;1.0
+0.467;1.0;1.0
+0.559;1.0;1.0
+0.425;1.0;1.0
+0.452;1.0;1.0
+0.411;1.0;1.0
+0.528;1.0;1.0
+0.429;1.0;1.0
+0.521;1.0;1.0
+0.54;0.0;1.0
+0.652;1.0;1.0
+0.687;1.0;1.0
+0.57;1.0;1.0
+0.484;0.0;1.0
+0.545;1.0;1.0
+0.479;1.0;1.0
+0.68;1.0;1.0
+0.434;1.0;1.0
+0.458;1.0;1.0
+0.501;1.0;1.0
+0.509;1.0;1.0
+0.462;1.0;1.0
+0.452;1.0;1.0
+0.522;1.0;1.0
+0.431;1.0;1.0
+0.43;1.0;1.0
+0.49;1.0;1.0
+0.697;1.0;1.0
+0.633;1.0;1.0
+0.539;1.0;1.0
+0.483;1.0;1.0
+1.11;1.0;1.0
+0.472;1.0;1.0
+0.757;1.0;1.0
+0.854;1.0;1.0
+0.653;1.0;1.0
+0.45;1.0;1.0
+0.516;1.0;1.0
+0.547;0.0;1.0
+0.432;1.0;1.0
+0.483;1.0;1.0
+0.501;1.0;1.0
+0.444;1.0;1.0
+0.515;1.0;1.0
+0.534;1.0;1.0
+0.441;1.0;1.0
+0.474;1.0;1.0
+0.513;1.0;1.0
+0.589;0.0;1.0
+0.446;1.0;1.0
+0.642;0.0;1.0
+0.591;1.0;1.0
+0.64;1.0;1.0
+0.449;1.0;1.0
+0.418;1.0;1.0
+0.615;1.0;1.0
+0.585;1.0;1.0
+0.459;1.0;1.0
+0.479;1.0;1.0
+0.477;1.0;1.0
+0.559;1.0;1.0
+0.419;1.0;1.0
+0.522;1.0;1.0
+0.429;1.0;1.0
+0.528;1.0;1.0
+0.467;1.0;1.0
+0.58;0.0;1.0
+0.487;1.0;1.0
+0.451;1.0;1.0
+0.527;1.0;1.0
+0.451;1.0;1.0
+0.49;1.0;1.0
+0.514;1.0;1.0
+0.455;1.0;1.0
+0.507;1.0;1.0
+0.474;1.0;1.0
+0.458;1.0;1.0
+0.454;1.0;1.0
+0.518;1.0;1.0
+0.429;1.0;1.0
+0.96;1.0;1.0
+0.427;1.0;1.0
+0.802;1.0;1.0
+0.446;1.0;1.0
+0.439;0.0;2.0
+0.471;0.0;2.0
+0.917;0.0;2.0
+0.562;1.0;2.0
+0.678;0.0;2.0
+0.671;1.0;2.0
+0.599;0.0;2.0
+0.638;0.0;2.0
+0.494;0.0;2.0
+0.498;1.0;2.0
+0.582;0.0;2.0
+0.672;1.0;2.0
+0.449;1.0;2.0
+0.585;0.0;2.0
+0.514;1.0;2.0
+0.493;1.0;2.0
+0.437;0.0;2.0
+0.452;1.0;2.0
+0.727;0.0;2.0
+0.523;1.0;2.0
+0.485;1.0;2.0
+0.439;1.0;2.0
+0.683;0.0;2.0
+0.578;1.0;2.0
+0.431;1.0;2.0
+0.562;0.0;2.0
+0.471;1.0;2.0
+0.786;1.0;2.0
+0.434;1.0;2.0
+0.441;1.0;2.0
+0.745;1.0;2.0
+0.533;1.0;2.0
+0.756;0.0;2.0
+0.678;1.0;2.0
+0.494;1.0;2.0
+1.028;1.0;2.0
+0.475;0.0;2.0
+0.563;0.0;2.0
+0.483;1.0;2.0
+0.566;0.0;2.0
+0.466;1.0;2.0
+1.086;1.0;2.0
+0.573;1.0;2.0
+0.597;1.0;2.0
+0.597;0.0;2.0
+0.446;1.0;2.0
+0.437;1.0;2.0
+0.515;1.0;2.0
+0.524;0.0;2.0
+0.513;1.0;2.0
+0.465;1.0;2.0
+0.704;1.0;2.0
+0.801;1.0;2.0
+0.484;0.0;2.0
+0.459;0.0;2.0
+0.576;0.0;2.0
+0.462;1.0;2.0
+0.471;0.0;2.0
+0.595;1.0;2.0
+0.464;1.0;2.0
+0.644;1.0;2.0
+0.42;0.0;2.0
+0.452;1.0;2.0
+0.488;0.0;2.0
+0.568;1.0;2.0
+0.481;0.0;2.0
+0.5;1.0;2.0
+0.54;1.0;2.0
+0.447;0.0;2.0
+0.463;1.0;2.0
+0.507;1.0;2.0
+0.522;1.0;2.0
+0.58;1.0;2.0
+0.464;0.0;2.0
+0.507;0.0;2.0
+0.727;1.0;2.0
+0.452;1.0;2.0
+0.636;0.0;2.0
+0.552;1.0;2.0
+0.739;1.0;2.0
+0.468;1.0;2.0
+0.563;1.0;2.0
+0.443;1.0;2.0
+1.023;1.0;2.0
+0.571;1.0;2.0
+0.44;0.0;2.0
+0.717;1.0;2.0
+0.751;1.0;2.0
+0.491;0.0;2.0
+0.456;1.0;2.0
+0.569;1.0;2.0
+0.456;1.0;2.0
+0.517;1.0;2.0
+0.492;1.0;2.0
+0.527;1.0;2.0
+0.501;0.0;2.0
+0.499;0.0;2.0
+0.428;0.0;2.0
+0.529;0.0;2.0
+0.43;0.0;2.0
+0.453;0.0;2.0
+0.484;0.0;2.0
+0.541;1.0;2.0
+0.707;0.0;2.0
+0.712;1.0;2.0
+0.53;0.0;2.0
+0.871;1.0;2.0
+0.896;1.0;2.0
+0.548;0.0;2.0
+0.484;1.0;2.0
+0.779;1.0;2.0
+0.503;0.0;2.0
+0.696;0.0;2.0
+0.522;0.0;2.0
+0.93;1.0;2.0
+0.535;0.0;2.0
+0.615;1.0;2.0
+0.624;1.0;2.0
+0.742;0.0;2.0
+0.528;1.0;2.0
+0.441;1.0;2.0
+0.514;0.0;2.0
+0.445;0.0;2.0
+0.625;0.0;2.0
+0.578;1.0;2.0
+0.55;1.0;2.0
+0.686;1.0;2.0
+0.505;1.0;2.0
+0.872;1.0;2.0
+0.548;1.0;2.0
+0.487;0.0;2.0
+0.733;0.0;2.0
+0.46;0.0;2.0
+0.764;1.0;2.0
+0.589;0.0;2.0
+0.482;0.0;2.0
+0.449;0.0;2.0
+0.428;0.0;2.0
+0.604;1.0;2.0
+0.505;1.0;2.0
+0.649;1.0;2.0
+0.484;1.0;2.0
+0.535;0.0;2.0
+0.471;0.0;2.0
+0.441;0.0;2.0
+0.528;0.0;2.0
+0.621;0.0;2.0
+0.48;1.0;2.0
+0.693;1.0;2.0
+0.493;1.0;2.0
@@ -0,0 +1,15 @@
+contourpy==1.3.1
+cycler==0.12.1
+fonttools==4.56.0
+kiwisolver==1.4.8
+matplotlib==3.10.1
+numpy==2.2.3
+packaging==24.2
+pandas==2.2.3
+paranoid-scientist==0.2.3
+pillow==11.1.0
+pyparsing==3.2.1
+python-dateutil==2.9.0.post0
+pytz==2025.1
+six==1.17.0
+tzdata==2025.1