finished HW3

This commit is contained in:
2025-03-23 16:54:36 -04:00
parent 3faf2fcfc2
commit e5d50c19d8
23 changed files with 1706 additions and 0 deletions
View File
+2
View File
@@ -0,0 +1,2 @@
build/
.venv/
BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

+50
View File
@@ -0,0 +1,50 @@
import numpy as np
import matplotlib.pyplot as plt
def simulate_ddm(v, a=1.0, beta=0.5, tau=0.3, sigma=1.0, dt=0.001, max_steps=3000):
X = beta * a # start position
t = 0.0
for _ in range(max_steps):
dW = np.random.normal(0, np.sqrt(dt))
dX = v * dt + sigma * dW
X += dX
t += dt
if X >= a:
return t + tau, 1 # upper bound hit
elif X <= 0:
return t + tau, 0 # lower bound hit
return max_steps * dt + tau, None # Timeout (optional)
# terrible params (upped in part 2)
vs = np.linspace(0.5, 1.5, 25) # drift rates for test
n_trials = 2000
# store
upper_means, lower_means = [], []
for v in vs:
upper_rts, lower_rts = [], []
for _ in range(n_trials):
rt, choice = simulate_ddm(v)
if choice == 1:
upper_rts.append(rt)
elif choice == 0:
lower_rts.append(rt)
# means (ignore cases where no hits)
upper_means.append(np.mean(upper_rts) if upper_rts else np.nan)
lower_means.append(np.mean(lower_rts) if lower_rts else np.nan)
# plotting yay
plt.figure(figsize=(10, 6))
plt.plot(vs, upper_means, 'o-', label='Upper Boundary Mean RT')
plt.plot(vs, lower_means, 's-', label='Lower Boundary Mean RT')
plt.plot(vs, np.array(upper_means) - np.array(lower_means),
'd-', label='Mean Difference')
plt.xlabel('Drift Rate (v)')
plt.ylabel('Response Time (s)')
plt.title('Effect of Drift Rate on RT Distributions')
plt.legend()
plt.grid(True)
plt.savefig('part1.png')
+104
View File
@@ -0,0 +1,104 @@
import numpy as np
import matplotlib.pyplot as plt
from multiprocessing import Pool, cpu_count
from functools import partial
def sim_ddm(v=1.0, a=1.0, beta=0.5, tau=0.3, sigma=1.0, dt=0.001, max_steps=3000):
X = beta * a # start
t = 0.0
for _ in range(max_steps):
dW = np.random.normal(0, np.sqrt(dt))
dX = v * dt + sigma * dW
X += dX
t += dt
if X >= a:
return t + tau, 1 # upper bound hit
elif X <= 0:
return t + tau, 0 # lower bound hit
return max_steps * dt + tau, None # timeout (which I ignored)
def sim_param(param_name, param_value, n_trials=200000):
default_params = {'v': 1.0, 'a': 1.0,
'beta': 0.5, 'tau': 0.3, 'sigma': 1.0}
params = default_params.copy()
params[param_name] = param_value
upper_rts, lower_rts = [], []
for _ in range(n_trials):
rt, choice = sim_ddm(**params)
if choice == 1:
upper_rts.append(rt)
elif choice == 0:
lower_rts.append(rt)
return (upper_rts, lower_rts) # Return all RTs
# deepseek-r1 wrote this to help parallelize my code (because for loops aren't cool when they're frying my laptop)
def parallel_sim_param(param_name, param_values, n_trials):
worker = partial(sim_param,
param_name, n_trials=n_trials)
with Pool(processes=cpu_count()) as pool:
results = pool.map(worker, param_values)
return results
parameters = {
'v': np.linspace(0.5, 1.5, 25),
'a': np.linspace(0.5, 2.0, 25),
'beta': np.linspace(0.3, 0.7, 25),
'tau': np.linspace(0.1, 0.5, 25),
}
fig, axes = plt.subplots(4, 2, figsize=(15, 20)) # should this be (15, 15)?
axes = axes.flatten()
for i, (param, values) in enumerate(parameters.items()):
results = parallel_sim_param(param, values, n_trials=200000)
# no bootstrapping
means_upper, means_lower = [], []
stdev_upper, stdev_lower = [], []
for upper_rts, lower_rts in results:
mu_upper = np.mean(upper_rts) if upper_rts else np.nan
mu_lower = np.mean(lower_rts) if lower_rts else np.nan
std_upper = np.std(upper_rts) if upper_rts else np.nan
std_lower = np.std(lower_rts) if lower_rts else np.nan
means_upper.append(mu_upper)
means_lower.append(mu_lower)
stdev_upper.append(std_upper)
stdev_lower.append(std_lower)
# means
ax_mean = axes[2 * i]
ax_mean.plot(values, means_upper, 'o-', label='Upper Boundary Mean RT')
ax_mean.plot(values, means_lower, 's-', label='Lower Boundary Mean RT')
ax_mean.plot(values, np.subtract(means_upper, means_lower),
'd-', label='Difference', color='red')
ax_mean.set_xlabel(param)
ax_mean.set_ylabel('Response Time (s)')
ax_mean.set_title(f'Effect of {param} on RT Means')
ax_mean.legend()
ax_mean.grid(True)
# STDDEV
ax_std = axes[2 * i + 1]
ax_std.plot(values, stdev_upper, 'o-', label='Upper Boundary Std RT')
ax_std.plot(values, stdev_lower, 's-', label='Lower Boundary Std RT')
ax_std.set_xlabel(param)
ax_std.set_ylabel('Standard Deviation (s)')
ax_std.set_title(f'Effect of {param} on RT Std Devs')
ax_std.legend()
ax_std.grid(True)
plt.tight_layout()
plt.savefig('part2.png')
# DEBUGGING
print(f"\nVARYING {param.upper()}:\n")
print(f"Means (Upper): {np.round(means_upper, 5)}")
print(f"Means (Lower): {np.round(means_lower, 5)}")
print(f"Std (Upper): {np.round(stdev_upper, 5)}")
print(f"Std (Lower): {np.round(stdev_lower, 5)}")
+421
View File
@@ -0,0 +1,421 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import stan\n",
"import arviz as az\n",
"\n",
"# stupid stan problems\n",
"import nest_asyncio\n",
"nest_asyncio.apply()\n",
"\n",
"# true param\n",
"alpha_true = 2.3,\n",
"beta_true = 4.0,\n",
"sigma_true = 2.0,\n",
"N = 100\n",
"\n",
"# simulation\n",
"np.random.seed(42)\n",
"x = np.random.normal(size=N)\n",
"y = alpha_true + beta_true * x + sigma_true * np.random.normal(size=N)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"stanCode = \"\"\"\n",
"data {\n",
" int<lower=0> N;\n",
" vector[N] x;\n",
" vector[N] y;\n",
"}\n",
"parameters {\n",
" real alpha;\n",
" real beta;\n",
" real<lower=0> sigma_sq;\n",
"}\n",
"transformed parameters {\n",
" real<lower=0> sigma = sqrt(sigma_sq);\n",
"}\n",
"model {\n",
" sigma_sq ~ inv_gamma(1, 1); // prior on variance\n",
" alpha ~ normal(0, 10);\n",
" beta ~ normal(0, 10);\n",
" y ~ normal(alpha + beta * x, sigma); // likelihood\n",
"}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Building...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Building: found in cache, done.Sampling: 0%\n",
"Sampling: 25% (3000/12000)\n",
"Sampling: 50% (6000/12000)\n",
"Sampling: 75% (9000/12000)\n",
"Sampling: 100% (12000/12000)\n",
"Sampling: 100% (12000/12000), done.\n",
"Messages received during sampling:\n",
" Gradient evaluation took 1.7e-05 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.\n",
" Adjust your expectations accordingly!\n",
" Gradient evaluation took 2.7e-05 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.\n",
" Adjust your expectations accordingly!\n",
" Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
" Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
" If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
" but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
" Gradient evaluation took 2e-05 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 0.2 seconds.\n",
" Adjust your expectations accordingly!\n",
" Gradient evaluation took 1e-05 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.\n",
" Adjust your expectations accordingly!\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean</th>\n",
" <th>sd</th>\n",
" <th>hdi_3%</th>\n",
" <th>hdi_97%</th>\n",
" <th>mcse_mean</th>\n",
" <th>mcse_sd</th>\n",
" <th>ess_bulk</th>\n",
" <th>ess_tail</th>\n",
" <th>r_hat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>alpha</th>\n",
" <td>2.317</td>\n",
" <td>0.192</td>\n",
" <td>1.959</td>\n",
" <td>2.683</td>\n",
" <td>0.002</td>\n",
" <td>0.002</td>\n",
" <td>6909.0</td>\n",
" <td>5804.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>beta</th>\n",
" <td>3.713</td>\n",
" <td>0.208</td>\n",
" <td>3.327</td>\n",
" <td>4.117</td>\n",
" <td>0.002</td>\n",
" <td>0.002</td>\n",
" <td>7805.0</td>\n",
" <td>5904.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sigma_sq</th>\n",
" <td>3.615</td>\n",
" <td>0.511</td>\n",
" <td>2.716</td>\n",
" <td>4.584</td>\n",
" <td>0.006</td>\n",
" <td>0.006</td>\n",
" <td>7166.0</td>\n",
" <td>5819.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sigma</th>\n",
" <td>1.897</td>\n",
" <td>0.133</td>\n",
" <td>1.648</td>\n",
" <td>2.141</td>\n",
" <td>0.002</td>\n",
" <td>0.001</td>\n",
" <td>7166.0</td>\n",
" <td>5819.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk \\\n",
"alpha 2.317 0.192 1.959 2.683 0.002 0.002 6909.0 \n",
"beta 3.713 0.208 3.327 4.117 0.002 0.002 7805.0 \n",
"sigma_sq 3.615 0.511 2.716 4.584 0.006 0.006 7166.0 \n",
"sigma 1.897 0.133 1.648 2.141 0.002 0.001 7166.0 \n",
"\n",
" ess_tail r_hat \n",
"alpha 5804.0 1.0 \n",
"beta 5904.0 1.0 \n",
"sigma_sq 5819.0 1.0 \n",
"sigma 5819.0 1.0 "
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Define data first\n",
"data = {\"N\": N, \"x\": x, \"y\": y}\n",
"\n",
"# Build the model with data\n",
"model = stan.build(stanCode, data=data)\n",
"\n",
"# Sample\n",
"fit = model.sample(num_chains=4, num_samples=2000)\n",
"\n",
"az.summary(az.from_pystan(fit))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 4: Analyze Results for N=100\n",
"\n",
"Posterior summaries should be close to the true values:\n",
"\n",
"- **α**: approximately 2.3\n",
"- **β**: approximately 4.0\n",
"- **σ**: approximately 2.0\n",
"\n",
"Also compute the 95% credible intervals."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step 5: Repeat with N=1000\n",
"\n",
"Increase the sample size and rerun the simulation and model fitting."
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Building...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Building: found in cache, done.Sampling: 0%\n",
"Sampling: 25% (3000/12000)\n",
"Sampling: 50% (6000/12000)\n",
"Sampling: 75% (9000/12000)\n",
"Sampling: 100% (12000/12000)\n",
"Sampling: 100% (12000/12000), done.\n",
"Messages received during sampling:\n",
" Gradient evaluation took 0.000146 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 1.46 seconds.\n",
" Adjust your expectations accordingly!\n",
" Gradient evaluation took 0.000126 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 1.26 seconds.\n",
" Adjust your expectations accordingly!\n",
" Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
" Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
" If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
" but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
" Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:\n",
" Exception: normal_lpdf: Scale parameter is 0, but must be positive! (in '/tmp/httpstan__2qigylb/model_74j73ceb.stan', line 19, column 2 to column 38)\n",
" If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,\n",
" but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.\n",
" Gradient evaluation took 0.000123 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 1.23 seconds.\n",
" Adjust your expectations accordingly!\n",
" Gradient evaluation took 0.000135 seconds\n",
" 1000 transitions using 10 leapfrog steps per transition would take 1.35 seconds.\n",
" Adjust your expectations accordingly!\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean</th>\n",
" <th>sd</th>\n",
" <th>hdi_3%</th>\n",
" <th>hdi_97%</th>\n",
" <th>mcse_mean</th>\n",
" <th>mcse_sd</th>\n",
" <th>ess_bulk</th>\n",
" <th>ess_tail</th>\n",
" <th>r_hat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>alpha</th>\n",
" <td>2.366</td>\n",
" <td>0.062</td>\n",
" <td>2.253</td>\n",
" <td>2.484</td>\n",
" <td>0.001</td>\n",
" <td>0.001</td>\n",
" <td>7563.0</td>\n",
" <td>5508.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>beta</th>\n",
" <td>3.929</td>\n",
" <td>0.063</td>\n",
" <td>3.814</td>\n",
" <td>4.048</td>\n",
" <td>0.001</td>\n",
" <td>0.001</td>\n",
" <td>8352.0</td>\n",
" <td>5934.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sigma_sq</th>\n",
" <td>3.895</td>\n",
" <td>0.174</td>\n",
" <td>3.588</td>\n",
" <td>4.236</td>\n",
" <td>0.002</td>\n",
" <td>0.002</td>\n",
" <td>8354.0</td>\n",
" <td>6044.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sigma</th>\n",
" <td>1.973</td>\n",
" <td>0.044</td>\n",
" <td>1.894</td>\n",
" <td>2.058</td>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>8354.0</td>\n",
" <td>6044.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk \\\n",
"alpha 2.366 0.062 2.253 2.484 0.001 0.001 7563.0 \n",
"beta 3.929 0.063 3.814 4.048 0.001 0.001 8352.0 \n",
"sigma_sq 3.895 0.174 3.588 4.236 0.002 0.002 8354.0 \n",
"sigma 1.973 0.044 1.894 2.058 0.000 0.000 8354.0 \n",
"\n",
" ess_tail r_hat \n",
"alpha 5508.0 1.0 \n",
"beta 5934.0 1.0 \n",
"sigma_sq 6044.0 1.0 \n",
"sigma 6044.0 1.0 "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"N_large = 1000;\n",
"x_large = np.random.normal(size=N_large);\n",
"y_large = alpha_true + beta_true * x_large + sigma_true * np.random.normal(size=N_large);\n",
"\n",
"# create new data dictionary\n",
"data_large = {\"N\": N_large, \"x\": x_large, \"y\": y_large};\n",
"model_large = stan.build(stanCode, data=data_large)\n",
"\n",
"# fit the model again\n",
"fit_large = model_large.sample(num_chains=4, num_samples=2000);\n",
"\n",
"# check diagnostics for larger data\n",
"az.summary(az.from_pystan(fit_large))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.x"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
+71
View File
@@ -0,0 +1,71 @@
To solve Problem 5, follow these steps:
### Step 1: Simulate Data
### Step 2: Stan Model Code
Write the Stan model (`bayesian_regression.stan`):
```stan
data {
int<lower=0> N;
vector[N] x;
vector[N] y;
}
parameters {
real alpha;
real beta;
real<lower=0> sigma_sq;
}
transformed parameters {
real<lower=0> sigma = sqrt(sigma_sq);
}
model {
sigma_sq ~ inv_gamma(1, 1); // Prior on variance
alpha ~ normal(0, 10);
beta ~ normal(0, 10);
y ~ normal(alpha + beta * x, sigma); // Likelihood
}
```
### Step 3: Fit the Model and Check Diagnostics
Use `pystan` or `cmdstanpy` to run the model. Check Rhat (≈1) and ESS (sufficiently large). For example:
```python
import cmdstanpy
model = cmdstanpy.CmdStanModel(stan_file="bayesian_regression.stan")
data = {"N": N, "x": x, "y": y}
fit = model.sample(data=data, chains=4, iter_sampling=2000)
# Check diagnostics
print(fit.diagnose())
```
### Step 4: Analyze Results for N=100
Posterior summaries:
- **Posterior means** should be close to true values (α=2.3, β=4.0, σ=2.0).
- **Uncertainty**: Compute 95% credible intervals. Example output:
- α: 2.1 ± 0.4 (1.7 to 2.5)
- β: 3.8 ± 0.5 (3.3 to 4.3)
- σ: 1.9 ± 0.2 (1.7 to 2.1)
### Step 5: Repeat with N=1000
Increase sample size and rerun:
```python
N_large = 1000
x_large = np.random.normal(size=N_large)
y_large = alpha_true + beta_true * x_large + sigma_true * np.random.normal(size=N_large)
```
Fit the model again. Results will show:
- **Tighter credible intervals** (e.g., β: 3.95 ± 0.1).
- Reduced posterior variance, indicating higher precision.
### Key Observations:
1. **Accuracy**: Posterior means align closely with true parameters.
2. **Uncertainty**: Credible intervals narrow as \(N\) increases, reflecting reduced uncertainty.
3. **Diagnostics**: Ensure Rhat ≈1 and sufficient ESS for reliable inferences.
**Visualization**: Plot prior vs. posterior histograms for parameters (using tools like `arviz` or `seaborn`), showing posterior concentration around true values, especially for \(N=1000\).
---
**Answer for LMS Submission**
Implement the steps above, ensuring your write-up includes code snippets, diagnostic results, and graphical comparisons. Highlight the reduction in posterior variance when increasing \(N\), demonstrating the influence of data quantity on Bayesian inference.
+46
View File
@@ -0,0 +1,46 @@
import matplotlib.pyplot as plt
import numpy as np
# H Y P E R P A R A M E T E R S
mu_prior = 0 # prior mean
sigma2_prior = 2 # prior variance (omega_0^2)
sigma2_likelihood = 1 # likelihood variance (omega^2)
n_samples = 1000000 # number of Monte Carlo samples
# simulate θ ~ N(mu_0, omega_0^2) and y ~ N(θ, (omega)^2)
theta = np.random.normal(mu_prior, np.sqrt(sigma2_prior), n_samples)
y = np.random.normal(theta, np.sqrt(sigma2_likelihood))
# posterior params for each y
sigma2_posterior = 1 / (1 / sigma2_prior + 1 / sigma2_likelihood)
mu_posterior = (mu_prior / sigma2_prior + y / sigma2_likelihood) * \
sigma2_posterior # posterior mean
# E[Var[θ|y]]
expected_posterior_var = sigma2_posterior
var_posterior_mean = np.var(mu_posterior) # var[𝔼[θ|y]]
prior_var = sigma2_prior # var[θ]
# verify identity
sum_terms = expected_posterior_var + var_posterior_mean
print(f"Prior Variance (Var[θ]): {prior_var:.4f}")
print(
f"Expected Posterior Variance (𝔼[Var[θ|y]]): {expected_posterior_var:.4f}")
print(f"Variance of Posterior Mean (Var[𝔼[θ|y]]): {var_posterior_mean:.4f}")
print(f"Sum of Terms: {sum_terms:.4f}")
print(f"Identity Holds: {np.isclose(prior_var, sum_terms, atol=1e-3)}")
# Plot posterior means and variances
plt.figure(figsize=(10, 6))
plt.hist(mu_posterior, bins=50, density=True,
alpha=0.6, label="Posterior Means")
plt.axvline(mu_prior, color='r', linestyle='--', label="Prior Mean")
plt.xlabel("Posterior Mean (𝔼[θ|y])")
plt.ylabel("Density")
plt.title("Distribution of Posterior Means vs. Prior")
plt.legend()
plt.grid(True)
# plt.show()
plt.savefig('part3.png')
BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

View File
+275
View File
File diff suppressed because one or more lines are too long
BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

+76
View File
@@ -0,0 +1,76 @@
As a culinary data scientist, you investigate how cooking time (\(x\)) affects the length of "massive ramen noodles" (\(y\)). Using Bayesian linear regression, you model the relationship to quantify expansion rates and uncertainty.
\subsection*{Methods}
\subsubsection*{Model Specification}
The regression model is:
\[
y_n = \alpha + \beta x_n + \epsilon_n, \quad \epsilon_n \sim \mathcal{N}(0, \sigma^2)
\]
\begin{itemize}
\item \textbf{Priors}:
\begin{align*}
\alpha &\sim \mathcal{N}(0, 10) \quad \text{(Intercept)} \\
\beta &\sim \mathcal{N}(0, 10) \quad \text{(Slope)} \\
\sigma^2 &\sim \text{Inv-Gamma}(1, 1) \quad \text{(Noise)}
\end{align*}
\end{itemize}
\subsubsection*{Data Simulation}
Data was generated with:
\begin{itemize}
\item True parameters: \(\alpha = 2.3\), \(\beta = 4.0\), \(\sigma = 2.0\)
\item \(N = 100\) observations, \(x \sim \mathcal{N}(0, 1)\), \(y = \alpha + \beta x + \mathcal{N}(0, \sigma^2)\)
\end{itemize}
\subsection*{Results}
\subsubsection*{Posterior Estimates (\(N = 100\))}
\begin{table}[h]
\centering
\begin{tabular}{@{}lccc@{}}
\toprule
Parameter & Posterior Mean & 95\% HDI & True Value \\
\midrule
\(\alpha\) (Intercept) & 2.31 & [1.94, 2.65] & 2.3 \\
\(\beta\) (Slope) & 3.71 & [3.32, 4.13] & 4.0 \\
\(\sigma\) (Noise) & 1.91 & [1.67, 2.18] & 2.0 \\
\bottomrule
\end{tabular}
\caption{Posterior summaries vs. true values. HDI = Highest Density Interval.}
\end{table}
\subsubsection*{Convergence Diagnostics}
\begin{itemize}
\item \textbf{R-hat}: 1.0 for all parameters (ideal: \(\leq 1.01\)).
\item \textbf{ESS (Effective Sample Size)}: \(\alpha\): 6123, \(\beta\): 7356, \(\sigma\): 6362 (exceeding thresholds for reliability).
\end{itemize}
\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{posterior_plots.png}
\caption{Posterior distributions for \(\alpha\), \(\beta\), and \(\sigma\). Dashed lines indicate true values.}
\end{figure}
\subsubsection*{Effect of Increased Data (\(N = 1000\), Hypothetical)}
\begin{itemize}
\item Expected uncertainty reduction: Credible interval widths shrink by \(\sim 60\%\).
\item Posteriors concentrate tightly around true values (law of large numbers).
\end{itemize}
\subsection*{Discussion}
\subsubsection*{Accuracy and Uncertainty}
\begin{itemize}
\item With \(N = 100\), estimates align closely with ground truth (e.g., \(\beta = 3.71\) vs. true \(4.0\)), but credible intervals reflect residual uncertainty.
\item Noise (\(\sigma\)) slightly underestimated but within plausible range.
\end{itemize}
\subsubsection*{Model Insights}
\begin{itemize}
\item Noodles expand by \(\sim 3.7\) units per second (\(\beta\)), validating the hypothesis.
\item Stan's MCMC sampler achieved excellent convergence (R-hat = 1.0, ESS > 5000).
\end{itemize}
\subsubsection*{Limitations}
\begin{itemize}
\item Assumes linearity and normality; real-world noodle expansion may exhibit nonlinear dynamics.
\item Hyperparameters (e.g., \(\mathcal{N}(0, 10)\)) chosen for demonstration, not domain knowledge.
\end{itemize}
+308
View File
File diff suppressed because one or more lines are too long
+37
View File
@@ -0,0 +1,37 @@
data {
int<lower=1> N;
array[N] real<lower=0> y;
array[N] int<lower=1, upper=2> condition;
array[N] int<lower=0, upper=1> choice;
}
parameters {
// Your code here
}
model {
// Priors
// Your code here
// Likelihood
for (n in 1:N) {
// Condition 1
if (condition[n] == 1) {
if (choice[n] == 1) {
// Your code here
}
else {
// Your code here
}
}
// Condition 2
if (condition[n] == 2) {
if (choice[n] == 1) {
// Your code here
}
else {
// Your code here
}
}
}
}
Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

+301
View File
@@ -0,0 +1,301 @@
rt;choice;condition
0.477;1.0;1.0
0.6;1.0;1.0
0.5;0.0;1.0
0.416;1.0;1.0
0.435;1.0;1.0
0.499;1.0;1.0
0.531;1.0;1.0
0.616;1.0;1.0
0.492;1.0;1.0
0.682;1.0;1.0
0.525;1.0;1.0
0.714;1.0;1.0
0.467;0.0;1.0
1.106;1.0;1.0
0.427;1.0;1.0
0.681;1.0;1.0
0.438;1.0;1.0
0.584;0.0;1.0
0.461;1.0;1.0
0.466;1.0;1.0
0.488;1.0;1.0
0.431;1.0;1.0
0.501;1.0;1.0
0.444;1.0;1.0
0.496;1.0;1.0
0.5;1.0;1.0
0.716;1.0;1.0
0.449;1.0;1.0
0.45;1.0;1.0
0.552;1.0;1.0
0.479;1.0;1.0
0.497;1.0;1.0
0.463;1.0;1.0
0.54;0.0;1.0
0.44;1.0;1.0
0.425;1.0;1.0
0.554;1.0;1.0
0.663;1.0;1.0
0.434;1.0;1.0
0.463;1.0;1.0
0.423;1.0;1.0
0.423;1.0;1.0
0.45;1.0;1.0
0.687;1.0;1.0
0.587;1.0;1.0
0.584;1.0;1.0
0.531;1.0;1.0
0.718;1.0;1.0
0.534;1.0;1.0
0.565;1.0;1.0
0.43;1.0;1.0
0.505;0.0;1.0
0.456;1.0;1.0
0.668;1.0;1.0
0.459;1.0;1.0
0.509;1.0;1.0
0.506;1.0;1.0
0.741;1.0;1.0
0.633;1.0;1.0
0.475;1.0;1.0
0.635;1.0;1.0
0.456;1.0;1.0
0.466;1.0;1.0
0.567;1.0;1.0
0.449;1.0;1.0
0.451;1.0;1.0
0.464;1.0;1.0
0.467;1.0;1.0
0.559;1.0;1.0
0.425;1.0;1.0
0.452;1.0;1.0
0.411;1.0;1.0
0.528;1.0;1.0
0.429;1.0;1.0
0.521;1.0;1.0
0.54;0.0;1.0
0.652;1.0;1.0
0.687;1.0;1.0
0.57;1.0;1.0
0.484;0.0;1.0
0.545;1.0;1.0
0.479;1.0;1.0
0.68;1.0;1.0
0.434;1.0;1.0
0.458;1.0;1.0
0.501;1.0;1.0
0.509;1.0;1.0
0.462;1.0;1.0
0.452;1.0;1.0
0.522;1.0;1.0
0.431;1.0;1.0
0.43;1.0;1.0
0.49;1.0;1.0
0.697;1.0;1.0
0.633;1.0;1.0
0.539;1.0;1.0
0.483;1.0;1.0
1.11;1.0;1.0
0.472;1.0;1.0
0.757;1.0;1.0
0.854;1.0;1.0
0.653;1.0;1.0
0.45;1.0;1.0
0.516;1.0;1.0
0.547;0.0;1.0
0.432;1.0;1.0
0.483;1.0;1.0
0.501;1.0;1.0
0.444;1.0;1.0
0.515;1.0;1.0
0.534;1.0;1.0
0.441;1.0;1.0
0.474;1.0;1.0
0.513;1.0;1.0
0.589;0.0;1.0
0.446;1.0;1.0
0.642;0.0;1.0
0.591;1.0;1.0
0.64;1.0;1.0
0.449;1.0;1.0
0.418;1.0;1.0
0.615;1.0;1.0
0.585;1.0;1.0
0.459;1.0;1.0
0.479;1.0;1.0
0.477;1.0;1.0
0.559;1.0;1.0
0.419;1.0;1.0
0.522;1.0;1.0
0.429;1.0;1.0
0.528;1.0;1.0
0.467;1.0;1.0
0.58;0.0;1.0
0.487;1.0;1.0
0.451;1.0;1.0
0.527;1.0;1.0
0.451;1.0;1.0
0.49;1.0;1.0
0.514;1.0;1.0
0.455;1.0;1.0
0.507;1.0;1.0
0.474;1.0;1.0
0.458;1.0;1.0
0.454;1.0;1.0
0.518;1.0;1.0
0.429;1.0;1.0
0.96;1.0;1.0
0.427;1.0;1.0
0.802;1.0;1.0
0.446;1.0;1.0
0.439;0.0;2.0
0.471;0.0;2.0
0.917;0.0;2.0
0.562;1.0;2.0
0.678;0.0;2.0
0.671;1.0;2.0
0.599;0.0;2.0
0.638;0.0;2.0
0.494;0.0;2.0
0.498;1.0;2.0
0.582;0.0;2.0
0.672;1.0;2.0
0.449;1.0;2.0
0.585;0.0;2.0
0.514;1.0;2.0
0.493;1.0;2.0
0.437;0.0;2.0
0.452;1.0;2.0
0.727;0.0;2.0
0.523;1.0;2.0
0.485;1.0;2.0
0.439;1.0;2.0
0.683;0.0;2.0
0.578;1.0;2.0
0.431;1.0;2.0
0.562;0.0;2.0
0.471;1.0;2.0
0.786;1.0;2.0
0.434;1.0;2.0
0.441;1.0;2.0
0.745;1.0;2.0
0.533;1.0;2.0
0.756;0.0;2.0
0.678;1.0;2.0
0.494;1.0;2.0
1.028;1.0;2.0
0.475;0.0;2.0
0.563;0.0;2.0
0.483;1.0;2.0
0.566;0.0;2.0
0.466;1.0;2.0
1.086;1.0;2.0
0.573;1.0;2.0
0.597;1.0;2.0
0.597;0.0;2.0
0.446;1.0;2.0
0.437;1.0;2.0
0.515;1.0;2.0
0.524;0.0;2.0
0.513;1.0;2.0
0.465;1.0;2.0
0.704;1.0;2.0
0.801;1.0;2.0
0.484;0.0;2.0
0.459;0.0;2.0
0.576;0.0;2.0
0.462;1.0;2.0
0.471;0.0;2.0
0.595;1.0;2.0
0.464;1.0;2.0
0.644;1.0;2.0
0.42;0.0;2.0
0.452;1.0;2.0
0.488;0.0;2.0
0.568;1.0;2.0
0.481;0.0;2.0
0.5;1.0;2.0
0.54;1.0;2.0
0.447;0.0;2.0
0.463;1.0;2.0
0.507;1.0;2.0
0.522;1.0;2.0
0.58;1.0;2.0
0.464;0.0;2.0
0.507;0.0;2.0
0.727;1.0;2.0
0.452;1.0;2.0
0.636;0.0;2.0
0.552;1.0;2.0
0.739;1.0;2.0
0.468;1.0;2.0
0.563;1.0;2.0
0.443;1.0;2.0
1.023;1.0;2.0
0.571;1.0;2.0
0.44;0.0;2.0
0.717;1.0;2.0
0.751;1.0;2.0
0.491;0.0;2.0
0.456;1.0;2.0
0.569;1.0;2.0
0.456;1.0;2.0
0.517;1.0;2.0
0.492;1.0;2.0
0.527;1.0;2.0
0.501;0.0;2.0
0.499;0.0;2.0
0.428;0.0;2.0
0.529;0.0;2.0
0.43;0.0;2.0
0.453;0.0;2.0
0.484;0.0;2.0
0.541;1.0;2.0
0.707;0.0;2.0
0.712;1.0;2.0
0.53;0.0;2.0
0.871;1.0;2.0
0.896;1.0;2.0
0.548;0.0;2.0
0.484;1.0;2.0
0.779;1.0;2.0
0.503;0.0;2.0
0.696;0.0;2.0
0.522;0.0;2.0
0.93;1.0;2.0
0.535;0.0;2.0
0.615;1.0;2.0
0.624;1.0;2.0
0.742;0.0;2.0
0.528;1.0;2.0
0.441;1.0;2.0
0.514;0.0;2.0
0.445;0.0;2.0
0.625;0.0;2.0
0.578;1.0;2.0
0.55;1.0;2.0
0.686;1.0;2.0
0.505;1.0;2.0
0.872;1.0;2.0
0.548;1.0;2.0
0.487;0.0;2.0
0.733;0.0;2.0
0.46;0.0;2.0
0.764;1.0;2.0
0.589;0.0;2.0
0.482;0.0;2.0
0.449;0.0;2.0
0.428;0.0;2.0
0.604;1.0;2.0
0.505;1.0;2.0
0.649;1.0;2.0
0.484;1.0;2.0
0.535;0.0;2.0
0.471;0.0;2.0
0.441;0.0;2.0
0.528;0.0;2.0
0.621;0.0;2.0
0.48;1.0;2.0
0.693;1.0;2.0
0.493;1.0;2.0
1 rt choice condition
2 0.477 1.0 1.0
3 0.6 1.0 1.0
4 0.5 0.0 1.0
5 0.416 1.0 1.0
6 0.435 1.0 1.0
7 0.499 1.0 1.0
8 0.531 1.0 1.0
9 0.616 1.0 1.0
10 0.492 1.0 1.0
11 0.682 1.0 1.0
12 0.525 1.0 1.0
13 0.714 1.0 1.0
14 0.467 0.0 1.0
15 1.106 1.0 1.0
16 0.427 1.0 1.0
17 0.681 1.0 1.0
18 0.438 1.0 1.0
19 0.584 0.0 1.0
20 0.461 1.0 1.0
21 0.466 1.0 1.0
22 0.488 1.0 1.0
23 0.431 1.0 1.0
24 0.501 1.0 1.0
25 0.444 1.0 1.0
26 0.496 1.0 1.0
27 0.5 1.0 1.0
28 0.716 1.0 1.0
29 0.449 1.0 1.0
30 0.45 1.0 1.0
31 0.552 1.0 1.0
32 0.479 1.0 1.0
33 0.497 1.0 1.0
34 0.463 1.0 1.0
35 0.54 0.0 1.0
36 0.44 1.0 1.0
37 0.425 1.0 1.0
38 0.554 1.0 1.0
39 0.663 1.0 1.0
40 0.434 1.0 1.0
41 0.463 1.0 1.0
42 0.423 1.0 1.0
43 0.423 1.0 1.0
44 0.45 1.0 1.0
45 0.687 1.0 1.0
46 0.587 1.0 1.0
47 0.584 1.0 1.0
48 0.531 1.0 1.0
49 0.718 1.0 1.0
50 0.534 1.0 1.0
51 0.565 1.0 1.0
52 0.43 1.0 1.0
53 0.505 0.0 1.0
54 0.456 1.0 1.0
55 0.668 1.0 1.0
56 0.459 1.0 1.0
57 0.509 1.0 1.0
58 0.506 1.0 1.0
59 0.741 1.0 1.0
60 0.633 1.0 1.0
61 0.475 1.0 1.0
62 0.635 1.0 1.0
63 0.456 1.0 1.0
64 0.466 1.0 1.0
65 0.567 1.0 1.0
66 0.449 1.0 1.0
67 0.451 1.0 1.0
68 0.464 1.0 1.0
69 0.467 1.0 1.0
70 0.559 1.0 1.0
71 0.425 1.0 1.0
72 0.452 1.0 1.0
73 0.411 1.0 1.0
74 0.528 1.0 1.0
75 0.429 1.0 1.0
76 0.521 1.0 1.0
77 0.54 0.0 1.0
78 0.652 1.0 1.0
79 0.687 1.0 1.0
80 0.57 1.0 1.0
81 0.484 0.0 1.0
82 0.545 1.0 1.0
83 0.479 1.0 1.0
84 0.68 1.0 1.0
85 0.434 1.0 1.0
86 0.458 1.0 1.0
87 0.501 1.0 1.0
88 0.509 1.0 1.0
89 0.462 1.0 1.0
90 0.452 1.0 1.0
91 0.522 1.0 1.0
92 0.431 1.0 1.0
93 0.43 1.0 1.0
94 0.49 1.0 1.0
95 0.697 1.0 1.0
96 0.633 1.0 1.0
97 0.539 1.0 1.0
98 0.483 1.0 1.0
99 1.11 1.0 1.0
100 0.472 1.0 1.0
101 0.757 1.0 1.0
102 0.854 1.0 1.0
103 0.653 1.0 1.0
104 0.45 1.0 1.0
105 0.516 1.0 1.0
106 0.547 0.0 1.0
107 0.432 1.0 1.0
108 0.483 1.0 1.0
109 0.501 1.0 1.0
110 0.444 1.0 1.0
111 0.515 1.0 1.0
112 0.534 1.0 1.0
113 0.441 1.0 1.0
114 0.474 1.0 1.0
115 0.513 1.0 1.0
116 0.589 0.0 1.0
117 0.446 1.0 1.0
118 0.642 0.0 1.0
119 0.591 1.0 1.0
120 0.64 1.0 1.0
121 0.449 1.0 1.0
122 0.418 1.0 1.0
123 0.615 1.0 1.0
124 0.585 1.0 1.0
125 0.459 1.0 1.0
126 0.479 1.0 1.0
127 0.477 1.0 1.0
128 0.559 1.0 1.0
129 0.419 1.0 1.0
130 0.522 1.0 1.0
131 0.429 1.0 1.0
132 0.528 1.0 1.0
133 0.467 1.0 1.0
134 0.58 0.0 1.0
135 0.487 1.0 1.0
136 0.451 1.0 1.0
137 0.527 1.0 1.0
138 0.451 1.0 1.0
139 0.49 1.0 1.0
140 0.514 1.0 1.0
141 0.455 1.0 1.0
142 0.507 1.0 1.0
143 0.474 1.0 1.0
144 0.458 1.0 1.0
145 0.454 1.0 1.0
146 0.518 1.0 1.0
147 0.429 1.0 1.0
148 0.96 1.0 1.0
149 0.427 1.0 1.0
150 0.802 1.0 1.0
151 0.446 1.0 1.0
152 0.439 0.0 2.0
153 0.471 0.0 2.0
154 0.917 0.0 2.0
155 0.562 1.0 2.0
156 0.678 0.0 2.0
157 0.671 1.0 2.0
158 0.599 0.0 2.0
159 0.638 0.0 2.0
160 0.494 0.0 2.0
161 0.498 1.0 2.0
162 0.582 0.0 2.0
163 0.672 1.0 2.0
164 0.449 1.0 2.0
165 0.585 0.0 2.0
166 0.514 1.0 2.0
167 0.493 1.0 2.0
168 0.437 0.0 2.0
169 0.452 1.0 2.0
170 0.727 0.0 2.0
171 0.523 1.0 2.0
172 0.485 1.0 2.0
173 0.439 1.0 2.0
174 0.683 0.0 2.0
175 0.578 1.0 2.0
176 0.431 1.0 2.0
177 0.562 0.0 2.0
178 0.471 1.0 2.0
179 0.786 1.0 2.0
180 0.434 1.0 2.0
181 0.441 1.0 2.0
182 0.745 1.0 2.0
183 0.533 1.0 2.0
184 0.756 0.0 2.0
185 0.678 1.0 2.0
186 0.494 1.0 2.0
187 1.028 1.0 2.0
188 0.475 0.0 2.0
189 0.563 0.0 2.0
190 0.483 1.0 2.0
191 0.566 0.0 2.0
192 0.466 1.0 2.0
193 1.086 1.0 2.0
194 0.573 1.0 2.0
195 0.597 1.0 2.0
196 0.597 0.0 2.0
197 0.446 1.0 2.0
198 0.437 1.0 2.0
199 0.515 1.0 2.0
200 0.524 0.0 2.0
201 0.513 1.0 2.0
202 0.465 1.0 2.0
203 0.704 1.0 2.0
204 0.801 1.0 2.0
205 0.484 0.0 2.0
206 0.459 0.0 2.0
207 0.576 0.0 2.0
208 0.462 1.0 2.0
209 0.471 0.0 2.0
210 0.595 1.0 2.0
211 0.464 1.0 2.0
212 0.644 1.0 2.0
213 0.42 0.0 2.0
214 0.452 1.0 2.0
215 0.488 0.0 2.0
216 0.568 1.0 2.0
217 0.481 0.0 2.0
218 0.5 1.0 2.0
219 0.54 1.0 2.0
220 0.447 0.0 2.0
221 0.463 1.0 2.0
222 0.507 1.0 2.0
223 0.522 1.0 2.0
224 0.58 1.0 2.0
225 0.464 0.0 2.0
226 0.507 0.0 2.0
227 0.727 1.0 2.0
228 0.452 1.0 2.0
229 0.636 0.0 2.0
230 0.552 1.0 2.0
231 0.739 1.0 2.0
232 0.468 1.0 2.0
233 0.563 1.0 2.0
234 0.443 1.0 2.0
235 1.023 1.0 2.0
236 0.571 1.0 2.0
237 0.44 0.0 2.0
238 0.717 1.0 2.0
239 0.751 1.0 2.0
240 0.491 0.0 2.0
241 0.456 1.0 2.0
242 0.569 1.0 2.0
243 0.456 1.0 2.0
244 0.517 1.0 2.0
245 0.492 1.0 2.0
246 0.527 1.0 2.0
247 0.501 0.0 2.0
248 0.499 0.0 2.0
249 0.428 0.0 2.0
250 0.529 0.0 2.0
251 0.43 0.0 2.0
252 0.453 0.0 2.0
253 0.484 0.0 2.0
254 0.541 1.0 2.0
255 0.707 0.0 2.0
256 0.712 1.0 2.0
257 0.53 0.0 2.0
258 0.871 1.0 2.0
259 0.896 1.0 2.0
260 0.548 0.0 2.0
261 0.484 1.0 2.0
262 0.779 1.0 2.0
263 0.503 0.0 2.0
264 0.696 0.0 2.0
265 0.522 0.0 2.0
266 0.93 1.0 2.0
267 0.535 0.0 2.0
268 0.615 1.0 2.0
269 0.624 1.0 2.0
270 0.742 0.0 2.0
271 0.528 1.0 2.0
272 0.441 1.0 2.0
273 0.514 0.0 2.0
274 0.445 0.0 2.0
275 0.625 0.0 2.0
276 0.578 1.0 2.0
277 0.55 1.0 2.0
278 0.686 1.0 2.0
279 0.505 1.0 2.0
280 0.872 1.0 2.0
281 0.548 1.0 2.0
282 0.487 0.0 2.0
283 0.733 0.0 2.0
284 0.46 0.0 2.0
285 0.764 1.0 2.0
286 0.589 0.0 2.0
287 0.482 0.0 2.0
288 0.449 0.0 2.0
289 0.428 0.0 2.0
290 0.604 1.0 2.0
291 0.505 1.0 2.0
292 0.649 1.0 2.0
293 0.484 1.0 2.0
294 0.535 0.0 2.0
295 0.471 0.0 2.0
296 0.441 0.0 2.0
297 0.528 0.0 2.0
298 0.621 0.0 2.0
299 0.48 1.0 2.0
300 0.693 1.0 2.0
301 0.493 1.0 2.0
Binary file not shown.

After

Width:  |  Height:  |  Size: 363 KiB

BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

+15
View File
@@ -0,0 +1,15 @@
contourpy==1.3.1
cycler==0.12.1
fonttools==4.56.0
kiwisolver==1.4.8
matplotlib==3.10.1
numpy==2.2.3
packaging==24.2
pandas==2.2.3
paranoid-scientist==0.2.3
pillow==11.1.0
pyparsing==3.2.1
python-dateutil==2.9.0.post0
pytz==2025.1
six==1.17.0
tzdata==2025.1