added lab 4
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"data": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/epi_results_2024_pop_gdp_v2.csv",
|
||||
"region_col": "region",
|
||||
"response": "EPI.new",
|
||||
"region_a": "Sub-Saharan Africa",
|
||||
"region_b": "Latin America & Caribbean",
|
||||
"predictors": [
|
||||
"gdp",
|
||||
["gdp", "population"]
|
||||
],
|
||||
"knn1": ["AGR.new", "AIR.new", "APO.new"],
|
||||
"knn2": ["BCA.new", "BDH.new", "CBP.new"],
|
||||
"k": 5,
|
||||
"fig_dir": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures",
|
||||
"stats_dir": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/stats",
|
||||
"box_a": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/box_Sub-Saharan_Africa_EPI.new.png",
|
||||
"box_b": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/box_Latin_America_Caribbean_EPI.new.png",
|
||||
"hist_a": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/hist_Sub-Saharan_Africa_EPI.new.png",
|
||||
"hist_b": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/hist_Latin_America_Caribbean_EPI.new.png",
|
||||
"qq_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/qq_EPI.new_Sub-Saharan_Africa_vs_Latin_America_Caribbean.png",
|
||||
"ols": [
|
||||
{
|
||||
"name": "full: EPI.new ~ gdp",
|
||||
"rsq": 0.5224,
|
||||
"aic": 1257.4369,
|
||||
"bic": 1266.999,
|
||||
"nobs": 179,
|
||||
"summary_file": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/stats/ols_full_EPI.new_gdp.txt",
|
||||
"residuals_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/residuals_full_EPI.new_gdp.png",
|
||||
"scatter_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/scatter_full_EPI.new_gdp_gdp.png"
|
||||
},
|
||||
{
|
||||
"name": "full: EPI.new ~ gdp + population",
|
||||
"rsq": 0.5392,
|
||||
"aic": 1246.1592,
|
||||
"bic": 1258.8864,
|
||||
"nobs": 178,
|
||||
"summary_file": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/stats/ols_full_EPI.new_gdp_population.txt",
|
||||
"residuals_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/residuals_full_EPI.new_gdp_population.png",
|
||||
"scatter_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/scatter_full_EPI.new_gdp_population_gdp.png"
|
||||
}
|
||||
],
|
||||
"best_region_note": "on region `Sub-Saharan Africa`, the better model is **region Sub-Saharan Africa: EPI.new ~ gdp + population** (r²=0.361, aic=265.4, bic=272.7).",
|
||||
"knn": [
|
||||
{
|
||||
"tag": "model A",
|
||||
"k": 5,
|
||||
"vars": ["AGR.new", "AIR.new", "APO.new"],
|
||||
"accuracy": 0.5581,
|
||||
"confusion_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/knn_confusion_model_A.png",
|
||||
"n_test": 43
|
||||
},
|
||||
{
|
||||
"tag": "model B",
|
||||
"k": 5,
|
||||
"vars": ["BCA.new", "BDH.new", "CBP.new"],
|
||||
"accuracy": 0.5116,
|
||||
"confusion_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/knn_confusion_model_B.png",
|
||||
"n_test": 43
|
||||
}
|
||||
]
|
||||
}
|
||||
|
After Width: | Height: | Size: 24 KiB |
|
After Width: | Height: | Size: 25 KiB |
|
After Width: | Height: | Size: 36 KiB |
|
After Width: | Height: | Size: 42 KiB |
|
After Width: | Height: | Size: 63 KiB |
|
After Width: | Height: | Size: 62 KiB |
|
After Width: | Height: | Size: 46 KiB |
|
After Width: | Height: | Size: 63 KiB |
|
After Width: | Height: | Size: 62 KiB |
|
After Width: | Height: | Size: 34 KiB |
|
After Width: | Height: | Size: 34 KiB |
|
After Width: | Height: | Size: 64 KiB |
|
After Width: | Height: | Size: 63 KiB |
|
After Width: | Height: | Size: 32 KiB |
|
After Width: | Height: | Size: 32 KiB |
@@ -0,0 +1,38 @@
|
||||
# exploratory data analysis and models on the epi dataset
|
||||
date: 2025-10-13
|
||||
|
||||
## dataset and choices
|
||||
- **file**: `epi_results_2024_pop_gdp_v2.csv`
|
||||
- **region column**: `region`
|
||||
- **response var**: `EPI.new`
|
||||
- **regions**: `Sub-Saharan Africa` vs `Latin America & Caribbean`
|
||||
|
||||
## 1) variable distributions
|
||||
### 1.1 boxplots and histograms (with density!)
|
||||

|
||||

|
||||

|
||||

|
||||
|
||||
### 1.2 qq plot (two-sample)
|
||||

|
||||
|
||||
## 2) linear models
|
||||
### full: EPI.new ~ gdp
|
||||
|
||||
### full: EPI.new ~ gdp + population
|
||||
|
||||
### 2.2 same models on one region (comparison)
|
||||
on region `Sub-Saharan Africa`, the better model is **region Sub-Saharan Africa: EPI.new ~ gdp + population** (r²=0.361, aic=265.4, bic=272.7).
|
||||
|
||||
## 3) classification (knn, label = region)
|
||||
### model A
|
||||
- **k**: 5 | **accuracy**: 0.5581 | **test n**: 43
|
||||
variables: `c("AGR.new", "AIR.new", "APO.new")`
|
||||

|
||||
|
||||
### model B
|
||||
- **k**: 5 | **accuracy**: 0.5116 | **test n**: 43
|
||||
variables: `c("BCA.new", "BDH.new", "CBP.new")`
|
||||

|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
|
||||
Call:
|
||||
lm(formula = f, data = d)
|
||||
|
||||
Residuals:
|
||||
Min 1Q Median 3Q Max
|
||||
-22.432 -4.915 0.043 6.222 20.899
|
||||
|
||||
Coefficients:
|
||||
Estimate Std. Error t value Pr(>|t|)
|
||||
(Intercept) -22.3482 5.0070 -4.463 1.43e-05 ***
|
||||
gdp 7.0974 0.5101 13.913 < 2e-16 ***
|
||||
---
|
||||
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
|
||||
|
||||
Residual standard error: 8.023 on 177 degrees of freedom
|
||||
Multiple R-squared: 0.5224, Adjusted R-squared: 0.5197
|
||||
F-statistic: 193.6 on 1 and 177 DF, p-value: < 2.2e-16
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
|
||||
Call:
|
||||
lm(formula = f, data = d)
|
||||
|
||||
Residuals:
|
||||
Min 1Q Median 3Q Max
|
||||
-21.810 -5.068 -0.027 6.014 19.567
|
||||
|
||||
Coefficients:
|
||||
Estimate Std. Error t value Pr(>|t|)
|
||||
(Intercept) -8.6847 7.0829 -1.226 0.2218
|
||||
gdp 6.9983 0.5047 13.867 <2e-16 ***
|
||||
population -0.7970 0.2995 -2.662 0.0085 **
|
||||
---
|
||||
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
|
||||
|
||||
Residual standard error: 7.905 on 175 degrees of freedom
|
||||
Multiple R-squared: 0.5392, Adjusted R-squared: 0.5339
|
||||
F-statistic: 102.4 on 2 and 175 DF, p-value: < 2.2e-16
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
|
||||
Call:
|
||||
lm(formula = f, data = d)
|
||||
|
||||
Residuals:
|
||||
Min 1Q Median 3Q Max
|
||||
-8.7004 -2.6329 -0.6432 3.1700 12.7231
|
||||
|
||||
Coefficients:
|
||||
Estimate Std. Error t value Pr(>|t|)
|
||||
(Intercept) 7.9123 6.3944 1.237 0.223
|
||||
gdp 3.6412 0.7453 4.885 1.41e-05 ***
|
||||
---
|
||||
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
|
||||
|
||||
Residual standard error: 4.322 on 44 degrees of freedom
|
||||
Multiple R-squared: 0.3517, Adjusted R-squared: 0.3369
|
||||
F-statistic: 23.87 on 1 and 44 DF, p-value: 1.407e-05
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
|
||||
Call:
|
||||
lm(formula = f, data = d)
|
||||
|
||||
Residuals:
|
||||
Min 1Q Median 3Q Max
|
||||
-8.4641 -2.8896 -0.5014 3.3435 12.5429
|
||||
|
||||
Coefficients:
|
||||
Estimate Std. Error t value Pr(>|t|)
|
||||
(Intercept) 3.4874 12.1785 0.286 0.776
|
||||
gdp 3.8097 0.8314 4.583 4.08e-05 ***
|
||||
population 0.1907 0.4558 0.418 0.678
|
||||
---
|
||||
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
|
||||
|
||||
Residual standard error: 4.375 on 42 degrees of freedom
|
||||
Multiple R-squared: 0.3611, Adjusted R-squared: 0.3307
|
||||
F-statistic: 11.87 on 2 and 42 DF, p-value: 8.205e-05
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"data": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/epi_results_2024_pop_gdp_v2.csv",
|
||||
"region_col": "region",
|
||||
"response": "EPI.new",
|
||||
"region_a": "Sub-Saharan Africa",
|
||||
"region_b": "Latin America & Caribbean",
|
||||
"predictors": [
|
||||
"gdp",
|
||||
["gdp", "population"]
|
||||
],
|
||||
"knn1": ["AGR.new", "AIR.new", "APO.new"],
|
||||
"knn2": ["BCA.new", "BDH.new", "CBP.new"],
|
||||
"k": 5,
|
||||
"fig_dir": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures",
|
||||
"stats_dir": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/stats",
|
||||
"box_a": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/box_Sub-Saharan_Africa_EPI.new.png",
|
||||
"box_b": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/box_Latin_America_Caribbean_EPI.new.png",
|
||||
"hist_a": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/hist_Sub-Saharan_Africa_EPI.new.png",
|
||||
"hist_b": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/hist_Latin_America_Caribbean_EPI.new.png",
|
||||
"qq_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/qq_EPI.new_Sub-Saharan_Africa_vs_Latin_America_Caribbean.png",
|
||||
"ols": [
|
||||
{
|
||||
"name": "full: EPI.new ~ gdp",
|
||||
"rsq": 0.5224,
|
||||
"aic": 1257.4369,
|
||||
"bic": 1266.999,
|
||||
"nobs": 179,
|
||||
"summary_file": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/stats/ols_full_EPI.new_gdp.txt",
|
||||
"residuals_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/residuals_full_EPI.new_gdp.png",
|
||||
"scatter_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/scatter_full_EPI.new_gdp_gdp.png"
|
||||
},
|
||||
{
|
||||
"name": "full: EPI.new ~ gdp + population",
|
||||
"rsq": 0.5392,
|
||||
"aic": 1246.1592,
|
||||
"bic": 1258.8864,
|
||||
"nobs": 178,
|
||||
"summary_file": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/stats/ols_full_EPI.new_gdp_population.txt",
|
||||
"residuals_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/residuals_full_EPI.new_gdp_population.png",
|
||||
"scatter_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/scatter_full_EPI.new_gdp_population_gdp.png"
|
||||
}
|
||||
],
|
||||
"best_region_note": "on region `Sub-Saharan Africa`, the better model is **region Sub-Saharan Africa: EPI.new ~ gdp + population** (r²=0.361, aic=265.4, bic=272.7).",
|
||||
"knn": [
|
||||
{
|
||||
"tag": "model A",
|
||||
"k": 5,
|
||||
"vars": ["AGR.new", "AIR.new", "APO.new"],
|
||||
"accuracy": 0.5581,
|
||||
"confusion_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/knn_confusion_model_A.png",
|
||||
"n_test": 43
|
||||
},
|
||||
{
|
||||
"tag": "model B",
|
||||
"k": 5,
|
||||
"vars": ["BCA.new", "BDH.new", "CBP.new"],
|
||||
"accuracy": 0.5116,
|
||||
"confusion_fig": "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/figures/knn_confusion_model_B.png",
|
||||
"n_test": 43
|
||||
}
|
||||
]
|
||||
}
|
||||