From 18a911f9d3eeb865183b91ff344cf3c66a3794c3 Mon Sep 17 00:00:00 2001 From: ION606 Date: Tue, 4 Nov 2025 21:00:48 -0500 Subject: [PATCH] added lab 5 --- Lab 5/Rplots.pdf | Bin 0 -> 4584 bytes Lab 5/lab5.r | 117 +++++++++++++++++------------- Lab 5/lab5_confusion_matrices.txt | 95 ++++++++++++++++++++++++ Lab 5/lab5_performance_table.txt | 4 + 4 files changed, 166 insertions(+), 50 deletions(-) create mode 100644 Lab 5/Rplots.pdf create mode 100644 Lab 5/lab5_confusion_matrices.txt create mode 100644 Lab 5/lab5_performance_table.txt diff --git a/Lab 5/Rplots.pdf b/Lab 5/Rplots.pdf new file mode 100644 index 0000000000000000000000000000000000000000..bf95203b9b3ef2de605ea9e3558909eb6c28a5f1 GIT binary patch literal 4584 zcmZ`-2{@E%8@7Z@WerWlM@S3b%-FYV6C(S*W*URZm|>Pe#-5#HFJ)hg;~+Z`g_7(N zrHw*l2_aPfjC1Oo|MdUgb=FN2_AySWH-=s4u`X$S`IO0NR(vl;-j z^z+65pbm919RYS2gP3;=yr5#A_hdC#gqG2Ji5V;cAUfD-N_z+f}8>X>SA$Z+6F*f9IZD<44UAIp_fe}(^}>Q@y#r^ zG9tWy?Qfa4S)LVo9KBUp=ow4V3^_1%(Iv7@>iGzx{Rq1oo8bBqTQ?C|;&3xp^?KJn zjWy&de2;+U@*a&2oArQ0tyGn(?}5EVscj3Dj_w=d@6S(Mb3Z>>!4Vi;CYZPk?3LNm zo&rRh-*-=P?zY8_l@VAniTp0&teiJsT!-cd4#T?8uz~vJBa*^BTKfA980(X!J-;wn z6P$GNpT0j#Tsn;QNoLIoF~0t0u#{aRwnr$}Rde1@vr9rVMXP}R-)5@$-axYp?(%9D zK5=5C6!}}F<bV_S z%B<47?@=PlA@kWa89;|0FuTbx(Pd^jl%F?oz+BjqQ`cWS&0S0IHqsnndLj3XmAl3DPsdXjY zX`g2UjO+BWfA@^9ecZkHD8*=Dupw1QLRF$)am+50E$vP$wK0Fawp{&$MQR6`?-*{4 zU)0BmFKCean$;%(sE-w2kQujt)tZQk-O@#^^kRs<&&HGRo^X{f?oAk#tR30$6~nuv zov_$>$L1~Q80H92VDnqE9*stLb1T>s#b4|8v)=u}ahNyTk9`JIpW9Fb=W zPC>KbbEYLd{RV3rWnV?r1Hg{&;&}IX(Ho>gRd*x}hEa*pI`w`YIU7(RAIM4SePi+X z=V3#Kon4kan=pGGKhkP7qL+ z#x+1q4b(*daC(qkwt{DT$$A!K`njT}Mh{0zi6am#yiw>~1SFa3X#?^<%>U*$9ir!R z0MF{^Xro9NS3rSo<@Dpkps#-HQH#z|JUt1B#9YFnG4!CIG3^up|Kc)2Uip8MnO@UR zGH0bQNWBRz z&`GKii@C~ZRyDr|iX8FrJ}8)e*0mh^`mWzn@6FdD8-boVE&VUum#Cvn42e5oEcegZ zvIp4TasM&{GmhbncXhQk|E4Ze22;RT94>=*Q=p}Z1NgFiVpt=t zo#A^DjlbGdq6UV0Fm3dm@l3btyGkZ515v`KwF!I?5%`U>bDrkVQ0)tfYgv>O_ z!WActPL@K1N>BMQywUGIzdQumKI!M};q2^tVlTrZUJ?bBbhL? z%<()9&Ms}`a0Ka`59l{0q4?d9Uk#Uv1DjRXl~}**yMcht#G_CJ^J}3_dvQWQ<3Ol%|KxW zsT14)`xV!d154C>U3$!4dSeGOu4irchkm2lG@SePh+jM?0j-BbH1L@9eY~+_&m;#h zoY7>S(-fR#=55Kswh)bw67FCwZk7C47&rfYUF{@aadvLe zvP7OjCamW3MBze_#u)vil1B~)SSTU;o^_r=L8%#=Y1XaNBUU3) z7a^ayjdVTpdk6bw8OWzppjojpx;D8t2ae2E&vLIRZSk>&<)P!?35SBt2}$x*GgU*Y z1&0%quR^=xP{mcYzpIQ2&Bq+*BvxL)!RsJbxUTSAVNp$>h8>+Gd)FzQcT_WuI@w|M z#@_pww~aS%1N1J>E3TzD&d$~4o==!f-2l$Oy&4HQqEftZ?0ue2y~i@Xilzh_F4YhEy{Vl4zAM({~eBI36M{>Lt2xEKBUASPPKSt?SB3RI@wx zN*gbkBl`N-J+s^swkP&TEg!T#IU+?q;b`Vt9AzVCxn}A3`}GRjJYS@b{-@hK&fWUa zpX!dhx-C0G9T^^}zkT`k>tRjob!@ojyNe$O zmC8PmIlK~jgcdWCl_O^+M#JTM<&MhPcq|SJ4P=fEjjjwo9x5GDDfn!8^N)qRtkM>p z?eO(N4y%Vodb!m)GlhNV2*`t!q`q6OYw%*SJmo7zqUm!}Sd*-Om%o|+RsRM5jWzbI zQ(wiu9@%QyvS3wV6=h{%EoZGi@G0WdgM$y8BECjc#K*;7h*LO3fvO{{WjSDdig@`} zPi+MsMLV}47h>;0^kamP>`O2I@%;+biiYqqxF903_sA2L>E_-@=>dLYe(hM+Siy5V z!zmdLaF_)9A?YybebUXRa_peCoeo~rbM;#uUW6?COAqS9hzq1TCa z>(3om9A7t<&0OlI;@$BN>tCQR4`*K#xp1$&_1U+uu!68hdA+Ei!8Z6{)#xYI*e(d# z%Ps4&wyK|U`6ZR~5Ujzh-4R096_7wPat!}O!o{4XqYa}|(I~C#g*CXp& z@aC7DE+MT~lQSl7tzHYF7oY3*2zq#YeK;$Iv zdy56qYny(>@%pEiANRi^Tz!2!s3s`!yTW%tMtjDI;OXGonh)F3La17Wxhz^wwKzk2 zL(jCUi+bz3JX%j|UOh0gZ|3sV(1GBAg}jaXaSzh3QbiKjIrmkt2OUE3nsE)@3vP1h z4ozpN5Ni?Z5nDoLAkXIe7w{GE=HDzh|Iqhg>O*~W`30LU=a}Ir-VVw`^i;vr&B+ph zty2??-fHcq^OW9hiA|mHiTW zz%9xw@@BIU*a*o>kHHxe@kYtCW9kN%j9z7{WlUz9C#_hv@muc8F4-y>EowF1XY?Xx z{kXbhi#qeQr0YJG`rO6r+lX4(RYYX`2Mvd)EEH1l}NYM`{J z%-?3dLdIUbwTsXjQX5{2^$e1#kvRnhG|IR+nIu^c-pH@|960X~%NKK|Q&FJ-ck4ye zbEz?<2JB*Ssga1GuY`bMx#3BJ*Mw<&>t3iWH9&4!XIy@cDsO z)K(uT;ex;4r1z3({t z{pft&9OLPn%+qOxUBWVMV0?8Zpg-%w#zzgmSD$yP_Vo!Q zIJ-K1YX%0|7J*Td+?9v{l zik_%Crh)ms7I=47^b4vpybrm0^o8C-N$N3B!+Fzp=j>=oMoFi}$?uNaPu`7RzURAx zfbprye{)>h8Lqf_{?@&*??a22CGyzXR~u-BoAb51Wfy^7>tJ}Yf+R8u2P{?1x^$eaS60vxia;1;0f3Iw?cr82$`vZW0 zNUWP1hNf2Oa?}BklLx)9cnS$nRsek{1TqGPaU(HSK0g#NvxzZpn6q-oD1KZTzcFRn?Zdw?1Wf_`g2~c! z(|_B+5b$4oVTfPj$tuhKVkZYz`bRBfA_|Mc5NX#j0BC{r$I$i*fM@Y;1e!Xd_X9L1 e5Xb=inq;>(Br=Lf-tCMWLP-H4A#uh;7xEuJp~Lb3 literal 0 HcmV?d00001 diff --git a/Lab 5/lab5.r b/Lab 5/lab5.r index a1d5067..91b20a6 100644 --- a/Lab 5/lab5.r +++ b/Lab 5/lab5.r @@ -1,22 +1,28 @@ -install.packages(c("e1071","caret","randomForest","ggplot2","pROC"), dependencies = TRUE) +install.packages( + c("e1071", "caret", "randomForest", "ggplot2", "pROC"), + repos = c("https://cloud.r-project.org/"), + dependencies = TRUE +) suppressPackageStartupMessages({ - library(e1071) # for svm/tune.svm - library(caret) # for metrics - library(randomForest) # alternative classifier - library(ggplot2) + library(e1071) # for svm/tune.svm + library(caret) # for metrics + library(randomForest) # alternative classifier + library(ggplot2) }) set.seed(42) read_wine <- function() { - df <- read.csv("wine.data", header = FALSE) - colnames(df) <- c("Class", - "Alcohol","Malic.acid","Ash","Alcalinity.of.ash","Magnesium", - "Total.phenols","Flavanoids","Nonflavanoid.phenols","Proanthocyanins", - "Color.intensity","Hue","OD280.OD315","Proline") - df$Class <- factor(df$Class) - df + df <- read.csv("wine.data", header = FALSE) + colnames(df) <- c( + "Class", + "Alcohol", "Malic.acid", "Ash", "Alcalinity.of.ash", "Magnesium", + "Total.phenols", "Flavanoids", "Nonflavanoid.phenols", "Proanthocyanins", + "Color.intensity", "Hue", "OD280.OD315", "Proline" + ) + df$Class <- factor(df$Class) + df } df <- read_wine() @@ -24,56 +30,62 @@ df <- read_wine() # split into train/test idx <- createDataPartition(df$Class, p = 0.8, list = FALSE) train <- df[idx, ] -test <- df[-idx, ] +test <- df[-idx, ] # choose a subset of features based on ANOVA F-test # I picked this sbuset before the runs: # alcohol, flavanoids, color intensity, od280/od315, proline, total phenols -features <- c("Alcohol","Flavanoids","Color.intensity","OD280.OD315","Proline","Total.phenols") +features <- c("Alcohol", "Flavanoids", "Color.intensity", "OD280.OD315", "Proline", "Total.phenols") x_train <- train[, features] y_train <- train$Class -x_test <- test[, features] -y_test <- test$Class +x_test <- test[, features] +y_test <- test$Class # scale features -pp <- preProcess(x_train, method = c("center","scale")) +pp <- preProcess(x_train, method = c("center", "scale")) x_train_s <- predict(pp, x_train) -x_test_s <- predict(pp, x_test) +x_test_s <- predict(pp, x_test) -# 1) linear kernel svm with hyperparameter tuning (C) +# linear kernel svm with hyperparameter tuning (C) set.seed(42) lin_grid <- data.frame(cost = c(0.1, 1, 10, 100)) -tune_lin <- tune.svm(x = x_train_s, y = y_train, - kernel = "linear", - cost = lin_grid$cost, - tunecontrol = tune.control(cross = 5)) +tune_lin <- tune.svm( + x = x_train_s, y = y_train, + kernel = "linear", + cost = lin_grid$cost, + tunecontrol = tune.control(cross = 5) +) lin_best <- tune_lin$best.model -# 2) rbf kernel svm with tuning (C, gamma) +# rbf kernel svm with tuning (C, gamma) set.seed(42) rbf_grid_cost <- c(0.1, 1, 10, 100, 1000) rbf_grid_gamma <- c(0.001, 0.01, 0.1, 1) -tune_rbf <- tune.svm(x = x_train_s, y = y_train, - kernel = "radial", - cost = rbf_grid_cost, - gamma = rbf_grid_gamma, - tunecontrol = tune.control(cross = 5)) +tune_rbf <- tune.svm( + x = x_train_s, y = y_train, + kernel = "radial", + cost = rbf_grid_cost, + gamma = rbf_grid_gamma, + tunecontrol = tune.control(cross = 5) +) rbf_best <- tune_rbf$best.model -# 3) alternative classifier: random forest (same features) +# alt classifier: random forest (same features) set.seed(42) rf_fit <- randomForest(x = x_train, y = y_train, ntree = 500, mtry = 2, importance = TRUE) # evaluation helper eval_model <- function(model, x_test_s, y_test, name) { - pred <- predict(model, x_test_s) - cm <- confusionMatrix(pred, y_test) - pr <- data.frame(model = name, - accuracy = cm$overall["Accuracy"], - precision_macro = mean(cm$byClass[,"Precision"], na.rm=TRUE), - recall_macro = mean(cm$byClass[,"Recall"], na.rm=TRUE), - f1_macro = mean(cm$byClass[,"F1"], na.rm=TRUE)) - list(cm = cm, pr = pr) + pred <- predict(model, x_test_s) + cm <- confusionMatrix(pred, y_test) + pr <- data.frame( + model = name, + accuracy = cm$overall["Accuracy"], + precision_macro = mean(cm$byClass[, "Precision"], na.rm = TRUE), + recall_macro = mean(cm$byClass[, "Recall"], na.rm = TRUE), + f1_macro = mean(cm$byClass[, "F1"], na.rm = TRUE) + ) + list(cm = cm, pr = pr) } # eval svm models (use scaled features) @@ -84,11 +96,13 @@ rbf_eval <- eval_model(rbf_best, x_test_s, y_test, "svm_rbf") rf_pred <- predict(rf_fit, x_test) rf_cm <- confusionMatrix(rf_pred, y_test) -rf_pr <- data.frame(model = "random_forest", - accuracy = rf_cm$overall["Accuracy"], - precision_macro = mean(rf_cm$byClass[,"Precision"], na.rm=TRUE), - recall_macro = mean(rf_cm$byClass[,"Recall"], na.rm=TRUE), - f1_macro = mean(rf_cm$byClass[,"F1"], na.rm=TRUE)) +rf_pr <- data.frame( + model = "random_forest", + accuracy = rf_cm$overall["Accuracy"], + precision_macro = mean(rf_cm$byClass[, "Precision"], na.rm = TRUE), + recall_macro = mean(rf_cm$byClass[, "Recall"], na.rm = TRUE), + f1_macro = mean(rf_cm$byClass[, "F1"], na.rm = TRUE) +) perf <- rbind(lin_eval$pr, rbf_eval$pr, rf_pr) @@ -98,14 +112,17 @@ cat("best params (rbf svm): C =", rbf_best$cost, " gamma =", rbf_best$gamma, "\n print(perf) # macro-f1 comparison -ggplot(perf, aes(x = model, y = f1_macro)) + - geom_col() + - labs(title = "macro-F1 by model (wine test set)") +ggplot(perf, aes(x = model, y = f1_macro)) + + geom_col() + + labs(title = "macro-F1 by model (wine test set)") # save outputs write.table(perf, file = "lab5_performance_table.txt", sep = "\t", row.names = FALSE, quote = FALSE) sink("lab5_confusion_matrices.txt") -cat("=== svm linear ===\n"); print(lin_eval$cm) -cat("\n=== svm rbf ===\n"); print(rbf_eval$cm) -cat("\n=== random forest ===\n"); print(rf_cm) -sink() \ No newline at end of file +cat("=== svm linear ===\n") +print(lin_eval$cm) +cat("\n=== svm rbf ===\n") +print(rbf_eval$cm) +cat("\n=== random forest ===\n") +print(rf_cm) +sink() diff --git a/Lab 5/lab5_confusion_matrices.txt b/Lab 5/lab5_confusion_matrices.txt new file mode 100644 index 0000000..065a670 --- /dev/null +++ b/Lab 5/lab5_confusion_matrices.txt @@ -0,0 +1,95 @@ +=== svm linear === +Confusion Matrix and Statistics + + Reference +Prediction 1 2 3 + 1 11 1 0 + 2 0 13 0 + 3 0 0 9 + +Overall Statistics + + Accuracy : 0.9706 + 95% CI : (0.8467, 0.9993) + No Information Rate : 0.4118 + P-Value [Acc > NIR] : 3.92e-12 + + Kappa : 0.9553 + + Mcnemar's Test P-Value : NA + +Statistics by Class: + + Class: 1 Class: 2 Class: 3 +Sensitivity 1.0000 0.9286 1.0000 +Specificity 0.9565 1.0000 1.0000 +Pos Pred Value 0.9167 1.0000 1.0000 +Neg Pred Value 1.0000 0.9524 1.0000 +Prevalence 0.3235 0.4118 0.2647 +Detection Rate 0.3235 0.3824 0.2647 +Detection Prevalence 0.3529 0.3824 0.2647 +Balanced Accuracy 0.9783 0.9643 1.0000 + +=== svm rbf === +Confusion Matrix and Statistics + + Reference +Prediction 1 2 3 + 1 11 1 0 + 2 0 13 0 + 3 0 0 9 + +Overall Statistics + + Accuracy : 0.9706 + 95% CI : (0.8467, 0.9993) + No Information Rate : 0.4118 + P-Value [Acc > NIR] : 3.92e-12 + + Kappa : 0.9553 + + Mcnemar's Test P-Value : NA + +Statistics by Class: + + Class: 1 Class: 2 Class: 3 +Sensitivity 1.0000 0.9286 1.0000 +Specificity 0.9565 1.0000 1.0000 +Pos Pred Value 0.9167 1.0000 1.0000 +Neg Pred Value 1.0000 0.9524 1.0000 +Prevalence 0.3235 0.4118 0.2647 +Detection Rate 0.3235 0.3824 0.2647 +Detection Prevalence 0.3529 0.3824 0.2647 +Balanced Accuracy 0.9783 0.9643 1.0000 + +=== random forest === +Confusion Matrix and Statistics + + Reference +Prediction 1 2 3 + 1 11 1 0 + 2 0 13 0 + 3 0 0 9 + +Overall Statistics + + Accuracy : 0.9706 + 95% CI : (0.8467, 0.9993) + No Information Rate : 0.4118 + P-Value [Acc > NIR] : 3.92e-12 + + Kappa : 0.9553 + + Mcnemar's Test P-Value : NA + +Statistics by Class: + + Class: 1 Class: 2 Class: 3 +Sensitivity 1.0000 0.9286 1.0000 +Specificity 0.9565 1.0000 1.0000 +Pos Pred Value 0.9167 1.0000 1.0000 +Neg Pred Value 1.0000 0.9524 1.0000 +Prevalence 0.3235 0.4118 0.2647 +Detection Rate 0.3235 0.3824 0.2647 +Detection Prevalence 0.3529 0.3824 0.2647 +Balanced Accuracy 0.9783 0.9643 1.0000 diff --git a/Lab 5/lab5_performance_table.txt b/Lab 5/lab5_performance_table.txt new file mode 100644 index 0000000..9b74dfc --- /dev/null +++ b/Lab 5/lab5_performance_table.txt @@ -0,0 +1,4 @@ +model accuracy precision_macro recall_macro f1_macro +svm_linear 0.970588235294118 0.972222222222222 0.976190476190476 0.973161567364466 +svm_rbf 0.970588235294118 0.972222222222222 0.976190476190476 0.973161567364466 +random_forest 0.970588235294118 0.972222222222222 0.976190476190476 0.973161567364466