5 Commits

Author SHA1 Message Date
ION606 18a911f9d3 added lab 5 2025-11-04 21:00:48 -05:00
ION606 414a4ac5a3 god I am DUMB 2025-11-04 17:43:39 -05:00
ION606 4eff5a6378 merge 2025-11-04 17:34:25 -05:00
ION606 5adb4119f5 Merge branch 'transfer' of https://git.ion606.com/ION606/Data-Analytics into transfer 2025-11-04 17:34:00 -05:00
ION606 cd3ababd59 added lab 5 2025-11-04 17:33:38 -05:00
10 changed files with 555 additions and 0 deletions
+9
View File
@@ -0,0 +1,9 @@
{
"[r]": {
// generated automatically? What even....
"editor.wordSeparators": "`~!@#$%^&*()-=+[{]}\\|;:'\",<>/",
"editor.indentSize": "tabSize",
"editor.useTabStops": true,
}
}
+41
View File
@@ -0,0 +1,41 @@
##########################################
### Principal Component Analysis (PCA) ###
##########################################
## load libraries
library(ggplot2)
library(ggfortify)
library(GGally)
library(e1071)
library(class)
library(psych)
library(readr)
## set working directory so that files can be referenced without the full path
setwd("~/Courses/Data Analytics/Fall25/labs/lab 4/")
## read dataset
wine <- read_csv("wine.data", col_names = FALSE)
## set column names
names(wine) <- c("Type","Alcohol","Malic acid","Ash","Alcalinity of ash","Magnesium","Total phenols","Flavanoids","Nonflavanoid Phenols","Proanthocyanins","Color Intensity","Hue","Od280/od315 of diluted wines","Proline")
## inspect data frame
head(wine)
## change the data type of the "Type" column from character to factor
####
# Factors look like regular strings (characters) but with factors R knows
# that the column is a categorical variable with finite possible values
# e.g. "Type" in the Wine dataset can only be 1, 2, or 3
####
wine$Type <- as.factor(wine$Type)
## visualize variables
pairs.panels(wine[,-1],gap = 0,bg = c("red", "yellow", "blue")[wine$Type],pch=21)
ggpairs(wine, ggplot2::aes(colour = Type))
###
Binary file not shown.
BIN
View File
Binary file not shown.
+128
View File
@@ -0,0 +1,128 @@
install.packages(
c("e1071", "caret", "randomForest", "ggplot2", "pROC"),
repos = c("https://cloud.r-project.org/"),
dependencies = TRUE
)
suppressPackageStartupMessages({
library(e1071) # for svm/tune.svm
library(caret) # for metrics
library(randomForest) # alternative classifier
library(ggplot2)
})
set.seed(42)
read_wine <- function() {
df <- read.csv("wine.data", header = FALSE)
colnames(df) <- c(
"Class",
"Alcohol", "Malic.acid", "Ash", "Alcalinity.of.ash", "Magnesium",
"Total.phenols", "Flavanoids", "Nonflavanoid.phenols", "Proanthocyanins",
"Color.intensity", "Hue", "OD280.OD315", "Proline"
)
df$Class <- factor(df$Class)
df
}
df <- read_wine()
# split into train/test
idx <- createDataPartition(df$Class, p = 0.8, list = FALSE)
train <- df[idx, ]
test <- df[-idx, ]
# choose a subset of features based on ANOVA F-test
# I picked this sbuset before the runs:
# alcohol, flavanoids, color intensity, od280/od315, proline, total phenols
features <- c("Alcohol", "Flavanoids", "Color.intensity", "OD280.OD315", "Proline", "Total.phenols")
x_train <- train[, features]
y_train <- train$Class
x_test <- test[, features]
y_test <- test$Class
# scale features
pp <- preProcess(x_train, method = c("center", "scale"))
x_train_s <- predict(pp, x_train)
x_test_s <- predict(pp, x_test)
# linear kernel svm with hyperparameter tuning (C)
set.seed(42)
lin_grid <- data.frame(cost = c(0.1, 1, 10, 100))
tune_lin <- tune.svm(
x = x_train_s, y = y_train,
kernel = "linear",
cost = lin_grid$cost,
tunecontrol = tune.control(cross = 5)
)
lin_best <- tune_lin$best.model
# rbf kernel svm with tuning (C, gamma)
set.seed(42)
rbf_grid_cost <- c(0.1, 1, 10, 100, 1000)
rbf_grid_gamma <- c(0.001, 0.01, 0.1, 1)
tune_rbf <- tune.svm(
x = x_train_s, y = y_train,
kernel = "radial",
cost = rbf_grid_cost,
gamma = rbf_grid_gamma,
tunecontrol = tune.control(cross = 5)
)
rbf_best <- tune_rbf$best.model
# alt classifier: random forest (same features)
set.seed(42)
rf_fit <- randomForest(x = x_train, y = y_train, ntree = 500, mtry = 2, importance = TRUE)
# evaluation helper
eval_model <- function(model, x_test_s, y_test, name) {
pred <- predict(model, x_test_s)
cm <- confusionMatrix(pred, y_test)
pr <- data.frame(
model = name,
accuracy = cm$overall["Accuracy"],
precision_macro = mean(cm$byClass[, "Precision"], na.rm = TRUE),
recall_macro = mean(cm$byClass[, "Recall"], na.rm = TRUE),
f1_macro = mean(cm$byClass[, "F1"], na.rm = TRUE)
)
list(cm = cm, pr = pr)
}
# eval svm models (use scaled features)
lin_eval <- eval_model(lin_best, x_test_s, y_test, "svm_linear")
rbf_eval <- eval_model(rbf_best, x_test_s, y_test, "svm_rbf")
# evaluate random forest (no scaling)
rf_pred <- predict(rf_fit, x_test)
rf_cm <- confusionMatrix(rf_pred, y_test)
rf_pr <- data.frame(
model = "random_forest",
accuracy = rf_cm$overall["Accuracy"],
precision_macro = mean(rf_cm$byClass[, "Precision"], na.rm = TRUE),
recall_macro = mean(rf_cm$byClass[, "Recall"], na.rm = TRUE),
f1_macro = mean(rf_cm$byClass[, "F1"], na.rm = TRUE)
)
perf <- rbind(lin_eval$pr, rbf_eval$pr, rf_pr)
# print
cat("best params (linear svm): C =", lin_best$cost, "\n")
cat("best params (rbf svm): C =", rbf_best$cost, " gamma =", rbf_best$gamma, "\n\n")
print(perf)
# macro-f1 comparison
ggplot(perf, aes(x = model, y = f1_macro)) +
geom_col() +
labs(title = "macro-F1 by model (wine test set)")
# save outputs
write.table(perf, file = "lab5_performance_table.txt", sep = "\t", row.names = FALSE, quote = FALSE)
sink("lab5_confusion_matrices.txt")
cat("=== svm linear ===\n")
print(lin_eval$cm)
cat("\n=== svm rbf ===\n")
print(rbf_eval$cm)
cat("\n=== random forest ===\n")
print(rf_cm)
sink()
+95
View File
@@ -0,0 +1,95 @@
=== svm linear ===
Confusion Matrix and Statistics
Reference
Prediction 1 2 3
1 11 1 0
2 0 13 0
3 0 0 9
Overall Statistics
Accuracy : 0.9706
95% CI : (0.8467, 0.9993)
No Information Rate : 0.4118
P-Value [Acc > NIR] : 3.92e-12
Kappa : 0.9553
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: 1 Class: 2 Class: 3
Sensitivity 1.0000 0.9286 1.0000
Specificity 0.9565 1.0000 1.0000
Pos Pred Value 0.9167 1.0000 1.0000
Neg Pred Value 1.0000 0.9524 1.0000
Prevalence 0.3235 0.4118 0.2647
Detection Rate 0.3235 0.3824 0.2647
Detection Prevalence 0.3529 0.3824 0.2647
Balanced Accuracy 0.9783 0.9643 1.0000
=== svm rbf ===
Confusion Matrix and Statistics
Reference
Prediction 1 2 3
1 11 1 0
2 0 13 0
3 0 0 9
Overall Statistics
Accuracy : 0.9706
95% CI : (0.8467, 0.9993)
No Information Rate : 0.4118
P-Value [Acc > NIR] : 3.92e-12
Kappa : 0.9553
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: 1 Class: 2 Class: 3
Sensitivity 1.0000 0.9286 1.0000
Specificity 0.9565 1.0000 1.0000
Pos Pred Value 0.9167 1.0000 1.0000
Neg Pred Value 1.0000 0.9524 1.0000
Prevalence 0.3235 0.4118 0.2647
Detection Rate 0.3235 0.3824 0.2647
Detection Prevalence 0.3529 0.3824 0.2647
Balanced Accuracy 0.9783 0.9643 1.0000
=== random forest ===
Confusion Matrix and Statistics
Reference
Prediction 1 2 3
1 11 1 0
2 0 13 0
3 0 0 9
Overall Statistics
Accuracy : 0.9706
95% CI : (0.8467, 0.9993)
No Information Rate : 0.4118
P-Value [Acc > NIR] : 3.92e-12
Kappa : 0.9553
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: 1 Class: 2 Class: 3
Sensitivity 1.0000 0.9286 1.0000
Specificity 0.9565 1.0000 1.0000
Pos Pred Value 0.9167 1.0000 1.0000
Neg Pred Value 1.0000 0.9524 1.0000
Prevalence 0.3235 0.4118 0.2647
Detection Rate 0.3235 0.3824 0.2647
Detection Prevalence 0.3529 0.3824 0.2647
Balanced Accuracy 0.9783 0.9643 1.0000
+4
View File
@@ -0,0 +1,4 @@
model accuracy precision_macro recall_macro f1_macro
svm_linear 0.970588235294118 0.972222222222222 0.976190476190476 0.973161567364466
svm_rbf 0.970588235294118 0.972222222222222 0.976190476190476 0.973161567364466
random_forest 0.970588235294118 0.972222222222222 0.976190476190476 0.973161567364466
+178
View File
@@ -0,0 +1,178 @@
1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560
+100
View File
@@ -0,0 +1,100 @@
1. Title of Database: Wine recognition data
Updated Sept 21, 1998 by C.Blake : Added attribute information
2. Sources:
(a) Forina, M. et al, PARVUS - An Extendible Package for Data
Exploration, Classification and Correlation. Institute of Pharmaceutical
and Food Analysis and Technologies, Via Brigata Salerno,
16147 Genoa, Italy.
(b) Stefan Aeberhard, email: stefan@coral.cs.jcu.edu.au
(c) July 1991
3. Past Usage:
(1)
S. Aeberhard, D. Coomans and O. de Vel,
Comparison of Classifiers in High Dimensional Settings,
Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of
Mathematics and Statistics, James Cook University of North Queensland.
(Also submitted to Technometrics).
The data was used with many others for comparing various
classifiers. The classes are separable, though only RDA
has achieved 100% correct classification.
(RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data))
(All results using the leave-one-out technique)
In a classification context, this is a well posed problem
with "well behaved" class structures. A good data set
for first testing of a new classifier, but not very
challenging.
(2)
S. Aeberhard, D. Coomans and O. de Vel,
"THE CLASSIFICATION PERFORMANCE OF RDA"
Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of
Mathematics and Statistics, James Cook University of North Queensland.
(Also submitted to Journal of Chemometrics).
Here, the data was used to illustrate the superior performance of
the use of a new appreciation function with RDA.
4. Relevant Information:
-- These data are the results of a chemical analysis of
wines grown in the same region in Italy but derived from three
different cultivars.
The analysis determined the quantities of 13 constituents
found in each of the three types of wines.
-- I think that the initial data set had around 30 variables, but
for some reason I only have the 13 dimensional version.
I had a list of what the 30 or so variables were, but a.)
I lost it, and b.), I would not know which 13 variables
are included in the set.
-- The attributes are (dontated by Riccardo Leardi,
riclea@anchem.unige.it )
1) Alcohol
2) Malic acid
3) Ash
4) Alcalinity of ash
5) Magnesium
6) Total phenols
7) Flavanoids
8) Nonflavanoid phenols
9) Proanthocyanins
10)Color intensity
11)Hue
12)OD280/OD315 of diluted wines
13)Proline
5. Number of Instances
class 1 59
class 2 71
class 3 48
6. Number of Attributes
13
7. For Each Attribute:
All attributes are continuous
No statistics available, but suggest to standardise
variables for certain uses (e.g. for us with classifiers
which are NOT scale invariant)
NOTE: 1st attribute is class identifier (1-3)
8. Missing Attribute Values:
None
9. Class Distribution: number of instances per class
class 1 59
class 2 71
class 3 48