Compare commits
2 Commits
9abd1a6df6
...
88f2975b86
| Author | SHA1 | Date | |
|---|---|---|---|
| 88f2975b86 | |||
| dc2ceac7de |
|
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
|
Before Width: | Height: | Size: 42 KiB After Width: | Height: | Size: 42 KiB |
|
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 63 KiB |
|
Before Width: | Height: | Size: 62 KiB After Width: | Height: | Size: 62 KiB |
|
Before Width: | Height: | Size: 46 KiB After Width: | Height: | Size: 46 KiB |
|
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 63 KiB |
|
Before Width: | Height: | Size: 62 KiB After Width: | Height: | Size: 62 KiB |
|
Before Width: | Height: | Size: 34 KiB After Width: | Height: | Size: 34 KiB |
|
Before Width: | Height: | Size: 34 KiB After Width: | Height: | Size: 34 KiB |
|
Before Width: | Height: | Size: 64 KiB After Width: | Height: | Size: 64 KiB |
|
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 63 KiB |
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
@@ -1,5 +0,0 @@
|
||||
node_modules
|
||||
.venv
|
||||
.vscode
|
||||
Assignment III
|
||||
tmp/
|
||||
@@ -0,0 +1,41 @@
|
||||
##########################################
|
||||
### Principal Component Analysis (PCA) ###
|
||||
##########################################
|
||||
|
||||
## load libraries
|
||||
library(ggplot2)
|
||||
library(ggfortify)
|
||||
library(GGally)
|
||||
library(e1071)
|
||||
library(class)
|
||||
library(psych)
|
||||
library(readr)
|
||||
|
||||
## set working directory so that files can be referenced without the full path
|
||||
setwd("/home/ion606/Desktop/Data Analytics/Lab 4")
|
||||
|
||||
## read dataset
|
||||
wine <- read_csv("wine.data", col_names = FALSE)
|
||||
|
||||
## set column names
|
||||
names(wine) <- c("Type","Alcohol","Malic acid","Ash","Alcalinity of ash","Magnesium","Total phenols","Flavanoids","Nonflavanoid Phenols","Proanthocyanins","Color Intensity","Hue","Od280/od315 of diluted wines","Proline")
|
||||
|
||||
## inspect data frame
|
||||
head(wine)
|
||||
|
||||
## change the data type of the "Type" column from character to factor
|
||||
####
|
||||
# Factors look like regular strings (characters) but with factors R knows
|
||||
# that the column is a categorical variable with finite possible values
|
||||
# e.g. "Type" in the Wine dataset can only be 1, 2, or 3
|
||||
####
|
||||
|
||||
wine$Type <- as.factor(wine$Type)
|
||||
|
||||
|
||||
## visualize variables
|
||||
pairs.panels(wine[,-1],gap = 0,bg = c("red", "yellow", "blue")[wine$Type],pch=21)
|
||||
|
||||
ggpairs(wine, ggplot2::aes(colour = Type))
|
||||
|
||||
###
|
||||
@@ -0,0 +1,366 @@
|
||||
has_pkg <- function(pkg) requireNamespace(pkg, quietly = TRUE)
|
||||
|
||||
has_ggplot2 <- has_pkg("ggplot2")
|
||||
has_GGally <- has_pkg("GGally")
|
||||
has_e1071 <- has_pkg("e1071")
|
||||
has_class <- has_pkg("class")
|
||||
has_psych <- has_pkg("psych")
|
||||
has_readr <- has_pkg("readr")
|
||||
|
||||
# WHY IS THIS HERE YOU MIGHT ASK???? WELL LET ME TELL YOU I SPENT TWO HOURS ON STUPID PACKAGE IMPORTS
|
||||
# OOOOOOHHH PSYCH IS IN A DIFFERENT REPO??? OH IT ISN'T??? I have a fever of 103 I DO NOT CARE
|
||||
if (has_ggplot2) { library(ggplot2) } else { warning("ggplot2 not available; plots will be skipped") }
|
||||
if (has_GGally) { library(GGally) } else { message("GGally not available; skipping ggpairs plot") }
|
||||
if (has_e1071) { library(e1071) }
|
||||
if (has_class) { library(class) } else { stop("class package not available for kNN") }
|
||||
if (!has_psych) { message("psych not available; skipping pairs.panels plot") }
|
||||
if (has_readr) { library(readr) }
|
||||
library(grid) # unit() for arrows in plots
|
||||
suppressWarnings(RNGkind(sample.kind = "Rounding"))
|
||||
|
||||
# set a reproducible seed
|
||||
set.seed(4600)
|
||||
|
||||
# 178 rows
|
||||
# col 1 is class label (1,2,3)
|
||||
# other 13 columns continuous predictors
|
||||
|
||||
possible_paths <- c(
|
||||
"wine.data",
|
||||
"./wine.data",
|
||||
"../wine.data",
|
||||
"DAN/wine.data",
|
||||
"./DAN/wine.data"
|
||||
)
|
||||
data_path <- NA
|
||||
for (p in possible_paths) { if (file.exists(p)) { data_path <- p; break } }
|
||||
if (is.na(data_path)) stop("could not find wine.data; place this script in the DAN folder or given/ and re-run")
|
||||
|
||||
if (has_readr) {
|
||||
wine <- readr::read_csv(
|
||||
file = data_path,
|
||||
col_names = FALSE,
|
||||
show_col_types = FALSE,
|
||||
progress = FALSE
|
||||
)
|
||||
} else {
|
||||
wine <- read.csv(file = data_path, header = FALSE)
|
||||
}
|
||||
|
||||
colnames(wine) <- c(
|
||||
"Type",
|
||||
"Alcohol",
|
||||
"Malic_acid",
|
||||
"Ash",
|
||||
"Alcalinity_of_ash",
|
||||
"Magnesium",
|
||||
"Total_phenols",
|
||||
"Flavanoids",
|
||||
"Nonflavanoid_phenols",
|
||||
"Proanthocyanins",
|
||||
"Color_intensity",
|
||||
"Hue",
|
||||
"OD280_OD315",
|
||||
"Proline"
|
||||
)
|
||||
|
||||
wine$Type <- as.factor(wine$Type)
|
||||
|
||||
# put here from when I accidentally read in the wrong file repeatedly
|
||||
# left because it makes it more, "robust"
|
||||
stopifnot(nrow(wine) == 178, ncol(wine) == 14)
|
||||
print(summary(wine$Type))
|
||||
|
||||
# exploratory plots (because I went down a rabbit hole and by god I'm using it)
|
||||
|
||||
if (has_psych) {
|
||||
# pairs panel (psych) – colors by class
|
||||
psych::pairs.panels(
|
||||
wine[,-1],
|
||||
gap = 0,
|
||||
bg = c("red","gold","royalblue")[wine$Type],
|
||||
pch = 21,
|
||||
main = "wine (uci) – scatterplot matrix by class"
|
||||
)
|
||||
}
|
||||
|
||||
if (has_GGally && has_ggplot2) {
|
||||
# ggpairs for nice matrix <3
|
||||
GGally::ggpairs(wine, ggplot2::aes(colour = Type), columns = 2:ncol(wine))
|
||||
}
|
||||
|
||||
# split into train/test BEFORE!!!!!!!!!!!!!!!!!!!!!! any preprocessing to avoid leakage
|
||||
|
||||
set.seed(4600)
|
||||
n <- nrow(wine)
|
||||
train_idx <- sample.int(n, size = floor(0.7 * n))
|
||||
wine_train <- wine[train_idx, , drop = FALSE]
|
||||
wine_test <- wine[-train_idx, , drop = FALSE]
|
||||
|
||||
X_train <- wine_train[, -1]
|
||||
y_train <- wine_train$Type
|
||||
X_test <- wine_test[, -1]
|
||||
y_test <- wine_test$Type
|
||||
|
||||
# yes
|
||||
if (any(sapply(X_train, function(x) var(x, na.rm = TRUE) == 0))) {
|
||||
warning("one or more predictors have zero variance in the training set; scale() would fail")
|
||||
}
|
||||
if (anyNA(X_train) | anyNA(X_test)) {
|
||||
stop("found NA values in predictors; handle missingness before PCA")
|
||||
}
|
||||
|
||||
# project both train and test using the train-fitted pca
|
||||
pca_tr <- prcomp(X_train, center = TRUE, scale. = TRUE)
|
||||
|
||||
pve_tr <- (pca_tr$sdev^2) / sum(pca_tr$sdev^2)
|
||||
pve_df <- data.frame(
|
||||
PC = paste0("PC", seq_along(pve_tr)),
|
||||
PVE = pve_tr,
|
||||
CumPVE = cumsum(pve_tr)
|
||||
)
|
||||
|
||||
print("variance explained (training pca):")
|
||||
print(pve_df)
|
||||
|
||||
# scree plot from training pca
|
||||
p_scree <- ggplot(pve_df, aes(x = seq_along(PVE), y = PVE)) +
|
||||
geom_line() + geom_point() +
|
||||
scale_x_continuous(breaks = 1:length(pve_df$PC), labels = pve_df$PC) +
|
||||
labs(title = "scree plot – variance explained by principal components (training pca)",
|
||||
x = "principal component", y = "proportion of variance explained") +
|
||||
theme_minimal()
|
||||
|
||||
# cumulative variance plot from training pca
|
||||
p_cumvar <- ggplot(pve_df, aes(x = seq_along(CumPVE), y = CumPVE)) +
|
||||
geom_line() + geom_point() +
|
||||
scale_x_continuous(breaks = 1:length(pve_df$PC), labels = pve_df$PC) +
|
||||
labs(title = "cumulative variance explained (training pca)",
|
||||
x = "principal component", y = "cumulative proportion of variance") +
|
||||
theme_minimal()
|
||||
|
||||
# ========================================================================================================
|
||||
|
||||
# choose number of pcs: default to the smallest k with >= thresh cum variance
|
||||
# you can change thresh to 0.90 or 0.99 if you prefer
|
||||
|
||||
pc_variance_threshold <- 0.95
|
||||
k_pcs <- which(cumsum(pve_tr) >= pc_variance_threshold)[1]
|
||||
if (is.na(k_pcs)) k_pcs <- ncol(X_train) # crashes if fails so...
|
||||
cat("chosen number of pcs (threshold =", pc_variance_threshold, "):", k_pcs, "\n")
|
||||
|
||||
# project train/test into the pca space
|
||||
Z_train_full <- as.data.frame(predict(pca_tr, newdata = X_train))
|
||||
Z_test_full <- as.data.frame(predict(pca_tr, newdata = X_test))
|
||||
|
||||
# for downstream modeling
|
||||
Z_train <- Z_train_full[, seq_len(k_pcs), drop = FALSE]
|
||||
Z_test <- Z_test_full[, seq_len(k_pcs), drop = FALSE]
|
||||
|
||||
scores_all <- as.data.frame(predict(pca_tr, newdata = wine[,-1]))
|
||||
scores_all$Type <- wine$Type
|
||||
|
||||
# loadings from training pca
|
||||
loadings <- as.data.frame(pca_tr$rotation)
|
||||
loadings$Variable <- rownames(loadings)
|
||||
top_pc1 <- loadings[order(abs(loadings$PC1), decreasing = TRUE), c("Variable","PC1")][1:5, ]
|
||||
top_pc2 <- loadings[order(abs(loadings$PC2), decreasing = TRUE), c("Variable","PC2")][1:5, ]
|
||||
print("top contributors to pc1 (training pca):"); print(top_pc1)
|
||||
print("top contributors to pc2 (training pca):"); print(top_pc2)
|
||||
|
||||
|
||||
# function to make convex hull data for each group
|
||||
scores <- scores_all
|
||||
hull_df <- do.call(rbind, lapply(split(scores, scores$Type), function(df) {
|
||||
pts <- df[chull(df$PC1, df$PC2), c("PC1","PC2")]
|
||||
pts$Type <- unique(df$Type)
|
||||
pts
|
||||
}))
|
||||
p_pc12 <- ggplot(scores, aes(PC1, PC2, color = Type)) +
|
||||
geom_point(size = 2, alpha = 0.85) +
|
||||
geom_polygon(data = hull_df, aes(fill = Type, group = Type), color = NA, alpha = 0.15) +
|
||||
guides(fill = "none") +
|
||||
theme_minimal() +
|
||||
labs(title = "pc1 vs pc2 by class (projected with training pca)")
|
||||
|
||||
# arrow arrow arrow arrow arrow arrow arrow arrow arrow
|
||||
loading_scalefactor <- 3 * max(abs(scores$PC1), abs(scores$PC2)) # heuristic
|
||||
load_plot_df <- loadings
|
||||
load_plot_df$PC1s <- load_plot_df$PC1 * loading_scalefactor
|
||||
load_plot_df$PC2s <- load_plot_df$PC2 * loading_scalefactor
|
||||
|
||||
p_biplot <- ggplot(scores, aes(PC1, PC2, color = Type)) +
|
||||
geom_point(size = 2, alpha = 0.85) +
|
||||
geom_segment(
|
||||
data = load_plot_df,
|
||||
mapping = aes(x = 0, y = 0, xend = PC1s, yend = PC2s),
|
||||
inherit.aes = FALSE,
|
||||
arrow = arrow(length = unit(0.02, "npc")),
|
||||
color = "black",
|
||||
alpha = 0.8
|
||||
) +
|
||||
geom_text(
|
||||
data = load_plot_df,
|
||||
mapping = aes(x = PC1s, y = PC2s, label = Variable),
|
||||
inherit.aes = FALSE,
|
||||
hjust = 0,
|
||||
vjust = 0
|
||||
) +
|
||||
theme_minimal() +
|
||||
labs(title = "pc1 vs pc2 with variable loadings (training pca projection)")
|
||||
|
||||
# 1) kNN on original variables with standardization
|
||||
# 2) kNN on first 2 principal components only
|
||||
|
||||
# helper to create metrics from a confusion matrix (rows=true, cols=pred)
|
||||
compute_metrics <- function(cm) {
|
||||
lv <- rownames(cm)
|
||||
if (is.null(lv)) lv <- as.character(1:nrow(cm))
|
||||
TP <- diag(cm)
|
||||
FP <- colSums(cm) - TP
|
||||
FN <- rowSums(cm) - TP
|
||||
precision <- TP / (TP + FP)
|
||||
recall <- TP / (TP + FN)
|
||||
f1 <- 2 * precision * recall / (precision + recall)
|
||||
acc <- sum(TP) / sum(cm)
|
||||
macro_precision <- mean(precision, na.rm = TRUE)
|
||||
macro_recall <- mean(recall, na.rm = TRUE)
|
||||
macro_f1 <- mean(f1, na.rm = TRUE)
|
||||
per_class <- data.frame(
|
||||
class = lv,
|
||||
precision = precision,
|
||||
recall = recall,
|
||||
f1 = f1,
|
||||
row.names = NULL
|
||||
)
|
||||
summary <- data.frame(
|
||||
accuracy = acc,
|
||||
macro_precision = macro_precision,
|
||||
macro_recall = macro_recall,
|
||||
macro_f1 = macro_f1
|
||||
)
|
||||
list(per_class = per_class, summary = summary)
|
||||
}
|
||||
|
||||
set.seed(4600)
|
||||
ks <- seq(1, 15, by = 2)
|
||||
Kfolds <- 5
|
||||
|
||||
# kNN on original vars
|
||||
X_train_scaled <- scale(X_train, center = TRUE, scale = TRUE)
|
||||
scale_center <- attr(X_train_scaled, "scaled:center")
|
||||
scale_scale <- attr(X_train_scaled, "scaled:scale")
|
||||
X_test_scaled <- scale(X_test, center = scale_center, scale = scale_scale)
|
||||
|
||||
n_train_orig <- nrow(X_train_scaled)
|
||||
folds_orig <- sample(rep(1:Kfolds, length.out = n_train_orig))
|
||||
cv_acc_orig <- sapply(ks, function(k) {
|
||||
mean(sapply(1:Kfolds, function(f) {
|
||||
tr <- which(folds_orig != f)
|
||||
va <- which(folds_orig == f)
|
||||
pred_cv <- knn(train = X_train_scaled[tr, , drop = FALSE],
|
||||
test = X_train_scaled[va, , drop = FALSE],
|
||||
cl = y_train[tr], k = k)
|
||||
mean(pred_cv == y_train[va])
|
||||
}))
|
||||
})
|
||||
|
||||
best_k_orig <- ks[which.max(cv_acc_orig)]
|
||||
cat("[Original vars] best k:", best_k_orig, "cv acc:", max(cv_acc_orig), "\n")
|
||||
|
||||
pred_orig <- knn(train = X_train_scaled, test = X_test_scaled, cl = y_train, k = best_k_orig)
|
||||
acc_orig <- mean(pred_orig == y_test)
|
||||
cm_orig <- table(truth = y_test, pred = pred_orig)
|
||||
|
||||
cat("[Original vars] held-out accuracy:", round(acc_orig, 4), "\n")
|
||||
print(cm_orig)
|
||||
|
||||
metrics_orig <- compute_metrics(cm_orig)
|
||||
print(metrics_orig$summary)
|
||||
print(metrics_orig$per_class)
|
||||
|
||||
# kNN on first 2 PCs only
|
||||
Z2_train <- Z_train_full[, 1:2, drop = FALSE]
|
||||
Z2_test <- Z_test_full[, 1:2, drop = FALSE]
|
||||
n_train_2pc <- nrow(Z2_train)
|
||||
|
||||
folds_2pc <- sample(rep(1:Kfolds, length.out = n_train_2pc))
|
||||
cv_acc_2pc <- sapply(ks, function(k) {
|
||||
mean(sapply(1:Kfolds, function(f) {
|
||||
tr <- which(folds_2pc != f)
|
||||
va <- which(folds_2pc == f)
|
||||
pred_cv <- knn(train = Z2_train[tr, , drop = FALSE],
|
||||
test = Z2_train[va, , drop = FALSE],
|
||||
cl = y_train[tr], k = k)
|
||||
mean(pred_cv == y_train[va])
|
||||
}))
|
||||
})
|
||||
|
||||
best_k_2pc <- ks[which.max(cv_acc_2pc)]
|
||||
cat("[First 2 PCs] best k:", best_k_2pc, "cv acc:", max(cv_acc_2pc), "\n")
|
||||
|
||||
pred_2pc <- knn(train = Z2_train, test = Z2_test, cl = y_train, k = best_k_2pc)
|
||||
acc_2pc <- mean(pred_2pc == y_test)
|
||||
cm_2pc <- table(truth = y_test, pred = pred_2pc)
|
||||
|
||||
cat("[First 2 PCs] held-out accuracy:", round(acc_2pc, 4), "\n")
|
||||
print(cm_2pc)
|
||||
|
||||
metrics_2pc <- compute_metrics(cm_2pc)
|
||||
print(metrics_2pc$summary)
|
||||
print(metrics_2pc$per_class)
|
||||
|
||||
# ===========================================================================================
|
||||
outputs_dir <- "outputs"
|
||||
if (!dir.exists(outputs_dir)) dir.create(outputs_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
|
||||
# plots
|
||||
if (exists("p_pc12") && inherits(p_pc12, "ggplot")) ggsave(filename = file.path(outputs_dir, "pc12_scatter.png"), plot = p_pc12, width = 8, height = 6, dpi = 300)
|
||||
if (exists("p_biplot") && inherits(p_biplot, "ggplot")) ggsave(filename = file.path(outputs_dir, "pc12_biplot.png"), plot = p_biplot, width = 8, height = 6, dpi = 300)
|
||||
if (exists("p_scree") && inherits(p_scree, "ggplot")) ggsave(filename = file.path(outputs_dir, "pca_scree.png"), plot = p_scree, width = 8, height = 6, dpi = 300)
|
||||
if (exists("p_cumvar") && inherits(p_cumvar, "ggplot")) ggsave(filename = file.path(outputs_dir, "pca_cumvar.png"), plot = p_cumvar, width = 8, height = 6, dpi = 300)
|
||||
|
||||
# top contributors/vars to PC1 and PC2
|
||||
write.csv(top_pc1, file = file.path(outputs_dir, "top_contributors_pc1.csv"), row.names = FALSE)
|
||||
write.csv(top_pc2, file = file.path(outputs_dir, "top_contributors_pc2.csv"), row.names = FALSE)
|
||||
|
||||
# confusion matrices as wide CSV and pretty text
|
||||
write.csv(as.matrix(cm_orig), file = file.path(outputs_dir, "confusion_original_wide.csv"))
|
||||
writeLines(capture.output(cm_orig), con = file.path(outputs_dir, "confusion_original.txt"))
|
||||
|
||||
write.csv(as.matrix(cm_2pc), file = file.path(outputs_dir, "confusion_2pc_wide.csv"))
|
||||
writeLines(capture.output(cm_2pc), con = file.path(outputs_dir, "confusion_2pc.txt"))
|
||||
|
||||
# metrics
|
||||
write.csv(metrics_orig$per_class, file = file.path(outputs_dir, "metrics_original_per_class.csv"), row.names = FALSE)
|
||||
write.csv(metrics_orig$summary, file = file.path(outputs_dir, "metrics_original_summary.csv"), row.names = FALSE)
|
||||
write.csv(metrics_2pc$per_class, file = file.path(outputs_dir, "metrics_2pc_per_class.csv"), row.names = FALSE)
|
||||
write.csv(metrics_2pc$summary, file = file.path(outputs_dir, "metrics_2pc_summary.csv"), row.names = FALSE)
|
||||
|
||||
# summary
|
||||
metrics_compare <- data.frame(
|
||||
model = c("original_variables", "first_2_pcs"),
|
||||
accuracy = c(metrics_orig$summary$accuracy, metrics_2pc$summary$accuracy),
|
||||
macro_precision = c(metrics_orig$summary$macro_precision, metrics_2pc$summary$macro_precision),
|
||||
macro_recall = c(metrics_orig$summary$macro_recall, metrics_2pc$summary$macro_recall),
|
||||
macro_f1 = c(metrics_orig$summary$macro_f1, metrics_2pc$summary$macro_f1)
|
||||
)
|
||||
write.csv(metrics_compare, file = file.path(outputs_dir, "metrics_comparison.csv"), row.names = FALSE)
|
||||
|
||||
# The below was made with help from ChatGPT because the psych package is confusing
|
||||
if (!interactive() && has_ggplot2) {
|
||||
pdf("Rplots_pca_fixed.pdf", width = 8, height = 6)
|
||||
if (has_psych) {
|
||||
psych::pairs.panels(
|
||||
wine[,-1],
|
||||
gap = 0,
|
||||
bg = c("red","gold","royalblue")[wine$Type],
|
||||
pch = 21,
|
||||
main = "wine (uci) – scatterplot matrix by class"
|
||||
)
|
||||
}
|
||||
|
||||
if (exists("p_scree") && inherits(p_scree, "ggplot")) print(p_scree)
|
||||
if (exists("p_pc12") && inherits(p_pc12, "ggplot")) print(p_pc12)
|
||||
dev.off()
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
pred
|
||||
truth 1 2 3
|
||||
1 15 2 0
|
||||
2 1 19 1
|
||||
3 0 1 15
|
||||
@@ -0,0 +1,4 @@
|
||||
"","1","2","3"
|
||||
"1",15,2,0
|
||||
"2",1,19,1
|
||||
"3",0,1,15
|
||||
|
@@ -0,0 +1,5 @@
|
||||
pred
|
||||
truth 1 2 3
|
||||
1 17 0 0
|
||||
2 1 18 2
|
||||
3 0 0 16
|
||||
@@ -0,0 +1,4 @@
|
||||
"","1","2","3"
|
||||
"1",17,0,0
|
||||
"2",1,18,2
|
||||
"3",0,0,16
|
||||
|
@@ -0,0 +1,4 @@
|
||||
"class","precision","recall","f1"
|
||||
"1",0.9375,0.882352941176471,0.909090909090909
|
||||
"2",0.863636363636364,0.904761904761905,0.883720930232558
|
||||
"3",0.9375,0.9375,0.9375
|
||||
|
@@ -0,0 +1,2 @@
|
||||
"accuracy","macro_precision","macro_recall","macro_f1"
|
||||
0.907407407407407,0.912878787878788,0.908204948646125,0.910103946441156
|
||||
|
@@ -0,0 +1,3 @@
|
||||
"model","accuracy","macro_precision","macro_recall","macro_f1"
|
||||
"original_variables",0.944444444444444,0.944444444444444,0.952380952380952,0.94522732169791
|
||||
"first_2_pcs",0.907407407407407,0.912878787878788,0.908204948646125,0.910103946441156
|
||||
|
@@ -0,0 +1,4 @@
|
||||
"class","precision","recall","f1"
|
||||
"1",0.944444444444444,1,0.971428571428571
|
||||
"2",1,0.857142857142857,0.923076923076923
|
||||
"3",0.888888888888889,1,0.941176470588235
|
||||
|
@@ -0,0 +1,2 @@
|
||||
"accuracy","macro_precision","macro_recall","macro_f1"
|
||||
0.944444444444444,0.944444444444444,0.952380952380952,0.94522732169791
|
||||
|
|
After Width: | Height: | Size: 344 KiB |
|
After Width: | Height: | Size: 227 KiB |
|
After Width: | Height: | Size: 101 KiB |
|
After Width: | Height: | Size: 105 KiB |
@@ -0,0 +1,6 @@
|
||||
"Variable","PC1"
|
||||
"Flavanoids",0.430570697054093
|
||||
"Total_phenols",0.388556731445086
|
||||
"OD280_OD315",0.379238757892512
|
||||
"Proanthocyanins",0.318149910146199
|
||||
"Nonflavanoid_phenols",-0.292569052362651
|
||||
|
@@ -0,0 +1,6 @@
|
||||
"Variable","PC2"
|
||||
"Color_intensity",-0.504116493512561
|
||||
"Alcohol",-0.480328824227057
|
||||
"Ash",-0.369020648548877
|
||||
"Proline",-0.3555672525193
|
||||
"Hue",0.300324646690879
|
||||
|
@@ -0,0 +1,178 @@
|
||||
1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
|
||||
1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
|
||||
1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
|
||||
1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
|
||||
1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
|
||||
1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
|
||||
1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
|
||||
1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
|
||||
1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
|
||||
1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
|
||||
1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
|
||||
1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
|
||||
1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
|
||||
1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
|
||||
1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
|
||||
1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
|
||||
1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
|
||||
1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
|
||||
1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
|
||||
1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
|
||||
1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
|
||||
1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
|
||||
1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
|
||||
1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
|
||||
1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
|
||||
1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
|
||||
1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
|
||||
1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
|
||||
1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
|
||||
1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
|
||||
1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
|
||||
1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
|
||||
1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
|
||||
1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
|
||||
1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
|
||||
1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
|
||||
1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
|
||||
1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
|
||||
1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
|
||||
1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
|
||||
1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
|
||||
1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
|
||||
1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
|
||||
1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
|
||||
1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
|
||||
1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
|
||||
1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
|
||||
1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
|
||||
1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
|
||||
1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
|
||||
1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
|
||||
1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
|
||||
1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
|
||||
1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
|
||||
1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
|
||||
1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
|
||||
1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
|
||||
1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
|
||||
1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
|
||||
2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
|
||||
2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
|
||||
2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
|
||||
2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
|
||||
2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
|
||||
2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
|
||||
2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
|
||||
2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
|
||||
2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
|
||||
2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
|
||||
2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
|
||||
2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
|
||||
2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
|
||||
2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
|
||||
2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
|
||||
2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
|
||||
2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
|
||||
2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
|
||||
2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
|
||||
2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
|
||||
2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
|
||||
2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
|
||||
2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
|
||||
2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
|
||||
2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
|
||||
2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
|
||||
2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
|
||||
2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
|
||||
2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
|
||||
2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
|
||||
2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
|
||||
2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
|
||||
2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
|
||||
2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
|
||||
2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
|
||||
2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
|
||||
2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
|
||||
2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
|
||||
2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
|
||||
2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
|
||||
2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
|
||||
2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
|
||||
2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
|
||||
2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
|
||||
2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
|
||||
2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
|
||||
2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
|
||||
2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
|
||||
2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
|
||||
2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
|
||||
2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
|
||||
2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
|
||||
2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
|
||||
2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
|
||||
2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
|
||||
2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
|
||||
2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
|
||||
2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
|
||||
2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
|
||||
2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
|
||||
2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
|
||||
2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
|
||||
2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
|
||||
2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
|
||||
2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
|
||||
2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
|
||||
2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
|
||||
2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
|
||||
2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
|
||||
2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
|
||||
2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
|
||||
3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
|
||||
3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
|
||||
3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
|
||||
3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
|
||||
3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
|
||||
3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
|
||||
3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
|
||||
3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
|
||||
3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
|
||||
3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
|
||||
3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
|
||||
3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
|
||||
3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
|
||||
3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
|
||||
3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
|
||||
3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
|
||||
3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
|
||||
3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
|
||||
3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
|
||||
3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
|
||||
3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
|
||||
3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
|
||||
3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
|
||||
3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
|
||||
3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
|
||||
3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
|
||||
3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
|
||||
3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
|
||||
3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
|
||||
3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
|
||||
3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
|
||||
3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
|
||||
3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
|
||||
3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
|
||||
3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
|
||||
3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
|
||||
3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
|
||||
3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
|
||||
3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
|
||||
3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
|
||||
3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
|
||||
3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
|
||||
3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
|
||||
3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
|
||||
3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
|
||||
3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
|
||||
3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
|
||||
3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560
|
||||
@@ -0,0 +1,100 @@
|
||||
1. Title of Database: Wine recognition data
|
||||
Updated Sept 21, 1998 by C.Blake : Added attribute information
|
||||
|
||||
2. Sources:
|
||||
(a) Forina, M. et al, PARVUS - An Extendible Package for Data
|
||||
Exploration, Classification and Correlation. Institute of Pharmaceutical
|
||||
and Food Analysis and Technologies, Via Brigata Salerno,
|
||||
16147 Genoa, Italy.
|
||||
|
||||
(b) Stefan Aeberhard, email: stefan@coral.cs.jcu.edu.au
|
||||
(c) July 1991
|
||||
3. Past Usage:
|
||||
|
||||
(1)
|
||||
S. Aeberhard, D. Coomans and O. de Vel,
|
||||
Comparison of Classifiers in High Dimensional Settings,
|
||||
Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of
|
||||
Mathematics and Statistics, James Cook University of North Queensland.
|
||||
(Also submitted to Technometrics).
|
||||
|
||||
The data was used with many others for comparing various
|
||||
classifiers. The classes are separable, though only RDA
|
||||
has achieved 100% correct classification.
|
||||
(RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data))
|
||||
(All results using the leave-one-out technique)
|
||||
|
||||
In a classification context, this is a well posed problem
|
||||
with "well behaved" class structures. A good data set
|
||||
for first testing of a new classifier, but not very
|
||||
challenging.
|
||||
|
||||
(2)
|
||||
S. Aeberhard, D. Coomans and O. de Vel,
|
||||
"THE CLASSIFICATION PERFORMANCE OF RDA"
|
||||
Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of
|
||||
Mathematics and Statistics, James Cook University of North Queensland.
|
||||
(Also submitted to Journal of Chemometrics).
|
||||
|
||||
Here, the data was used to illustrate the superior performance of
|
||||
the use of a new appreciation function with RDA.
|
||||
|
||||
4. Relevant Information:
|
||||
|
||||
-- These data are the results of a chemical analysis of
|
||||
wines grown in the same region in Italy but derived from three
|
||||
different cultivars.
|
||||
The analysis determined the quantities of 13 constituents
|
||||
found in each of the three types of wines.
|
||||
|
||||
-- I think that the initial data set had around 30 variables, but
|
||||
for some reason I only have the 13 dimensional version.
|
||||
I had a list of what the 30 or so variables were, but a.)
|
||||
I lost it, and b.), I would not know which 13 variables
|
||||
are included in the set.
|
||||
|
||||
-- The attributes are (dontated by Riccardo Leardi,
|
||||
riclea@anchem.unige.it )
|
||||
1) Alcohol
|
||||
2) Malic acid
|
||||
3) Ash
|
||||
4) Alcalinity of ash
|
||||
5) Magnesium
|
||||
6) Total phenols
|
||||
7) Flavanoids
|
||||
8) Nonflavanoid phenols
|
||||
9) Proanthocyanins
|
||||
10)Color intensity
|
||||
11)Hue
|
||||
12)OD280/OD315 of diluted wines
|
||||
13)Proline
|
||||
|
||||
5. Number of Instances
|
||||
|
||||
class 1 59
|
||||
class 2 71
|
||||
class 3 48
|
||||
|
||||
6. Number of Attributes
|
||||
|
||||
13
|
||||
|
||||
7. For Each Attribute:
|
||||
|
||||
All attributes are continuous
|
||||
|
||||
No statistics available, but suggest to standardise
|
||||
variables for certain uses (e.g. for us with classifiers
|
||||
which are NOT scale invariant)
|
||||
|
||||
NOTE: 1st attribute is class identifier (1-3)
|
||||
|
||||
8. Missing Attribute Values:
|
||||
|
||||
None
|
||||
|
||||
9. Class Distribution: number of instances per class
|
||||
|
||||
class 1 59
|
||||
class 2 71
|
||||
class 3 48
|
||||