Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4eff5a6378 | |||
| 5adb4119f5 | |||
| cd3ababd59 | |||
| 88f2975b86 | |||
| dc2ceac7de | |||
| 9abd1a6df6 | |||
| 555650ac3c |
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
|
|
||||||
suppressPackageStartupMessages({
|
suppressPackageStartupMessages({
|
||||||
pkgs <- c("tidyverse", "readr", "readxl", "broom", "jsonlite", "ggplot2", "class", "optparse")
|
pkgs <- c("tidyverse", "readr", "readxl", "broom", "jsonlite", "ggplot2", "class", "optparse", "markdown")
|
||||||
to_install <- pkgs[!pkgs %in% rownames(installed.packages())]
|
to_install <- pkgs[!pkgs %in% rownames(installed.packages())]
|
||||||
if (length(to_install)) install.packages(to_install, repos = "https://cloud.r-project.org")
|
if (length(to_install)) install.packages(to_install, repos = "https://cloud.r-project.org")
|
||||||
lapply(pkgs, library, character.only = TRUE)
|
lapply(pkgs, library, character.only = TRUE)
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
|
library(markdown)
|
||||||
|
|
||||||
source("/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/R/00_utils.R")
|
source("/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/R/00_utils.R")
|
||||||
ctx <- jsonlite::fromJSON("/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/ctx.json")
|
ctx <- jsonlite::fromJSON("/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/ctx.json")
|
||||||
|
|
||||||
@@ -70,15 +72,18 @@ if (!is.null(ctx$knn) && length(ctx$knn)) {
|
|||||||
# I hate markdown sometimes man
|
# I hate markdown sometimes man
|
||||||
md <- gsub("/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/", "", md)
|
md <- gsub("/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/", "", md)
|
||||||
|
|
||||||
writeLines(md, "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/report.md")
|
# writeLines(md, "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/report.md")
|
||||||
writeLines(jsonlite::toJSON(ctx, pretty = TRUE, auto_unbox = TRUE),
|
# writeLines(jsonlite::toJSON(ctx, pretty = TRUE, auto_unbox = TRUE),
|
||||||
file.path(ctx$stats_dir, "summary.json"))
|
# file.path(ctx$stats_dir, "summary.json"))
|
||||||
|
|
||||||
# rmarkdown::render(
|
md_file <- "output/report.md"
|
||||||
# "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output/report.md",
|
html_file <- "output/report.html"
|
||||||
# output_format = "pdf_document",
|
pdf_file <- "output/report.pdf"
|
||||||
# output_file = "report.pdf",
|
|
||||||
# output_dir = "/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/output"
|
setwd("/home/ion606/Desktop/Homework/Data Analytics/Assignments/Assignment II/")
|
||||||
# )
|
markdownToHTML(
|
||||||
|
md_file,
|
||||||
|
html_file
|
||||||
|
)
|
||||||
|
|
||||||
message("done")
|
message("done")
|
||||||
|
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
|
Before Width: | Height: | Size: 42 KiB After Width: | Height: | Size: 42 KiB |
|
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 63 KiB |
|
Before Width: | Height: | Size: 62 KiB After Width: | Height: | Size: 62 KiB |
|
Before Width: | Height: | Size: 46 KiB After Width: | Height: | Size: 46 KiB |
|
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 63 KiB |
|
Before Width: | Height: | Size: 62 KiB After Width: | Height: | Size: 62 KiB |
|
Before Width: | Height: | Size: 34 KiB After Width: | Height: | Size: 34 KiB |
|
Before Width: | Height: | Size: 34 KiB After Width: | Height: | Size: 34 KiB |
|
Before Width: | Height: | Size: 64 KiB After Width: | Height: | Size: 64 KiB |
|
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 63 KiB |
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
@@ -1,4 +0,0 @@
|
|||||||
node_modules
|
|
||||||
.venv
|
|
||||||
.vscode
|
|
||||||
Assignment III
|
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
##########################################
|
||||||
|
### Principal Component Analysis (PCA) ###
|
||||||
|
##########################################
|
||||||
|
|
||||||
|
## load libraries
|
||||||
|
library(ggplot2)
|
||||||
|
library(ggfortify)
|
||||||
|
library(GGally)
|
||||||
|
library(e1071)
|
||||||
|
library(class)
|
||||||
|
library(psych)
|
||||||
|
library(readr)
|
||||||
|
|
||||||
|
## set working directory so that files can be referenced without the full path
|
||||||
|
setwd("/home/ion606/Desktop/Data Analytics/Lab 4")
|
||||||
|
|
||||||
|
## read dataset
|
||||||
|
wine <- read_csv("wine.data", col_names = FALSE)
|
||||||
|
|
||||||
|
## set column names
|
||||||
|
names(wine) <- c("Type","Alcohol","Malic acid","Ash","Alcalinity of ash","Magnesium","Total phenols","Flavanoids","Nonflavanoid Phenols","Proanthocyanins","Color Intensity","Hue","Od280/od315 of diluted wines","Proline")
|
||||||
|
|
||||||
|
## inspect data frame
|
||||||
|
head(wine)
|
||||||
|
|
||||||
|
## change the data type of the "Type" column from character to factor
|
||||||
|
####
|
||||||
|
# Factors look like regular strings (characters) but with factors R knows
|
||||||
|
# that the column is a categorical variable with finite possible values
|
||||||
|
# e.g. "Type" in the Wine dataset can only be 1, 2, or 3
|
||||||
|
####
|
||||||
|
|
||||||
|
wine$Type <- as.factor(wine$Type)
|
||||||
|
|
||||||
|
|
||||||
|
## visualize variables
|
||||||
|
pairs.panels(wine[,-1],gap = 0,bg = c("red", "yellow", "blue")[wine$Type],pch=21)
|
||||||
|
|
||||||
|
ggpairs(wine, ggplot2::aes(colour = Type))
|
||||||
|
|
||||||
|
###
|
||||||
@@ -0,0 +1,366 @@
|
|||||||
|
has_pkg <- function(pkg) requireNamespace(pkg, quietly = TRUE)
|
||||||
|
|
||||||
|
has_ggplot2 <- has_pkg("ggplot2")
|
||||||
|
has_GGally <- has_pkg("GGally")
|
||||||
|
has_e1071 <- has_pkg("e1071")
|
||||||
|
has_class <- has_pkg("class")
|
||||||
|
has_psych <- has_pkg("psych")
|
||||||
|
has_readr <- has_pkg("readr")
|
||||||
|
|
||||||
|
# WHY IS THIS HERE YOU MIGHT ASK???? WELL LET ME TELL YOU I SPENT TWO HOURS ON STUPID PACKAGE IMPORTS
|
||||||
|
# OOOOOOHHH PSYCH IS IN A DIFFERENT REPO??? OH IT ISN'T??? I have a fever of 103 I DO NOT CARE
|
||||||
|
if (has_ggplot2) { library(ggplot2) } else { warning("ggplot2 not available; plots will be skipped") }
|
||||||
|
if (has_GGally) { library(GGally) } else { message("GGally not available; skipping ggpairs plot") }
|
||||||
|
if (has_e1071) { library(e1071) }
|
||||||
|
if (has_class) { library(class) } else { stop("class package not available for kNN") }
|
||||||
|
if (!has_psych) { message("psych not available; skipping pairs.panels plot") }
|
||||||
|
if (has_readr) { library(readr) }
|
||||||
|
library(grid) # unit() for arrows in plots
|
||||||
|
suppressWarnings(RNGkind(sample.kind = "Rounding"))
|
||||||
|
|
||||||
|
# set a reproducible seed
|
||||||
|
set.seed(4600)
|
||||||
|
|
||||||
|
# 178 rows
|
||||||
|
# col 1 is class label (1,2,3)
|
||||||
|
# other 13 columns continuous predictors
|
||||||
|
|
||||||
|
possible_paths <- c(
|
||||||
|
"wine.data",
|
||||||
|
"./wine.data",
|
||||||
|
"../wine.data",
|
||||||
|
"DAN/wine.data",
|
||||||
|
"./DAN/wine.data"
|
||||||
|
)
|
||||||
|
data_path <- NA
|
||||||
|
for (p in possible_paths) { if (file.exists(p)) { data_path <- p; break } }
|
||||||
|
if (is.na(data_path)) stop("could not find wine.data; place this script in the DAN folder or given/ and re-run")
|
||||||
|
|
||||||
|
if (has_readr) {
|
||||||
|
wine <- readr::read_csv(
|
||||||
|
file = data_path,
|
||||||
|
col_names = FALSE,
|
||||||
|
show_col_types = FALSE,
|
||||||
|
progress = FALSE
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
wine <- read.csv(file = data_path, header = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
colnames(wine) <- c(
|
||||||
|
"Type",
|
||||||
|
"Alcohol",
|
||||||
|
"Malic_acid",
|
||||||
|
"Ash",
|
||||||
|
"Alcalinity_of_ash",
|
||||||
|
"Magnesium",
|
||||||
|
"Total_phenols",
|
||||||
|
"Flavanoids",
|
||||||
|
"Nonflavanoid_phenols",
|
||||||
|
"Proanthocyanins",
|
||||||
|
"Color_intensity",
|
||||||
|
"Hue",
|
||||||
|
"OD280_OD315",
|
||||||
|
"Proline"
|
||||||
|
)
|
||||||
|
|
||||||
|
wine$Type <- as.factor(wine$Type)
|
||||||
|
|
||||||
|
# put here from when I accidentally read in the wrong file repeatedly
|
||||||
|
# left because it makes it more, "robust"
|
||||||
|
stopifnot(nrow(wine) == 178, ncol(wine) == 14)
|
||||||
|
print(summary(wine$Type))
|
||||||
|
|
||||||
|
# exploratory plots (because I went down a rabbit hole and by god I'm using it)
|
||||||
|
|
||||||
|
if (has_psych) {
|
||||||
|
# pairs panel (psych) – colors by class
|
||||||
|
psych::pairs.panels(
|
||||||
|
wine[,-1],
|
||||||
|
gap = 0,
|
||||||
|
bg = c("red","gold","royalblue")[wine$Type],
|
||||||
|
pch = 21,
|
||||||
|
main = "wine (uci) – scatterplot matrix by class"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_GGally && has_ggplot2) {
|
||||||
|
# ggpairs for nice matrix <3
|
||||||
|
GGally::ggpairs(wine, ggplot2::aes(colour = Type), columns = 2:ncol(wine))
|
||||||
|
}
|
||||||
|
|
||||||
|
# split into train/test BEFORE!!!!!!!!!!!!!!!!!!!!!! any preprocessing to avoid leakage
|
||||||
|
|
||||||
|
set.seed(4600)
|
||||||
|
n <- nrow(wine)
|
||||||
|
train_idx <- sample.int(n, size = floor(0.7 * n))
|
||||||
|
wine_train <- wine[train_idx, , drop = FALSE]
|
||||||
|
wine_test <- wine[-train_idx, , drop = FALSE]
|
||||||
|
|
||||||
|
X_train <- wine_train[, -1]
|
||||||
|
y_train <- wine_train$Type
|
||||||
|
X_test <- wine_test[, -1]
|
||||||
|
y_test <- wine_test$Type
|
||||||
|
|
||||||
|
# yes
|
||||||
|
if (any(sapply(X_train, function(x) var(x, na.rm = TRUE) == 0))) {
|
||||||
|
warning("one or more predictors have zero variance in the training set; scale() would fail")
|
||||||
|
}
|
||||||
|
if (anyNA(X_train) | anyNA(X_test)) {
|
||||||
|
stop("found NA values in predictors; handle missingness before PCA")
|
||||||
|
}
|
||||||
|
|
||||||
|
# project both train and test using the train-fitted pca
|
||||||
|
pca_tr <- prcomp(X_train, center = TRUE, scale. = TRUE)
|
||||||
|
|
||||||
|
pve_tr <- (pca_tr$sdev^2) / sum(pca_tr$sdev^2)
|
||||||
|
pve_df <- data.frame(
|
||||||
|
PC = paste0("PC", seq_along(pve_tr)),
|
||||||
|
PVE = pve_tr,
|
||||||
|
CumPVE = cumsum(pve_tr)
|
||||||
|
)
|
||||||
|
|
||||||
|
print("variance explained (training pca):")
|
||||||
|
print(pve_df)
|
||||||
|
|
||||||
|
# scree plot from training pca
|
||||||
|
p_scree <- ggplot(pve_df, aes(x = seq_along(PVE), y = PVE)) +
|
||||||
|
geom_line() + geom_point() +
|
||||||
|
scale_x_continuous(breaks = 1:length(pve_df$PC), labels = pve_df$PC) +
|
||||||
|
labs(title = "scree plot – variance explained by principal components (training pca)",
|
||||||
|
x = "principal component", y = "proportion of variance explained") +
|
||||||
|
theme_minimal()
|
||||||
|
|
||||||
|
# cumulative variance plot from training pca
|
||||||
|
p_cumvar <- ggplot(pve_df, aes(x = seq_along(CumPVE), y = CumPVE)) +
|
||||||
|
geom_line() + geom_point() +
|
||||||
|
scale_x_continuous(breaks = 1:length(pve_df$PC), labels = pve_df$PC) +
|
||||||
|
labs(title = "cumulative variance explained (training pca)",
|
||||||
|
x = "principal component", y = "cumulative proportion of variance") +
|
||||||
|
theme_minimal()
|
||||||
|
|
||||||
|
# ========================================================================================================
|
||||||
|
|
||||||
|
# choose number of pcs: default to the smallest k with >= thresh cum variance
|
||||||
|
# you can change thresh to 0.90 or 0.99 if you prefer
|
||||||
|
|
||||||
|
pc_variance_threshold <- 0.95
|
||||||
|
k_pcs <- which(cumsum(pve_tr) >= pc_variance_threshold)[1]
|
||||||
|
if (is.na(k_pcs)) k_pcs <- ncol(X_train) # crashes if fails so...
|
||||||
|
cat("chosen number of pcs (threshold =", pc_variance_threshold, "):", k_pcs, "\n")
|
||||||
|
|
||||||
|
# project train/test into the pca space
|
||||||
|
Z_train_full <- as.data.frame(predict(pca_tr, newdata = X_train))
|
||||||
|
Z_test_full <- as.data.frame(predict(pca_tr, newdata = X_test))
|
||||||
|
|
||||||
|
# for downstream modeling
|
||||||
|
Z_train <- Z_train_full[, seq_len(k_pcs), drop = FALSE]
|
||||||
|
Z_test <- Z_test_full[, seq_len(k_pcs), drop = FALSE]
|
||||||
|
|
||||||
|
scores_all <- as.data.frame(predict(pca_tr, newdata = wine[,-1]))
|
||||||
|
scores_all$Type <- wine$Type
|
||||||
|
|
||||||
|
# loadings from training pca
|
||||||
|
loadings <- as.data.frame(pca_tr$rotation)
|
||||||
|
loadings$Variable <- rownames(loadings)
|
||||||
|
top_pc1 <- loadings[order(abs(loadings$PC1), decreasing = TRUE), c("Variable","PC1")][1:5, ]
|
||||||
|
top_pc2 <- loadings[order(abs(loadings$PC2), decreasing = TRUE), c("Variable","PC2")][1:5, ]
|
||||||
|
print("top contributors to pc1 (training pca):"); print(top_pc1)
|
||||||
|
print("top contributors to pc2 (training pca):"); print(top_pc2)
|
||||||
|
|
||||||
|
|
||||||
|
# function to make convex hull data for each group
|
||||||
|
scores <- scores_all
|
||||||
|
hull_df <- do.call(rbind, lapply(split(scores, scores$Type), function(df) {
|
||||||
|
pts <- df[chull(df$PC1, df$PC2), c("PC1","PC2")]
|
||||||
|
pts$Type <- unique(df$Type)
|
||||||
|
pts
|
||||||
|
}))
|
||||||
|
p_pc12 <- ggplot(scores, aes(PC1, PC2, color = Type)) +
|
||||||
|
geom_point(size = 2, alpha = 0.85) +
|
||||||
|
geom_polygon(data = hull_df, aes(fill = Type, group = Type), color = NA, alpha = 0.15) +
|
||||||
|
guides(fill = "none") +
|
||||||
|
theme_minimal() +
|
||||||
|
labs(title = "pc1 vs pc2 by class (projected with training pca)")
|
||||||
|
|
||||||
|
# arrow arrow arrow arrow arrow arrow arrow arrow arrow
|
||||||
|
loading_scalefactor <- 3 * max(abs(scores$PC1), abs(scores$PC2)) # heuristic
|
||||||
|
load_plot_df <- loadings
|
||||||
|
load_plot_df$PC1s <- load_plot_df$PC1 * loading_scalefactor
|
||||||
|
load_plot_df$PC2s <- load_plot_df$PC2 * loading_scalefactor
|
||||||
|
|
||||||
|
p_biplot <- ggplot(scores, aes(PC1, PC2, color = Type)) +
|
||||||
|
geom_point(size = 2, alpha = 0.85) +
|
||||||
|
geom_segment(
|
||||||
|
data = load_plot_df,
|
||||||
|
mapping = aes(x = 0, y = 0, xend = PC1s, yend = PC2s),
|
||||||
|
inherit.aes = FALSE,
|
||||||
|
arrow = arrow(length = unit(0.02, "npc")),
|
||||||
|
color = "black",
|
||||||
|
alpha = 0.8
|
||||||
|
) +
|
||||||
|
geom_text(
|
||||||
|
data = load_plot_df,
|
||||||
|
mapping = aes(x = PC1s, y = PC2s, label = Variable),
|
||||||
|
inherit.aes = FALSE,
|
||||||
|
hjust = 0,
|
||||||
|
vjust = 0
|
||||||
|
) +
|
||||||
|
theme_minimal() +
|
||||||
|
labs(title = "pc1 vs pc2 with variable loadings (training pca projection)")
|
||||||
|
|
||||||
|
# 1) kNN on original variables with standardization
|
||||||
|
# 2) kNN on first 2 principal components only
|
||||||
|
|
||||||
|
# helper to create metrics from a confusion matrix (rows=true, cols=pred)
|
||||||
|
compute_metrics <- function(cm) {
|
||||||
|
lv <- rownames(cm)
|
||||||
|
if (is.null(lv)) lv <- as.character(1:nrow(cm))
|
||||||
|
TP <- diag(cm)
|
||||||
|
FP <- colSums(cm) - TP
|
||||||
|
FN <- rowSums(cm) - TP
|
||||||
|
precision <- TP / (TP + FP)
|
||||||
|
recall <- TP / (TP + FN)
|
||||||
|
f1 <- 2 * precision * recall / (precision + recall)
|
||||||
|
acc <- sum(TP) / sum(cm)
|
||||||
|
macro_precision <- mean(precision, na.rm = TRUE)
|
||||||
|
macro_recall <- mean(recall, na.rm = TRUE)
|
||||||
|
macro_f1 <- mean(f1, na.rm = TRUE)
|
||||||
|
per_class <- data.frame(
|
||||||
|
class = lv,
|
||||||
|
precision = precision,
|
||||||
|
recall = recall,
|
||||||
|
f1 = f1,
|
||||||
|
row.names = NULL
|
||||||
|
)
|
||||||
|
summary <- data.frame(
|
||||||
|
accuracy = acc,
|
||||||
|
macro_precision = macro_precision,
|
||||||
|
macro_recall = macro_recall,
|
||||||
|
macro_f1 = macro_f1
|
||||||
|
)
|
||||||
|
list(per_class = per_class, summary = summary)
|
||||||
|
}
|
||||||
|
|
||||||
|
set.seed(4600)
|
||||||
|
ks <- seq(1, 15, by = 2)
|
||||||
|
Kfolds <- 5
|
||||||
|
|
||||||
|
# kNN on original vars
|
||||||
|
X_train_scaled <- scale(X_train, center = TRUE, scale = TRUE)
|
||||||
|
scale_center <- attr(X_train_scaled, "scaled:center")
|
||||||
|
scale_scale <- attr(X_train_scaled, "scaled:scale")
|
||||||
|
X_test_scaled <- scale(X_test, center = scale_center, scale = scale_scale)
|
||||||
|
|
||||||
|
n_train_orig <- nrow(X_train_scaled)
|
||||||
|
folds_orig <- sample(rep(1:Kfolds, length.out = n_train_orig))
|
||||||
|
cv_acc_orig <- sapply(ks, function(k) {
|
||||||
|
mean(sapply(1:Kfolds, function(f) {
|
||||||
|
tr <- which(folds_orig != f)
|
||||||
|
va <- which(folds_orig == f)
|
||||||
|
pred_cv <- knn(train = X_train_scaled[tr, , drop = FALSE],
|
||||||
|
test = X_train_scaled[va, , drop = FALSE],
|
||||||
|
cl = y_train[tr], k = k)
|
||||||
|
mean(pred_cv == y_train[va])
|
||||||
|
}))
|
||||||
|
})
|
||||||
|
|
||||||
|
best_k_orig <- ks[which.max(cv_acc_orig)]
|
||||||
|
cat("[Original vars] best k:", best_k_orig, "cv acc:", max(cv_acc_orig), "\n")
|
||||||
|
|
||||||
|
pred_orig <- knn(train = X_train_scaled, test = X_test_scaled, cl = y_train, k = best_k_orig)
|
||||||
|
acc_orig <- mean(pred_orig == y_test)
|
||||||
|
cm_orig <- table(truth = y_test, pred = pred_orig)
|
||||||
|
|
||||||
|
cat("[Original vars] held-out accuracy:", round(acc_orig, 4), "\n")
|
||||||
|
print(cm_orig)
|
||||||
|
|
||||||
|
metrics_orig <- compute_metrics(cm_orig)
|
||||||
|
print(metrics_orig$summary)
|
||||||
|
print(metrics_orig$per_class)
|
||||||
|
|
||||||
|
# kNN on first 2 PCs only
|
||||||
|
Z2_train <- Z_train_full[, 1:2, drop = FALSE]
|
||||||
|
Z2_test <- Z_test_full[, 1:2, drop = FALSE]
|
||||||
|
n_train_2pc <- nrow(Z2_train)
|
||||||
|
|
||||||
|
folds_2pc <- sample(rep(1:Kfolds, length.out = n_train_2pc))
|
||||||
|
cv_acc_2pc <- sapply(ks, function(k) {
|
||||||
|
mean(sapply(1:Kfolds, function(f) {
|
||||||
|
tr <- which(folds_2pc != f)
|
||||||
|
va <- which(folds_2pc == f)
|
||||||
|
pred_cv <- knn(train = Z2_train[tr, , drop = FALSE],
|
||||||
|
test = Z2_train[va, , drop = FALSE],
|
||||||
|
cl = y_train[tr], k = k)
|
||||||
|
mean(pred_cv == y_train[va])
|
||||||
|
}))
|
||||||
|
})
|
||||||
|
|
||||||
|
best_k_2pc <- ks[which.max(cv_acc_2pc)]
|
||||||
|
cat("[First 2 PCs] best k:", best_k_2pc, "cv acc:", max(cv_acc_2pc), "\n")
|
||||||
|
|
||||||
|
pred_2pc <- knn(train = Z2_train, test = Z2_test, cl = y_train, k = best_k_2pc)
|
||||||
|
acc_2pc <- mean(pred_2pc == y_test)
|
||||||
|
cm_2pc <- table(truth = y_test, pred = pred_2pc)
|
||||||
|
|
||||||
|
cat("[First 2 PCs] held-out accuracy:", round(acc_2pc, 4), "\n")
|
||||||
|
print(cm_2pc)
|
||||||
|
|
||||||
|
metrics_2pc <- compute_metrics(cm_2pc)
|
||||||
|
print(metrics_2pc$summary)
|
||||||
|
print(metrics_2pc$per_class)
|
||||||
|
|
||||||
|
# ===========================================================================================
|
||||||
|
outputs_dir <- "outputs"
|
||||||
|
if (!dir.exists(outputs_dir)) dir.create(outputs_dir, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
|
# plots
|
||||||
|
if (exists("p_pc12") && inherits(p_pc12, "ggplot")) ggsave(filename = file.path(outputs_dir, "pc12_scatter.png"), plot = p_pc12, width = 8, height = 6, dpi = 300)
|
||||||
|
if (exists("p_biplot") && inherits(p_biplot, "ggplot")) ggsave(filename = file.path(outputs_dir, "pc12_biplot.png"), plot = p_biplot, width = 8, height = 6, dpi = 300)
|
||||||
|
if (exists("p_scree") && inherits(p_scree, "ggplot")) ggsave(filename = file.path(outputs_dir, "pca_scree.png"), plot = p_scree, width = 8, height = 6, dpi = 300)
|
||||||
|
if (exists("p_cumvar") && inherits(p_cumvar, "ggplot")) ggsave(filename = file.path(outputs_dir, "pca_cumvar.png"), plot = p_cumvar, width = 8, height = 6, dpi = 300)
|
||||||
|
|
||||||
|
# top contributors/vars to PC1 and PC2
|
||||||
|
write.csv(top_pc1, file = file.path(outputs_dir, "top_contributors_pc1.csv"), row.names = FALSE)
|
||||||
|
write.csv(top_pc2, file = file.path(outputs_dir, "top_contributors_pc2.csv"), row.names = FALSE)
|
||||||
|
|
||||||
|
# confusion matrices as wide CSV and pretty text
|
||||||
|
write.csv(as.matrix(cm_orig), file = file.path(outputs_dir, "confusion_original_wide.csv"))
|
||||||
|
writeLines(capture.output(cm_orig), con = file.path(outputs_dir, "confusion_original.txt"))
|
||||||
|
|
||||||
|
write.csv(as.matrix(cm_2pc), file = file.path(outputs_dir, "confusion_2pc_wide.csv"))
|
||||||
|
writeLines(capture.output(cm_2pc), con = file.path(outputs_dir, "confusion_2pc.txt"))
|
||||||
|
|
||||||
|
# metrics
|
||||||
|
write.csv(metrics_orig$per_class, file = file.path(outputs_dir, "metrics_original_per_class.csv"), row.names = FALSE)
|
||||||
|
write.csv(metrics_orig$summary, file = file.path(outputs_dir, "metrics_original_summary.csv"), row.names = FALSE)
|
||||||
|
write.csv(metrics_2pc$per_class, file = file.path(outputs_dir, "metrics_2pc_per_class.csv"), row.names = FALSE)
|
||||||
|
write.csv(metrics_2pc$summary, file = file.path(outputs_dir, "metrics_2pc_summary.csv"), row.names = FALSE)
|
||||||
|
|
||||||
|
# summary
|
||||||
|
metrics_compare <- data.frame(
|
||||||
|
model = c("original_variables", "first_2_pcs"),
|
||||||
|
accuracy = c(metrics_orig$summary$accuracy, metrics_2pc$summary$accuracy),
|
||||||
|
macro_precision = c(metrics_orig$summary$macro_precision, metrics_2pc$summary$macro_precision),
|
||||||
|
macro_recall = c(metrics_orig$summary$macro_recall, metrics_2pc$summary$macro_recall),
|
||||||
|
macro_f1 = c(metrics_orig$summary$macro_f1, metrics_2pc$summary$macro_f1)
|
||||||
|
)
|
||||||
|
write.csv(metrics_compare, file = file.path(outputs_dir, "metrics_comparison.csv"), row.names = FALSE)
|
||||||
|
|
||||||
|
# The below was made with help from ChatGPT because the psych package is confusing
|
||||||
|
if (!interactive() && has_ggplot2) {
|
||||||
|
pdf("Rplots_pca_fixed.pdf", width = 8, height = 6)
|
||||||
|
if (has_psych) {
|
||||||
|
psych::pairs.panels(
|
||||||
|
wine[,-1],
|
||||||
|
gap = 0,
|
||||||
|
bg = c("red","gold","royalblue")[wine$Type],
|
||||||
|
pch = 21,
|
||||||
|
main = "wine (uci) – scatterplot matrix by class"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (exists("p_scree") && inherits(p_scree, "ggplot")) print(p_scree)
|
||||||
|
if (exists("p_pc12") && inherits(p_pc12, "ggplot")) print(p_pc12)
|
||||||
|
dev.off()
|
||||||
|
}
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
pred
|
||||||
|
truth 1 2 3
|
||||||
|
1 15 2 0
|
||||||
|
2 1 19 1
|
||||||
|
3 0 1 15
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
"","1","2","3"
|
||||||
|
"1",15,2,0
|
||||||
|
"2",1,19,1
|
||||||
|
"3",0,1,15
|
||||||
|
@@ -0,0 +1,5 @@
|
|||||||
|
pred
|
||||||
|
truth 1 2 3
|
||||||
|
1 17 0 0
|
||||||
|
2 1 18 2
|
||||||
|
3 0 0 16
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
"","1","2","3"
|
||||||
|
"1",17,0,0
|
||||||
|
"2",1,18,2
|
||||||
|
"3",0,0,16
|
||||||
|
@@ -0,0 +1,4 @@
|
|||||||
|
"class","precision","recall","f1"
|
||||||
|
"1",0.9375,0.882352941176471,0.909090909090909
|
||||||
|
"2",0.863636363636364,0.904761904761905,0.883720930232558
|
||||||
|
"3",0.9375,0.9375,0.9375
|
||||||
|
@@ -0,0 +1,2 @@
|
|||||||
|
"accuracy","macro_precision","macro_recall","macro_f1"
|
||||||
|
0.907407407407407,0.912878787878788,0.908204948646125,0.910103946441156
|
||||||
|
@@ -0,0 +1,3 @@
|
|||||||
|
"model","accuracy","macro_precision","macro_recall","macro_f1"
|
||||||
|
"original_variables",0.944444444444444,0.944444444444444,0.952380952380952,0.94522732169791
|
||||||
|
"first_2_pcs",0.907407407407407,0.912878787878788,0.908204948646125,0.910103946441156
|
||||||
|
@@ -0,0 +1,4 @@
|
|||||||
|
"class","precision","recall","f1"
|
||||||
|
"1",0.944444444444444,1,0.971428571428571
|
||||||
|
"2",1,0.857142857142857,0.923076923076923
|
||||||
|
"3",0.888888888888889,1,0.941176470588235
|
||||||
|
@@ -0,0 +1,2 @@
|
|||||||
|
"accuracy","macro_precision","macro_recall","macro_f1"
|
||||||
|
0.944444444444444,0.944444444444444,0.952380952380952,0.94522732169791
|
||||||
|
|
After Width: | Height: | Size: 344 KiB |
|
After Width: | Height: | Size: 227 KiB |
|
After Width: | Height: | Size: 101 KiB |
|
After Width: | Height: | Size: 105 KiB |
@@ -0,0 +1,6 @@
|
|||||||
|
"Variable","PC1"
|
||||||
|
"Flavanoids",0.430570697054093
|
||||||
|
"Total_phenols",0.388556731445086
|
||||||
|
"OD280_OD315",0.379238757892512
|
||||||
|
"Proanthocyanins",0.318149910146199
|
||||||
|
"Nonflavanoid_phenols",-0.292569052362651
|
||||||
|
@@ -0,0 +1,6 @@
|
|||||||
|
"Variable","PC2"
|
||||||
|
"Color_intensity",-0.504116493512561
|
||||||
|
"Alcohol",-0.480328824227057
|
||||||
|
"Ash",-0.369020648548877
|
||||||
|
"Proline",-0.3555672525193
|
||||||
|
"Hue",0.300324646690879
|
||||||
|
@@ -0,0 +1,178 @@
|
|||||||
|
1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
|
||||||
|
1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
|
||||||
|
1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
|
||||||
|
1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
|
||||||
|
1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
|
||||||
|
1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
|
||||||
|
1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
|
||||||
|
1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
|
||||||
|
1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
|
||||||
|
1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
|
||||||
|
1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
|
||||||
|
1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
|
||||||
|
1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
|
||||||
|
1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
|
||||||
|
1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
|
||||||
|
1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
|
||||||
|
1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
|
||||||
|
1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
|
||||||
|
1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
|
||||||
|
1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
|
||||||
|
1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
|
||||||
|
1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
|
||||||
|
1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
|
||||||
|
1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
|
||||||
|
1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
|
||||||
|
1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
|
||||||
|
1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
|
||||||
|
1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
|
||||||
|
1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
|
||||||
|
1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
|
||||||
|
1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
|
||||||
|
1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
|
||||||
|
1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
|
||||||
|
1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
|
||||||
|
1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
|
||||||
|
1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
|
||||||
|
1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
|
||||||
|
1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
|
||||||
|
1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
|
||||||
|
1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
|
||||||
|
1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
|
||||||
|
1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
|
||||||
|
1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
|
||||||
|
1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
|
||||||
|
1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
|
||||||
|
1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
|
||||||
|
1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
|
||||||
|
1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
|
||||||
|
1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
|
||||||
|
1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
|
||||||
|
1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
|
||||||
|
1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
|
||||||
|
1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
|
||||||
|
1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
|
||||||
|
1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
|
||||||
|
1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
|
||||||
|
1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
|
||||||
|
1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
|
||||||
|
1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
|
||||||
|
2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
|
||||||
|
2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
|
||||||
|
2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
|
||||||
|
2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
|
||||||
|
2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
|
||||||
|
2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
|
||||||
|
2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
|
||||||
|
2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
|
||||||
|
2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
|
||||||
|
2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
|
||||||
|
2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
|
||||||
|
2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
|
||||||
|
2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
|
||||||
|
2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
|
||||||
|
2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
|
||||||
|
2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
|
||||||
|
2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
|
||||||
|
2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
|
||||||
|
2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
|
||||||
|
2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
|
||||||
|
2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
|
||||||
|
2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
|
||||||
|
2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
|
||||||
|
2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
|
||||||
|
2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
|
||||||
|
2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
|
||||||
|
2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
|
||||||
|
2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
|
||||||
|
2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
|
||||||
|
2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
|
||||||
|
2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
|
||||||
|
2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
|
||||||
|
2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
|
||||||
|
2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
|
||||||
|
2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
|
||||||
|
2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
|
||||||
|
2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
|
||||||
|
2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
|
||||||
|
2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
|
||||||
|
2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
|
||||||
|
2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
|
||||||
|
2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
|
||||||
|
2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
|
||||||
|
2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
|
||||||
|
2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
|
||||||
|
2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
|
||||||
|
2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
|
||||||
|
2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
|
||||||
|
2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
|
||||||
|
2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
|
||||||
|
2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
|
||||||
|
2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
|
||||||
|
2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
|
||||||
|
2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
|
||||||
|
2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
|
||||||
|
2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
|
||||||
|
2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
|
||||||
|
2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
|
||||||
|
2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
|
||||||
|
2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
|
||||||
|
2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
|
||||||
|
2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
|
||||||
|
2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
|
||||||
|
2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
|
||||||
|
2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
|
||||||
|
2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
|
||||||
|
2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
|
||||||
|
2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
|
||||||
|
2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
|
||||||
|
2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
|
||||||
|
2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
|
||||||
|
3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
|
||||||
|
3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
|
||||||
|
3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
|
||||||
|
3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
|
||||||
|
3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
|
||||||
|
3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
|
||||||
|
3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
|
||||||
|
3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
|
||||||
|
3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
|
||||||
|
3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
|
||||||
|
3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
|
||||||
|
3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
|
||||||
|
3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
|
||||||
|
3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
|
||||||
|
3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
|
||||||
|
3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
|
||||||
|
3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
|
||||||
|
3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
|
||||||
|
3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
|
||||||
|
3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
|
||||||
|
3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
|
||||||
|
3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
|
||||||
|
3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
|
||||||
|
3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
|
||||||
|
3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
|
||||||
|
3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
|
||||||
|
3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
|
||||||
|
3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
|
||||||
|
3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
|
||||||
|
3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
|
||||||
|
3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
|
||||||
|
3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
|
||||||
|
3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
|
||||||
|
3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
|
||||||
|
3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
|
||||||
|
3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
|
||||||
|
3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
|
||||||
|
3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
|
||||||
|
3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
|
||||||
|
3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
|
||||||
|
3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
|
||||||
|
3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
|
||||||
|
3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
|
||||||
|
3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
|
||||||
|
3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
|
||||||
|
3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
|
||||||
|
3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
|
||||||
|
3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560
|
||||||
@@ -0,0 +1,100 @@
|
|||||||
|
1. Title of Database: Wine recognition data
|
||||||
|
Updated Sept 21, 1998 by C.Blake : Added attribute information
|
||||||
|
|
||||||
|
2. Sources:
|
||||||
|
(a) Forina, M. et al, PARVUS - An Extendible Package for Data
|
||||||
|
Exploration, Classification and Correlation. Institute of Pharmaceutical
|
||||||
|
and Food Analysis and Technologies, Via Brigata Salerno,
|
||||||
|
16147 Genoa, Italy.
|
||||||
|
|
||||||
|
(b) Stefan Aeberhard, email: stefan@coral.cs.jcu.edu.au
|
||||||
|
(c) July 1991
|
||||||
|
3. Past Usage:
|
||||||
|
|
||||||
|
(1)
|
||||||
|
S. Aeberhard, D. Coomans and O. de Vel,
|
||||||
|
Comparison of Classifiers in High Dimensional Settings,
|
||||||
|
Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of
|
||||||
|
Mathematics and Statistics, James Cook University of North Queensland.
|
||||||
|
(Also submitted to Technometrics).
|
||||||
|
|
||||||
|
The data was used with many others for comparing various
|
||||||
|
classifiers. The classes are separable, though only RDA
|
||||||
|
has achieved 100% correct classification.
|
||||||
|
(RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data))
|
||||||
|
(All results using the leave-one-out technique)
|
||||||
|
|
||||||
|
In a classification context, this is a well posed problem
|
||||||
|
with "well behaved" class structures. A good data set
|
||||||
|
for first testing of a new classifier, but not very
|
||||||
|
challenging.
|
||||||
|
|
||||||
|
(2)
|
||||||
|
S. Aeberhard, D. Coomans and O. de Vel,
|
||||||
|
"THE CLASSIFICATION PERFORMANCE OF RDA"
|
||||||
|
Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of
|
||||||
|
Mathematics and Statistics, James Cook University of North Queensland.
|
||||||
|
(Also submitted to Journal of Chemometrics).
|
||||||
|
|
||||||
|
Here, the data was used to illustrate the superior performance of
|
||||||
|
the use of a new appreciation function with RDA.
|
||||||
|
|
||||||
|
4. Relevant Information:
|
||||||
|
|
||||||
|
-- These data are the results of a chemical analysis of
|
||||||
|
wines grown in the same region in Italy but derived from three
|
||||||
|
different cultivars.
|
||||||
|
The analysis determined the quantities of 13 constituents
|
||||||
|
found in each of the three types of wines.
|
||||||
|
|
||||||
|
-- I think that the initial data set had around 30 variables, but
|
||||||
|
for some reason I only have the 13 dimensional version.
|
||||||
|
I had a list of what the 30 or so variables were, but a.)
|
||||||
|
I lost it, and b.), I would not know which 13 variables
|
||||||
|
are included in the set.
|
||||||
|
|
||||||
|
-- The attributes are (dontated by Riccardo Leardi,
|
||||||
|
riclea@anchem.unige.it )
|
||||||
|
1) Alcohol
|
||||||
|
2) Malic acid
|
||||||
|
3) Ash
|
||||||
|
4) Alcalinity of ash
|
||||||
|
5) Magnesium
|
||||||
|
6) Total phenols
|
||||||
|
7) Flavanoids
|
||||||
|
8) Nonflavanoid phenols
|
||||||
|
9) Proanthocyanins
|
||||||
|
10)Color intensity
|
||||||
|
11)Hue
|
||||||
|
12)OD280/OD315 of diluted wines
|
||||||
|
13)Proline
|
||||||
|
|
||||||
|
5. Number of Instances
|
||||||
|
|
||||||
|
class 1 59
|
||||||
|
class 2 71
|
||||||
|
class 3 48
|
||||||
|
|
||||||
|
6. Number of Attributes
|
||||||
|
|
||||||
|
13
|
||||||
|
|
||||||
|
7. For Each Attribute:
|
||||||
|
|
||||||
|
All attributes are continuous
|
||||||
|
|
||||||
|
No statistics available, but suggest to standardise
|
||||||
|
variables for certain uses (e.g. for us with classifiers
|
||||||
|
which are NOT scale invariant)
|
||||||
|
|
||||||
|
NOTE: 1st attribute is class identifier (1-3)
|
||||||
|
|
||||||
|
8. Missing Attribute Values:
|
||||||
|
|
||||||
|
None
|
||||||
|
|
||||||
|
9. Class Distribution: number of instances per class
|
||||||
|
|
||||||
|
class 1 59
|
||||||
|
class 2 71
|
||||||
|
class 3 48
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"[r]": {
|
||||||
|
// generated automatically? What even....
|
||||||
|
"editor.wordSeparators": "`~!@#$%^&*()-=+[{]}\\|;:'\",<>/",
|
||||||
|
"editor.indentSize": "tabSize",
|
||||||
|
"editor.useTabStops": true,
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
##########################################
|
||||||
|
### Principal Component Analysis (PCA) ###
|
||||||
|
##########################################
|
||||||
|
|
||||||
|
## load libraries
|
||||||
|
library(ggplot2)
|
||||||
|
library(ggfortify)
|
||||||
|
library(GGally)
|
||||||
|
library(e1071)
|
||||||
|
library(class)
|
||||||
|
library(psych)
|
||||||
|
library(readr)
|
||||||
|
|
||||||
|
## set working directory so that files can be referenced without the full path
|
||||||
|
setwd("~/Courses/Data Analytics/Fall25/labs/lab 4/")
|
||||||
|
|
||||||
|
## read dataset
|
||||||
|
wine <- read_csv("wine.data", col_names = FALSE)
|
||||||
|
|
||||||
|
## set column names
|
||||||
|
names(wine) <- c("Type","Alcohol","Malic acid","Ash","Alcalinity of ash","Magnesium","Total phenols","Flavanoids","Nonflavanoid Phenols","Proanthocyanins","Color Intensity","Hue","Od280/od315 of diluted wines","Proline")
|
||||||
|
|
||||||
|
## inspect data frame
|
||||||
|
head(wine)
|
||||||
|
|
||||||
|
## change the data type of the "Type" column from character to factor
|
||||||
|
####
|
||||||
|
# Factors look like regular strings (characters) but with factors R knows
|
||||||
|
# that the column is a categorical variable with finite possible values
|
||||||
|
# e.g. "Type" in the Wine dataset can only be 1, 2, or 3
|
||||||
|
####
|
||||||
|
|
||||||
|
wine$Type <- as.factor(wine$Type)
|
||||||
|
|
||||||
|
|
||||||
|
## visualize variables
|
||||||
|
pairs.panels(wine[,-1],gap = 0,bg = c("red", "yellow", "blue")[wine$Type],pch=21)
|
||||||
|
|
||||||
|
ggpairs(wine, ggplot2::aes(colour = Type))
|
||||||
|
|
||||||
|
###
|
||||||
@@ -0,0 +1,111 @@
|
|||||||
|
install.packages(c("e1071","caret","randomForest","ggplot2","pROC"), dependencies = TRUE)
|
||||||
|
|
||||||
|
suppressPackageStartupMessages({
|
||||||
|
library(e1071) # for svm/tune.svm
|
||||||
|
library(caret) # for metrics
|
||||||
|
library(randomForest) # alternative classifier
|
||||||
|
library(ggplot2)
|
||||||
|
})
|
||||||
|
|
||||||
|
set.seed(42)
|
||||||
|
|
||||||
|
read_wine <- function() {
|
||||||
|
df <- read.csv("wine.data", header = FALSE)
|
||||||
|
colnames(df) <- c("Class",
|
||||||
|
"Alcohol","Malic.acid","Ash","Alcalinity.of.ash","Magnesium",
|
||||||
|
"Total.phenols","Flavanoids","Nonflavanoid.phenols","Proanthocyanins",
|
||||||
|
"Color.intensity","Hue","OD280.OD315","Proline")
|
||||||
|
df$Class <- factor(df$Class)
|
||||||
|
df
|
||||||
|
}
|
||||||
|
|
||||||
|
df <- read_wine()
|
||||||
|
|
||||||
|
# split into train/test
|
||||||
|
idx <- createDataPartition(df$Class, p = 0.8, list = FALSE)
|
||||||
|
train <- df[idx, ]
|
||||||
|
test <- df[-idx, ]
|
||||||
|
|
||||||
|
# choose a subset of features based on ANOVA F-test
|
||||||
|
# I picked this sbuset before the runs:
|
||||||
|
# alcohol, flavanoids, color intensity, od280/od315, proline, total phenols
|
||||||
|
features <- c("Alcohol","Flavanoids","Color.intensity","OD280.OD315","Proline","Total.phenols")
|
||||||
|
x_train <- train[, features]
|
||||||
|
y_train <- train$Class
|
||||||
|
x_test <- test[, features]
|
||||||
|
y_test <- test$Class
|
||||||
|
|
||||||
|
# scale features
|
||||||
|
pp <- preProcess(x_train, method = c("center","scale"))
|
||||||
|
x_train_s <- predict(pp, x_train)
|
||||||
|
x_test_s <- predict(pp, x_test)
|
||||||
|
|
||||||
|
# 1) linear kernel svm with hyperparameter tuning (C)
|
||||||
|
set.seed(42)
|
||||||
|
lin_grid <- data.frame(cost = c(0.1, 1, 10, 100))
|
||||||
|
tune_lin <- tune.svm(x = x_train_s, y = y_train,
|
||||||
|
kernel = "linear",
|
||||||
|
cost = lin_grid$cost,
|
||||||
|
tunecontrol = tune.control(cross = 5))
|
||||||
|
lin_best <- tune_lin$best.model
|
||||||
|
|
||||||
|
# 2) rbf kernel svm with tuning (C, gamma)
|
||||||
|
set.seed(42)
|
||||||
|
rbf_grid_cost <- c(0.1, 1, 10, 100, 1000)
|
||||||
|
rbf_grid_gamma <- c(0.001, 0.01, 0.1, 1)
|
||||||
|
tune_rbf <- tune.svm(x = x_train_s, y = y_train,
|
||||||
|
kernel = "radial",
|
||||||
|
cost = rbf_grid_cost,
|
||||||
|
gamma = rbf_grid_gamma,
|
||||||
|
tunecontrol = tune.control(cross = 5))
|
||||||
|
rbf_best <- tune_rbf$best.model
|
||||||
|
|
||||||
|
# 3) alternative classifier: random forest (same features)
|
||||||
|
set.seed(42)
|
||||||
|
rf_fit <- randomForest(x = x_train, y = y_train, ntree = 500, mtry = 2, importance = TRUE)
|
||||||
|
|
||||||
|
# evaluation helper
|
||||||
|
eval_model <- function(model, x_test_s, y_test, name) {
|
||||||
|
pred <- predict(model, x_test_s)
|
||||||
|
cm <- confusionMatrix(pred, y_test)
|
||||||
|
pr <- data.frame(model = name,
|
||||||
|
accuracy = cm$overall["Accuracy"],
|
||||||
|
precision_macro = mean(cm$byClass[,"Precision"], na.rm=TRUE),
|
||||||
|
recall_macro = mean(cm$byClass[,"Recall"], na.rm=TRUE),
|
||||||
|
f1_macro = mean(cm$byClass[,"F1"], na.rm=TRUE))
|
||||||
|
list(cm = cm, pr = pr)
|
||||||
|
}
|
||||||
|
|
||||||
|
# eval svm models (use scaled features)
|
||||||
|
lin_eval <- eval_model(lin_best, x_test_s, y_test, "svm_linear")
|
||||||
|
rbf_eval <- eval_model(rbf_best, x_test_s, y_test, "svm_rbf")
|
||||||
|
|
||||||
|
# evaluate random forest (no scaling)
|
||||||
|
rf_pred <- predict(rf_fit, x_test)
|
||||||
|
rf_cm <- confusionMatrix(rf_pred, y_test)
|
||||||
|
|
||||||
|
rf_pr <- data.frame(model = "random_forest",
|
||||||
|
accuracy = rf_cm$overall["Accuracy"],
|
||||||
|
precision_macro = mean(rf_cm$byClass[,"Precision"], na.rm=TRUE),
|
||||||
|
recall_macro = mean(rf_cm$byClass[,"Recall"], na.rm=TRUE),
|
||||||
|
f1_macro = mean(rf_cm$byClass[,"F1"], na.rm=TRUE))
|
||||||
|
|
||||||
|
perf <- rbind(lin_eval$pr, rbf_eval$pr, rf_pr)
|
||||||
|
|
||||||
|
# print
|
||||||
|
cat("best params (linear svm): C =", lin_best$cost, "\n")
|
||||||
|
cat("best params (rbf svm): C =", rbf_best$cost, " gamma =", rbf_best$gamma, "\n\n")
|
||||||
|
print(perf)
|
||||||
|
|
||||||
|
# macro-f1 comparison
|
||||||
|
ggplot(perf, aes(x = model, y = f1_macro)) +
|
||||||
|
geom_col() +
|
||||||
|
labs(title = "macro-F1 by model (wine test set)")
|
||||||
|
|
||||||
|
# save outputs
|
||||||
|
write.table(perf, file = "lab5_performance_table.txt", sep = "\t", row.names = FALSE, quote = FALSE)
|
||||||
|
sink("lab5_confusion_matrices.txt")
|
||||||
|
cat("=== svm linear ===\n"); print(lin_eval$cm)
|
||||||
|
cat("\n=== svm rbf ===\n"); print(rbf_eval$cm)
|
||||||
|
cat("\n=== random forest ===\n"); print(rf_cm)
|
||||||
|
sink()
|
||||||
@@ -0,0 +1,178 @@
|
|||||||
|
1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
|
||||||
|
1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
|
||||||
|
1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
|
||||||
|
1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
|
||||||
|
1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
|
||||||
|
1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
|
||||||
|
1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
|
||||||
|
1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
|
||||||
|
1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
|
||||||
|
1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
|
||||||
|
1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
|
||||||
|
1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
|
||||||
|
1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
|
||||||
|
1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
|
||||||
|
1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
|
||||||
|
1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
|
||||||
|
1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
|
||||||
|
1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
|
||||||
|
1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
|
||||||
|
1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
|
||||||
|
1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
|
||||||
|
1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
|
||||||
|
1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
|
||||||
|
1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
|
||||||
|
1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
|
||||||
|
1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
|
||||||
|
1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
|
||||||
|
1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
|
||||||
|
1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
|
||||||
|
1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
|
||||||
|
1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
|
||||||
|
1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
|
||||||
|
1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
|
||||||
|
1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
|
||||||
|
1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
|
||||||
|
1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
|
||||||
|
1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
|
||||||
|
1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
|
||||||
|
1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
|
||||||
|
1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
|
||||||
|
1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
|
||||||
|
1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
|
||||||
|
1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
|
||||||
|
1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
|
||||||
|
1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
|
||||||
|
1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
|
||||||
|
1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
|
||||||
|
1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
|
||||||
|
1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
|
||||||
|
1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
|
||||||
|
1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
|
||||||
|
1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
|
||||||
|
1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
|
||||||
|
1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
|
||||||
|
1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
|
||||||
|
1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
|
||||||
|
1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
|
||||||
|
1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
|
||||||
|
1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
|
||||||
|
2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
|
||||||
|
2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
|
||||||
|
2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
|
||||||
|
2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
|
||||||
|
2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
|
||||||
|
2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
|
||||||
|
2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
|
||||||
|
2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
|
||||||
|
2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
|
||||||
|
2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
|
||||||
|
2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
|
||||||
|
2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
|
||||||
|
2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
|
||||||
|
2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
|
||||||
|
2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
|
||||||
|
2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
|
||||||
|
2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
|
||||||
|
2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
|
||||||
|
2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
|
||||||
|
2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
|
||||||
|
2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
|
||||||
|
2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
|
||||||
|
2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
|
||||||
|
2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
|
||||||
|
2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
|
||||||
|
2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
|
||||||
|
2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
|
||||||
|
2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
|
||||||
|
2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
|
||||||
|
2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
|
||||||
|
2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
|
||||||
|
2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
|
||||||
|
2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
|
||||||
|
2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
|
||||||
|
2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
|
||||||
|
2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
|
||||||
|
2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
|
||||||
|
2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
|
||||||
|
2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
|
||||||
|
2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
|
||||||
|
2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
|
||||||
|
2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
|
||||||
|
2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
|
||||||
|
2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
|
||||||
|
2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
|
||||||
|
2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
|
||||||
|
2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
|
||||||
|
2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
|
||||||
|
2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
|
||||||
|
2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
|
||||||
|
2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
|
||||||
|
2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
|
||||||
|
2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
|
||||||
|
2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
|
||||||
|
2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
|
||||||
|
2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
|
||||||
|
2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
|
||||||
|
2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
|
||||||
|
2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
|
||||||
|
2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
|
||||||
|
2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
|
||||||
|
2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
|
||||||
|
2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
|
||||||
|
2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
|
||||||
|
2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
|
||||||
|
2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
|
||||||
|
2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
|
||||||
|
2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
|
||||||
|
2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
|
||||||
|
2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
|
||||||
|
2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
|
||||||
|
3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
|
||||||
|
3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
|
||||||
|
3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
|
||||||
|
3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
|
||||||
|
3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
|
||||||
|
3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
|
||||||
|
3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
|
||||||
|
3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
|
||||||
|
3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
|
||||||
|
3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
|
||||||
|
3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
|
||||||
|
3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
|
||||||
|
3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
|
||||||
|
3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
|
||||||
|
3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
|
||||||
|
3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
|
||||||
|
3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
|
||||||
|
3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
|
||||||
|
3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
|
||||||
|
3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
|
||||||
|
3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
|
||||||
|
3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
|
||||||
|
3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
|
||||||
|
3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
|
||||||
|
3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
|
||||||
|
3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
|
||||||
|
3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
|
||||||
|
3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
|
||||||
|
3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
|
||||||
|
3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
|
||||||
|
3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
|
||||||
|
3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
|
||||||
|
3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
|
||||||
|
3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
|
||||||
|
3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
|
||||||
|
3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
|
||||||
|
3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
|
||||||
|
3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
|
||||||
|
3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
|
||||||
|
3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
|
||||||
|
3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
|
||||||
|
3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
|
||||||
|
3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
|
||||||
|
3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
|
||||||
|
3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
|
||||||
|
3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
|
||||||
|
3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
|
||||||
|
3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560
|
||||||
@@ -0,0 +1,100 @@
|
|||||||
|
1. Title of Database: Wine recognition data
|
||||||
|
Updated Sept 21, 1998 by C.Blake : Added attribute information
|
||||||
|
|
||||||
|
2. Sources:
|
||||||
|
(a) Forina, M. et al, PARVUS - An Extendible Package for Data
|
||||||
|
Exploration, Classification and Correlation. Institute of Pharmaceutical
|
||||||
|
and Food Analysis and Technologies, Via Brigata Salerno,
|
||||||
|
16147 Genoa, Italy.
|
||||||
|
|
||||||
|
(b) Stefan Aeberhard, email: stefan@coral.cs.jcu.edu.au
|
||||||
|
(c) July 1991
|
||||||
|
3. Past Usage:
|
||||||
|
|
||||||
|
(1)
|
||||||
|
S. Aeberhard, D. Coomans and O. de Vel,
|
||||||
|
Comparison of Classifiers in High Dimensional Settings,
|
||||||
|
Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of
|
||||||
|
Mathematics and Statistics, James Cook University of North Queensland.
|
||||||
|
(Also submitted to Technometrics).
|
||||||
|
|
||||||
|
The data was used with many others for comparing various
|
||||||
|
classifiers. The classes are separable, though only RDA
|
||||||
|
has achieved 100% correct classification.
|
||||||
|
(RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data))
|
||||||
|
(All results using the leave-one-out technique)
|
||||||
|
|
||||||
|
In a classification context, this is a well posed problem
|
||||||
|
with "well behaved" class structures. A good data set
|
||||||
|
for first testing of a new classifier, but not very
|
||||||
|
challenging.
|
||||||
|
|
||||||
|
(2)
|
||||||
|
S. Aeberhard, D. Coomans and O. de Vel,
|
||||||
|
"THE CLASSIFICATION PERFORMANCE OF RDA"
|
||||||
|
Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of
|
||||||
|
Mathematics and Statistics, James Cook University of North Queensland.
|
||||||
|
(Also submitted to Journal of Chemometrics).
|
||||||
|
|
||||||
|
Here, the data was used to illustrate the superior performance of
|
||||||
|
the use of a new appreciation function with RDA.
|
||||||
|
|
||||||
|
4. Relevant Information:
|
||||||
|
|
||||||
|
-- These data are the results of a chemical analysis of
|
||||||
|
wines grown in the same region in Italy but derived from three
|
||||||
|
different cultivars.
|
||||||
|
The analysis determined the quantities of 13 constituents
|
||||||
|
found in each of the three types of wines.
|
||||||
|
|
||||||
|
-- I think that the initial data set had around 30 variables, but
|
||||||
|
for some reason I only have the 13 dimensional version.
|
||||||
|
I had a list of what the 30 or so variables were, but a.)
|
||||||
|
I lost it, and b.), I would not know which 13 variables
|
||||||
|
are included in the set.
|
||||||
|
|
||||||
|
-- The attributes are (dontated by Riccardo Leardi,
|
||||||
|
riclea@anchem.unige.it )
|
||||||
|
1) Alcohol
|
||||||
|
2) Malic acid
|
||||||
|
3) Ash
|
||||||
|
4) Alcalinity of ash
|
||||||
|
5) Magnesium
|
||||||
|
6) Total phenols
|
||||||
|
7) Flavanoids
|
||||||
|
8) Nonflavanoid phenols
|
||||||
|
9) Proanthocyanins
|
||||||
|
10)Color intensity
|
||||||
|
11)Hue
|
||||||
|
12)OD280/OD315 of diluted wines
|
||||||
|
13)Proline
|
||||||
|
|
||||||
|
5. Number of Instances
|
||||||
|
|
||||||
|
class 1 59
|
||||||
|
class 2 71
|
||||||
|
class 3 48
|
||||||
|
|
||||||
|
6. Number of Attributes
|
||||||
|
|
||||||
|
13
|
||||||
|
|
||||||
|
7. For Each Attribute:
|
||||||
|
|
||||||
|
All attributes are continuous
|
||||||
|
|
||||||
|
No statistics available, but suggest to standardise
|
||||||
|
variables for certain uses (e.g. for us with classifiers
|
||||||
|
which are NOT scale invariant)
|
||||||
|
|
||||||
|
NOTE: 1st attribute is class identifier (1-3)
|
||||||
|
|
||||||
|
8. Missing Attribute Values:
|
||||||
|
|
||||||
|
None
|
||||||
|
|
||||||
|
9. Class Distribution: number of instances per class
|
||||||
|
|
||||||
|
class 1 59
|
||||||
|
class 2 71
|
||||||
|
class 3 48
|
||||||