########################################## ### Principal Component Analysis (PCA) ### ########################################## ## load libraries library(ggplot2) library(ggfortify) library(GGally) library(e1071) library(class) library(psych) library(readr) ## set working directory so that files can be referenced without the full path setwd("~/Courses/Data Analytics/Fall25/labs/lab 4/") ## read dataset wine <- read_csv("wine.data", col_names = FALSE) ## set column names names(wine) <- c("Type","Alcohol","Malic acid","Ash","Alcalinity of ash","Magnesium","Total phenols","Flavanoids","Nonflavanoid Phenols","Proanthocyanins","Color Intensity","Hue","Od280/od315 of diluted wines","Proline") ## inspect data frame head(wine) ## change the data type of the "Type" column from character to factor #### # Factors look like regular strings (characters) but with factors R knows # that the column is a categorical variable with finite possible values # e.g. "Type" in the Wine dataset can only be 1, 2, or 3 #### wine$Type <- as.factor(wine$Type) ## visualize variables pairs.panels(wine[,-1],gap = 0,bg = c("red", "yellow", "blue")[wine$Type],pch=21) ggpairs(wine, ggplot2::aes(colour = Type)) ###