42 lines
1.2 KiB
R
42 lines
1.2 KiB
R
##########################################
|
|
### Principal Component Analysis (PCA) ###
|
|
##########################################
|
|
|
|
## load libraries
|
|
library(ggplot2)
|
|
library(ggfortify)
|
|
library(GGally)
|
|
library(e1071)
|
|
library(class)
|
|
library(psych)
|
|
library(readr)
|
|
|
|
## set working directory so that files can be referenced without the full path
|
|
setwd("~/Courses/Data Analytics/Fall25/labs/lab 4/")
|
|
|
|
## read dataset
|
|
wine <- read_csv("wine.data", col_names = FALSE)
|
|
|
|
## set column names
|
|
names(wine) <- c("Type","Alcohol","Malic acid","Ash","Alcalinity of ash","Magnesium","Total phenols","Flavanoids","Nonflavanoid Phenols","Proanthocyanins","Color Intensity","Hue","Od280/od315 of diluted wines","Proline")
|
|
|
|
## inspect data frame
|
|
head(wine)
|
|
|
|
## change the data type of the "Type" column from character to factor
|
|
####
|
|
# Factors look like regular strings (characters) but with factors R knows
|
|
# that the column is a categorical variable with finite possible values
|
|
# e.g. "Type" in the Wine dataset can only be 1, 2, or 3
|
|
####
|
|
|
|
wine$Type <- as.factor(wine$Type)
|
|
|
|
|
|
## visualize variables
|
|
pairs.panels(wine[,-1],gap = 0,bg = c("red", "yellow", "blue")[wine$Type],pch=21)
|
|
|
|
ggpairs(wine, ggplot2::aes(colour = Type))
|
|
|
|
###
|