This repository has been archived on 2026-05-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Data-Analytics/Lab 5/Data_Analytics2025Fall_Lab4_code.R
2025-11-04 17:33:38 -05:00

42 lines
1.2 KiB
R

##########################################
### Principal Component Analysis (PCA) ###
##########################################
## load libraries
library(ggplot2)
library(ggfortify)
library(GGally)
library(e1071)
library(class)
library(psych)
library(readr)
## set working directory so that files can be referenced without the full path
setwd("~/Courses/Data Analytics/Fall25/labs/lab 4/")
## read dataset
wine <- read_csv("wine.data", col_names = FALSE)
## set column names
names(wine) <- c("Type","Alcohol","Malic acid","Ash","Alcalinity of ash","Magnesium","Total phenols","Flavanoids","Nonflavanoid Phenols","Proanthocyanins","Color Intensity","Hue","Od280/od315 of diluted wines","Proline")
## inspect data frame
head(wine)
## change the data type of the "Type" column from character to factor
####
# Factors look like regular strings (characters) but with factors R knows
# that the column is a categorical variable with finite possible values
# e.g. "Type" in the Wine dataset can only be 1, 2, or 3
####
wine$Type <- as.factor(wine$Type)
## visualize variables
pairs.panels(wine[,-1],gap = 0,bg = c("red", "yellow", "blue")[wine$Type],pch=21)
ggpairs(wine, ggplot2::aes(colour = Type))
###