diff --git a/Lab 1/Lab1_code_sample.R b/Lab 1/Lab1_code_sample.R index fb291ee..b5017f1 100755 --- a/Lab 1/Lab1_code_sample.R +++ b/Lab 1/Lab1_code_sample.R @@ -1,9 +1,12 @@ library(readr) library(EnvStats) +library(nortest) # set working directory (relative path) -setwd("~/Desktop/R/Lab 1/") +setwd("~/Desktop/Data Analytics/Lab 1") + +pdf("all_plots.pdf", width = 8, height = 6) # read data epi.data <- read_csv("epi_results_2024_pop_gdp.csv") @@ -17,9 +20,7 @@ summary(epi.data$RLI.new) # print values in variable epi.data$RLI.new - -######## Optional ######## -## If you want to reference the variable without using the dataframe: +# AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA # attach dataframe attach(epi.data) @@ -43,6 +44,15 @@ PHL <- epi.data$PHL.new PHL +# no NAs +RLI_noNA <- epi.data$RLI.new[!is.na(epi.data$RLI.new)]; +PHL_noNA <- epi.data$PHL.new[!is.na(epi.data$PHL.new)]; + +set.seed(1); +RLI_sub <- sample(RLI_noNA, size = min(180, length(RLI_noNA))); +RLI_new_sub <- RLI_sub; # only if you truly need a second alias + + # find NAs inv variavle - outputs vector of logical values, true if NA, false otherwise NAs <- is.na(PHL) @@ -72,7 +82,7 @@ boxplot(RLI, PHL.above30, names = c("RHI","PHL")) hist(RLI) # define sequence of values over which to plot histogram -x <- seq(20., 80., 10) +x <- seq(0., 100., 10) # histogram (frequency distribution) over range hist(RLI, x, prob=TRUE) @@ -83,7 +93,7 @@ lines(density(RLI,na.rm=TRUE,bw=1.)) # or try bw=“SJ” # print rug rug(RLI) -x <- seq(20., 80., 5) +x <- seq(0., 100., 5) # histogram (frequency distribution) over rabge hist(RLI, breaks = "FD", prob=TRUE) @@ -136,8 +146,8 @@ qqnorm(x); qqline(x) # print quantile-quantile plot for variable with any theoretical distribution -qqplot(rnorm(180), RLI.new.sub, xlab = "Q-Q plot for norm dsn") -qqline(RLI.new.sub) +qqplot(rnorm(180), RLI_sub, xlab = "Q-Q plot for norm dsn") +qqline(RLI_sub) # print quantile-quantile plot for 2 variables qqplot(RLI, PHL, xlab = "Q-Q plot for RHI vs PHL") @@ -171,3 +181,5 @@ wilcox.test(x,y) var.test(x,y) t.test(x,y) + +dev.off() diff --git a/Lab 1/Rplots.pdf b/Lab 1/Rplots.pdf new file mode 100644 index 0000000..7b8e24e Binary files /dev/null and b/Lab 1/Rplots.pdf differ diff --git a/Lab 1/all_plots.pdf b/Lab 1/all_plots.pdf new file mode 100644 index 0000000..4297a1a Binary files /dev/null and b/Lab 1/all_plots.pdf differ