diff --git a/Lab 1/Lab1_code_sample.R b/Lab 1/Lab1_code_sample.R index fb291ee..a4ba08b 100755 --- a/Lab 1/Lab1_code_sample.R +++ b/Lab 1/Lab1_code_sample.R @@ -1,9 +1,12 @@ library(readr) library(EnvStats) +# install.packages(c("readr", "EnvStats")) # set working directory (relative path) -setwd("~/Desktop/R/Lab 1/") + +# paste function my beloved <3 +setwd(paste(getwd(), "Lab 1", sep="/")) # read data epi.data <- read_csv("epi_results_2024_pop_gdp.csv") @@ -12,7 +15,7 @@ epi.data <- read_csv("epi_results_2024_pop_gdp.csv") View(epi.data) # print summary of variables in dataframe -summary(epi.data$RLI.new) +summary(epi.data$epi_results_2024_pop_gdp.csv.new) # print values in variable epi.data$RLI.new @@ -58,7 +61,7 @@ PHL.noNA PHL.above30 <- PHL.noNA[PHL.noNA>30] PHL.above30 - + # stats summary(PHL.above30) @@ -69,21 +72,38 @@ boxplot(RLI, PHL.above30, names = c("RHI","PHL")) ### Histograms ### # histogram (frequency distribution) -hist(RLI) +# hist(RLI) # define sequence of values over which to plot histogram -x <- seq(20., 80., 10) - +# I have NO IDEA why this keep breaking but I just started using the range func +rng <- range(RLI, na.rm = TRUE) +lo <- floor(rng[1] / 5) * 5 +hi <- ceiling(rng[2] / 5) * 5 +brks <- seq(lo, hi, by = 1) + +# WHY????? WHY IS IT BREAKING???? +# [1] "range 0 lo 0 hi 100 brks 50" "range 97.7 lo 0 hi 100 brks 50" +# Error in freq && !equidist : 'length = 15' in coercion to 'logical(1)' +# Calls: hist -> hist.default -> plot -> plot.histogram +# Execution halted +print(paste("range", rng, "lo", lo, "hi", hi, "brks", brks)) + +hist(RLI, + breaks = brks, + prob = TRUE) + +x <- seq(20, 90, by = 5) + # histogram (frequency distribution) over range -hist(RLI, x, prob=TRUE) +hist(RLI, x, breaks=brks, prob=TRUE) # print estimated density curve for variable -lines(density(RLI,na.rm=TRUE,bw=1.)) # or try bw=“SJ” +lines(density(RLI, na.rm=TRUE)) # or try bw=“SJ” # print rug rug(RLI) -x <- seq(20., 80., 5) +x <- seq(5., 95., 5) # histogram (frequency distribution) over rabge hist(RLI, breaks = "FD", prob=TRUE) @@ -99,7 +119,7 @@ rug(RLI) hist(RLI.new, breaks = "FD", prob=TRUE) # range -x1<-seq(20,80,1) +x1<-seq(5,95,1) # generate probability density values for a normal distribution with given mean and sd d1 <- dnorm(x1,mean=45, sd=11,log=FALSE) @@ -136,8 +156,8 @@ qqnorm(x); qqline(x) # print quantile-quantile plot for variable with any theoretical distribution -qqplot(rnorm(180), RLI.new.sub, xlab = "Q-Q plot for norm dsn") -qqline(RLI.new.sub) +qqplot(rnorm(180), RLI.sub, xlab = "Q-Q plot for norm dsn") +qqline(RLI.sub) # print quantile-quantile plot for 2 variables qqplot(RLI, PHL, xlab = "Q-Q plot for RHI vs PHL") diff --git a/Lab 1/Rplots.pdf b/Lab 1/Rplots.pdf new file mode 100644 index 0000000..7ea2119 Binary files /dev/null and b/Lab 1/Rplots.pdf differ diff --git a/Lab 1/findMinMax.sh b/Lab 1/findMinMax.sh new file mode 100644 index 0000000..3252eec --- /dev/null +++ b/Lab 1/findMinMax.sh @@ -0,0 +1,42 @@ +awk -F, ' +NR==1{ + for(i=1;i<=NF;i++){ + h=$i; gsub(/\r/,"",h) + if(h=="RLI.new") rli=i + if(h=="PHL.new") phl=i + if(h=="country") country=i + if(h=="iso") iso=i + } + next +} +{ + # RLI.new + if(rli){ + v=$rli; gsub(/"/,"",v) + if(v ~ /^[+-]?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$/){ + v+=0 + if(!rli_min_set || vrli_max){ rli_max=v; rli_max_country=$(country); rli_max_iso=$(iso); rli_max_set=1 } + } + } + # PHL.new + if(phl){ + w=$phl; gsub(/"/,"",w) + if(w ~ /^[+-]?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$/){ + w+=0 + if(!phl_min_set || wphl_max){ phl_max=w; phl_max_country=$(country); phl_max_iso=$(iso); phl_max_set=1 } + } + } +} +END{ + if(rli){ + print "RLI.new min:", rli_min, " (", rli_min_iso, "-", rli_min_country, ")" + print "RLI.new max:", rli_max, " (", rli_max_iso, "-", rli_max_country, ")" + } else { print "naur col RLI.new" } + if(phl){ + print "PHL.new min:", phl_min, " (", phl_min_iso, "-", phl_min_country, ")" + print "PHL.new max:", phl_max, " (", phl_max_iso, "-", phl_max_country, ")" + } else { print "naur col PHL.new" } +} +' epi_results_2024_pop_gdp.csv \ No newline at end of file