diff --git a/Lab 1/Lab1_code_sample.R b/Lab 1/Lab1_code_sample.R index b5017f1..007f3be 100755 --- a/Lab 1/Lab1_code_sample.R +++ b/Lab 1/Lab1_code_sample.R @@ -2,6 +2,7 @@ library(readr) library(EnvStats) library(nortest) +# install.packages(c("readr", "EnvStats")) # set working directory (relative path) setwd("~/Desktop/Data Analytics/Lab 1") @@ -15,7 +16,7 @@ epi.data <- read_csv("epi_results_2024_pop_gdp.csv") View(epi.data) # print summary of variables in dataframe -summary(epi.data$RLI.new) +summary(epi.data$epi_results_2024_pop_gdp.csv.new) # print values in variable epi.data$RLI.new @@ -68,7 +69,7 @@ PHL.noNA PHL.above30 <- PHL.noNA[PHL.noNA>30] PHL.above30 - + # stats summary(PHL.above30) @@ -79,16 +80,16 @@ boxplot(RLI, PHL.above30, names = c("RHI","PHL")) ### Histograms ### # histogram (frequency distribution) -hist(RLI) +# hist(RLI) # define sequence of values over which to plot histogram x <- seq(0., 100., 10) # histogram (frequency distribution) over range -hist(RLI, x, prob=TRUE) +hist(RLI, x, breaks=brks, prob=TRUE) # print estimated density curve for variable -lines(density(RLI,na.rm=TRUE,bw=1.)) # or try bw=“SJ” +lines(density(RLI, na.rm=TRUE)) # or try bw=“SJ” # print rug rug(RLI) @@ -109,7 +110,7 @@ rug(RLI) hist(RLI.new, breaks = "FD", prob=TRUE) # range -x1<-seq(20,80,1) +x1<-seq(5,95,1) # generate probability density values for a normal distribution with given mean and sd d1 <- dnorm(x1,mean=45, sd=11,log=FALSE) diff --git a/Lab 1/findMinMax.sh b/Lab 1/findMinMax.sh new file mode 100644 index 0000000..3252eec --- /dev/null +++ b/Lab 1/findMinMax.sh @@ -0,0 +1,42 @@ +awk -F, ' +NR==1{ + for(i=1;i<=NF;i++){ + h=$i; gsub(/\r/,"",h) + if(h=="RLI.new") rli=i + if(h=="PHL.new") phl=i + if(h=="country") country=i + if(h=="iso") iso=i + } + next +} +{ + # RLI.new + if(rli){ + v=$rli; gsub(/"/,"",v) + if(v ~ /^[+-]?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$/){ + v+=0 + if(!rli_min_set || vrli_max){ rli_max=v; rli_max_country=$(country); rli_max_iso=$(iso); rli_max_set=1 } + } + } + # PHL.new + if(phl){ + w=$phl; gsub(/"/,"",w) + if(w ~ /^[+-]?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$/){ + w+=0 + if(!phl_min_set || wphl_max){ phl_max=w; phl_max_country=$(country); phl_max_iso=$(iso); phl_max_set=1 } + } + } +} +END{ + if(rli){ + print "RLI.new min:", rli_min, " (", rli_min_iso, "-", rli_min_country, ")" + print "RLI.new max:", rli_max, " (", rli_max_iso, "-", rli_max_country, ")" + } else { print "naur col RLI.new" } + if(phl){ + print "PHL.new min:", phl_min, " (", phl_min_iso, "-", phl_min_country, ")" + print "PHL.new max:", phl_max, " (", phl_max_iso, "-", phl_max_country, ")" + } else { print "naur col PHL.new" } +} +' epi_results_2024_pop_gdp.csv \ No newline at end of file