246 lines
12 KiB
Plaintext
246 lines
12 KiB
Plaintext
[Running] Rscript "/home/ion606/Desktop/Homework/Data Analytics/Assignment IV/analysis.r"
|
||
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
|
||
✔ dplyr 1.1.4 ✔ readr 2.1.5
|
||
✔ forcats 1.0.1 ✔ stringr 1.6.0
|
||
✔ ggplot2 4.0.0 ✔ tibble 3.3.0
|
||
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
|
||
✔ purrr 1.1.0
|
||
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
|
||
✖ dplyr::filter() masks stats::filter()
|
||
✖ dplyr::lag() masks stats::lag()
|
||
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
|
||
Registered S3 method overwritten by 'quantmod':
|
||
method from
|
||
as.zoo.data.frame zoo
|
||
[1] "month" "status" "new_answers"
|
||
# A tibble: 6 × 3
|
||
month status new_answers
|
||
<date> <chr> <dbl>
|
||
1 2018-01-01 deleted 26
|
||
2 2018-01-01 non-deleted 159
|
||
3 2018-02-01 deleted 20
|
||
4 2018-02-01 non-deleted 175
|
||
5 2018-03-01 deleted 18
|
||
6 2018-03-01 non-deleted 193
|
||
Rows: 95
|
||
Columns: 11
|
||
$ month <date> 2018-01-01, 2018-02-01, 2018-03-01, 2018-04-01, 2…
|
||
$ answers_total <dbl> 185, 195, 211, 221, 227, 189, 149, 179, 198, 232, …
|
||
$ answers_non_deleted <dbl> 159, 175, 193, 191, 203, 172, 133, 154, 170, 198, …
|
||
$ answers_deleted <dbl> 26, 20, 18, 30, 24, 17, 16, 25, 28, 34, 20, 45, 33…
|
||
$ year <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 20…
|
||
$ month_num <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4,…
|
||
$ time_index <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
|
||
$ post_chatgpt <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
|
||
$ post_ai_policy <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
|
||
$ during_mod_strike <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
|
||
$ period <chr> "pre_chatgpt", "pre_chatgpt", "pre_chatgpt", "pre_…
|
||
file already exists: data/stack-overflow-developer-survey-2023.zip
|
||
file already exists: data/stack-overflow-developer-survey-2024.zip
|
||
[1] "ResponseId" "Q120"
|
||
[3] "MainBranch" "Age"
|
||
[5] "Employment" "RemoteWork"
|
||
[7] "CodingActivities" "EdLevel"
|
||
[9] "LearnCode" "LearnCodeOnline"
|
||
[11] "LearnCodeCoursesCert" "YearsCode"
|
||
[13] "YearsCodePro" "DevType"
|
||
[15] "OrgSize" "PurchaseInfluence"
|
||
[17] "TechList" "BuyNewTool"
|
||
[19] "Country" "Currency"
|
||
[21] "CompTotal" "LanguageHaveWorkedWith"
|
||
[23] "LanguageWantToWorkWith" "DatabaseHaveWorkedWith"
|
||
[25] "DatabaseWantToWorkWith" "PlatformHaveWorkedWith"
|
||
[27] "PlatformWantToWorkWith" "WebframeHaveWorkedWith"
|
||
[29] "WebframeWantToWorkWith" "MiscTechHaveWorkedWith"
|
||
[31] "MiscTechWantToWorkWith" "ToolsTechHaveWorkedWith"
|
||
[33] "ToolsTechWantToWorkWith" "NEWCollabToolsHaveWorkedWith"
|
||
[35] "NEWCollabToolsWantToWorkWith" "OpSysPersonal use"
|
||
[37] "OpSysProfessional use" "OfficeStackAsyncHaveWorkedWith"
|
||
[39] "OfficeStackAsyncWantToWorkWith" "OfficeStackSyncHaveWorkedWith"
|
||
[41] "OfficeStackSyncWantToWorkWith" "AISearchHaveWorkedWith"
|
||
[43] "AISearchWantToWorkWith" "AIDevHaveWorkedWith"
|
||
[45] "AIDevWantToWorkWith" "NEWSOSites"
|
||
[47] "SOVisitFreq" "SOAccount"
|
||
[49] "SOPartFreq" "SOComm"
|
||
[51] "SOAI" "AISelect"
|
||
[53] "AISent" "AIAcc"
|
||
[55] "AIBen" "AIToolInterested in Using"
|
||
[57] "AIToolCurrently Using" "AIToolNot interested in Using"
|
||
[59] "AINextVery different" "AINextNeither different nor similar"
|
||
[61] "AINextSomewhat similar" "AINextVery similar"
|
||
[63] "AINextSomewhat different" "TBranch"
|
||
[65] "ICorPM" "WorkExp"
|
||
[67] "Knowledge_1" "Knowledge_2"
|
||
[69] "Knowledge_3" "Knowledge_4"
|
||
[71] "Knowledge_5" "Knowledge_6"
|
||
[73] "Knowledge_7" "Knowledge_8"
|
||
[75] "Frequency_1" "Frequency_2"
|
||
[77] "Frequency_3" "TimeSearching"
|
||
[79] "TimeAnswering" "ProfessionalTech"
|
||
2023 so visit col: SOVisitFreq
|
||
2023 ai col : SOAI
|
||
2024 so visit col: SOVisitFreq
|
||
2024 ai col : AISelect
|
||
Rows: 146,676
|
||
Columns: 10
|
||
$ year <fct> 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 202…
|
||
$ main_branch <chr> "I am a developer by profession", "I am a developer by pr…
|
||
$ country <chr> "United States of America", "United States of America", "…
|
||
$ age <dbl> 25, 45, 25, 25, 35, 35, 25, 45, 25, 25, 25, 25, 35, 25, 3…
|
||
$ gender <fct> Unknown, Unknown, Unknown, Unknown, Unknown, Unknown, Unk…
|
||
$ so_visit <chr> "Daily or almost daily", "A few times per month or weekly…
|
||
$ ai_select <chr> "I don't think it's super necessary, but I think improvin…
|
||
$ frequent_so <int> 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, …
|
||
$ uses_chatgpt <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, …
|
||
$ age_group <fct> 25-34, 45+, 25-34, 25-34, 35-44, 35-44, 25-34, 45+, 25-34…
|
||
# A tibble: 2 × 7
|
||
period n_months mean_answers median_answers sd_answers min_answers max_answers
|
||
<chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
|
||
1 post_… 36 90.5 88 38.0 11 157
|
||
2 pre_c… 59 193. 185 44.7 122 313
|
||
Warning message:
|
||
Removed 2 rows containing missing values or values outside the scale range
|
||
(`geom_line()`).
|
||
Warning message:
|
||
Removed 2 rows containing missing values or values outside the scale range
|
||
(`geom_line()`).
|
||
[1] -10.02227
|
||
|
||
Call:
|
||
lm(formula = answers_total ~ time + post_chatgpt + chatgpt_time,
|
||
data = its_data)
|
||
|
||
Residuals:
|
||
Min 1Q Median 3Q Max
|
||
-76.623 -22.914 -3.868 13.431 123.402
|
||
|
||
Coefficients:
|
||
Estimate Std. Error t value Pr(>|t|)
|
||
(Intercept) 218.8013 9.3214 23.473 < 2e-16 ***
|
||
time -0.8589 0.2702 -3.179 0.002022 **
|
||
post_chatgptTRUE -17.9635 15.0779 -1.191 0.236601
|
||
chatgpt_time -2.3661 0.6282 -3.767 0.000293 ***
|
||
---
|
||
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
|
||
|
||
Residual standard error: 35.35 on 91 degrees of freedom
|
||
Multiple R-squared: 0.717, Adjusted R-squared: 0.7077
|
||
F-statistic: 76.86 on 3 and 91 DF, p-value: < 2.2e-16
|
||
|
||
# A tibble: 4 × 5
|
||
term estimate std.error statistic p.value
|
||
<chr> <dbl> <dbl> <dbl> <dbl>
|
||
1 (Intercept) 219. 9.32 23.5 2.23e-40
|
||
2 time -0.859 0.270 -3.18 2.02e- 3
|
||
3 post_chatgptTRUE -18.0 15.1 -1.19 2.37e- 1
|
||
4 chatgpt_time -2.37 0.628 -3.77 2.93e- 4
|
||
# A tibble: 1 × 12
|
||
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
|
||
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
|
||
1 0.717 0.708 35.3 76.9 7.39e-25 3 -471. 953. 966.
|
||
# ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
|
||
|
||
Call:
|
||
glm(formula = answers_total ~ time + post_chatgpt + chatgpt_time,
|
||
family = poisson(link = "log"), data = its_data)
|
||
|
||
Coefficients:
|
||
Estimate Std. Error z value Pr(>|z|)
|
||
(Intercept) 5.3936301 0.0183909 293.277 < 2e-16 ***
|
||
time -0.0044547 0.0005512 -8.082 6.38e-16 ***
|
||
post_chatgptTRUE -0.0187737 0.0365851 -0.513 0.608
|
||
chatgpt_time -0.0322028 0.0018440 -17.464 < 2e-16 ***
|
||
---
|
||
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
|
||
|
||
(Dispersion parameter for poisson family taken to be 1)
|
||
|
||
Null deviance: 2879.9 on 94 degrees of freedom
|
||
Residual deviance: 713.8 on 91 degrees of freedom
|
||
AIC: 1363
|
||
|
||
Number of Fisher Scoring iterations: 4
|
||
|
||
# A tibble: 4 × 5
|
||
term estimate std.error statistic p.value
|
||
<chr> <dbl> <dbl> <dbl> <dbl>
|
||
1 (Intercept) 220. 0.0184 293. 0
|
||
2 time 0.996 0.000551 -8.08 6.38e-16
|
||
3 post_chatgptTRUE 0.981 0.0366 -0.513 6.08e- 1
|
||
4 chatgpt_time 0.968 0.00184 -17.5 2.71e-68
|
||
Series: train_ts
|
||
ARIMA(1,1,0)(1,0,0)[12]
|
||
|
||
Coefficients:
|
||
ar1 sar1
|
||
-0.3956 0.3016
|
||
s.e. 0.1360 0.1381
|
||
|
||
sigma^2 = 1142: log likelihood = -281.17
|
||
AIC=568.34 AICc=568.8 BIC=574.47
|
||
|
||
Training set error measures:
|
||
ME RMSE MAE MPE MAPE MASE
|
||
Training set -0.1691686 32.90678 26.65938 -1.989033 14.30025 0.5170032
|
||
ACF1
|
||
Training set 0.03124461
|
||
ME RMSE MAE MPE MAPE MASE
|
||
Training set -0.1691686 32.90678 26.65938 -1.989033 14.30025 0.5170032
|
||
Test set -78.4100374 89.26691 79.26493 -171.518981 171.98870 1.5371782
|
||
ACF1 Theil's U
|
||
Training set 0.03124461 NA
|
||
Test set 0.73383075 7.11443
|
||
dropping predictors with <2 levels: gender
|
||
classification threshold (training frequent_so share): 0.384
|
||
|
||
Call:
|
||
glm(formula = logit_formula, family = binomial(link = "logit"),
|
||
data = survey_train)
|
||
|
||
Coefficients:
|
||
Estimate Std. Error z value Pr(>|z|)
|
||
(Intercept) -0.358743 0.013009 -27.577 < 2e-16 ***
|
||
uses_chatgpt -0.006783 0.066977 -0.101 0.91933
|
||
age_group25-34 0.040677 0.015439 2.635 0.00842 **
|
||
age_group35-44 -0.207571 0.017478 -11.876 < 2e-16 ***
|
||
age_group45+ -0.345739 0.020289 -17.041 < 2e-16 ***
|
||
age_groupunknown -0.222739 0.096177 -2.316 0.02056 *
|
||
year2024 -0.082452 0.012319 -6.693 2.18e-11 ***
|
||
---
|
||
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
|
||
|
||
(Dispersion parameter for binomial family taken to be 1)
|
||
|
||
Null deviance: 156271 on 117339 degrees of freedom
|
||
Residual deviance: 155647 on 117333 degrees of freedom
|
||
AIC: 155661
|
||
|
||
Number of Fisher Scoring iterations: 4
|
||
|
||
# A tibble: 7 × 7
|
||
term estimate std.error statistic p.value conf.low conf.high
|
||
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
|
||
1 (Intercept) 0.699 0.0130 -27.6 2.10e-167 0.681 0.717
|
||
2 uses_chatgpt 0.993 0.0670 -0.101 9.19e- 1 0.870 1.13
|
||
3 age_group25-34 1.04 0.0154 2.63 8.42e- 3 1.01 1.07
|
||
4 age_group35-44 0.813 0.0175 -11.9 1.57e- 32 0.785 0.841
|
||
5 age_group45+ 0.708 0.0203 -17.0 4.09e- 65 0.680 0.736
|
||
6 age_groupunknown 0.800 0.0962 -2.32 2.06e- 2 0.662 0.965
|
||
7 year2024 0.921 0.0123 -6.69 2.18e- 11 0.899 0.943
|
||
pred
|
||
truth 0 1
|
||
0 7560 10549
|
||
1 4009 7218
|
||
$accuracy
|
||
[1] 0.5037497
|
||
|
||
$precision
|
||
[1] 0.4062588
|
||
|
||
$recall
|
||
[1] 0.6429144
|
||
|
||
|
||
[Done] exited with code=0 in 12.272 seconds
|
||
|