https://github.com/tlaplus/survey
Results TLA+ Surveys
https://github.com/tlaplus/survey
Last synced: 3 months ago
JSON representation
Results TLA+ Surveys
- Host: GitHub
- URL: https://github.com/tlaplus/survey
- Owner: tlaplus
- License: cc0-1.0
- Created: 2022-05-16T02:55:03.000Z (about 4 years ago)
- Default Branch: main
- Last Pushed: 2022-05-19T17:35:00.000Z (about 4 years ago)
- Last Synced: 2025-12-05T13:58:08.966Z (6 months ago)
- Size: 771 KB
- Stars: 2
- Watchers: 4
- Forks: 0
- Open Issues: 0
-
Metadata Files:
- Readme: README.Rmd
- License: LICENSE
Awesome Lists containing this project
README
---
title: "TLA+ Surveys"
date: "(`r Sys.Date()`)"
output:
github_document:
toc: true
toc_depth: 2
html_preview: false
---
[2022 survey](https://docs.google.com/forms/d/1GQnqZgfJKTZgZd-TydtdqT8axvRrTdWH7M87VUt0_Wk/edit?usp=sharing)
```{r, echo=FALSE, warning=FALSE, message=FALSE}
## Import from csv file exported from Google Forms.
data <- read.csv(header=TRUE, sep = ",", file = "2022.csv")
## Rename column names 10 and 19 to 26 (https://www.tutorialkart.com/r-tutorial/r-dataframe-change-column-name/).
colnames(data)[2:4] <- c("Frequency", "Expertise", "Learned")
colnames(data)[8] <- c("Welcome")
colnames(data)[10:11] <- c("Country", "Underrepresented")
IA <- c("Spec2Code", "Doc", "Library", "Performance", "Lang", "CI/CD", "Extensibility", "IDE")
colnames(data)[19:26] <- IA
#data <- data %>% filter(Welcome == "No")
#data <- data %>% filter(Underrepresented == "Yes")
########### Geography
## Convert empty/NA into "Unknown" to prevent countrycode from converting it into Namibia.
data[c(10)] <- lapply(data[c(10)], gsub, pattern = "^$", replacement = "Unknown", ignore.case = TRUE)
## Map human-provided countries to iso country names.
library(countrycode)
data$iso2c <- countrycode(data$Country, origin = "country.name", destination = "iso2c")
data$countryname <- countrycode(data$Country, origin = "country.name", destination = "country.name")
library(dplyr)
library(ggplot2)
ggplot(data %>% count(countryname), aes(x = reorder(countryname, -n), y = n, fill = countryname)) +
geom_bar(stat = "identity") +
xlab("Country") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")
## Map human-provided countries to iso country names.
library(countrycode)
data$iso2c <- countrycode(data$Country, origin = "country.name", destination = "iso2c")
## (https://stackoverflow.com/a/64769847/6291195)
library(maps)
world_map <- map_data("world")
world_map <- subset(world_map, region != "Antarctica")
## The following line does the trick that iso2c from above works with the map.
world_map$region <- iso.alpha(world_map$region)
C <- data %>% filter(!is.na(iso2c)) %>% count(iso2c)
ggplot(data %>% filter(!is.na(iso2c)) %>% count(iso2c)) +
geom_map(
dat = world_map, map = world_map, aes(map_id = region),
fill = "white", color = "#7f7f7f", size = 0.25 ) +
geom_map(map = world_map, aes(map_id = iso2c, fill = n), size = 0.25) +
scale_fill_gradient(low = "#fff7bc", high = "#cc4c02", name = "Respondants") +
expand_limits(x = world_map$long, y = world_map$lat)
########### Improvement Areas
## Remove bogus strings from column 19 to 26 (https://stackoverflow.com/a/56963488/6291195).
nm1 <- c(19:26)
data[nm1] <- lapply(data[nm1], gsub, pattern = " - Most Important", replacement = "")
data[nm1] <- lapply(data[nm1], gsub, pattern = " - Least Important", replacement = "")
data[nm1] <- lapply(data[nm1], gsub, pattern = "N/A / I do not use this", replacement = "")
## Convert column 19 to 26 to numeric (https://stackoverflow.com/a/2290107/6291195).
data[, nm1] <- sapply(data[, nm1], as.numeric)
## Replace all NA with value 6 (to end with 0 after next lapply).
data[nm1] <- lapply(data[nm1], function(x) replace(x, is.na(x), 6))
data[nm1] <- lapply(data[nm1], function(x) 6 - x)
library(tidyr)
library(ggplot2)
{ggplot(pivot_longer(data %>% summarise_at(19:26, sum, na.rm = TRUE), cols=1:8, names_to = "Area", values_to = "Points")
, aes(x = reorder(Area, -Points), y = Points, fill = Area)) +
geom_bar(stat = "identity") +
ggtitle("All Demographics") +
xlab("Improvement Area") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")}
{ggplot(pivot_longer(data %>% filter(Frequency == "Rarely") %>% summarise_at(19:26, sum, na.rm = TRUE), cols=1:8, names_to = "Area", values_to = "Points")
, aes(x = reorder(Area, -Points), y = Points, fill = Area)) +
geom_bar(stat = "identity") +
ggtitle("Rarely Demographics") +
xlab("Improvement Area") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")}
ggplot(pivot_longer(data %>% filter(Frequency == "Monthly") %>% summarise_at(19:26, sum, na.rm = TRUE), cols=1:8, names_to = "Area", values_to = "Points")
, aes(x = reorder(Area, -Points), y = Points, fill = Area)) +
geom_bar(stat = "identity") +
ggtitle("Monthly Demographics") +
xlab("Improvement Area") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")
ggplot(pivot_longer(data %>% filter(Frequency == "Weekly") %>% summarise_at(19:26, sum, na.rm = TRUE), cols=1:8, names_to = "Area", values_to = "Points")
, aes(x = reorder(Area, -Points), y = Points, fill = Area)) +
geom_bar(stat = "identity") +
ggtitle("Weekly Demographics") +
xlab("Improvement Area") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")
ggplot(pivot_longer(data %>% filter(Frequency == "Daily") %>% summarise_at(19:26, sum, na.rm = TRUE), cols=1:8, names_to = "Area", values_to = "Points")
, aes(x = reorder(Area, -Points), y = Points, fill = Area)) +
geom_bar(stat = "identity") +
ggtitle("Daily Demographics") +
xlab("Improvement Area") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
theme(legend.position = "none")
########### Experience
## Convert semantically ordered categorial/characters into numericals for correlation analysis.
data$FrequencyNum <- as.numeric(factor(data$Frequency, levels = c("Rarely", "Monthly", "Weekly", "Daily")))
data$ExpertiseNum <- as.numeric(factor(data$Expertise, levels = c("I can neither read nor write TLA+", "I can write useful specifications for work, but it's a struggle", "I can write simple specifications", "I am productive writing TLA+", "I am an expert")))
data$LearnedNum <- as.numeric(factor(data$Learned, levels = c("Recently", "Within last 12 months", "Within last 5 years", "Longer than 5 years ago")))
nm2 <- c("FrequencyNum","ExpertiseNum", "LearnedNum")
data[nm2] <- lapply(data[nm1], function(x) replace(x, is.na(x), 0))
library(ggcorrplot)
my_data <- data[, nm2]
p.mat <- cor_pmat(my_data)
corr <- round(cor(my_data), 3)
ggcorrplot(corr, p.mat = cor_pmat(my_data),
hc.order = TRUE, type = "lower",
color = c("#FC4E07", "white", "#00AFBB"),
outline.col = "white", lab = TRUE)
```
#### README.md is generated from README.Rmd on a host with all libraries installed via:
```shell
Rscript -e "rmarkdown::render('README.Rmd')"
```
### Install required libraries and R packages (on macOS) with:
```shell
brew install pandoc r
Rscript -e "install.packages(c('rmarkdown', 'ggplot2','dplyr', 'here'), repos='http://cran.us.r-project.org')"
```