An open API service indexing awesome lists of open source software.

https://github.com/hrbrmstr/fbi-2018-ic3

Data wrangling the 2018 FBI IC3 report
https://github.com/hrbrmstr/fbi-2018-ic3

fbi ic3 r rstats

Last synced: 3 months ago
JSON representation

Data wrangling the 2018 FBI IC3 report

Awesome Lists containing this project

README

        

2018 FBI IC3 PDF Data Wrangling
================

``` r
library(readxl)
library(ggbeeswarm)
library(pdftools)
library(stringi)
library(hrbrthemes)
library(ggrepel)
library(tidyverse)
```

``` r
tibble(
year = 2014:2018,
complaints = c(269422, 288012, 298728, 301580, 351937),
losses = c(800.5, 1070.7, 1450.7, 1418.7, 2706.4)
) -> ic3_summary

write_csv(ic3_summary, here::here("data/2018-fbi-ic3-annual-summary.csv"))

ggplot(ic3_summary, aes(complaints, losses)) +
geom_path(
arrow = arrow(type = "closed", length = unit(12, "pt")),
color = "#31739C"
) +
geom_point(color = "#31739C") +
geom_label_repel(
aes(label = year), family = font_rc, size = c(rep(3, 4), 4),
color = c(rep("#3B454A", 4), "black"),
fontface = c(rep("plain", 4), "bold")
) +
scale_x_comma(limits = c(0, NA)) +
scale_y_continuous(label = scales::dollar, limits = c(0, NA)) +
labs(
x = "Number of Complaints", y = "Losses (USD, millions)",
title = "Both Incident Count and Total Losses Related to Cybercrime\nSkyrocketed in the 2018 Edition of the FBI IC3 Report",
subtitle = "Zero baseline; Point labels denote IC3 summary year data",
caption = "Source: 2018 FBI IC3; Page 5 'IC3 Complaint Statistics 2014-2018'"
) +
theme_ipsum_rc()
```

``` r
ic3 <- pdf_text(here::here("raw/2018_IC3Report.pdf"))

ic3[[16]] %>%
stri_split_lines() %>%
unlist() %>%
stri_trim_both() -> l

l[which(stri_detect_regex(l, "^Under")):which(stri_detect_regex(l, "^Over 6"))] %>%
stri_split_regex("[[:space:]]{3,}", simplify = TRUE) %>%
as.data.frame(stringsAsFactors = FALSE) %>%
set_names("age_group", "incidents", "losses") %>%
as_tibble() %>%
mutate(losses = stri_replace_first_fixed(losses, "$", "")) %>%
type_convert(
col_types = cols(
age_group = col_character(),
incidents = col_number(),
losses = col_number()
)
) -> loss

write_csv(loss, here::here("data/2018-fbi-ic3-loss-by-age.csv"))

mutate(loss, `Loss Ratio (USD, milions)` = losses/incidents) %>%
rename(
`Total Losses (USD, milions)` = losses,
`Total Incidents` = incidents
) %>%
mutate(age_group = stri_replace_first_fixed(age_group, " ", "\n")) %>%
mutate(age_group = factor(age_group, age_group)) %>%
gather(measure, value, -age_group) %>%
mutate(
measure = factor(measure, levels = c(
"Total Incidents", "Total Losses (USD, milions)", "Loss Ratio (USD, milions)"))
) %>%
ggplot(aes(age_group, value)) +
geom_col(width=0.45, fill = "#31739C") +
scale_x_discrete() +
scale_y_comma() +
facet_wrap(~measure, scales = "free") +
labs(
x = NULL, y = "Loss Ratio (total losses/victim count)",
title = "In 2018, Older Victims Generally Lost More Overall and Per-Incident Than Younger Victims",
subtitle = "Note that 40-49 age group had more incients than older groups but fewer overall losses.",
caption = "NOTE: Free Y Scale\nSource: 2018 FBI IC3; Page 16 '2018 Victims by Age Group'"
) +
theme_ipsum_rc(grid="Y")
```

``` r
ic3[[19]] %>%
stri_split_lines() %>%
unlist() %>%
keep(stri_detect_regex, "^[[:upper:]]") %>%
keep(stri_detect_regex, "[[:digit:]]") %>%
stri_replace_first_regex("([[:digit:]]) ([[:alpha:]\\*])", "$1\t$2") %>%
stri_split_fixed("\t") %>%
unlist() %>%
keep(stri_detect_regex, " [[:digit:]]") %>%
stri_match_first_regex("([^[:digit:]]+)([[:digit:],]+)$") %>%
.[,2:3] %>%
as.data.frame(stringsAsFactors=FALSE) %>%
as_tibble() %>%
mutate_all(.funs = stri_trim_both) %>%
type_convert(
col_types = cols(
V1 = col_character(),
V2 = col_number()
)
) %>%
mutate(V1 = case_when(
stri_detect_fixed(V1, "IPR/Copy") ~ "IPR/Copyright and Counterfeit",
TRUE ~ V1
)) %>%
set_names(c("crime", "victim_count")) %>%
head(-2) %>%
arrange(desc(victim_count)) -> victims

ic3[[20]] %>%
stri_split_lines() %>%
unlist() %>%
keep(stri_detect_regex, "^ [[:upper:]]") %>%
keep(stri_detect_regex, "[[:digit:]]") %>%
stri_trim_both() %>%
stri_replace_first_regex("([[:digit:]]) ([[:alpha:]\\*])", "$1\t$2") %>%
stri_split_fixed("\t") %>%
unlist() %>%
stri_replace_first_regex("([[:digit:]])[[:space:]]+([[:alpha:]\\*])", "$1\t$2") %>%
stri_split_fixed("\t") %>%
unlist() %>%
keep(stri_detect_regex, "\\$[[:digit:]]") %>%
stri_match_first_regex("([^\\$]+)([\\$[:digit:],\\.]+)$") %>%
.[,2:3] %>%
as.data.frame(stringsAsFactors=FALSE) %>%
as_tibble() %>%
mutate_all(.funs = stri_trim_both) %>%
mutate(V2 = stri_replace_first_fixed(V2, "$", "")) %>%
type_convert(
col_types = cols(
V1 = col_character(),
V2 = col_number()
)
) %>%
mutate(V1 = case_when(
stri_detect_fixed(V1, "IPR/Copy") ~ "IPR/Copyright and Counterfeit",
stri_detect_fixed(V1, "Malware/Sca") ~ "Malware/Scareware/Virus",
stri_detect_fixed(V1, "Harassment/T") ~ "Harassment/Threats of Violence",
stri_detect_fixed(V1, "Ransomware") ~ "Ransomware",
stri_detect_fixed(V1, "Denial of Service") ~ "Denial of Service/TDoS",
stri_detect_fixed(V1, "Re-Shipping") ~ "Re-shipping",
TRUE ~ V1
)) %>%
set_names(c("crime", "loss")) %>%
head(-2) %>%
left_join(victims, "crime") %>%
mutate(loss = loss / 1000000) -> crime_types

write_csv(crime_types, here::here("data/2018-fbi-ic3-loss-by-crime-type.csv"))

ggplot() +
geom_point(
data = mutate(crime_types, color = case_when(
(loss >= 100) | (victim_count >= 20000) ~ "#E85E26",
TRUE ~ "#31739C"
)),
aes(victim_count, loss, color = I(color))
) +
geom_label_repel(
data = filter(crime_types, (loss >= 100) | (victim_count >= 20000)),
aes(victim_count, loss, label = crime),
size = 3, family = font_rc
) +
scale_x_comma() +
scale_y_continuous(label = scales::dollar) +
labs(
x = "Victim count", y = "Loss (USD, millions)",
title = "[Business] E-mail Account Compromise was the Most Profitable\nIC3 Crime in 2018 with over $1.2 billion (USD) in Losses",
subtitle = "Markers only on IC3 crimes with ≥$100m (USD) losses or ≥20,000 victims ",
caption = "Source: 2018 FBI IC3; Pages 19-20 '2018 Crime Types'"
) +
theme_ipsum_rc(grid="XY")
```

``` r
arrange(crime_types, desc(loss)) %>%
select(`Crime` = 1, `Loss (USD, millions)` = 2, `Victim Count` = 3) %>%
gt::gt() %>%
gt::fmt_number("Victim Count", decimals = 0) %>%
gt::fmt_currency("Loss (USD, millions)", decimals = 2)
```

html {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans', 'Helvetica Neue', Arial, sans-serif;
}

#jemiiwsncq .gt_table {
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
color: #000000;
font-size: 16px;
background-color: #FFFFFF;
/* table.background.color */
width: auto;
/* table.width */
border-top-style: solid;
/* table.border.top.style */
border-top-width: 2px;
/* table.border.top.width */
border-top-color: #A8A8A8;
/* table.border.top.color */
}

#jemiiwsncq .gt_heading {
background-color: #FFFFFF;
/* heading.background.color */
border-bottom-color: #FFFFFF;
}

#jemiiwsncq .gt_title {
color: #000000;
font-size: 125%;
/* heading.title.font.size */
padding-top: 4px;
/* heading.top.padding */
padding-bottom: 1px;
border-bottom-color: #FFFFFF;
border-bottom-width: 0;
}

#jemiiwsncq .gt_subtitle {
color: #000000;
font-size: 85%;
/* heading.subtitle.font.size */
padding-top: 1px;
padding-bottom: 4px;
/* heading.bottom.padding */
border-top-color: #FFFFFF;
border-top-width: 0;
}

#jemiiwsncq .gt_bottom_border {
border-bottom-style: solid;
/* heading.border.bottom.style */
border-bottom-width: 2px;
/* heading.border.bottom.width */
border-bottom-color: #A8A8A8;
/* heading.border.bottom.color */
}

#jemiiwsncq .gt_column_spanner {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #A8A8A8;
padding-top: 4px;
padding-bottom: 4px;
}

#jemiiwsncq .gt_col_heading {
color: #000000;
background-color: #FFFFFF;
/* column_labels.background.color */
font-size: 16px;
/* column_labels.font.size */
font-weight: initial;
/* column_labels.font.weight */
padding: 10px;
margin: 10px;
}

#jemiiwsncq .gt_sep_right {
border-right: 5px solid #FFFFFF;
}

#jemiiwsncq .gt_group_heading {
padding: 8px;
color: #000000;
background-color: #FFFFFF;
/* stub_group.background.color */
font-size: 16px;
/* stub_group.font.size */
font-weight: initial;
/* stub_group.font.weight */
border-top-style: solid;
/* stub_group.border.top.style */
border-top-width: 2px;
/* stub_group.border.top.width */
border-top-color: #A8A8A8;
/* stub_group.border.top.color */
border-bottom-style: solid;
/* stub_group.border.bottom .style */
border-bottom-width: 2px;
/* stub_group.border.bottom .width */
border-bottom-color: #A8A8A8;
/* stub_group.border.bottom .color */
}

#jemiiwsncq .gt_empty_group_heading {
padding: 0.5px;
color: #000000;
background-color: #FFFFFF;
/* stub_group.background.color */
font-size: 16px;
/* stub_group.font.size */
font-weight: initial;
/* stub_group.font.weight */
border-top-style: solid;
/* stub_group.border.top.style */
border-top-width: 2px;
/* stub_group.border.top.width */
border-top-color: #A8A8A8;
/* stub_group.border.top.color */
border-bottom-style: solid;
/* stub_group.border.bottom .style */
border-bottom-width: 2px;
/* stub_group.border.bottom .width */
border-bottom-color: #A8A8A8;
/* stub_group.border.bottom .color */
}

#jemiiwsncq .gt_striped tr:nth-child(even) {
background-color: #f2f2f2;
}

#jemiiwsncq .gt_row {
padding: 10px;
/* row.padding */
margin: 10px;
}

#jemiiwsncq .gt_stub {
border-right-style: solid;
border-right-width: 2px;
border-right-color: #A8A8A8;
text-indent: 5px;
}

#jemiiwsncq .gt_stub.gt_row {
background-color: #FFFFFF;
}

#jemiiwsncq .gt_summary_row {
background-color: #FFFFFF;
/* summary_row.background.color */
padding: 6px;
/* summary_row.padding */
text-transform: inherit;
/* summary_row.text_transform */
}

#jemiiwsncq .gt_first_summary_row {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #A8A8A8;
}

#jemiiwsncq .gt_table_body {
border-top-style: solid;
/* field.border.top.style */
border-top-width: 2px;
/* field.border.top.width */
border-top-color: #A8A8A8;
/* field.border.top.color */
border-bottom-style: solid;
/* field.border.bottom.style */
border-bottom-width: 2px;
/* field.border.bottom.width */
border-bottom-color: #A8A8A8;
/* field.border.bottom.color */
}

#jemiiwsncq .gt_footnote {
font-size: 90%;
/* footnote.font.size */
padding: 4px;
/* footnote.padding */
}

#jemiiwsncq .gt_sourcenote {
font-size: 90%;
/* sourcenote.font.size */
padding: 4px;
/* sourcenote.padding */
}

#jemiiwsncq .gt_center {
text-align: center;
}

#jemiiwsncq .gt_left {
text-align: left;
}

#jemiiwsncq .gt_right {
text-align: right;
font-variant-numeric: tabular-nums;
}

#jemiiwsncq .gt_font_normal {
font-weight: normal;
}

#jemiiwsncq .gt_font_bold {
font-weight: bold;
}

#jemiiwsncq .gt_font_italic {
font-style: italic;
}

#jemiiwsncq .gt_super {
font-size: 65%;
}

#jemiiwsncq .gt_footnote_glyph {
font-style: italic;
font-size: 65%;
}

Crime

Loss (USD, millions)

Victim Count

BEC/EAC

$1,297.80

20,373

Confidence Fraud/Romance

$362.50

18,493

Investment

$252.96

3,693

Non-Payment/Non-Delivery

$183.83

65,116

Real Estate/Rental

$149.46

11,300

Personal Data Breach

$148.89

50,642

Corporate Data Breach

$117.71

2,480

Identity Theft

$100.43

16,128

Advanced Fee

$92.27

16,362

Credit Card Fraud

$88.99

15,210

Extortion

$83.36

51,146

Spoofing

$70.00

15,569

Government Impersonation

$64.21

10,978

Other

$63.13

10,826

Lottery/Sweepstakes

$60.21

7,146

Overpayment

$53.23

15,512

Phishing/Vishing/Smishing/Pharming

$48.24

26,379

Employment

$45.49

14,979

Tech Support

$38.70

14,408

Harassment/Threats of Violence

$21.90

18,415

Misrepresentation

$20.00

5,959

IPR/Copyright and Counterfeit

$15.80

2,249

Civil Matter

$15.17

768

Malware/Scareware/Virus

$7.41

2,811

Health Care Related

$4.47

337

Ransomware

$3.62

1,493

Denial of Service/TDoS

$2.05

1,799

Re-shipping

$1.68

907

Charity

$1.01

493

Gambling

$0.93

181

Crimes Against Children

$0.27

1,394

Hacktivist

$0.08

77

Terrorism

$0.01

120

No Lead Value

$0.00

36,936

``` r
ic3[[21]] %>%
stri_split_lines() %>%
unlist() %>%
keep(stri_detect_regex, "^[[:digit:]]") %>%
stri_replace_first_regex("([[:digit:],]+)[[:space:]]+([[:digit:]])", "$1\t$2") %>%
stri_split_fixed("\t") %>%
unlist() %>%
stri_replace_first_regex("^[[:digit:] ]+", "") %>%
stri_replace_first_regex("[[:space:]]+([[:digit:]])", "\t$1") %>%
stri_split_fixed("\t", simplify = TRUE) %>%
as.data.frame(stringsAsFactors=FALSE) %>%
as_tibble() %>%
mutate_all(.funs = stri_trim_both) %>%
mutate(V1 = case_when(
stri_detect_fixed(V1, "Northern Marina") ~ "Northern Mariana Islands",
TRUE ~ V1
)) %>%
type_convert(
col_types = cols(
V1 = col_character(),
V2 = col_number()
)
) %>%
set_names(c("state", "victim_count")) -> state_vics

ic3[[23]] %>%
stri_split_lines() %>%
unlist() %>%
keep(stri_detect_regex, "^[[:space:]]+[[:digit:]]") %>%
stri_replace_first_regex("([[:digit:],]+)[[:space:]]+([[:digit:]])", "$1\t$2") %>%
stri_split_fixed("\t") %>%
unlist() %>%
stri_replace_first_regex("^[[:digit:] ]+", "") %>%
stri_replace_first_regex("[[:space:]]+([[:digit:]])", "\t$1") %>%
stri_split_fixed("\t", simplify = TRUE) %>%
as.data.frame(stringsAsFactors=FALSE) %>%
as_tibble() %>%
mutate_all(.funs = stri_trim_both) %>%
mutate(V1 = case_when(
stri_detect_fixed(V1, "Northern Marina") ~ "Northern Mariana Islands",
TRUE ~ V1
)) %>%
type_convert(
col_types = cols(
V1 = col_character(),
V2 = col_number()
)
) %>%
set_names(c("state", "subject_earnings")) -> subj_earnings

ic3[[22]] %>%
stri_split_lines() %>%
unlist() %>%
keep(stri_detect_regex, "^ [[:digit:]]") %>%
stri_trim_both() %>%
stri_replace_first_regex("([[:digit:],]+)[[:space:]]+([[:digit:]])", "$1\t$2") %>%
stri_split_fixed("\t") %>%
unlist() %>%
stri_replace_first_regex("^[[:digit:] ]+", "") %>%
stri_replace_first_regex("[[:space:]]+\\$([[:digit:]])", "\t$1") %>%
stri_split_fixed("\t", simplify = TRUE) %>%
as.data.frame(stringsAsFactors=FALSE) %>%
as_tibble() %>%
mutate_all(.funs = stri_trim_both) %>%
type_convert(
col_types = cols(
V1 = col_character(),
V2 = col_number()
)
) %>%
set_names(c("state", "loss")) %>%
left_join(subj_earnings, "state") %>%
left_join(state_vics, "state") %>%
mutate(subject_earnings = subject_earnings / 1000000) %>%
mutate(loss = loss / 1000000) -> by_state

if (!file.exists(here::here("raw/2018-pop-est.xlsx"))) {
download.file(
url = "https://www2.census.gov/programs-surveys/popest/tables/2010-2018/national/totals/nst-est2018-01.xlsx",
destfile = here::here("raw/2018-pop-est.xlsx")
)
}

read_excel(here::here("raw/2018-pop-est.xlsx"), skip=9, col_names = FALSE) %>%
select(state = 1, pop_2018 = 12) %>%
mutate(state = stri_replace_first_fixed(state, ".", "")) %>%
filter(!is.na(state), !is.na(pop_2018)) %>%
add_row(state = "U.S. Virgin Islands", pop_2018 = 104914) %>%
add_row(state = "Guam", pop_2018 = 165718) %>%
add_row(state = "U.S. Minor Outlying Islands", pop_2018 = 270) %>%
add_row(state = "American Samoa", pop_2018 = 55679) %>%
add_row(state = "Northern Mariana Islands", pop_2018 = 55194) -> pops

left_join(by_state, pops, "state") %>%
mutate(
loss_per_vic = loss/victim_count,
frac = victim_count / pop_2018
) -> by_state

write_csv(by_state, here::here("data/2018-fbi-ic3-loss-by-state.csv"))
```

``` r
arrange(by_state, desc(frac)) %>%
mutate(loss_per_vic = loss_per_vic * 1000000) %>%
select(
`State` = 1,
`Victim Count` = 4,
`Loss (USD, millions)` = 3,
`Loss per Victim (USD)` = 6,
`% Population Impacted` = 7,
`Subject Earnings (USD, millions)` = 2
) %>%
gt::gt() %>%
gt::fmt_number("Victim Count", decimals = 0) %>%
gt::fmt_currency("Loss (USD, millions)", decimals = 2) %>%
gt::fmt_currency("Loss per Victim (USD)", decimals = 2) %>%
gt::fmt_percent("% Population Impacted", decimals = 4) %>%
gt::fmt_currency("Subject Earnings (USD, millions)", decimals = 2)
```

html {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans', 'Helvetica Neue', Arial, sans-serif;
}

#pvgcvpomyy .gt_table {
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
color: #000000;
font-size: 16px;
background-color: #FFFFFF;
/* table.background.color */
width: auto;
/* table.width */
border-top-style: solid;
/* table.border.top.style */
border-top-width: 2px;
/* table.border.top.width */
border-top-color: #A8A8A8;
/* table.border.top.color */
}

#pvgcvpomyy .gt_heading {
background-color: #FFFFFF;
/* heading.background.color */
border-bottom-color: #FFFFFF;
}

#pvgcvpomyy .gt_title {
color: #000000;
font-size: 125%;
/* heading.title.font.size */
padding-top: 4px;
/* heading.top.padding */
padding-bottom: 1px;
border-bottom-color: #FFFFFF;
border-bottom-width: 0;
}

#pvgcvpomyy .gt_subtitle {
color: #000000;
font-size: 85%;
/* heading.subtitle.font.size */
padding-top: 1px;
padding-bottom: 4px;
/* heading.bottom.padding */
border-top-color: #FFFFFF;
border-top-width: 0;
}

#pvgcvpomyy .gt_bottom_border {
border-bottom-style: solid;
/* heading.border.bottom.style */
border-bottom-width: 2px;
/* heading.border.bottom.width */
border-bottom-color: #A8A8A8;
/* heading.border.bottom.color */
}

#pvgcvpomyy .gt_column_spanner {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #A8A8A8;
padding-top: 4px;
padding-bottom: 4px;
}

#pvgcvpomyy .gt_col_heading {
color: #000000;
background-color: #FFFFFF;
/* column_labels.background.color */
font-size: 16px;
/* column_labels.font.size */
font-weight: initial;
/* column_labels.font.weight */
padding: 10px;
margin: 10px;
}

#pvgcvpomyy .gt_sep_right {
border-right: 5px solid #FFFFFF;
}

#pvgcvpomyy .gt_group_heading {
padding: 8px;
color: #000000;
background-color: #FFFFFF;
/* stub_group.background.color */
font-size: 16px;
/* stub_group.font.size */
font-weight: initial;
/* stub_group.font.weight */
border-top-style: solid;
/* stub_group.border.top.style */
border-top-width: 2px;
/* stub_group.border.top.width */
border-top-color: #A8A8A8;
/* stub_group.border.top.color */
border-bottom-style: solid;
/* stub_group.border.bottom .style */
border-bottom-width: 2px;
/* stub_group.border.bottom .width */
border-bottom-color: #A8A8A8;
/* stub_group.border.bottom .color */
}

#pvgcvpomyy .gt_empty_group_heading {
padding: 0.5px;
color: #000000;
background-color: #FFFFFF;
/* stub_group.background.color */
font-size: 16px;
/* stub_group.font.size */
font-weight: initial;
/* stub_group.font.weight */
border-top-style: solid;
/* stub_group.border.top.style */
border-top-width: 2px;
/* stub_group.border.top.width */
border-top-color: #A8A8A8;
/* stub_group.border.top.color */
border-bottom-style: solid;
/* stub_group.border.bottom .style */
border-bottom-width: 2px;
/* stub_group.border.bottom .width */
border-bottom-color: #A8A8A8;
/* stub_group.border.bottom .color */
}

#pvgcvpomyy .gt_striped tr:nth-child(even) {
background-color: #f2f2f2;
}

#pvgcvpomyy .gt_row {
padding: 10px;
/* row.padding */
margin: 10px;
}

#pvgcvpomyy .gt_stub {
border-right-style: solid;
border-right-width: 2px;
border-right-color: #A8A8A8;
text-indent: 5px;
}

#pvgcvpomyy .gt_stub.gt_row {
background-color: #FFFFFF;
}

#pvgcvpomyy .gt_summary_row {
background-color: #FFFFFF;
/* summary_row.background.color */
padding: 6px;
/* summary_row.padding */
text-transform: inherit;
/* summary_row.text_transform */
}

#pvgcvpomyy .gt_first_summary_row {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #A8A8A8;
}

#pvgcvpomyy .gt_table_body {
border-top-style: solid;
/* field.border.top.style */
border-top-width: 2px;
/* field.border.top.width */
border-top-color: #A8A8A8;
/* field.border.top.color */
border-bottom-style: solid;
/* field.border.bottom.style */
border-bottom-width: 2px;
/* field.border.bottom.width */
border-bottom-color: #A8A8A8;
/* field.border.bottom.color */
}

#pvgcvpomyy .gt_footnote {
font-size: 90%;
/* footnote.font.size */
padding: 4px;
/* footnote.padding */
}

#pvgcvpomyy .gt_sourcenote {
font-size: 90%;
/* sourcenote.font.size */
padding: 4px;
/* sourcenote.padding */
}

#pvgcvpomyy .gt_center {
text-align: center;
}

#pvgcvpomyy .gt_left {
text-align: left;
}

#pvgcvpomyy .gt_right {
text-align: right;
font-variant-numeric: tabular-nums;
}

#pvgcvpomyy .gt_font_normal {
font-weight: normal;
}

#pvgcvpomyy .gt_font_bold {
font-weight: bold;
}

#pvgcvpomyy .gt_font_italic {
font-style: italic;
}

#pvgcvpomyy .gt_super {
font-size: 65%;
}

#pvgcvpomyy .gt_footnote_glyph {
font-style: italic;
font-size: 65%;
}

State

Victim Count

Loss (USD, millions)

Loss per Victim (USD)

% Population Impacted

Subject Earnings (USD, millions)

U.S. Minor Outlying Islands

47

$0.00

$2,049.91

17.4074%

$0.10

Alaska

1,603

$0.00

$2,256.30

0.2174%

$3.62

District of Columbia

1,364

$0.00

$6,524.80

0.1942%

$8.90

Virginia

14,800

$0.01

$2,958.95

0.1738%

$43.79

Nevada

5,228

$0.00

$5,531.93

0.1723%

$28.92

Colorado

9,328

$0.00

$3,653.82

0.1638%

$34.08

Maryland

8,777

$0.00

$5,375.44

0.1452%

$47.18

Washington

10,775

$0.00

$5,616.07

0.1430%

$60.51

California

49,031

$0.02

$9,187.70

0.1240%

$450.48

Wisconsin

6,621

$0.00

$3,722.89

0.1139%

$24.65

Florida

23,984

$0.01

$7,427.51

0.1126%

$178.14

Arizona

8,027

$0.00

$5,626.77

0.1119%

$45.17

Oregon

4,511

$0.00

$6,340.05

0.1076%

$28.60

New Mexico

2,127

$0.00

$4,051.61

0.1015%

$8.62

Rhode Island

1,028

$0.00

$6,740.27

0.0972%

$6.93

Utah

3,041

$0.00

$6,779.82

0.0962%

$20.62

New Jersey

8,440

$0.00

$9,444.52

0.0947%

$79.71

Alabama

4,585

$0.00

$3,688.35

0.0938%

$16.91

Delaware

897

$0.00

$3,502.11

0.0927%

$3.14

New York

18,124

$0.01

$11,095.24

0.0927%

$201.09

Missouri

5,508

$0.00

$4,643.74

0.0899%

$25.58

Massachusetts

6,173

$0.00

$11,054.95

0.0894%

$68.24

Texas

25,589

$0.01

$7,644.34

0.0892%

$195.61

Connecticut

3,134

$0.00

$12,080.38

0.0877%

$37.86

Georgia

9,095

$0.00

$6,758.33

0.0865%

$61.47

Idaho

1,513

$0.00

$4,529.54

0.0862%

$6.85

Wyoming

497

$0.00

$9,088.79

0.0860%

$4.52

Vermont

525

$0.00

$4,052.03

0.0838%

$2.13

Tennessee

5,584

$0.00

$5,120.06

0.0825%

$28.59

Pennsylvania

10,554

$0.00

$5,940.19

0.0824%

$62.69

Illinois

10,087

$0.00

$8,213.52

0.0792%

$82.85

New Hampshire

1,056

$0.00

$5,761.96

0.0778%

$6.08

Hawaii

1,100

$0.00

$5,873.44

0.0774%

$6.46

Minnesota

4,304

$0.00

$11,341.56

0.0767%

$48.81

Michigan

7,533

$0.00

$10,743.37

0.0754%

$80.93

Louisiana

3,469

$0.00

$4,726.51

0.0744%

$16.40

Montana

787

$0.00

$8,401.60

0.0741%

$6.61

North Carolina

7,523

$0.00

$18,241.52

0.0725%

$137.23

Kansas

2,098

$0.00

$8,329.25

0.0721%

$17.47

South Carolina

3,575

$0.00

$5,473.54

0.0703%

$19.57

Indiana

4,676

$0.00

$6,325.43

0.0699%

$29.58

Oklahoma

2,644

$0.00

$4,382.72

0.0671%

$11.59

Ohio

7,812

$0.00

$12,510.25

0.0668%

$97.73

Mississippi

1,882

$0.00

$3,041.99

0.0630%

$5.73

Kentucky

2,813

$0.00

$3,324.84

0.0630%

$9.35

Iowa

1,983

$0.00

$7,734.73

0.0628%

$15.34

Nebraska

1,205

$0.00

$7,822.97

0.0625%

$9.43

Maine

832

$0.00

$3,244.89

0.0622%

$2.70

U.S. Virgin Islands

65

$0.00

$41,904.46

0.0620%

$2.72

West Virginia

1,109

$0.00

$7,483.10

0.0614%

$8.30

Arkansas

1,849

$0.00

$3,770.43

0.0614%

$6.97

North Dakota

459

$0.00

$5,003.90

0.0604%

$2.30

South Dakota

465

$0.00

$3,728.66

0.0527%

$1.73

Guam

52

$0.00

$2,981.83

0.0314%

$0.16

American Samoa

16

$0.00

$1,158.56

0.0287%

$0.02

Northern Mariana Islands

15

$0.00

$924.33

0.0272%

$0.01

Puerto Rico

704

$0.00

$7,413.48

0.0220%

$5.22

``` r
by_state <- mutate(by_state, loss_per_vic = loss_per_vic * 1000000)

ggplot(by_state) +
geom_quasirandom(aes(x="", loss_per_vic)) -> gg

gb <- ggplot_build(gg)

as_tibble(gb$data[[1]]) %>%
select(x, y) %>%
left_join(by_state, c("y" = "loss_per_vic")) %>%
rename(loss_per_vic = y) -> gd

ggplot() +
geom_blank(data = gd, aes(x, loss_per_vic)) +
geom_hline(
yintercept = round(median(gd$loss_per_vic)),
linetype = "dotted", color = "#3B454A"
) +
geom_label(
data = data.frame(),
aes(
x = 0.5, y = round(median(gd$loss_per_vic)),
label = sprintf(
"2018 IC3 Median\nPer In-State\nVictim Loss\n($%s USD)",
scales::comma(round(median(gd$loss_per_vic)))
)
), size = 3, family = font_rc, hjust = 0, vjust = 0,
label.size = 0, lineheight = 0.875
) +
geom_point(
data = mutate(gd, color = case_when(
(loss_per_vic >= 10000) ~ "#E85E26",
TRUE ~ "#31739C"
)),
aes(x=x, loss_per_vic, color = I(color))
) +
geom_label_repel(nudge_y = 2500,
data = filter(gd, loss_per_vic >= 10000),
aes(x=x, loss_per_vic, label = state),
size = 3, family = font_rc
) +
scale_x_continuous(expand = c(0,0.125)) +
scale_y_continuous(label = scales::dollar) +
labs(
x = "Victim count", y = "Loss (USD, millions)",
title = "U.S. Virgin Islands Residents Were Hit Hardest\nin IC3 2018 Catalogued Incidents",
subtitle = "Markers only on IC3 states with ≥$10K (USD) losses per in-state victim",
caption = "Source: 2018 FBI IC3; Pages 21-22 '2018 Overall State Statistics'"
) +
theme_ipsum_rc(grid="XY")
```