Ecosyste.ms: Awesome

An open API service indexing awesome lists of open source software.

Awesome Lists | Featured Topics | Projects

https://github.com/evamaerey/tidypivot

Declarative group-wise count and compute, describing your target table
https://github.com/evamaerey/tidypivot

Last synced: 11 days ago
JSON representation

Declarative group-wise count and compute, describing your target table

Awesome Lists containing this project

README

        

---
output:
github_document:
toc: TRUE
---

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "man/figures/README-",
out.width = "100%"
)
knitr::opts_chunk$set(message = F, warning = F)
```

# {tidypivot} allows you to create tables by describing them (like ggplot2 plot description/declaration)

note: see original discussion here: https://evamaerey.github.io/mytidytuesday/2022-02-14-tables/tables.html and thoughtful contributions from @shannonpileggi and @brshallow https://github.com/EvaMaeRey/mytidytuesday/issues/3

> And, you know, I'd get a dataset. And, *in my head I could very clearly kind of picture*, I want to put this on the x-axis. Let's put this on the y-axis, draw a line, put some points here, break it up by this variable. And then, like, getting that vision out of my head, and into reality, it's just really, really hard. Just, like, felt harder than it should be. Like, there's a lot of custom programming involved, where I just felt, like, to me, I just wanted to say, like, you know, *this is what I'm thinking, this is how I'm picturing this plot. Like you're the computer 'Go and do it'.* ... and I'd also been reading about the Grammar of Graphics by Leland Wilkinson, I got to meet him a couple of times and ... I was, like, this book has been, like, written for me. - https://www.trifacta.com/podcast/tidy-data-with-hadley-wickham/

# declarative table creation with ggplot2

```{r}
library(ggplot2)
StatSum$default_aes <- aes(label = after_stat(n))

# I want to put this on the x-axis (cols)
tidytitanic::tidy_titanic |>
ggplot(
# I want to put this on the x-axis (cols)
aes(x = sex,
# I want to put this on the y- axis (rows)
y = survived)
)

# grouping and computation happen in one step, filling in 'table'
last_plot() +
stat_sum(geom = "text")
```

## Status quo table creation: Harder than it should be?

- 1. grouping
- 2. compute
- 3. pivot

### pivotr function: toward declarative table generation

Under the hood:

- group by rows and columns
- value in data to consider (1 if not specified)
- wt, weight the value (1 if not specified)
- fun - do an operation (on value) within group

But API:

- describe layout of table and compute

```{r tp, eval = T}
#' Title
#'
#' @param data
#' @param rows
#' @param cols
#' @param value
#' @param wt
#' @param fun
#' @param prop
#' @param percent
#' @param round
#' @param within
#' @param withinfun
#' @param pivot
#' @param wrap
#' @param totals_within
#'
#' @return
#' @export
#'
#' @examples
pivotr <- function(data,
rows = NULL,
cols = NULL,

value = NULL,
wt = NULL,

fun = NULL,

prop = FALSE,
percent = FALSE,
round = NULL,

within = NULL,
withinfun = NULL,

pivot = NULL,
wrap = NULL,
totals_within = NULL
){


cols_quo <- rlang::enquo(cols)
value_quo <- rlang::enquo(value)
wt_quo <- rlang::enquo(wt)
within_quo <- rlang::enquo(within)
totals_within_quo <- rlang::enquo(totals_within)

if(is.null(prop)) {prop <- FALSE}
if(is.null(pivot)){pivot <- TRUE}
if(is.null(wrap)) {wrap <- FALSE}

if(is.null(fun)) {fun <- sum}

## adding a value as 1 if there is none

if(rlang::quo_is_null(value_quo) ){

data <- data |>
dplyr::mutate(value = 1)

}else{

data <- data |>
dplyr::mutate(value = {{value}})

}

#### weighting ####

if(!rlang::quo_is_null(wt_quo) ){

data <- data |>
dplyr::mutate(value = value * {{wt}})
}


### grouping by tabulation vars col and row
grouped <- data |>
dplyr::group_by(dplyr::across(c({{cols}}, {{rows}})),
.drop = FALSE)

### summarizing ####

summarized <- grouped |>
dplyr::summarise(value = fun(value))

# proportion case or percent
if(prop|percent){

mult <- ifelse(percent, 100, 1)
if(is.null(round)){round <- ifelse(percent, 1, 3)}

# prop is across all data
if(rlang::quo_is_null(within_quo) ){

summarized <- summarized |>
dplyr::ungroup() |>
dplyr::mutate(value = round(value*mult/sum(value), round))

# prop is within categories specified by within variable
}else{

summarized <- summarized |>
dplyr::ungroup() |>
dplyr::group_by(dplyr::across(c({{within}})),
.drop = FALSE) |>
dplyr::mutate(value = round(value*mult/sum(value), round))

}
}

arranged <- summarized

ungrouped <- arranged |>
dplyr::ungroup()

tidy <- ungrouped

# do not pivot if argument pivot false or if no columns specified
if(pivot == F | rlang::quo_is_null(cols_quo)){

tidy
# tidy |>
# dplyr::rename(count = .data$value)

# otherwise pivot by columns
}else{

tidy |>
tidyr::pivot_wider(names_from = {{cols}})

}

}

```

```{r}
data_define_value <- function(data, value = NULL, wt = NULL){

value_quo <- rlang::enquo(value)
wt_quo <- rlang::enquo(wt)

if(rlang::quo_is_null(value_quo) ){

## adding a value as 1 if there is none
data <- data |>
dplyr::mutate(value = 1)

}else{

data <- data |>
dplyr::mutate(value = {{value}})

}

#### weighting ####

if(!rlang::quo_is_null(wt_quo) ){

data <- data |>
dplyr::mutate(value = .data$value * {{wt}})
}

data

}

data_to_grouped <- function(data, cols, rows){

### grouping by tabulation vars col and row
data |>
dplyr::group_by(dplyr::across(c({{cols}}, {{rows}})),
.drop = FALSE)


}

data_grouped_to_summarized <- function(data, fun = NULL){

if(is.null(fun)) {fun <- sum}

## adding a value as 1 if there is none

### summarizing ####

data |>
dplyr::summarise(value = fun(.data$value))


}

data_summarized_to_proportioned <- function(data, prop = F, percent = F, within = NULL, round = 2){
# proportion case or percent

within_quo <- rlang::enquo(within)
# totals_within_quo <- rlang::enquo(totals_within)

if(is.null(prop)) {prop <- FALSE}

if(prop|percent){

mult <- ifelse(percent, 100, 1)
if(is.null(round)){round <- ifelse(percent, 1, 3)}

# prop is across all data
if(rlang::quo_is_null(within_quo) ){

data <- data |>
dplyr::ungroup() |>
dplyr::mutate(value = round(.data$value*mult/sum(.data$value), round))

# prop is within categories specified by within variable
}else{

data <- data |>
dplyr::ungroup() |>
dplyr::group_by(dplyr::across(c({{within}})),
.drop = FALSE) |>
dplyr::mutate(value = round(.data$value*mult/sum(.data$value), round))

}
}

data

}

data_proportioned_to_pivoted <- function(data, pivot = T, cols = NULL){

cols_quo <- rlang::enquo(cols)
if(is.null(pivot)){pivot <- TRUE}

tidy <- data |>
dplyr::ungroup()

# do not pivot if argument pivot false or if no columns specified
if(pivot == F | rlang::quo_is_null(cols_quo)){

tidy

# otherwise pivot by columns
}else{

tidy |>
tidyr::pivot_wider(names_from = {{cols}})

}

}
```

```{r}
tidytitanic::flat_titanic |>
data_define_value(value = freq) |>
data_to_grouped(rows = survived, cols = sex) |>
data_grouped_to_summarized() |>
data_summarized_to_proportioned(percent = T, within = survived) |>
data_proportioned_to_pivoted(cols = sex)
```

```{r}


pivotr <- function(data,
rows = NULL,
cols = NULL,

value = NULL,
wt = NULL,

fun = NULL,

prop = FALSE,
percent = FALSE,
round = NULL,

within = NULL,

pivot = NULL
){


data |>
data_define_value(value = {{value}}, wt = {{wt}}) |>
data_to_grouped(rows = {{rows}}, cols = {{cols}}) |>
data_grouped_to_summarized(fun = fun) |>
data_summarized_to_proportioned(prop = prop, percent = percent, within = {{within}}, round = round) |>
data_proportioned_to_pivoted(pivot = pivot, cols = {{cols}})

}

tidytitanic::flat_titanic |>
pivotr(value = freq, rows = survived, cols = sex, percent = T, within = survived)

```

```{r}
library(tidytitanic)

tidy_titanic |> pivotr()

tidy_titanic |> pivotr(rows = sex, cols = survived)

tidy_titanic |> pivotr(rows = c(sex, age), cols = survived)

tidy_titanic |> pivotr(rows = sex, cols = survived, pivot = F)

flat_titanic |> pivotr(rows = sex, value = freq, prop = TRUE)

flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE)

flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE, within = sex)
```

# examples/derivative

Here are some examples where you might have derivative functions

```{r pivotr_countsummean}
pivot_count <- function(...){

# maybe a wt version...
pivotr(fun = length, ...)

}

pivot_average <- function(...){

mean_na_rm <- function(x){mean(x, na.rm = T)}

pivotr(fun = mean_na_rm, ...)

}

pivot_sum <- function(...){

pivotr(fun = sum, ...)

}

pivot_empty <- function(...){

nar <- function(x) return(NA)

pivotr(fun = nar, ...)

}
```

```{r, eval = T}
library(magrittr)
library(tidytitanic)

passengers <- readr::read_csv("https://raw.githubusercontent.com/clauswilke/dviz.supp/master/data-raw/titanic/Titanic.csv")

head(passengers)

tidy_titanic |> pivot_count(rows = sex)
tidy_titanic |> pivot_count(rows = sex, col = survived)
flat_titanic |> pivot_sum(rows = survived, value = freq)
flat_titanic |> pivot_sum(rows = sex, cols = survived, value = freq)

flat_titanic |> pivot_average(rows = sex, cols = survived, value = freq)
flat_titanic |> pivot_empty(rows = survived, cols = age)

passengers |> pivot_average(rows = c(Sex, PClass), cols = Survived, value = Age)

```

# filling cells with examples from data.

```{r}
pivot_example <- function(...){

sample1 <- function(x) sample(x, 1)
pivotr(fun = sample1, ...)

}

pivot_samplen <- function(..., n = 3, sep = "; "){

samplen <- function(x) paste(sample(x, n, replace = F), collapse = sep)

pivotr(fun = samplen, ...)

}

pivot_list <- function(..., sep = "; "){

paste_collapse <- function(x) paste (x, collapse = sep)
pivotr(fun = paste_collapse, ...)

}
```

```{r, eval = T}
flat_titanic |> pivot_example(rows = sex, value = freq)

flat_titanic |> pivot_samplen(rows = sex, value = freq)

flat_titanic |> pivot_list(rows = sex, cols = survived, value = freq)

set.seed(12345)
passengers |> pivot_example(rows = Survived, cols = Sex, value = Name)
passengers |> pivot_samplen(rows = Survived, cols = Sex, value = Name, n = 2, sep = "; ")

passengers |> pivot_samplen(rows = Survived, cols = Sex, value = Age, n = 7)

passengers |> dplyr::sample_n(20) |> pivot_list(rows = Sex, cols = Survived, value = Age)

```

## proportions helpers

```{r}
library(tidytitanic)
# pivot_prop
flat_titanic |> pivotr(rows = sex, value = freq, prop = TRUE) # pivot_prop
flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE)
flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE, within = sex)

# pivot_percent
flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, percent = TRUE, within = sex)
```

```{r}
knitr::knit_exit()

```

---

```{r Tidypivot}
# library(R6)

# library(tidytitanic)
# pivot_helper(data = passengers, rows = sex, cols = survived)

Tidypivot <- R6::R6Class("Tidypivot",
public = list(

# objects
data = NULL,
rows = NULL,
cols = NULL,
fun = NULL,
value = NULL,
wt = NULL,
within = NULL,
withinfun = NULL,
pivot = NULL,
wrap = NULL,
out = NULL
,

# functions
# initialize = function(){
#
# self$data <- NULL
# self$rows = NULL
# self$cols = NULL
# self$fun = NULL
# self$value = NULL
# self$wt = NULL
# self$within = NULL
# self$withinfun = NULL
# self$pivot = NULL
# self$wrap = NULL
# self$out = NULL
#
# },
update = function(data = NULL, rows = NULL, cols = NULL,
fun = NULL, value = NULL, wt = NULL, pivot_logical = NULL){ # a method

# updating

if(!is.null(data)){self$data <- data}
if(!is.null(rows)){self$rows <- rows}
if(!is.null(cols)){self$cols <- cols}
if(!is.null(fun)){self$fun <- fun}
if(!is.null(wt)){self$wt <- wt}
if(!is.null(value)){self$value <- value}
if(!is.null(pivot_logical)){self$pivot <- pivot_logical}

# displaying
self$out <- 'tidypivot::pivot_helper(data, rows, cols, fun, value, wt, pivot_logical)'

if(!is.null(self$data)) {self$out <- self$out |> stringr::str_replace("data", self$data) }
if(!is.null(self$rows)) {self$out <- self$out |> stringr::str_replace("rows", self$rows) }
if(!is.null(self$cols)) {self$out <- self$out |> stringr::str_replace("cols", self$cols) }
if(!is.null(self$fun)) {self$out <- self$out |> stringr::str_replace("fun", self$fun) }
if(!is.null(self$value)){self$out <- self$out |> stringr::str_replace("value", self$value) }
if(!is.null(self$wt)) {self$out <- self$out |> stringr::str_replace("wt", self$wt) }
if(!is.null(self$pivot)){self$out <- self$out |> stringr::str_replace("pivot_logical", self$pivot) }

if(is.null(self$data)) {self$out <- self$out |> stringr::str_replace("data", "NULL")}
if(is.null(self$rows)) {self$out <- self$out |> stringr::str_replace("rows", "NULL")}
if(is.null(self$cols)) {self$out <- self$out |> stringr::str_replace("cols", "NULL")}
if(is.null(self$fun)) {self$out <- self$out |> stringr::str_replace("fun", "NULL")}
if(is.null(self$value)){self$out <- self$out |> stringr::str_replace("value", "NULL")}
if(is.null(self$wt)) {self$out <- self$out |> stringr::str_replace("wt", "NULL")}
if(is.null(self$pivot)){self$out <- self$out |> stringr::str_replace("pivot_logical", "NULL")}

invisible(self) #returns

},

print = function() { # print method; default is to print everything

# str(self)
self$out

}
)
)
```

```{r}
my_tp <- Tidypivot$new()
eval(parse(text = my_tp$out))
```

```{r}

#' Title
#'
#' @param data
#' @param rows
#' @param cols
#' @param fun
#' @param value
#' @param wt
#' @param pivot_logical
#'
#' @return
#' @export
#'
#' @examples
#' tp_init(data = "tidytitanic::tidy_titanic")
tp_init <- function(data = NULL, quietly = FALSE){

if(exists("my_tp")){rm(my_tp)}

# my_tp <- Tidypivot$new()
my_tp <<- Tidypivot$new()

my_tp$update(data)

if(!quietly){
eval(parse(text = my_tp$out))
}

}

tp_init_pipe <- function(data = NULL, quietly = FALSE){

# if(exists("my_tp")){rm(my_tp)}

my_tp_pipe <- Tidypivot$new()
# my_tp <<- Tidypivot$new()

my_tp_pipe$update(data)

if(!quietly){
eval(parse(text = my_tp_pipe$out))
}

}
#
#
#' Title
#'
#' @param nothing
#' @param data
#' @param rows
#' @param cols
#' @param fun
#' @param value
#' @param wt
#' @param pivot_logical
#'
#' @return
#' @export
#'
#' @examples
#'tp_init(data = "tidytitanic::tidy_titanic")
#' tp_add(rows = "sex")
#' tp_add(cols = "class")
#'
#'tp_init(data = "tidytitanic::tidy_titanic") |>
#' tp_add(rows = "sex") |>
#' tp_add(cols = "class")
#'
tp_add <- function(nothing = NULL, data = NULL, rows = NULL, cols = NULL,
fun = NULL, value = NULL, wt = NULL,
pivot_logical = NULL, quietly = FALSE){

my_tp$update(data,
rows,
cols,
fun,
value,
wt,
pivot_logical)

if(!quietly){
eval(parse(text = my_tp$out))
}

}

tp_add_pipe <- function(nothing = NULL, data = NULL, rows = NULL, cols = NULL,
fun = NULL, value = NULL, wt = NULL,
pivot_logical = NULL, quietly = FALSE){

my_tp_pipe$update(data,
rows,
cols,
fun,
value,
wt,
pivot_logical)

if(!quietly){
eval(parse(text = my_tp_pipe$out))
}

}

#
#
#' Title
#'
#' @param vars
#'
#' @return
#' @export
#'
#' @examples
#' tp_init(data = "tidytitanic::tidy_titanic")
#'
#' set_data(data = tidytitanic::tidy_titanic) |>
#' set_rows(age)
set_rows <- function(first, vars){

var_names = deparse(substitute(vars))

var_names
tp_add(rows = deparse(substitute(vars)))

}

#
#
#' Title
#'
#' @param vars
#'
#' @return
#' @export
#'
#' @examples
#' tp_init(data = "tidytitanic::tidy_titanic")
#'
#' set_data(data = tidytitanic::tidy_titanic) |>
#' set_rows(age) |>
#' set_cols(c(survived, sex))
set_cols <- function(first, vars){

var_names = deparse(substitute(vars))

var_names
tp_add(cols = var_names)

}

#' Title
#'
#' @param vars
#'
#' @return
#' @export
#'
#' @examples
#' set_data(tidytitanic::tidy_titanic)
#'
set_data <- function(data){

my_tp <<- Tidypivot$new()
return(my_tp)

my_tp$update(data = data,
rows,
cols,
fun,
value,
wt,
pivot_logical)

eval(parse(text = my_tp$out))

# tp_add(data = deparse(substitute(data)))

}

```

# Piping

```{r}
tp_init(data = "tidytitanic::tidy_titanic")

set_data(tidytitanic::tidy_titanic) |>
set_rows(sex) |>
set_cols(survived) |>
set_cols(c(survived, age))
```

```{r}
knitr::knit_exit()

```

```{r}
library(tidypivot)

tp_init(data = "tidytitanic::tidy_titanic"); str(my_tp)
tp_add(rows = "sex"); str(my_tp)
tp_add(cols = "survived"); str(my_tp)
tp_add(cols = "c(survived, class)"); str(my_tp)
```

```{r}
tp_init_pipe(data = "tidytitanic::tidy_titanic") |>
tp_add(cols = "class") ; str(my_tp)
```

# Alternate interface

---

# Step 0. Some observations

## ggplot2: user needs to describe layout of table

you can make a visual pivot table in ggplot2; analyst job is to describe the form. How will it look

specify 3 things - start with visual layout

- specify x
- specify y
- specify count type geom

```{r}
tidy_titanic |>
ggplot() +
aes(x = Sex, y = Survived) +
geom_jitter() +
geom_count(color = "blue")
```

---

## With existing pivot tools, description isn't so visual

- specify vars
- specify aggregation
- specify visual arrangement (names from?) <- this comes last

```{r slow}
tidy_titanic |>
group_by(Sex, Survived) |>
summarize(count = n()) |>
pivot_wider(names_from = Survived,
values_from = count)
```

---

# Step 1a. Make Functions to allow description of final table, pivot_count and pivot_calc

## x argument is horizontal elements (columns) and y is vertical elements (rows)

```{r}
pivot_count_script <- readLines("./R/pivot_count.R")
```

```{r count, code = pivot_count_script}

```

```{r}
pivot_calc_script <- readLines("./R/pivot_calc.R")
```

```{r calc, code = pivot_calc_script}

```

# Step 1b. Using those functions

```{r examples}
# rows and cols
tidy_titanic |>
pivot_count(rows = Survived, cols = Sex)

# cols only
tidy_titanic |>
pivot_count(cols = Sex)

# rows only
tidy_titanic |>
pivot_count(rows = Survived)

# two rows and col
tidy_titanic |>
pivot_count(rows = c(Survived, Class), cols = Sex)

# two rows and col and contains zero counts
tidy_titanic |>
pivot_count(rows = c(Survived, Class), cols = c(Sex, Age))

# two rows and col and contains zero counts
tidy_titanic |>
pivot_count(rows = c(Survived, Class), cols = c(Sex, Age), pivot = F)

# count all
tidy_titanic |>
pivot_count()

# for fun organize like it will appear visually in code
tidy_titanic |>
pivot_count( cols = Sex,
rows = c(Survived, Class) )
```

## After examining your table you might actually want to have the calculation in long form (for use in something like ggplot2). This is what pivot = F is for!

```{r}
tidy_titanic |>
pivot_count(cols = Sex, rows = Survived, pivot = F)
```

## 1b. pivot_calc using pivot calc function for non count aggregation

### just for fun arrange the code how the table will look

```{r calc2, eval = F}
library(tidytitanic)
flat_titanic |>
pivot_calc( cols = Sex,
rows = Survived, value = Freq, fun = sum)

flat_titanic |>
pivot_count(cols = Sex,
rows = Survived, wt = Freq)

```

Issue: For this case, we should probably use pivot_count and allow for a wt argument.

## 1b style. use another tool to style

### goal of functions is not to style - just to make calculation faster by using a visually driven API

```{r style}
tidy_titanic |>
pivot_count(cols = Sex, rows = c(Survived, Class)) |>
group_by(Class) |>
gt::gt()
```

```{r style2}
tidy_titanic |>
pivot_count(cols = Sex, rows = c(Survived, Class, Age)) |>
group_by(Age) |>
gt::gt()
```

# Back to Step 0, Observations: use existing tools to calculate *proportions* is many step process

### feels like lots of gymnastics... a vis first approach is what we are after

```{r proportionslow}
tidy_titanic |>
group_by(Sex, Survived) |>
summarize(value = n()) |>
group_by(Sex) |>
mutate(prop = value/sum(value)) |>
select(-value) |>
pivot_wider(values_from = prop, names_from = Sex)
```

# Step 2a. build a function where visual arrangement leads.

```{r}
pivot_prop_script <- readLines("./R/pivot_prop.R")
```

```{r propfunction, code = pivot_prop_script}

```

# Step 2b. using the pivot_prop

```{r propusage}
tidy_titanic |>
pivot_prop(rows = Survived, cols = Class, within = Class)

tidy_titanic |>
pivot_prop(rows = c(Survived, Sex),
cols = Class,
within = Survived)

tidy_titanic |>
pivot_prop(rows = c(Survived, Sex),
cols = Class,
within = c(Survived, Sex))
```

```{r propgraph}
tidy_titanic |>
pivot_prop(rows = c(Survived, Sex),
cols = Class,
within = Survived, pivot = F) |>
ggplot() +
aes(x = Class, y = Sex) +
facet_grid(rows = vars(Survived)) +
geom_tile() +
aes(fill = prop) +
geom_text(aes(label = prop %>% round(3)))
```

```{r propgraph2}
tidy_titanic %>%
pivot_prop(rows = c(Survived, Sex),
cols = Class,
within = c(Survived, Sex), pivot = F) %>%
ggplot() +
aes(x = Class, y = 1) +
facet_grid(rows = vars(Survived, Sex)) +
geom_tile() +
aes(fill = prop) +
geom_text(aes(label = prop %>% round(3)))

```

# Reflections, questions, issues

1. Does this already exist?
2. ~~How can API improve? possibly rows = vars(y00, y0, y), cols = vars(x). and within = vars(?, ?) This requires more digging into tidy eval. What about multiple x vars?~~ These changes implemented thanks to Brian and Shannon
3. ~~How can internals improve? Also tidy eval is old I think. defaults for missing data.~~ Using new {{}} tidy eval within and across, and rlang::quo_is_null() thanks to Brian
4. What about summary columns, rows. Column totals, etc. Maybe not very tidy... maybe allowed w/ error message.
5. Ideas about different API - more like ggplot2, where you would specify new dimensions of your table after piping. Would require function to create non-data frame type object. Not sure about consistency dplyr/tidyr tools. These just return dataframes/tibble. I think being consistent with that expectation might be a good thing. Also less challenging to implement.

```{r, echo = F}
library(ggstamp)
set.seed(128790)
ggcanvas() +
stamp_tile(x = rnorm(500)/2.5,
y= rnorm(500)/2.5,
fill = "darkslateblue",
alpha = runif(500, .25, 1),
width = rnorm(500)/3,
height = rnorm(500)/3,
color = alpha("black", .75)) +
stamp_point(size = 8, color = "grey85") +
stamp_spoke(angle = pi, size = 1.7,
radius = .65, color = "grey85") +
stamp_point(size = 5, color = "grey85") +
stamp_point(size = 3, color = "grey55") +

stamp_polygon_holes(y0 = .25) +
stamp_text(label = "tidy", hjust = 1,
color = "violetred",
vjust = -.2, size = 17) +
stamp_text(label = "pivot", hjust = -.1,
angle = 0:25, color = "violetred",
vjust = -.2, size = 17, alpha = c(0:25/400)) +
stamp_spoke(angle = .45*0:25/25, size = 1.7,
radius = .85, color = "grey55", alpha = c(0:25/400)) +
stamp_spoke(angle = .45, size = 1.7,
radius = .85, color = "grey55") +
stamp_text(label = "pivot", hjust = -.1,
angle = 25, color = "violetred",
vjust = -.2, size = 17) +
ggstamp::theme_void_fill(fill = "darkslateblue") +
stamp_polygon(color = "lightslateblue",
alpha = 0, y0 = .25, size = 4)
```

# Other work in this space

- janitor::tably
- pivottabler

---

# And back again...

How to create a scatterplot with a pivot table

```{r}
library(ggplot2)
anscombe %>%
ggplot() +
aes(x = rank(x1), y = rank(y1)) +
geom_point() +
scale_y_continuous(breaks = 1:12 -.5) +
scale_x_continuous(breaks = 1:12 -.5) +
theme(panel.grid.minor = element_blank())
# or

anscombe %>%
pivot_count(cols = x1, rows = y1) %>%
dplyr::arrange(-y1) %>%
knitr::kable()
```

```{r}
"tidy_titanic %>%
group_by(Sex, Survived) %>%
summarize(count = n()) %>%
pivot_wider(names_from = Survived,
values_from = count)" %>%
writeLines(con = tempfile())

ggstamp::ggdraft() +
ggstamp::stamp_text(label = "tidy_titanic %>%
group_by(Sex, Survived) %>%
summarize(count = n()) %>%
pivot_wider(names_from = Survived,
values_from = count)",
hjust = 0, size = 8, x = .01, y = .4)

ggstamp::ggcanvas() +
ggstamp::stamp_circle(x0y0 = ggstamp::pos_polygon(n = 3),
fill = "green", alpha = .2, radius = 1.3) +
ggstamp::stamp_text(xy = ggstamp::pos_polygon(n = 3, radius = 1.5),
label = c("ggplot2", "tidyr", "dplyr"))

```

```{r}
pivot_helper <- function(data,

rows = NULL,
cols = NULL,

value = NULL,
fun = NULL,
wt = NULL,

prop = FALSE,
percent = FALSE,
round = NULL,
within = NULL,
withinfun = NULL,

pivot = NULL,
wrap = NULL,
totals_within = NULL){

cols_quo <- rlang::enquo(cols)
value_quo <- rlang::enquo(value)
wt_quo <- rlang::enquo(wt)
within_quo <- rlang::enquo(within)
totals_within_quo <- rlang::enquo(totals_within)

if(is.null(prop)) {prop <- FALSE}
if(is.null(pivot)){pivot <- TRUE}
if(is.null(wrap)) {wrap <- FALSE}
if(is.null(fun)) {fun <- sum }

########## group data by all row and col vars #############
grouped <- data %>%
dplyr::group_by(dplyr::across(c({{cols}}, {{rows}})),
.drop = FALSE)

######### define value on which to summarize ###############
if(rlang::quo_is_null(value_quo) ){

valued <- grouped %>%
dplyr::mutate(value = 1)

}else{

valued <- grouped %>%
dplyr::mutate(value = {{value}})

}

######### weighting value if required ###################
if(!rlang::quo_is_null(wt_quo)){

valued <- valued %>%
dplyr::mutate(value = value*{{wt}})

}

######### summarizing by group according to function ########

summarized <- valued %>%
dplyr::summarise(summary = fun(value))

######## custom summaries props and percent, where within is defined ####

## proportion ####
if(prop|percent){

if(is.null(round)&prop){round = 3}
if(is.null(round)&percent){round = 1}
multiplier <- ifelse(percent, 100, 1)

# proportion across all data, when 'within' is not defined
if(rlang::quo_is_null(within_quo) ){

summarized <- valued %>%
dplyr::ungroup() %>%
dplyr::summarise(summary = round(value*multiplier/sum(value), round))

}else{

summarized <- valued %>%
dplyr::ungroup() %>%
dplyr::group_by(dplyr::across(c({{within}})),
.drop = FALSE) %>%
dplyr::summarise(summary = round(value*multiplier/sum(value), round))

}
}

### Just saving a place for adding rearrangement variable

arranged <- summarized

##### Ungrouping
ungrouped <- arranged |>
dplyr::ungroup()

############# yields tidy data
tidy <- ungrouped

####### should we actually pivot the data ####
# do not pivot if argument pivot false or if no columns specified
if(pivot == F | rlang::quo_is_null(cols_quo)){

out <- tidy # for unpivoted value should be renamed conditionally

}else{

out <- tidy |>
tidyr::pivot_wider(names_from = {{cols}})

}

return(out)

}

```