https://github.com/tjmahr/fitness

dumping ground for some of my fitness data
https://github.com/tjmahr/fitness
Last synced: 3 months ago
JSON representation
dumping ground for some of my fitness data
Host: GitHub
URL: https://github.com/tjmahr/fitness
Owner: tjmahr
License: gpl-3.0
Created: 2022-08-16T18:11:05.000Z (almost 3 years ago)
Default Branch: main
Last Pushed: 2023-06-22T19:28:05.000Z (almost 2 years ago)
Last Synced: 2025-02-24T13:56:36.895Z (3 months ago)
Language: R
Size: 27.6 MB
Stars: 0
Watchers: 2
Forks: 0
Open Issues: 0
Metadata Files:
- Readme: README.Rmd
- License: LICENSE
Awesome Lists containing this project

README

        ---

output: github_document

---

```{r}

#| include: false

library(tidyverse)

knitr::opts_chunk$set(

  collapse = TRUE,

  comment = "#>",

  dpi = 300, 

  dev = "ragg_png"

)

# these are fake-ish targets to force a dependence between the data and 

# the README

targets::tar_load(data_exercise)

targets::tar_load(data_intervals)

targets::tar_load(data_miles_to_schools)

targets::tar_load(data_bodyweight)

```

# fitness

A dumping ground for some of my fitness data.

## exercise-minutes

I exported all of my Apple Health data. The export process provides a .zip file.

I opened the `export.xml` file in the .zip archive and extracted my daily

exercise minutes (the green ring) activity using the following:

```{r}

#| eval: false

library(tidyverse)

# Get all nodes in the document with summary

nodes_summary <- xml2::read_xml("C:/Users/trist/Desktop/export.xml") |> 

  xml2::xml_find_all(".//ActivitySummary") 

# Get the attributes in each node and create a dataframe from them

nodes_summary |> 

  xml2::xml_attrs() |>

  lapply(as.list) |> 

  lapply(as.data.frame) |> 

  bind_rows() |> 

  as_tibble() |> 

  # I got my first Apple Watch around Christmas 2018

  filter(dateComponents > "2018-12-19") |> 

  select(

    date = dateComponents,

    exercise_minutes = appleExerciseTime

  ) |> 

  type_convert(

    col_types = cols(

      date = col_date(format = ""),

      exercise_minutes = col_double(),

    )

  ) |>

  write_csv("data/exercise-minutes.csv")

```

Now, I have my exercise minutes for each day.

I should disclose that I turn on an Apple workout on my watch when I mow the

lawn, shovel snow, or do something mildly active like going for a walk with my

family. 

```{r exercise-minutes, fig.height = 7, fig.width = 10}

library(tidyverse)

data_minutes <- "data/exercise-minutes.csv" |> 

  # I added entries with Excel and it mangled the dates

  read_csv(

    col_types = cols(

      date = col_date("%m/%d/%Y"), 

      exercise_minutes = "i")

  ) |> 

  filter(!is.na(exercise_minutes)) |> 

  # in case i want to aggregate by months within years

  mutate(

    year = lubridate::year(date),

    month = lubridate::month(date),

    year_month = paste0(year, month)

  ) |> 

  group_by(year_month) |> 

  mutate(

    year_month_start = min(date)

  ) |> 

  ungroup()

ggplot(data_minutes |> filter(year > 2018)) + 

  aes(x = date, y = exercise_minutes) + 

  geom_hline(

    yintercept = 30, 

    color = "#92E82A", 

    linewidth = 1, 

  ) +

  geom_point(

    data = function(x) filter(x, exercise_minutes < 125),

    alpha = .4

  ) +

  geom_point(

    aes(y = 125),

    data = function(x) filter(x, exercise_minutes >= 125),

    alpha = .4,

    color = "orange"

  ) +

  geom_blank(

    data = tibble::tibble(

      exercise_minutes = NA_real_,

      date = as.Date("2023-12-31"),

      year = 2023

    )

  ) +

  facet_wrap("year", scales = "free_x") + 

  stat_smooth(method = "loess", formula = y ~ x) +

  scale_x_date(date_labels = "%b", date_minor_breaks = "1 months") +

  scale_y_continuous(breaks = c(0, 30, 60, 90, 120)) +

  labs(

    x = "month",

    y = "exercise activity minutes ⌚",

    caption = "Outliers (125+ minutes) replaced with orange points."

  ) +

  theme_light(base_size = 16)

```

I think that the smoothing lines should be match up from facet to facet. Let's

figure out how to do that.

```{r exercise-minutes-2, fig.height = 7, fig.width = 10}

gam <- mgcv::gam(

  exercise_minutes ~ s(as.numeric(date), bs = "cr"), 

  data = data_minutes

)

data_gam <- gratia::fitted_values(gam, data = data_minutes)

ggplot(data_gam |> filter(year > 2018)) + 

  aes(x = date, y = exercise_minutes) + 

  geom_hline(

    yintercept = 30, 

    color = "#92E82A", 

    linewidth = 1, 

  ) +

  geom_point(

    data = function(x) filter(x, exercise_minutes < 125),

    alpha = .4

  ) +

  geom_point(

    aes(y = 125),

    data = function(x) filter(x, exercise_minutes >= 125),

    alpha = .4,

    color = "orange"

  ) +

  geom_blank(

    data = tibble::tibble(

      exercise_minutes = NA_real_,

      date = as.Date("2023-12-31"),

      year = 2023

    )

  ) +

  geom_smooth(

    aes(y = fitted, ymin = lower, ymax = upper),

    stat = "identity"

  ) +

  facet_wrap("year", scales = "free_x") + 

  scale_x_date(date_labels = "%b", date_minor_breaks = "1 months") +

  scale_y_continuous(breaks = c(0, 30, 60, 90, 120)) +

  labs(

    x = "month",

    y = "exercise activity minutes ⌚",

    caption = "Outliers (125+ minutes) replaced with orange points."

  ) +

  theme_light(base_size = 16)

```

What is my exercise debt (unmet goal minutes)?

```{r}

data_minutes |> 

  group_by(year) |> 

  summarise(

    n_days = n(),

    mean_exercise = mean(exercise_minutes),

    exercise = sum(exercise_minutes),

    debt = sum(30 - exercise_minutes)

  )

data_minutes |> 

  summarise(

    n_days = n(),

    mean_exercise = mean(exercise_minutes),

    exercise = sum(exercise_minutes),

    debt = sum(30 - exercise_minutes)

  )

```

My stupid Apple Fitness said my September 2022 challenge is 64 minutes of

exercise activity per day on average, so let's make a query to check on that.

```{r}

data_minutes |> 

  filter(year == 2022) |> 

  group_by(year, month) |> 

  summarise(

    num_days = n(),

    sum_minutes = sum(exercise_minutes),

    mean_minutes = scales::label_number(.1)(mean(exercise_minutes)),

    prop_of_sept_22_goal = (sum(exercise_minutes) / (64 * 30)) |> round(2),

    .groups = "drop"

  ) |> 

  ungroup() |> 

  arrange(desc(year), desc(month)) |> 

  knitr::kable(align = "lrrrr")

```

## bodyweight

In April, I took up a new hobby: Weekly boxing classes. This new activity

motivated me to be a little more active (closing my watch's daily exercise goal

more often or trying exercises that help for boxing conditioning). I started

measuring my bodyweight around this time. I haven't change my diet or anything

like that, and I don't have any weight goals besides getting back to my

pre-pandemic bodyweight (250 lbs).

```{r bodyweight, fig.width = 6, fig.height = 4}

data_weight <- "data/bodyweight.csv" |> 

  read_csv(col_types = cols(date = col_date("%m/%d/%Y"))) 

p <- data_weight |> 

  ggplot() + 

  aes(x = date, y = weight) + 

  stat_smooth(method = "loess", formula = y ~ x) +

  geom_point() +

  ylim(225, 270) + 

  labs(y = "bodyweight [lb]") +

  theme_grey(base_size = 16) +

  theme(

    panel.grid.minor.y = element_blank()

  )

p

library(ggrepel)

data_weight |> 

  ggplot() + 

  aes(x = date, y = weight) + 

  geom_hline(

    yintercept = 250,

    linewidth = 1.5,

    color = "grey60"

  ) +

  geom_vline(

    linetype = "dashed",

    xintercept = as.Date("2022-04-24"),

    linewidth = 1,

  ) +

  stat_smooth(method = "loess", formula = y ~ x) +

  geom_point() +

  ylim(225, 270) + 

  labs(y = "bodyweight [lb]") +

  theme_grey(base_size = 16) +

  theme(

    # panel.grid.minor.y = element_blank()

  ) +

  geom_text_repel(

    data = data.frame(

      date = as.Date("2022-04-25"),

      weight = 269

    ),

    nudge_y = .5,

    point.padding = 0.2,

    segment.curvature = 1e-20,

    xlim = c(as.Date("2022-04-30"), NA),

    label = "Started taking boxing classes in late April 🥊",

    size = 4.5

  ) +

  geom_text_repel(

    data = data.frame(

      date = as.Date("2022-04-26"),

      weight = 250

    ),

    nudge_y = 0,

    point.padding = 0.5,

    segment.curvature = 1e-20,

    xlim = c(as.Date("2022-04-30"), NA),

    label = "Pre-pandemic weight\naround 250 lbs",

    size = 4.5

  )  + 

  geom_blank(

    aes(x = max(c(data_minutes$date, data_weight$date)), y = 260),

  )

# A version with only interesting jumps in weight

last_plot() +

  list(data_weight |> filter(abs(weight - lead(weight)) >= 1))

  # list()

# last_plot() + 

#   geom_vline(

#     linetype = "dashed",

#     xintercept = as.Date("2023-02-16"),

#     linewidth = 1,

#   ) +

#   geom_text_repel(

#     data = data.frame(

#       date = as.Date("2023-02-20"),

#       weight = 257

#     ),

#     nudge_y = .5,

#     point.padding = 0.2,

#     segment.curvature = 1e-20,

#     xlim = c(as.Date("2023-02-20"), NA),

#     label = "Intermittent fasting\non weekdays",

#     size = 3.5

#   ) 

```

The last time I was on a months-long fitness kick, I did a Nike+ Run Club

training program for a half-marathon. In a one-month period (July 2015), I ran a

half-marathon distance three times. My weight at that time was 234 lbs, so

that's the benchmark for Tristan, The Cardio Machine.

## other notes

11/25/2022 - got the cross-rope "get lean" weighted jump rope set

12/5/2022 - increased my apple move goal from 860 to 940

01/10/2023 - decreased my apple move goal to 500 because of a family vacation 

and procedure. started bumping it back up in 02/2023.

02/20/2023 - started doing intermittent fasting on weekdays. basically, this 

kept me from eating peanut butter before bed or in the middle of the night.

### fast miles

In September 2022, I started jogging my kid to preschool in a running stroller.

School is just a little more than a mile away. After a few weeks, I decided to

compete with myself for how fast I could run the first mile split on these jogs.

```{r fast-miles, fig.width = 6, fig.height = 3}

convert_min_sec_to_sec <- function(xs) {

  # ye olde splitten applyen combinen

  xs |> 

    strsplit(split = ":", fixed = TRUE) |> 

    lapply(function(x) {

      x <- as.numeric(x)

      60 * x[1] + x[2]

    }) |> 

    unlist(use.names = FALSE)

}

data_miles <- "data/miles-to-school.csv" |> 

  # I added entries with Excel and it mangled the dates

  read_csv(

    col_types = cols(

      date = col_date("%m/%d/%Y"), 

      first_mile_split = "c",

      comment = "c",

      season = "n"

    )

  ) |> 

  mutate(

    time = convert_min_sec_to_sec(first_mile_split)

  ) |> 

  filter(

    str_detect(tolower(comment), "injured", negate = TRUE),

    str_detect(tolower(comment), "under the weather", negate = TRUE),

  )

ggplot(data_miles) +

  aes(x = date, y = time) +

  geom_point(size = 3) +

  # add some extra spacing in panel as we wait for data

  geom_blank(

    data = data.frame(

      season = 2, 

      date = as.Date("2023-06-15"), 

      time = 450

    )

  ) +

  scale_x_date(

    date_breaks = "2 weeks",

    date_labels = "%b %d"

  ) +

  scale_y_continuous(

    breaks = (14:22 / 2) * 60,

    labels = function(x) {

      sprintf("%02d:%02d", x %/% 60, x %% 60)

    }

  ) +

  labs(

    x = NULL, 

    y = "pace [min per mi]"

  ) +

  facet_wrap("season", labeller = label_both, scales = "free_x") +

  theme_light(base_size = 11) +

  ggtitle("🏃‍♂️ How fast I ran 1 mile, running my kid to school 🎒")

```

## archived/iced things

### intervals

  - Run some distance as fast as you can (time it), stop and recover until

    heartrate is 140 bpm (time how long the recovery takes).

  - Do this for the following distances (m): 800, 800, 600, 600, 400, 400, 200.

I injured my knee doing the 200 m sprint on 2022-09-06 and now I'm kind of

scared of this workout.

```{r intervals, fig.width = 8, fig.height = 4}

library(tidyverse)

d <- "data/intervals.csv" |> 

  read_csv(

    col_types = cols(

      interval_number = "i",

      time = "c",

      distance = "c",

      date = col_date("%m/%d/%Y")

    )

  ) |> 

  mutate(

    time = convert_min_sec_to_sec(time)

  )

d_date_margins <- d |> 

  group_by(date, interval_type) |> 

  summarise(

    intervals = n(),

    total_time = sum(time),

    distance = sum(as.numeric(distance)),

    .groups = "drop"

  )

d_date_margins

d |> 

  filter(interval_type == "run") |> 

  ggplot() + 

    aes(

      x = (interval_number + 1) / 2, 

      y = (time / 60) / (as.numeric(distance) / 1000)

    ) +

    geom_line(aes(group = date, color = factor(date))) +

    geom_point(aes(color = factor(date))) +

    geom_label(

      aes(

        label = paste0(distance, " m"), 

        y = 3.5

      ), 

      hjust = .5,

      stat = "unique"

    ) +

    geom_label(

      aes(label = label, y = 3.5),

      data = data.frame(interval_number = 15, label = "(all)"),

      hjust = .5,

    ) +

    geom_point(

      aes(color = factor(date)), 

      data = d_date_margins |> 

        filter(interval_type == "run") |> 

        mutate(interval_number = 15) |> 

        rename(time = total_time),

    ) +

    labs(

      x = "running interval number", 

      y = "pace [min per km] (lower: better)", 

      color = "date"

    )

d |> 

  filter(interval_type == "run") |> 

  ggplot() + 

    aes(

      x = (interval_number + 1) / 2, 

      y = (time) / (as.numeric(distance) / 200)

    ) +

    geom_line(aes(group = date, color = factor(date))) +

    geom_point(aes(color = factor(date))) +

    geom_label(

      aes(

        label = paste0(distance, " m"), 

        y = 30

      ), 

      hjust = .5,

      stat = "unique"

    ) +

    geom_label(

      aes(label = label, y = 30),

      data = data.frame(interval_number = 15, label = "(all)"),

      hjust = .5,

    ) +

    geom_point(

      aes(color = factor(date)), 

      data = d_date_margins |> 

        filter(interval_type == "run") |> 

        mutate(interval_number = 15) |> 

        rename(time = total_time),

    ) +

    labs(

      x = "running interval number", 

      y = "seconds per 200 m (lower: better)", 

      color = "date"

    )

d |> 

  filter(interval_type == "recover") |> 

  ggplot() + 

    aes(

      x = (interval_number + 1) / 2, 

      y = time

    ) +

    geom_line(aes(group = date, color = factor(date))) +

    geom_point(aes(color = factor(date))) +

    geom_label(

      aes(

        label = paste0(distance, " m"), 

        y = 45

      ), 

      hjust = .5,

      stat = "unique"

    ) +

    labs(

      x = "running interval number", 

      y = "recovery duration [s] (lower: better)", 

      color = "date",

      caption = "I used a different workout app on 2022-08-16 🤷‍♀️"

    )

```
ecosyste.ms

Data

Tools

Indexes

Applications

Experiments

Awesome

https://github.com/tjmahr/fitness

Awesome Lists containing this project

README