winners <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-04-25/winners.csv')
## Rows: 163 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): Category, Athlete, Nationality
## dbl  (1): Year
## time (1): Time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
london_marathon <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-04-25/london_marathon.csv') %>% 
    filter(Year < 2020)
## Rows: 42 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Official charity
## dbl  (6): Year, Applicants, Accepted, Starters, Finishers, Raised
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Participants of London Marathons

london_marathon %>% 
    ggplot() +
    aes(x = Year) +
    geom_col(aes(y = Accepted), alpha = .5) +
    geom_col(aes(y = Starters), fill = "green", alpha = .5) +
    geom_col(aes(y = Finishers), fill = "red", alpha = .5)

# To overcome some problems, use long format
london_marathon %>% 
    select(Year, Accepted, Starters, Finishers) %>% 
    pivot_longer(-Year, names_to = "status", values_to = "n") %>% 
    mutate(status = fct_relevel(status, c("Accepted", "Starters", "Finishers"))) %>% 
    ggplot() +
    aes(x = Year, y = n, fill = status) +
    geom_col() +
    scale_y_continuous(labels = scales::comma_format()) +
    facet_wrap(~status) +
    labs(x = NULL, y = "Particiapnts",
         title = "Participant of the London Mararthon over time",
         fill = "Status")

Summary stats of London Marathon

london_marathon %>% 
    drop_na(Raised) %>% 
    summarise(mean_donation = mean(Raised),
              sd_donation = sd(Raised),
              med_donation = median(Raised))
## # A tibble: 1 × 3
##   mean_donation sd_donation med_donation
##           <dbl>       <dbl>        <dbl>
## 1          54.4        6.50           53
summary(na.omit(london_marathon$Raised))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   46.50   50.60   53.00   54.38   59.40   66.40

Proportion of finishers

london_marathon %>% 
    mutate(finish_prop = Finishers/Starters) %>% 
    ggplot() +
    aes(x = Year, y = finish_prop) +
    geom_line(size = 1.2) +
    scale_y_continuous(labels = scales::percent_format()) +
    geom_text(x = 1995, y = .935, label = "Something happened here", 
              color = "red", hjust = 0) +
    labs(title = "The proportion of finishers over time",
        x = NULL, y = NULL)

Time of the winner by year by category

winners %>% 
    count(Category)
## # A tibble: 4 × 2
##   Category             n
##   <chr>            <int>
## 1 Men                 43
## 2 Wheelchair Men      39
## 3 Wheelchair Women    39
## 4 Women               42
winners %>% 
    ggplot() +
    aes(x = Year, y = Time, color = Category) +
    geom_line(size = 1.2) +
    expand_limits(y = c(0, Inf))

winners %>% 
    ggplot() +
    aes(x = Year, y = Time, color = Category) +
    geom_smooth(size = 1.2, se = FALSE) +
    expand_limits(y = c(0, Inf))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Winners by nationality

winners %>% 
    count(Nationality, sort = TRUE) %>% 
    mutate(Nationality = fct_reorder(Nationality, n)) %>% 
    ggplot() +
    aes(x = n, y = Nationality, label = n) +
    geom_col() +
    geom_text(hjust = 0) +
    labs(y = NULL, x = "Number of winners",
         title = "Winners of each Nationality")

Winners who won more than one marathon

winners %>% 
    mutate(Athlete = str_remove(Athlete, pattern = " \\(Tie\\)")) %>% 
    add_count(Athlete, name = "wins") %>%
    filter(wins > 1) %>%
    mutate(Athlete = fct_reorder(Athlete, Year, min)) %>% 
    ggplot() +
    aes(x = Year, y = Athlete, fill = Category) +
    geom_tile(show.legend = FALSE) +
    labs(y = NULL, x = NULL,
         title = "Winners of London Marathons over the year by category",
         subtitle = "Only those athletes are show who won more than 1 competitions") +
    facet_wrap(~Category, ncol = 1, scales = "free_y")