winners <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-04-25/winners.csv')
## Rows: 163 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Category, Athlete, Nationality
## dbl (1): Year
## time (1): Time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
london_marathon <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-04-25/london_marathon.csv') %>%
filter(Year < 2020)
## Rows: 42 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Official charity
## dbl (6): Year, Applicants, Accepted, Starters, Finishers, Raised
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Participants of London Marathons
london_marathon %>%
ggplot() +
aes(x = Year) +
geom_col(aes(y = Accepted), alpha = .5) +
geom_col(aes(y = Starters), fill = "green", alpha = .5) +
geom_col(aes(y = Finishers), fill = "red", alpha = .5)
# To overcome some problems, use long format
london_marathon %>%
select(Year, Accepted, Starters, Finishers) %>%
pivot_longer(-Year, names_to = "status", values_to = "n") %>%
mutate(status = fct_relevel(status, c("Accepted", "Starters", "Finishers"))) %>%
ggplot() +
aes(x = Year, y = n, fill = status) +
geom_col() +
scale_y_continuous(labels = scales::comma_format()) +
facet_wrap(~status) +
labs(x = NULL, y = "Particiapnts",
title = "Participant of the London Mararthon over time",
fill = "Status")
Summary stats of London Marathon
london_marathon %>%
drop_na(Raised) %>%
summarise(mean_donation = mean(Raised),
sd_donation = sd(Raised),
med_donation = median(Raised))
## # A tibble: 1 × 3
## mean_donation sd_donation med_donation
## <dbl> <dbl> <dbl>
## 1 54.4 6.50 53
summary(na.omit(london_marathon$Raised))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 46.50 50.60 53.00 54.38 59.40 66.40
Proportion of finishers
london_marathon %>%
mutate(finish_prop = Finishers/Starters) %>%
ggplot() +
aes(x = Year, y = finish_prop) +
geom_line(size = 1.2) +
scale_y_continuous(labels = scales::percent_format()) +
geom_text(x = 1995, y = .935, label = "Something happened here",
color = "red", hjust = 0) +
labs(title = "The proportion of finishers over time",
x = NULL, y = NULL)
Time of the winner by year by category
winners %>%
count(Category)
## # A tibble: 4 × 2
## Category n
## <chr> <int>
## 1 Men 43
## 2 Wheelchair Men 39
## 3 Wheelchair Women 39
## 4 Women 42
winners %>%
ggplot() +
aes(x = Year, y = Time, color = Category) +
geom_line(size = 1.2) +
expand_limits(y = c(0, Inf))
winners %>%
ggplot() +
aes(x = Year, y = Time, color = Category) +
geom_smooth(size = 1.2, se = FALSE) +
expand_limits(y = c(0, Inf))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Winners by nationality
winners %>%
count(Nationality, sort = TRUE) %>%
mutate(Nationality = fct_reorder(Nationality, n)) %>%
ggplot() +
aes(x = n, y = Nationality, label = n) +
geom_col() +
geom_text(hjust = 0) +
labs(y = NULL, x = "Number of winners",
title = "Winners of each Nationality")
Winners who won more than one marathon
winners %>%
mutate(Athlete = str_remove(Athlete, pattern = " \\(Tie\\)")) %>%
add_count(Athlete, name = "wins") %>%
filter(wins > 1) %>%
mutate(Athlete = fct_reorder(Athlete, Year, min)) %>%
ggplot() +
aes(x = Year, y = Athlete, fill = Category) +
geom_tile(show.legend = FALSE) +
labs(y = NULL, x = NULL,
title = "Winners of London Marathons over the year by category",
subtitle = "Only those athletes are show who won more than 1 competitions") +
facet_wrap(~Category, ncol = 1, scales = "free_y")