museums_raw <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-22/museums.csv')
## Rows: 4191 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (24): museum_id, Name_of_museum, Address_line_1, Address_line_2, Village...
## dbl (11): Latitude, Longitude, DOMUS_identifier, Area_Deprivation_index, Are...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
museums <-
museums_raw %>%
mutate(Subject_Matter = str_replace_all(Subject_Matter,
pattern = "_|-",
replacement = " "),
Year_opened = parse_number(Year_opened),
Year_closed = parse_number(Year_closed) %>%
if_else(. == 9999, true = 2023, false = .))
museums %>% glimpse()
## Rows: 4,191
## Columns: 35
## $ museum_id <chr> "mm.New.1", "mm.aim.1230", "mm.…
## $ Name_of_museum <chr> "Titanic Belfast", "The Woodlan…
## $ Address_line_1 <chr> "1 Olympic Way", NA, "Warwick C…
## $ Address_line_2 <chr> NA, "Brokerswood", "Horticultur…
## $ `Village,_Town_or_City` <chr> "Belfast", "nr Westbury", "More…
## $ Postcode <chr> "BT3 9EP", "BA13 4EH", "CV35 9B…
## $ Latitude <dbl> 54.60808, 51.27090, 52.19715, 4…
## $ Longitude <dbl> -5.909915, -2.231863, -1.555528…
## $ Admin_area <chr> "/Northern Ireland/Belfast (NI …
## $ Accreditation <chr> "Unaccredited", "Unaccredited",…
## $ Governance <chr> "Independent-Not_for_profit", "…
## $ Size <chr> "large", "small", "medium", "sm…
## $ Size_provenance <chr> NA, "aim_size_designation", "mm…
## $ Subject_Matter <chr> "Sea and seafaring Boats and sh…
## $ Year_opened <dbl> 2012, 1971, 1984, 1971, 2013, 1…
## $ Year_closed <dbl> 2023, 2007, 2023, 2012, 2023, 2…
## $ DOMUS_Subject_Matter <chr> NA, NA, NA, NA, NA, NA, NA, "sc…
## $ DOMUS_identifier <dbl> NA, NA, 1218, NA, NA, NA, 1528,…
## $ Primary_provenance_of_data <chr> "New", "aim", "domus", "aim", "…
## $ Identifier_used_in_primary_data_source <chr> NA, NA, "WM000019", NA, NA, NA,…
## $ Area_Deprivation_index <dbl> 2, 9, 8, NA, 8, 2, 6, 6, 5, 6, …
## $ Area_Deprivation_index_crime <dbl> 3, 8, 9, NA, 10, 1, 10, 3, 1, 1…
## $ Area_Deprivation_index_education <dbl> 1, 9, 8, NA, 7, 6, 8, 7, 7, 6, …
## $ Area_Deprivation_index_employment <dbl> 2, 8, 10, NA, 7, 3, 7, 6, 6, 7,…
## $ Area_Deprivation_index_health <dbl> 1, 8, 8, NA, 8, 2, 7, 8, 5, 7, …
## $ Area_Deprivation_index_housing <dbl> 4, 6, 5, NA, 7, 1, 8, 9, 1, 7, …
## $ Area_Deprivation_index_income <dbl> 5, 8, 8, NA, 8, 3, 5, 5, 7, 5, …
## $ Area_Deprivation_index_services <dbl> 5, 4, 1, NA, 4, 4, 2, 3, 9, 1, …
## $ Area_Geodemographic_group <chr> "Larger Towns and Cities", "Cou…
## $ Area_Geodemographic_group_code <chr> "2ar", "7ar", "3ar", NA, "7ar",…
## $ Area_Geodemographic_subgroup <chr> "Larger Towns and Cities", "Cou…
## $ Area_Geodemographic_subgroup_code <chr> "2a1r", "7a1r", "3a1r", NA, "7a…
## $ Area_Geodemographic_supergroup <chr> "Business Education and Heritag…
## $ Area_Geodemographic_supergroup_code <chr> "2r", "7r", "3r", NA, "7r", "5r…
## $ Notes <chr> NA, "Previously known as Philli…
museums %>%
count(Subject_Matter, sort = TRUE)
## # A tibble: 114 × 2
## Subject_Matter n
## <chr> <int>
## 1 Local Histories 887
## 2 Buildings Houses Large houses 414
## 3 Arts Fine and decorative arts 195
## 4 Transport Trains and railways 139
## 5 War and conflict Regiment 132
## 6 Mixed Encyclopaedic 119
## 7 Mixed Other 85
## 8 Personality Literary 83
## 9 Other 79
## 10 Buildings Houses Medium houses 73
## # ℹ 104 more rows
museums %>%
mutate(Subject_Matter = fct_lump(Subject_Matter, n = 20) %>%
fct_infreq() %>%
fct_rev()) %>%
ggplot() +
aes(y = Subject_Matter, fill = Subject_Matter) +
geom_bar(show.legend = FALSE) +
labs(y = NULL,
x = "Nr. of museums",
title = "22 most frequent museum topics in the UK")
museums %>%
ggplot() +
aes(x = Year_opened) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
museums %>%
ggplot() +
aes(x = Year_closed) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
set.seed(123)
museums %>%
filter(Year_closed < 2023) %>%
sample_n(15) %>%
mutate(Name_of_museum = fct_reorder(Name_of_museum, Year_opened),
duration = Year_closed - Year_opened) %>%
ggplot() +
aes(y = Name_of_museum) +
geom_segment(aes(x = Year_opened, xend = Year_closed, yend = Name_of_museum),
alpha = .7) +
geom_point(aes(x = Year_opened), color = "green", size = 2) +
geom_point(aes(x = Year_closed), color = "red", size = 2) +
geom_text(x = 1840, aes(label = paste0("Duration:", duration))) +
theme_light() +
xlim(c(1800, 2023)) +
labs(x = NULL, y = NULL,
title = "Opening and closing times for 15 random closed museums")