Read data

museums_raw <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-22/museums.csv')
## Rows: 4191 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (24): museum_id, Name_of_museum, Address_line_1, Address_line_2, Village...
## dbl (11): Latitude, Longitude, DOMUS_identifier, Area_Deprivation_index, Are...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Preprocess data

museums <-
    museums_raw %>% 
    mutate(Subject_Matter = str_replace_all(Subject_Matter, 
                                            pattern = "_|-", 
                                            replacement = " "),
           Year_opened = parse_number(Year_opened),
           Year_closed = parse_number(Year_closed) %>% 
                         if_else(. == 9999, true = 2023, false = .))
    
museums %>% glimpse()
## Rows: 4,191
## Columns: 35
## $ museum_id                              <chr> "mm.New.1", "mm.aim.1230", "mm.…
## $ Name_of_museum                         <chr> "Titanic Belfast", "The Woodlan…
## $ Address_line_1                         <chr> "1 Olympic Way", NA, "Warwick C…
## $ Address_line_2                         <chr> NA, "Brokerswood", "Horticultur…
## $ `Village,_Town_or_City`                <chr> "Belfast", "nr Westbury", "More…
## $ Postcode                               <chr> "BT3 9EP", "BA13 4EH", "CV35 9B…
## $ Latitude                               <dbl> 54.60808, 51.27090, 52.19715, 4…
## $ Longitude                              <dbl> -5.909915, -2.231863, -1.555528…
## $ Admin_area                             <chr> "/Northern Ireland/Belfast (NI …
## $ Accreditation                          <chr> "Unaccredited", "Unaccredited",…
## $ Governance                             <chr> "Independent-Not_for_profit", "…
## $ Size                                   <chr> "large", "small", "medium", "sm…
## $ Size_provenance                        <chr> NA, "aim_size_designation", "mm…
## $ Subject_Matter                         <chr> "Sea and seafaring Boats and sh…
## $ Year_opened                            <dbl> 2012, 1971, 1984, 1971, 2013, 1…
## $ Year_closed                            <dbl> 2023, 2007, 2023, 2012, 2023, 2…
## $ DOMUS_Subject_Matter                   <chr> NA, NA, NA, NA, NA, NA, NA, "sc…
## $ DOMUS_identifier                       <dbl> NA, NA, 1218, NA, NA, NA, 1528,…
## $ Primary_provenance_of_data             <chr> "New", "aim", "domus", "aim", "…
## $ Identifier_used_in_primary_data_source <chr> NA, NA, "WM000019", NA, NA, NA,…
## $ Area_Deprivation_index                 <dbl> 2, 9, 8, NA, 8, 2, 6, 6, 5, 6, …
## $ Area_Deprivation_index_crime           <dbl> 3, 8, 9, NA, 10, 1, 10, 3, 1, 1…
## $ Area_Deprivation_index_education       <dbl> 1, 9, 8, NA, 7, 6, 8, 7, 7, 6, …
## $ Area_Deprivation_index_employment      <dbl> 2, 8, 10, NA, 7, 3, 7, 6, 6, 7,…
## $ Area_Deprivation_index_health          <dbl> 1, 8, 8, NA, 8, 2, 7, 8, 5, 7, …
## $ Area_Deprivation_index_housing         <dbl> 4, 6, 5, NA, 7, 1, 8, 9, 1, 7, …
## $ Area_Deprivation_index_income          <dbl> 5, 8, 8, NA, 8, 3, 5, 5, 7, 5, …
## $ Area_Deprivation_index_services        <dbl> 5, 4, 1, NA, 4, 4, 2, 3, 9, 1, …
## $ Area_Geodemographic_group              <chr> "Larger Towns and Cities", "Cou…
## $ Area_Geodemographic_group_code         <chr> "2ar", "7ar", "3ar", NA, "7ar",…
## $ Area_Geodemographic_subgroup           <chr> "Larger Towns and Cities", "Cou…
## $ Area_Geodemographic_subgroup_code      <chr> "2a1r", "7a1r", "3a1r", NA, "7a…
## $ Area_Geodemographic_supergroup         <chr> "Business Education and Heritag…
## $ Area_Geodemographic_supergroup_code    <chr> "2r", "7r", "3r", NA, "7r", "5r…
## $ Notes                                  <chr> NA, "Previously known as Philli…

Start to exploring the data

museums %>% 
    count(Subject_Matter, sort = TRUE)
## # A tibble: 114 × 2
##    Subject_Matter                     n
##    <chr>                          <int>
##  1 Local Histories                  887
##  2 Buildings Houses Large houses    414
##  3 Arts Fine and decorative arts    195
##  4 Transport Trains and railways    139
##  5 War and conflict Regiment        132
##  6 Mixed Encyclopaedic              119
##  7 Mixed Other                       85
##  8 Personality Literary              83
##  9 Other                             79
## 10 Buildings Houses Medium houses    73
## # ℹ 104 more rows
museums %>% 
    mutate(Subject_Matter = fct_lump(Subject_Matter, n = 20) %>% 
                            fct_infreq() %>% 
                            fct_rev()) %>% 
    ggplot() +
    aes(y = Subject_Matter, fill = Subject_Matter) +
    geom_bar(show.legend = FALSE) +
    labs(y = NULL, 
         x = "Nr. of museums",
         title = "22 most frequent museum topics in the UK")

Check opening and closing times

museums %>% 
    ggplot() +
    aes(x = Year_opened) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

museums %>% 
    ggplot() +
    aes(x = Year_closed) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Plot opening and closing times and duration

set.seed(123)

museums %>% 
    filter(Year_closed < 2023) %>% 
    sample_n(15) %>% 
    mutate(Name_of_museum = fct_reorder(Name_of_museum, Year_opened),
           duration = Year_closed - Year_opened) %>% 
    ggplot() +
    aes(y = Name_of_museum) +
    geom_segment(aes(x = Year_opened, xend = Year_closed, yend = Name_of_museum),
                 alpha = .7) +
    geom_point(aes(x = Year_opened), color = "green", size = 2) +
    geom_point(aes(x = Year_closed), color = "red", size = 2) +
    geom_text(x = 1840, aes(label = paste0("Duration:", duration))) +
    theme_light() +
    xlim(c(1800, 2023)) +
    labs(x = NULL, y = NULL,
         title = "Opening and closing times for 15 random closed museums")