/2020-week18/deprecated/broadway-scrape.R

https://github.com/gkaramanis/tidytuesday · R · 29 lines · 23 code · 6 blank · 0 comment · 0 complexity · f9f8ff97511e166c0dd7b35187e90847 MD5 · raw file

  1. library(rvest)
  2. library(tidyverse)
  3. url <- "https://www.broadway.org/broadway-theatres"
  4. webpage <- read_html(url)
  5. broadway_type <- html_nodes(webpage, ".theatre-descr div input") %>%
  6. html_attrs() %>%
  7. pluck(2) %>%
  8. unlist()
  9. broadway_value <- html_nodes(webpage, ".theatre-descr div input") %>%
  10. html_attrs() %>%
  11. pluck(3) %>%
  12. unlist()
  13. broadway_theatres <- tibble(type = broadway_type, value = broadway_value) %>%
  14. mutate(n = rep(1:41, each = 4)) %>%
  15. pivot_wider(names_from = type, values_from = value) %>%
  16. select(theatre = name, address = address1, longitude, latitude) %>%
  17. mutate(
  18. theatre = as.character(theatre),
  19. address = as.character(address),
  20. longitude = as.numeric(longitude),
  21. latitude = as.numeric(latitude)
  22. )
  23. write_csv(broadway_theatres, here::here("2020-week18", "data", "broadway-theatres.csv"))