broadway-scrape.R | searchcode

/2020-week18/deprecated/broadway-scrape.R

https://github.com/gkaramanis/tidytuesday · R · 29 lines · 23 code · 6 blank · 0 comment · 0 complexity · f9f8ff97511e166c0dd7b35187e90847 MD5 · raw file


library(rvest)
library(tidyverse)

url <- "https://www.broadway.org/broadway-theatres"

webpage <- read_html(url)

broadway_type <- html_nodes(webpage, ".theatre-descr div input") %>% 
  html_attrs() %>% 
  pluck(2) %>% 
  unlist()

broadway_value <- html_nodes(webpage, ".theatre-descr div input") %>% 
  html_attrs() %>% 
  pluck(3) %>% 
  unlist()

broadway_theatres <- tibble(type = broadway_type, value = broadway_value) %>% 
  mutate(n = rep(1:41, each = 4)) %>%
  pivot_wider(names_from = type, values_from = value) %>% 
  select(theatre = name, address = address1, longitude, latitude) %>% 
  mutate(
    theatre = as.character(theatre),
    address = as.character(address),
    longitude = as.numeric(longitude),
    latitude = as.numeric(latitude)
    )

write_csv(broadway_theatres, here::here("2020-week18", "data", "broadway-theatres.csv"))