/course-materials/application-exercises/ae-09-uoeart-functions/02-functionalize.R

https://github.com/rstudio-education/datascience-box · R · 39 lines · 19 code · 12 blank · 8 comment · 0 complexity · 8ceed7cca81a46919b37cd4f7389cadb MD5 · raw file

  1. # load packages ----------------------------------------------------------------
  2. library(tidyverse)
  3. library(rvest)
  4. # function: scrape_art_info() --------------------------------------------------
  5. scrape_art_info <- function(x){
  6. # read page at url ----
  7. page <- read_html(x)
  8. # scrape headers ----
  9. headers <- page %>%
  10. html_nodes("th") %>%
  11. html_text()
  12. # scrape values ----
  13. values <- page %>%
  14. html_nodes("td") %>%
  15. html_text() %>%
  16. str_squish()
  17. # put together in a tibble and add link to help keep track ----
  18. tibble(headers, values) %>%
  19. pivot_wider(names_from = headers, values_from = values) %>%
  20. add_column(link = x)
  21. }
  22. # load data to get links -------------------------------------------------------
  23. uoe_art <- read_csv("data/uoe_art.csv")
  24. # apply function ---------------------------------------------------------------
  25. scrape_art_info(uoe_art$link[1])
  26. scrape_art_info(___)
  27. scrape_art_info(___)