/tidytuesday_201913_seattle_pets.r

https://github.com/spren9er/tidytuesday · R · 73 lines · 67 code · 6 blank · 0 comment · 0 complexity · f657c742dc2fe828bbe61ca57df8bd44 MD5 · raw file

  1. library(tidyverse)
  2. library(lubridate)
  3. library(broom)
  4. library(ggrepel)
  5. raw_data <- read_csv(
  6. paste0(
  7. 'https://raw.githubusercontent.com/rfordatascience/tidytuesday/',
  8. 'master/data/2019/2019-03-26/seattle_pets.csv'
  9. )
  10. )
  11. pets_totals <- raw_data %>%
  12. mutate(
  13. year = year(mdy(license_issue_date)),
  14. animals_name = str_to_title(animals_name)
  15. ) %>%
  16. filter(species %in% c('Cat', 'Dog'), !is.na(animals_name), year >= 2015) %>%
  17. count(species, animals_name) %>%
  18. pivot_wider(
  19. names_from = species, values_from = n, values_fill = list(n = 0)
  20. ) %>%
  21. transmute(animal_name = animals_name, dog = Dog, cat = Cat)
  22. model <- lm(cat ~ dog, pets_totals)
  23. intercept <- model$coefficients[1]
  24. slope <- model$coefficients[2]
  25. pets_popular <- augment(model, pets_totals) %>%
  26. mutate(
  27. total = dog + cat,
  28. most_popular = total >= 230,
  29. popular = total >= 70,
  30. sign = factor(-sign(.resid))
  31. )
  32. pets_popular %>%
  33. ggplot(aes(x = dog, y = cat)) +
  34. geom_abline(
  35. intercept = intercept, slope = slope, size = 0.25, linetype = 2
  36. ) +
  37. geom_text_repel(
  38. data = filter(pets_popular, popular & !most_popular),
  39. aes(label = animal_name, size = total, color = sign),
  40. fontface = 'bold', segment.size = 0.25, segment.alpha = 0.35, seed = 6,
  41. show.legend = FALSE
  42. ) +
  43. geom_text(
  44. data = filter(pets_popular, most_popular),
  45. aes(label = animal_name, size = total, color = sign),
  46. fontface = 'bold', show.legend = FALSE
  47. ) +
  48. geom_label_repel(
  49. data = filter(pets_popular, most_popular),
  50. aes(label = paste0(total, ' (', dog, '/', cat, ')')),
  51. fontface = 'bold', label.padding = 0.2, size = 2, nudge_y = -3.5,
  52. show.legend = FALSE
  53. ) +
  54. expand_limits(x = 0, y = 0) +
  55. scale_y_continuous(breaks = c(0, 25, 50, 75, 100)) +
  56. scale_size_continuous(range = c(1.5, 4.25)) +
  57. labs(
  58. x = 'Number of Dogs',
  59. y = 'Number of Cats',
  60. title = "Seattle's Most Popular Dog and Cat Names 2015-2018",
  61. subtitle = '#tidytuesday 13|2019',
  62. caption = '© 2019 spren9er'
  63. )
  64. ggsave(
  65. 'images/tidytuesday_201913_seattle_pets.png',
  66. dpi = 600, bg = 'transparent'
  67. )