/2021/week11/R/analysis.R

https://github.com/jkaupp/tidytuesdays · R · 61 lines · 50 code · 10 blank · 1 comment · 5 complexity · a3ba8b113b524f6a060400fc3cb7d7ae MD5 · raw file

  1. library(tidyverse)
  2. library(jkmisc)
  3. library(glue)
  4. library(here)
  5. movies <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-09/movies.csv')
  6. test_counts <- movies %>%
  7. separate_rows(writer, sep = ",") %>%
  8. mutate(across(writer, str_trim)) %>%
  9. filter(!str_detect(writer, "novel"),
  10. !str_detect(writer, "characters")) %>%
  11. mutate(writer = str_remove_all(writer, "\\s*\\(.*?\\)\\s*")) %>%
  12. filter(!is.na(writer), writer != "N/A") %>%
  13. count(writer, binary, sort = TRUE)
  14. overall_tests <- test_counts %>%
  15. group_by(writer) %>%
  16. summarize(total = sum(n))
  17. writer_percentages <- test_counts %>%
  18. left_join(overall_tests) %>%
  19. mutate(percentage = n/total) %>%
  20. filter(total > 4) %>%
  21. ungroup() %>%
  22. complete(nesting(writer, total), binary, fill = list(n = 0, percentage = 0))
  23. order <- writer_percentages %>%
  24. filter(binary == "PASS") %>%
  25. #pivot_wider(names_from = binary, values_from = percentage, values_fill = 0) %>%
  26. arrange(percentage) %>%
  27. pull(writer)
  28. plot_data <- writer_percentages %>%
  29. mutate(writer = factor(writer, order)) %>%
  30. filter(!(binary == "PASS" & n == 0))
  31. labels <- writer_percentages %>%
  32. mutate(writer = factor(writer, order)) %>%
  33. group_by(writer) %>%
  34. slice_max(percentage, with_ties = FALSE) %>%
  35. mutate(label_x = if_else(binary == 'FAIL', 0.995, 0.005))
  36. plot <- ggplot(plot_data, aes(x = percentage, y = writer, fill = binary, color = binary)) +
  37. geom_col(show.legend = FALSE) +
  38. geom_text(data = labels, aes(x = label_x, y = writer, label = writer), hjust = "inward", color = "white", family = "Atkinson Hyperlegible", size = 3) +
  39. geom_vline(xintercept = 0.5, linetype = "dashed") +
  40. scale_fill_manual(values = rev(c("#009C99", "#3A023B"))) +
  41. scale_color_manual(values = rev(c("#009C99", "#3A023B"))) +
  42. scale_x_continuous(labels = scales::percent, expand = c(0,0.01)) +
  43. labs(x = NULL,
  44. y = NULL,
  45. title = "Gender Bias in Script and Screenplay Writers",
  46. subtitle = glue("Illustrated below is a stacked bar chart of the percentage of works that {highlight_text('pass', '#009C99', 'b')} or {highlight_text('fail', '#3A023B', 'b')}<br>the Bechdel Test for writers with five or greater writing credits in evaluated films."),
  47. caption = "**Data**: FiveThirtyEight | **Graphic**: @jakekaupp") +
  48. theme_jk(grid = FALSE,
  49. markdown = TRUE,subtitle_family = "Atkinson Hyperlegible") +
  50. theme(axis.text.y = element_blank())
  51. ggsave(here("2021", "week11", "tw11_plot.png"), plot, width = 8, height = 16, device = ragg::agg_png())