analysis.R | searchcode

/2021/week11/R/analysis.R

https://github.com/jkaupp/tidytuesdays · R · 61 lines · 50 code · 10 blank · 1 comment · 5 complexity · a3ba8b113b524f6a060400fc3cb7d7ae MD5 · raw file

library(tidyverse)
library(jkmisc)
library(glue)
library(here)

movies <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-09/movies.csv')

test_counts <- movies %>% 
  separate_rows(writer, sep = ",") %>% 
  mutate(across(writer, str_trim)) %>% 
  filter(!str_detect(writer, "novel"),
         !str_detect(writer, "characters")) %>% 
  mutate(writer = str_remove_all(writer, "\\s*\\(.*?\\)\\s*")) %>% 
  filter(!is.na(writer), writer != "N/A") %>% 
  count(writer, binary, sort = TRUE)

overall_tests <- test_counts %>% 
  group_by(writer) %>% 
  summarize(total = sum(n))

writer_percentages <- test_counts %>% 
  left_join(overall_tests) %>% 
  mutate(percentage = n/total) %>% 
  filter(total > 4) %>% 
  ungroup() %>% 
  complete(nesting(writer, total), binary, fill = list(n = 0, percentage = 0)) 
  

order <- writer_percentages %>% 
  filter(binary == "PASS") %>% 
  #pivot_wider(names_from = binary, values_from = percentage, values_fill = 0) %>% 
  arrange(percentage) %>% 
  pull(writer) 

plot_data <- writer_percentages %>% 
  mutate(writer = factor(writer, order)) %>% 
  filter(!(binary == "PASS" & n == 0))

labels <- writer_percentages %>% 
  mutate(writer = factor(writer, order)) %>% 
  group_by(writer) %>% 
  slice_max(percentage, with_ties = FALSE) %>% 
  mutate(label_x = if_else(binary == 'FAIL', 0.995, 0.005))

plot <- ggplot(plot_data, aes(x = percentage, y = writer, fill = binary, color = binary)) +
  geom_col(show.legend = FALSE) +
  geom_text(data = labels, aes(x = label_x, y = writer, label = writer), hjust = "inward", color = "white", family = "Atkinson Hyperlegible", size = 3) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  scale_fill_manual(values = rev(c("#009C99", "#3A023B"))) +
  scale_color_manual(values = rev(c("#009C99", "#3A023B"))) +
  scale_x_continuous(labels = scales::percent, expand = c(0,0.01)) +
  labs(x = NULL,
       y = NULL,
       title = "Gender Bias in Script and Screenplay Writers",
       subtitle = glue("Illustrated below is a stacked bar chart of the percentage of works that {highlight_text('pass', '#009C99', 'b')} or {highlight_text('fail', '#3A023B', 'b')}<br>the Bechdel Test for writers with five or greater writing credits in evaluated films."),
       caption = "**Data**: FiveThirtyEight | **Graphic**: @jakekaupp") +
  theme_jk(grid = FALSE,
           markdown = TRUE,subtitle_family = "Atkinson Hyperlegible") +
  theme(axis.text.y = element_blank())

ggsave(here("2021", "week11", "tw11_plot.png"), plot, width = 8, height = 16, device = ragg::agg_png())