/day1/tidy-exercises.R

https://github.com/ikashnitsky/dataviz-mpidr · R · 56 lines · 5 code · 23 blank · 28 comment · 0 complexity · 748aeb2d68ecdfd2217a921989c1b7e1 MD5 · raw file

  1. #===============================================================================
  2. # 2021-06-14 -- MPIDR dataviz
  3. # Tidy exercises
  4. # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  5. #===============================================================================
  6. # load the package
  7. library(tidyverse)
  8. # Read the data with readxl -----------------------------------------------
  9. library(readxl)
  10. # see the names of the sheets
  11. readxl::excel_sheets('data/data-denmark.xlsx')
  12. deaths <- read_excel(path = 'data/data-denmark.xlsx', sheet = 'deaths')
  13. pop <- read_excel(path = 'data/data-denmark.xlsx', sheet = 'pop')
  14. # Ex 1. deaths dataframe --------------------------------------------------
  15. # - subset only total number of deaths among men in year 2003 (filter)
  16. # Q: which region had the largest number of deaths?
  17. # Ex 2. pop dataframe -----------------------------------------------------
  18. # - subset only the year 2004
  19. # - transform to wide format using the column "sex" (pivot_wider)
  20. # - get rid of the column for both sex
  21. # - calculate the sex ratio (males to females)
  22. # Q: in which region the SR is highest at ages 15, 45, over75 (coded as "open")
  23. # Ex 3. joined dataframe --------------------------------------------------
  24. # - join the two dataframes (left_join OR inner_join)
  25. # - calculate age specific death ratios
  26. # - subset only the ages 15-59 and year 2001
  27. # Q: what is the average ratio of male ASDR to female ASDR in each region?
  28. # Tip: use summarize
  29. # Ex 4. joined dataframe (df) ---------------------------------------------
  30. # - subset only both sex
  31. # - transform to wide format using the column "year" (pivot_wider)
  32. # - calculate the growth of ASDR between 2005 and 2001
  33. # Q: in which region the average growth/decrease in ASDR was largest?