/day5/pyramids.R

https://github.com/ikashnitsky/dataviz-mpidr · R · 168 lines · 120 code · 33 blank · 15 comment · 10 complexity · 0477834320bb74d951b1c9cce54f3a5d MD5 · raw file

  1. #===============================================================================
  2. # 2021-06-18 -- MPIDR dataviz
  3. # Population pyramid example
  4. # Ilya Kashnitsky, ilya.kashnitsky@gmail.com
  5. #===============================================================================
  6. # load the package
  7. library(tidyverse)
  8. library(magrittr)
  9. # download eurostat data
  10. library(eurostat)
  11. eu_pop <- get_eurostat("demo_pjan")
  12. # clean the dataset
  13. df_dk <- eu_pop %>%
  14. filter(
  15. !age %in% c("TOTAL", "UNK", "Y_OPEN"),
  16. geo == "DK"
  17. ) %>%
  18. mutate(
  19. year = time %>% lubridate::year(),
  20. age = age %>%
  21. paste %>%
  22. str_replace("Y_LT1", "Y_0") %>%
  23. str_remove("_") %>%
  24. str_remove("Y") %>%
  25. as.numeric()
  26. ) %>%
  27. arrange(time, sex, age)
  28. save(df_dk, file = "data/df_dk.rda")
  29. df_dk %>%
  30. filter(
  31. year == 2018, sex == "T"
  32. ) %>%
  33. ggplot(aes(age, values))+
  34. geom_col()
  35. # both sex, coord_flip
  36. df_dk %>%
  37. filter(
  38. year == 2018, !sex == "T"
  39. ) %>%
  40. pivot_wider(names_from = sex, values_from = values) %>%
  41. ggplot(aes(age))+
  42. geom_step(aes(y = `F`), color = "purple")+
  43. geom_step(aes(y = -M), color = "orange")+
  44. coord_flip()+
  45. scale_y_continuous(
  46. breaks = seq(-40000, 40000, 10000),
  47. labels = seq(-40000, 40000, 10000) %>% abs %>% paste %>%
  48. str_replace("0000", "0K")
  49. )
  50. # two years and annotations
  51. df_dk %>%
  52. filter(
  53. year %in% c(1960, 2018), !sex == "T"
  54. ) %>%
  55. spread(sex, values) %>%
  56. ggplot(aes(age))+
  57. geom_hline(yintercept = 0, size = .5, color = "gray20")+
  58. geom_path(aes(y = `F`, linetype = year %>% factor), color = "purple")+
  59. geom_path(aes(y = -M, linetype = year %>% factor), color = "orange")+
  60. coord_flip()+
  61. scale_y_continuous(
  62. breaks = seq(-40000, 40000, 10000),
  63. labels = seq(-40000, 40000, 10000) %>%
  64. abs %>% divide_by(1000) %>% as.character() %>% paste0(., "K")
  65. )+
  66. annotate(
  67. geom = "text", x = 100, y = -4e4, label = "MALES", hjust = 0, vjust = 1
  68. )+
  69. annotate(
  70. geom = "text", x = 100, y = 4e4, label = "FEMALES", hjust = 1, vjust = 1
  71. )
  72. # compare two countries ---------------------------------------------------
  73. df_two <- eu_pop %>%
  74. filter(
  75. !age %in% c("TOTAL", "UNK", "Y_OPEN"),
  76. geo %in% c("IT", "BG")
  77. ) %>%
  78. mutate(
  79. year = time %>% lubridate::year(),
  80. age = age %>%
  81. paste %>%
  82. str_replace("Y_LT1", "Y_0") %>%
  83. str_remove("_") %>%
  84. str_remove("Y") %>%
  85. as.numeric()
  86. ) %>%
  87. arrange(time, sex, age) %>%
  88. group_by(sex, geo, time) %>%
  89. mutate(values = values / sum(values))
  90. df_two %>%
  91. filter(
  92. year == 2018, sex == "T"
  93. ) %>%
  94. ggplot(aes(age, values, color = geo))+
  95. geom_step()+
  96. coord_cartesian(expand = F)+
  97. scale_y_continuous(labels = scales::percent)+ # hrbrthemes::scale_y_percent()+
  98. theme_minimal()+
  99. theme(legend.position = c(.9,.9))
  100. # wrap the comparison as a function
  101. compare_pop <- function(cntr = c("IT", "BG")) {
  102. df_sub <- eu_pop %>%
  103. filter(
  104. !age %in% c("TOTAL", "UNK", "Y_OPEN"),
  105. geo %in% cntr
  106. ) %>%
  107. mutate(
  108. year = time %>% lubridate::year(),
  109. age = age %>%
  110. paste %>%
  111. str_replace("Y_LT1", "Y_0") %>%
  112. str_remove("_") %>%
  113. str_remove("Y") %>%
  114. as.numeric()
  115. ) %>%
  116. arrange(time, sex, age) %>%
  117. group_by(sex, geo, time) %>%
  118. mutate(values = values / sum(values))
  119. df_sub %>%
  120. filter(
  121. year == 2018, sex == "T"
  122. ) %>%
  123. ggplot(aes(age, values, color = geo))+
  124. geom_step()+
  125. coord_cartesian(expand = F)+
  126. scale_y_continuous(labels = scales::percent)+
  127. theme_minimal()+
  128. theme(legend.position = c(.9,.8))
  129. }
  130. c("UK", "ES", "IT", "DE", "FR") %>% compare_pop()
  131. # a glance at interactive plotly magic
  132. gg <- ggplot2::last_plot()
  133. plotly::ggplotly(gg)
  134. # plotly book
  135. # https://plotly-r.com/introduction.html