/scripts/tema2/03-tidy-data.R

https://github.com/joanby/tidyverse-data-science · R · 148 lines · 91 code · 43 blank · 14 comment · 2 complexity · 6a9c4221c04515d8dfece924aebeef7c MD5 · raw file

  1. library(tidyverse)
  2. table <- read_csv("data/population.csv")
  3. View(table)
  4. table %>%
  5. mutate(rate = cases/population*10000)
  6. table %>%
  7. count(year, wt = cases)
  8. table1 %>%
  9. ggplot(aes(year, cases)) +
  10. geom_line(aes(group = country), color = "grey") +
  11. geom_point(aes(color = country))
  12. table4a %>%
  13. gather(`1999`,`2000`, key = "year", value = "cases") -> tidy4a
  14. table4b %>%
  15. gather(`1999`, `2000`, key = "year", value = "population") -> tidy4b
  16. left_join(tidy4a, tidy4b) -> tidy4
  17. table2 %>%
  18. spread(key = type, value = count)
  19. #Ejercicio 4
  20. roi <- tibble(
  21. year = c(rep(2016,4), rep(2017,4), 2018),
  22. quarter = c(rep(c(1,2,3,4),2),1),
  23. return = rnorm(9, mean = 0.5, sd = 1)
  24. )
  25. roi %>%
  26. spread(year, return) %>%
  27. gather("year", "return", `2016`:`2018`)
  28. #Ejercicio 5
  29. ?spread
  30. # convert: If TRUE, type.convert() with asis = TRUE will be run
  31. # on each of the new columns. This is useful if the value column
  32. # was a mix of variables that was coerced to a string. If the
  33. # class of the value column was factor or date, note that will
  34. # not be true of the new columns that are produced, which are
  35. # coerced to character before type conversion.
  36. #Ejercicio 7
  37. people <- tribble(
  38. ~name, ~key, ~value,
  39. #-------------|-------|-------
  40. "Juan Gabriel", "age", 18,
  41. "Juan Gabriel", "weight", 58,
  42. "Juan Gabriel", "age", 30,
  43. "Juan Gabriel", "weight", 71,
  44. "Ricardo", "age", 55,
  45. "Ricardo", "age", 75
  46. )
  47. #Ejercicio 8
  48. pregnancy <- tribble(
  49. ~pregnant, ~male, ~female,
  50. #--------|------|---------
  51. "yes", NA, 32,
  52. "no", 85, 43
  53. )
  54. pregnancy %>%
  55. gather("male", "female", key = sex, value = count) %>%
  56. mutate(pregnant = (pregnant == "yes"),
  57. female = (sex == "female")) %>%
  58. select(-sex)
  59. ##PIVOTING
  60. tidy4a <- table4a %>%
  61. pivot_longer(c(`1999`, `2000`), names_to = "year", values_to = "cases")
  62. tidy4b <- table4b %>%
  63. pivot_longer(c(`1999`, `2000`), names_to = "year", values_to = "population")
  64. left_join(tidy4a, tidy4b)
  65. table2 %>%
  66. pivot_wider(names_from = type, values_from = count)
  67. ## SEPARATE y UNITE
  68. table3 %>%
  69. separate(rate, into = c("cases", "population"),
  70. sep = "/", convert = TRUE) %>%
  71. separate(year, sep = 2, into = c("century","year"),
  72. convert = TRUE)
  73. table5 %>%
  74. unite(new_year, century, year, sep = "")
  75. tibble(x = c("a,b,c", "d,e,f,g","h,i,j")) %>%
  76. separate(x, c("x", "y", "z"), extra = "drop")
  77. tibble(x = c("a,b,c", "d,e","f,g,h")) %>%
  78. separate(x, c("x", "y", "z"), fill = "right")
  79. roi <- tibble(
  80. year = c(rep(2016,4), rep(2017,4), 2018),
  81. quarter = c(rep(c(1,2,3,4),2),1),
  82. return = rnorm(9, mean = 0.5, sd = 1)
  83. )
  84. roi$return[7] = NA
  85. roi %>%
  86. spread(year, return) %>%
  87. gather(year, return, `2016`:`2018`, na.rm = TRUE)
  88. roi %>%
  89. complete(year, quarter)
  90. treatments <- tribble(
  91. ~name, ~treatment, ~response,
  92. "Juan Gabriel", 1, 8,
  93. NA, 2, 10,
  94. NA, 3, 4,
  95. "Ricardo", 1, 7,
  96. NA, 2, 9
  97. )
  98. treatments %>%
  99. fill(name)
  100. tidyr::who %>%
  101. gather(new_sp_m014:newrel_f65, key = "key", value = "cases", na.rm = TRUE) %>%
  102. mutate(key = stringr::str_replace(key, "newrel", "new_rel")) %>%
  103. separate(key, c("new", "type", "sexage"), sep = "_") %>%
  104. select(-new, -iso2, -iso3) %>%
  105. separate(sexage, c("sex", "age"), sep = 1)