/inst/doc/tidy-data.R

https://github.com/cran/tidyr · R · 128 lines · 77 code · 24 blank · 27 comment · 0 complexity · ba24166c812a2a242f1df3ef6dab9d7c MD5 · raw file

  1. ## ---- echo = FALSE------------------------------------------------------------
  2. knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
  3. set.seed(1014)
  4. options(dplyr.print_max = 10)
  5. ## -----------------------------------------------------------------------------
  6. classroom <- read.csv("classroom.csv", stringsAsFactors = FALSE)
  7. classroom
  8. ## -----------------------------------------------------------------------------
  9. read.csv("classroom2.csv", stringsAsFactors = FALSE)
  10. ## ----setup, message = FALSE---------------------------------------------------
  11. library(tidyr)
  12. library(dplyr)
  13. ## -----------------------------------------------------------------------------
  14. classroom2 <- classroom %>%
  15. pivot_longer(quiz1:test1, names_to = "assessment", values_to = "grade") %>%
  16. arrange(name, assessment)
  17. classroom2
  18. ## -----------------------------------------------------------------------------
  19. relig_income
  20. ## -----------------------------------------------------------------------------
  21. relig_income %>%
  22. pivot_longer(-religion, names_to = "income", values_to = "frequency")
  23. ## -----------------------------------------------------------------------------
  24. billboard
  25. ## -----------------------------------------------------------------------------
  26. billboard2 <- billboard %>%
  27. pivot_longer(
  28. wk1:wk76,
  29. names_to = "week",
  30. values_to = "rank",
  31. values_drop_na = TRUE
  32. )
  33. billboard2
  34. ## -----------------------------------------------------------------------------
  35. billboard3 <- billboard2 %>%
  36. mutate(
  37. week = as.integer(gsub("wk", "", week)),
  38. date = as.Date(date.entered) + 7 * (week - 1),
  39. date.entered = NULL
  40. )
  41. billboard3
  42. ## -----------------------------------------------------------------------------
  43. billboard3 %>% arrange(artist, track, week)
  44. ## -----------------------------------------------------------------------------
  45. billboard3 %>% arrange(date, rank)
  46. ## -----------------------------------------------------------------------------
  47. tb <- as_tibble(read.csv("tb.csv", stringsAsFactors = FALSE))
  48. tb
  49. ## -----------------------------------------------------------------------------
  50. tb2 <- tb %>%
  51. pivot_longer(
  52. c(-iso2, -year),
  53. names_to = "demo",
  54. values_to = "n",
  55. values_drop_na = TRUE
  56. )
  57. tb2
  58. ## -----------------------------------------------------------------------------
  59. tb3 <- tb2 %>%
  60. separate(demo, c("sex", "age"), 1)
  61. tb3
  62. ## -----------------------------------------------------------------------------
  63. tb %>% pivot_longer(
  64. c(-iso2, -year),
  65. names_to = c("sex", "age"),
  66. names_pattern = "(.)(.+)",
  67. values_to = "n",
  68. values_drop_na = TRUE
  69. )
  70. ## -----------------------------------------------------------------------------
  71. weather <- as_tibble(read.csv("weather.csv", stringsAsFactors = FALSE))
  72. weather
  73. ## -----------------------------------------------------------------------------
  74. weather2 <- weather %>%
  75. pivot_longer(
  76. d1:d31,
  77. names_to = "day",
  78. values_to = "value",
  79. values_drop_na = TRUE
  80. )
  81. weather2
  82. ## -----------------------------------------------------------------------------
  83. weather3 <- weather2 %>%
  84. mutate(day = as.integer(gsub("d", "", day))) %>%
  85. select(id, year, month, day, element, value)
  86. weather3
  87. ## -----------------------------------------------------------------------------
  88. weather3 %>%
  89. pivot_wider(names_from = element, values_from = value)
  90. ## -----------------------------------------------------------------------------
  91. song <- billboard3 %>%
  92. distinct(artist, track) %>%
  93. mutate(song_id = row_number())
  94. song
  95. ## -----------------------------------------------------------------------------
  96. rank <- billboard3 %>%
  97. left_join(song, c("artist", "track")) %>%
  98. select(song_id, date, week, rank)
  99. rank
  100. ## ---- eval = FALSE------------------------------------------------------------
  101. # library(purrr)
  102. # paths <- dir("data", pattern = "\\.csv$", full.names = TRUE)
  103. # names(paths) <- basename(paths)
  104. # map_dfr(paths, read.csv, stringsAsFactors = FALSE, .id = "filename")