/R/pivot_wider.R

https://github.com/markfairbanks/tidytable · R · 169 lines · 105 code · 22 blank · 42 comment · 16 complexity · 62d73ed1920ea21be0fcb7f66d2979d3 MD5 · raw file

  1. #' Pivot data from long to wide
  2. #'
  3. #' @description "Widens" data, increasing the number of columns and
  4. #' decreasing the number of rows.
  5. #'
  6. #' @param .df A data.frame or data.table
  7. #' @param id_cols A set of columns that uniquely identifies each observation.
  8. #' Defaults to all columns in the data table except for the columns specified in `names_from` and `values_from`.
  9. #' Typically used when you have additional variables that is directly related.
  10. #' `tidyselect` compatible.
  11. #' @param names_from A pair of arguments describing which column (or columns) to get the name of the output column `name_from`,
  12. #' and which column (or columns) to get the cell values from `values_from`).
  13. #' `tidyselect` compatible.
  14. #' @param values_from A pair of arguments describing which column (or columns) to get the name of the output column `name_from`,
  15. #' and which column (or columns) to get the cell values from `values_from`.
  16. #' `tidyselect` compatible.
  17. #' @param names_sep the separator between the names of the columns
  18. #' @param names_prefix prefix to add to the names of the new columns
  19. #' @param names_glue Instead of using `names_sep` and `names_prefix`, you can supply a
  20. #' glue specification that uses the `names_from` columns (and special `.value`) to create custom column names
  21. #' @param names_sort Should the resulting new columns be sorted
  22. #' @param names_repair Treatment of duplicate names. See `?vctrs::vec_as_names` for options/details.
  23. #' @param values_fn Should the data be aggregated before casting? If the formula doesn't identify a single observation for each cell, then aggregation defaults to length with a message.
  24. #' @param values_fill If values are missing, what value should be filled in
  25. #'
  26. #' @examples
  27. #' test_df <- data.table(
  28. #' a = rep(c("a", "b", "c"), 2),
  29. #' b = c(rep("x", 3), rep("y", 3)),
  30. #' vals = 1:6
  31. #' )
  32. #'
  33. #' test_df %>%
  34. #' pivot_wider.(names_from = b, values_from = vals)
  35. #'
  36. #' test_df %>%
  37. #' pivot_wider.(
  38. #' names_from = b, values_from = vals, names_prefix = "new_"
  39. #' )
  40. #' @export
  41. pivot_wider. <- function(.df,
  42. names_from = name,
  43. values_from = value,
  44. id_cols = NULL,
  45. names_sep = "_",
  46. names_prefix = "",
  47. names_glue = NULL,
  48. names_sort = FALSE,
  49. names_repair = "check_unique",
  50. values_fill = NULL,
  51. values_fn = NULL) {
  52. UseMethod("pivot_wider.")
  53. }
  54. #' @export
  55. pivot_wider..tidytable <- function(.df,
  56. names_from = name,
  57. values_from = value,
  58. id_cols = NULL,
  59. names_sep = "_",
  60. names_prefix = "",
  61. names_glue = NULL,
  62. names_sort = FALSE,
  63. names_repair = "check_unique",
  64. values_fill = NULL,
  65. values_fn = NULL) {
  66. id_cols <- enquo(id_cols)
  67. values_fn <- enquo(values_fn)
  68. names_from <- tidyselect_names(.df, {{ names_from }})
  69. values_from <- tidyselect_names(.df, {{ values_from }})
  70. uses_dot_value <- FALSE
  71. if (!is.null(names_glue)) {
  72. if (str_detect.(names_glue, ".value")) {
  73. uses_dot_value <- TRUE
  74. }
  75. }
  76. if (quo_is_null(id_cols)) {
  77. data_names <- names(.df)
  78. id_cols <- data_names[!data_names %in% c(names_from, values_from)]
  79. } else {
  80. id_cols <- tidyselect_names(.df, !!id_cols)
  81. }
  82. if (names_sort) {
  83. .df <- arrange.(.df, !!!syms(names_from))
  84. }
  85. if (nchar(names_prefix) > 0 && is.null(names_glue)) {
  86. .first_name <- sym(names_from[[1]])
  87. .df <- mutate.(.df, !!.first_name := paste0(!!names_prefix, !!.first_name))
  88. } else if (uses_dot_value) {
  89. glue_df <- distinct.(.df, !!!syms(names_from))
  90. values_from_reps <- nrow(glue_df)
  91. glue_df <- vec_rep(glue_df, length(values_from))
  92. glue_df$.value <- vec_rep_each(values_from, values_from_reps)
  93. glue_vars <- as.character(glue_data(glue_df, names_glue))
  94. } else if (!is.null(names_glue)) {
  95. .df <- mutate.(.df, .names_from = glue(names_glue))
  96. .df <- relocate.(.df, .names_from, .before = !!sym(names_from[[1]]))
  97. .df <- .df[, -..names_from]
  98. names_from <- ".names_from"
  99. }
  100. no_id <- length(id_cols) == 0
  101. if (no_id) {
  102. lhs <- "..."
  103. } else {
  104. lhs <- paste(id_cols, collapse = " + ")
  105. }
  106. rhs <- paste(names_from, collapse = " + ")
  107. dcast_form <- paste(lhs, rhs, sep = " ~ ")
  108. dcast_call <- call2_dt(
  109. "dcast",
  110. .df,
  111. formula = dcast_form,
  112. value.var = values_from,
  113. fun.aggregate = expr(!!values_fn),
  114. sep = names_sep,
  115. fill = values_fill
  116. )
  117. .df <- eval_tidy(dcast_call)
  118. if (no_id) .df[, . := NULL]
  119. if (uses_dot_value) {
  120. new_vars <- setdiff(names(.df), id_cols)
  121. setnames(.df, new_vars, glue_vars)
  122. }
  123. .df <- df_name_repair(.df, .name_repair = names_repair)
  124. as_tidytable(.df)
  125. }
  126. #' @export
  127. pivot_wider..data.frame <- function(.df,
  128. names_from = name,
  129. values_from = value,
  130. id_cols = NULL,
  131. names_sep = "_",
  132. names_prefix = "",
  133. names_glue = NULL,
  134. names_sort = FALSE,
  135. names_repair = "check_unique",
  136. values_fill = NULL,
  137. values_fn = NULL) {
  138. .df <- as_tidytable(.df)
  139. pivot_wider.(
  140. .df, names_from = {{ names_from }}, values_from = {{ values_from }},
  141. id_cols = {{ id_cols }}, names_sep = names_sep,
  142. names_prefix = names_prefix, names_glue = names_glue,
  143. names_sort = names_sort, names_repair = names_repair,
  144. values_fill = values_fill, values_fn = {{ values_fn }}
  145. )
  146. }
  147. globalVariables(c(".", ".names_from", "..names_from", "name", "value"))