/R/pivot_wider.R
https://github.com/markfairbanks/tidytable · R · 169 lines · 105 code · 22 blank · 42 comment · 16 complexity · 62d73ed1920ea21be0fcb7f66d2979d3 MD5 · raw file
- #' Pivot data from long to wide
- #'
- #' @description "Widens" data, increasing the number of columns and
- #' decreasing the number of rows.
- #'
- #' @param .df A data.frame or data.table
- #' @param id_cols A set of columns that uniquely identifies each observation.
- #' Defaults to all columns in the data table except for the columns specified in `names_from` and `values_from`.
- #' Typically used when you have additional variables that is directly related.
- #' `tidyselect` compatible.
- #' @param names_from A pair of arguments describing which column (or columns) to get the name of the output column `name_from`,
- #' and which column (or columns) to get the cell values from `values_from`).
- #' `tidyselect` compatible.
- #' @param values_from A pair of arguments describing which column (or columns) to get the name of the output column `name_from`,
- #' and which column (or columns) to get the cell values from `values_from`.
- #' `tidyselect` compatible.
- #' @param names_sep the separator between the names of the columns
- #' @param names_prefix prefix to add to the names of the new columns
- #' @param names_glue Instead of using `names_sep` and `names_prefix`, you can supply a
- #' glue specification that uses the `names_from` columns (and special `.value`) to create custom column names
- #' @param names_sort Should the resulting new columns be sorted
- #' @param names_repair Treatment of duplicate names. See `?vctrs::vec_as_names` for options/details.
- #' @param values_fn Should the data be aggregated before casting? If the formula doesn't identify a single observation for each cell, then aggregation defaults to length with a message.
- #' @param values_fill If values are missing, what value should be filled in
- #'
- #' @examples
- #' test_df <- data.table(
- #' a = rep(c("a", "b", "c"), 2),
- #' b = c(rep("x", 3), rep("y", 3)),
- #' vals = 1:6
- #' )
- #'
- #' test_df %>%
- #' pivot_wider.(names_from = b, values_from = vals)
- #'
- #' test_df %>%
- #' pivot_wider.(
- #' names_from = b, values_from = vals, names_prefix = "new_"
- #' )
- #' @export
- pivot_wider. <- function(.df,
- names_from = name,
- values_from = value,
- id_cols = NULL,
- names_sep = "_",
- names_prefix = "",
- names_glue = NULL,
- names_sort = FALSE,
- names_repair = "check_unique",
- values_fill = NULL,
- values_fn = NULL) {
- UseMethod("pivot_wider.")
- }
- #' @export
- pivot_wider..tidytable <- function(.df,
- names_from = name,
- values_from = value,
- id_cols = NULL,
- names_sep = "_",
- names_prefix = "",
- names_glue = NULL,
- names_sort = FALSE,
- names_repair = "check_unique",
- values_fill = NULL,
- values_fn = NULL) {
- id_cols <- enquo(id_cols)
- values_fn <- enquo(values_fn)
- names_from <- tidyselect_names(.df, {{ names_from }})
- values_from <- tidyselect_names(.df, {{ values_from }})
- uses_dot_value <- FALSE
- if (!is.null(names_glue)) {
- if (str_detect.(names_glue, ".value")) {
- uses_dot_value <- TRUE
- }
- }
- if (quo_is_null(id_cols)) {
- data_names <- names(.df)
- id_cols <- data_names[!data_names %in% c(names_from, values_from)]
- } else {
- id_cols <- tidyselect_names(.df, !!id_cols)
- }
- if (names_sort) {
- .df <- arrange.(.df, !!!syms(names_from))
- }
- if (nchar(names_prefix) > 0 && is.null(names_glue)) {
- .first_name <- sym(names_from[[1]])
- .df <- mutate.(.df, !!.first_name := paste0(!!names_prefix, !!.first_name))
- } else if (uses_dot_value) {
- glue_df <- distinct.(.df, !!!syms(names_from))
- values_from_reps <- nrow(glue_df)
- glue_df <- vec_rep(glue_df, length(values_from))
- glue_df$.value <- vec_rep_each(values_from, values_from_reps)
- glue_vars <- as.character(glue_data(glue_df, names_glue))
- } else if (!is.null(names_glue)) {
- .df <- mutate.(.df, .names_from = glue(names_glue))
- .df <- relocate.(.df, .names_from, .before = !!sym(names_from[[1]]))
- .df <- .df[, -..names_from]
- names_from <- ".names_from"
- }
- no_id <- length(id_cols) == 0
- if (no_id) {
- lhs <- "..."
- } else {
- lhs <- paste(id_cols, collapse = " + ")
- }
- rhs <- paste(names_from, collapse = " + ")
- dcast_form <- paste(lhs, rhs, sep = " ~ ")
- dcast_call <- call2_dt(
- "dcast",
- .df,
- formula = dcast_form,
- value.var = values_from,
- fun.aggregate = expr(!!values_fn),
- sep = names_sep,
- fill = values_fill
- )
- .df <- eval_tidy(dcast_call)
- if (no_id) .df[, . := NULL]
- if (uses_dot_value) {
- new_vars <- setdiff(names(.df), id_cols)
- setnames(.df, new_vars, glue_vars)
- }
- .df <- df_name_repair(.df, .name_repair = names_repair)
- as_tidytable(.df)
- }
- #' @export
- pivot_wider..data.frame <- function(.df,
- names_from = name,
- values_from = value,
- id_cols = NULL,
- names_sep = "_",
- names_prefix = "",
- names_glue = NULL,
- names_sort = FALSE,
- names_repair = "check_unique",
- values_fill = NULL,
- values_fn = NULL) {
- .df <- as_tidytable(.df)
- pivot_wider.(
- .df, names_from = {{ names_from }}, values_from = {{ values_from }},
- id_cols = {{ id_cols }}, names_sep = names_sep,
- names_prefix = names_prefix, names_glue = names_glue,
- names_sort = names_sort, names_repair = names_repair,
- values_fill = values_fill, values_fn = {{ values_fn }}
- )
- }
- globalVariables(c(".", ".names_from", "..names_from", "name", "value"))