/R/get_tableone.R

https://github.com/openpharma/visR · R · 129 lines · 38 code · 14 blank · 77 comment · 3 complexity · 265b822325f72c046448b85801841ccc MD5 · raw file

  1. #' @title Calculate summary statistics
  2. #'
  3. #' @description S3 method for creating a table of summary statistics.
  4. #' The summary statistics can be used for presentation in tables such as table one or baseline and demography tables.
  5. #'
  6. #' The summary statistics estimated are conditional on the variable type: continuous, binary, categorical, etc.
  7. #'
  8. #' By default the following summary stats are calculated:
  9. #' * Numeric variables: mean, min, 25th-percentile, median, 75th-percentile, maximum, standard deviation
  10. #' * Factor variables: proportion of each factor level in the overall dataset
  11. #' * Default: number of unique values and number of missing values
  12. #'
  13. #' @param data The dataset to summarize as dataframe or tibble
  14. #' @param strata Stratifying/Grouping variable name(s) as character vector. If NULL, only overall results are returned
  15. #' @param overall If TRUE, the summary statistics for the overall dataset are also calculated
  16. #' @param summary_function A function defining summary statistics for numeric and categorical values
  17. #' @return A list of data specified summaries for all input variables.
  18. #' @details It is possible to provide your own summary function. Please have a loot at summary for inspiration.
  19. #'
  20. #' @note All columns in the table will be summarized. If only some columns shall be used, please select only those
  21. #' variables prior to creating the summary table by using dplyr::select()
  22. #'
  23. #' @examples
  24. #'
  25. #' # Example using the ovarian data set
  26. #'
  27. #' survival::ovarian %>%
  28. #' dplyr::select(-fustat) %>%
  29. #' dplyr::mutate(
  30. #' age_group = factor(
  31. #' dplyr::case_when(
  32. #' age <= 50 ~ "<= 50 years",
  33. #' age <= 60 ~ "<= 60 years",
  34. #' age <= 70 ~ "<= 70 years",
  35. #' TRUE ~ "> 70 years"
  36. #' )
  37. #' ),
  38. #' rx = factor(rx),
  39. #' ecog.ps = factor(ecog.ps)
  40. #' ) %>%
  41. #' dplyr::select(age, age_group, everything()) %>%
  42. #' visR::get_tableone()
  43. #'
  44. #' # Examples using ADaM data
  45. #'
  46. #' # display patients in an analysis set
  47. #' adtte %>%
  48. #' dplyr::filter(SAFFL == "Y") %>%
  49. #' dplyr::select(TRTA) %>%
  50. #' visR::get_tableone()
  51. #'
  52. #' ## display overall summaries for demog
  53. #' adtte %>%
  54. #' dplyr::filter(SAFFL == "Y") %>%
  55. #' dplyr::select(AGE, AGEGR1, SEX, RACE) %>%
  56. #' visR::get_tableone()
  57. #'
  58. #' ## By actual treatment
  59. #' adtte %>%
  60. #' dplyr::filter(SAFFL == "Y") %>%
  61. #' dplyr::select(AGE, AGEGR1, SEX, RACE, TRTA ) %>%
  62. #' visR::get_tableone(strata = "TRTA")
  63. #'
  64. #' ## By actual treatment, without overall
  65. #' adtte %>%
  66. #' dplyr::filter(SAFFL == "Y") %>%
  67. #' dplyr::select(AGE, AGEGR1, SEX, EVNTDESC, TRTA ) %>%
  68. #' visR::get_tableone(strata = "TRTA", overall = FALSE)
  69. #'
  70. #' @rdname get_tableone
  71. #'
  72. #' @export
  73. get_tableone <- function(data, strata = NULL, overall=TRUE, summary_function = summarize_short){
  74. UseMethod("get_tableone")
  75. }
  76. #' @rdname get_tableone
  77. #' @method get_tableone default
  78. #' @return object of class tableone. That is a list of data specified summaries
  79. #' for all input variables.
  80. #' @export
  81. get_tableone.default <- function(data, strata = NULL, overall=TRUE, summary_function = summarize_short){
  82. summary_FUN <- match.fun(summary_function)
  83. if(overall & !is.null(strata)){
  84. overall_table1 <- get_tableone(data, strata = NULL, overall = FALSE, summary_function = summary_function)
  85. combine_dfs <- TRUE
  86. }
  87. else{
  88. combine_dfs = FALSE
  89. }
  90. if(is.null(strata)){
  91. data <- data %>%
  92. dplyr::mutate(all = "Total")
  93. strata <- c("all")
  94. }
  95. data <- data %>%
  96. dplyr::group_by(!!!dplyr::syms(strata))
  97. data_ns <- data %>%
  98. dplyr::summarise(summary = dplyr::n()) %>%
  99. tidyr::pivot_wider(names_from = tidyselect::any_of(strata), values_from = "summary") %>%
  100. dplyr::mutate(variable = "Sample", summary_id = "N")
  101. data_summary <- data %>%
  102. dplyr::summarise_all(summary_FUN) %>%
  103. dplyr::ungroup() %>%
  104. tidyr::pivot_longer(cols = setdiff(names(.), strata), names_to = "variable", values_to = "summary") %>%
  105. tidyr::unnest_longer(summary) %>%
  106. tidyr::pivot_wider(names_from = tidyselect::any_of(strata), values_from = "summary")
  107. data_table1 <- rbind(data_ns, data_summary) %>%
  108. dplyr::rename(statistic = summary_id) %>%
  109. dplyr::select(variable, statistic, everything())
  110. if(overall & combine_dfs){
  111. data_table1 <- overall_table1 %>% dplyr::left_join(data_table1, by=c("variable", "statistic"))
  112. }
  113. class(data_table1) <- c("tableone", class(data_table1))
  114. return(data_table1)
  115. }