/R/utils_table.R

https://github.com/openpharma/visR · R · 140 lines · 66 code · 12 blank · 62 comment · 0 complexity · 51665f51eaa2b1ed581785d9af2998d0 MD5 · raw file

  1. #' Calculate summary statistics for a vector
  2. #'
  3. #' Calculates several summary statistics for a vector depending on the vector class
  4. #'
  5. #' @param x an object
  6. #' @return A summarized version of the input.
  7. #' @export
  8. summarize_long <- function(x) UseMethod("summarize_long")
  9. #' Create variable summary for factors
  10. #'
  11. #' @param x an object of class "factor"
  12. #' @return Long list of summary statistics for the input factors.
  13. #' @export
  14. summarize_long.factor <- function(x){
  15. x1 <- forcats::fct_explicit_na(x, na_level = "Missing")
  16. dat <- tibble::enframe(x1) %>%
  17. dplyr::group_by(value) %>%
  18. dplyr::summarise(N = dplyr::n()) %>%
  19. dplyr::mutate(`%` = round(100 * N/sum(N), 3)) %>%
  20. tidyr::pivot_wider(names_from = value, values_from = c("N", "%"), names_sep=" ") %>%
  21. as.list()
  22. list(dat)
  23. }
  24. #' Create variable summary for numeric variables
  25. #'
  26. #' @param x an object of class "integer"
  27. #' @return Long list of summary statistics for the input.
  28. #' @export
  29. summarize_long.integer <- function(x){
  30. summarize_long.numeric(x)
  31. }
  32. #' Create variable summary for numeric variables
  33. #'
  34. #' @param x an object of class "numeric"
  35. #' @return Long list of summary statistics for the input.
  36. #' @export
  37. summarize_long.numeric <- function(x){
  38. dat <- list(
  39. mean = mean(x, na.rm = TRUE),
  40. min = min(x, na.rm = TRUE),
  41. Q1 = quantile(x, probs=0.25, na.rm = TRUE),
  42. median = median(x, na.rm = TRUE),
  43. Q3 = quantile(x, probs=0.75, na.rm = TRUE),
  44. max = max(x, na.rm = TRUE),
  45. sd = sd(x, na.rm = TRUE)
  46. )
  47. list(dat)
  48. }
  49. #' Create variable summary for all other variable types
  50. #'
  51. #' @param x an object of any other class
  52. #' @return List of counts for unique and missing values in `x`.
  53. #' @export
  54. summarize_long.default <- function(x){
  55. dat <- list(
  56. unique_values = length(unique(x)),
  57. nmiss = sum(is.na(x))
  58. )
  59. list(dat)
  60. }
  61. #' Create abbreviated variable summary for table1
  62. #'
  63. #' This function creates summaries combines multiple summary measures in a single formatted string.
  64. #'
  65. #' @param x a vector to be summarized
  66. #' @return A summarized less detailed version of the input.
  67. #' @export
  68. summarize_short <- function(x) UseMethod("summarize_short")
  69. #' Create variable summary for factors
  70. #'
  71. #' Calculates N and % of occurrence for each factor value
  72. #'
  73. #' @param x an object of class "factor"
  74. #' @return Short list of summary statistics for the input factors.
  75. #' @export
  76. summarize_short.factor <- function(x){
  77. x1 <- forcats::fct_explicit_na(x, na_level = "Missing")
  78. dat <- tibble::enframe(x1) %>%
  79. dplyr::group_by(value) %>%
  80. dplyr::summarise(N = dplyr::n()) %>%
  81. dplyr::mutate(`n (%)` = paste0(N, " (", format(100 * N/sum(N), digits = 3, trim=TRUE), "%)")) %>%
  82. dplyr::select(-N) %>%
  83. tidyr::pivot_wider(names_from = value, values_from = c("n (%)"), names_sep=" ") %>%
  84. as.list()
  85. list(dat)
  86. }
  87. #' Create variable summary for numeric variables
  88. #'
  89. #' Calculates mean (standard deviation), median (IQR), min-max range and N/% missing elements
  90. #' for a numeric vector.
  91. #'
  92. #' @param x an object of class "numeric"
  93. #' @return Short list of summary statistics for the input.
  94. #' @export
  95. summarize_short.numeric <- function(x){
  96. dat <- list(
  97. `Mean (SD)` = paste0(format(mean(x, na.rm = TRUE), digits = 3), " (", format(sd(x, na.rm = TRUE), digits = 3), ")"),
  98. `Median (IQR)` = paste0(format(median(x, na.rm = TRUE), digits = 3), " (", format(quantile(x, probs=0.25, na.rm = TRUE), digits = 3),
  99. "-", format(quantile(x, probs=0.75, na.rm = TRUE), digits = 3), ")"),
  100. `Min-max` = paste0(format(min(x, na.rm = TRUE), digits = 3), "-", format(max(x, na.rm = TRUE), digits = 3)),
  101. Missing = paste0(format(sum(is.na(x)), digits = 3),
  102. " (", format(100 * sum(is.na(x))/length(x), trim=TRUE, digits = 3), "%)")
  103. )
  104. list(dat)
  105. }
  106. #' Create variable summary for integer variables
  107. #'
  108. #' Calculates mean (standard deviation), median (IQR), min-max range and N/% missing elements
  109. #' for a integer vector.
  110. #'
  111. #' @param x an object of class "integer"
  112. #' @return Short list of summary statistics for the input.
  113. #' @export
  114. summarize_short.integer <- function(x){
  115. summarize_short.numeric(x)
  116. }
  117. #' Create variable summary for all other variable types
  118. #'
  119. #' @param x an object of any other class
  120. #' @return List of counts for unique and missing values in `x`.
  121. #' @export
  122. summarize_short.default <- function(x){
  123. dat <- list(
  124. `Unique values` = format(length(unique(x))),
  125. `Missing (%)` = paste0(format(sum(is.na(x))), " (", format(100 * sum(is.na(x))/length(x), trim=TRUE), "%)")
  126. )
  127. list(dat)
  128. }