/inst/bench/bench.R

https://github.com/cderv/dplyr · R · 240 lines · 163 code · 70 blank · 7 comment · 2 complexity · f334ac637a3fd05a6305d11b1b3a1e42 MD5 · raw file

  1. library(dplyr)
  2. library(rlang)
  3. library(purrr)
  4. library(tibble)
  5. library(tidyr)
  6. if (!dir.exists("../bench-libs")) dir.create("../bench-libs")
  7. if (!dir.exists("../bench-libs/0.8.3")) {
  8. dir.create("../bench-libs/0.8.3")
  9. pak::pkg_install("dplyr", lib = "../bench-libs/0.8.3", ask = FALSE)
  10. }
  11. libs <- list.files("../bench-libs", full.names = TRUE)
  12. names(libs) <- basename(libs)
  13. libs <- c(libs, "master" = .libPaths())
  14. benchs <- function(libs, setup, ..., iterations = NULL){
  15. dots <- rlang::exprs(...)
  16. setup <- substitute(setup)
  17. f <- function(){}
  18. body(f) <- rlang::expr({
  19. library(dplyr, warn.conflicts = FALSE)
  20. !!setup
  21. bench::mark(!!!dots, check = FALSE, iterations = !!iterations) %>%
  22. mutate(expression = purrr::map_chr(expression, deparse))
  23. })
  24. results <- purrr::imap(libs, ~callr::r(f, libpath = .x) %>% mutate(version = .y))
  25. as_tibble(vctrs::vec_rbind(!!!results))
  26. # %>%
  27. # select(expression, version, median) %>%
  28. # pivot_wider(names_from = version, values_from = median)
  29. }
  30. summarise_hybrid <- benchs(
  31. iterations = 10,
  32. libs = libs,
  33. setup = {
  34. df <- tibble(x = rnorm(1e5), g = sample(rep(1:1e3, 100))) %>% group_by(g)
  35. },
  36. summarise(df, n = n()),
  37. summarise(df, x = n_distinct(x)),
  38. summarise(df, x = first(x)),
  39. summarise(df, x = last(x)),
  40. summarise(df, x = nth(x, n = 1L)),
  41. summarise(df, x = first(x, default = 42)),
  42. summarise(df, x = last(x, default = 42)),
  43. summarise(df, x = nth(x, n = 1L, default = 42)),
  44. summarise(df, x = mean(x)),
  45. summarise(df, x = mean(x, na.rm = TRUE)),
  46. summarise(df, x = sd(x)),
  47. summarise(df, x = sd(x, na.rm = TRUE)),
  48. summarise(df, x = var(x)),
  49. summarise(df, x = var(x, na.rm = TRUE)),
  50. summarise(df, x = min(x)),
  51. summarise(df, x = min(x, na.rm = TRUE)),
  52. summarise(df, x = max(x)),
  53. summarise(df, x = max(x, na.rm = TRUE)),
  54. summarise(df, x = sum(x)),
  55. summarise(df, x = sum(x, na.rm = TRUE)),
  56. ) %>%
  57. mutate(hybrid = TRUE)
  58. summarise_non_hybrid <- benchs(
  59. iterations = 10,
  60. libs = libs,
  61. setup = {
  62. df <- tibble(x = rnorm(1e4), g = sample(rep(1:1e2, 100))) %>% group_by(g)
  63. },
  64. summarise(df, n = 0 + 0),
  65. summarise(df, n = 0+n()),
  66. summarise(df, x = 0+n_distinct(x)),
  67. summarise(df, x = 0+first(x)),
  68. summarise(df, x = 0+last(x)),
  69. summarise(df, x = 0+nth(x, n = 1L)),
  70. summarise(df, x = 0+first(x, default = 42)),
  71. summarise(df, x = 0+last(x, default = 42)),
  72. summarise(df, x = 0+nth(x, n = 1L, default = 42)),
  73. summarise(df, x = 0+mean(x)),
  74. summarise(df, x = 0+mean(x, na.rm = TRUE)),
  75. summarise(df, x = 0+sd(x)),
  76. summarise(df, x = 0+sd(x, na.rm = TRUE)),
  77. summarise(df, x = 0+var(x)),
  78. summarise(df, x = 0+var(x, na.rm = TRUE)),
  79. summarise(df, x = 0+min(x)),
  80. summarise(df, x = 0+min(x, na.rm = TRUE)),
  81. summarise(df, x = 0+max(x)),
  82. summarise(df, x = 0+max(x, na.rm = TRUE)),
  83. summarise(df, x = 0+sum(x)),
  84. summarise(df, x = 0+sum(x, na.rm = TRUE)),
  85. ) %>%
  86. mutate(hybrid = FALSE)
  87. mutate_hybrid <- benchs(
  88. iterations = 10,
  89. libs = libs,
  90. setup = {
  91. df <- tibble(x = rnorm(1e4), g = sample(rep(1:1e2, 100))) %>% group_by(g)
  92. },
  93. # window
  94. mutate(df, x = lead(x)),
  95. mutate(df, x = lead(x, n = 1L)),
  96. mutate(df, x = lag(x)),
  97. mutate(df, x = lag(x, n = 1L)),
  98. mutate(df, x = ntile(n = 2)),
  99. mutate(df, x = ntile(x, n = 2)),
  100. mutate(df, x = min_rank(x)),
  101. mutate(df, x = dense_rank(x)),
  102. mutate(df, x = percent_rank(x)),
  103. mutate(df, x = cume_dist(x)),
  104. mutate(df, x = row_number(x)),
  105. # same as summarise() + recycling to match size
  106. mutate(df, n = n()),
  107. mutate(df, x = n_distinct(x)),
  108. mutate(df, x = first(x)),
  109. mutate(df, x = last(x)),
  110. mutate(df, x = nth(x, n = 1L)),
  111. mutate(df, x = first(x, default = 42)),
  112. mutate(df, x = last(x, default = 42)),
  113. mutate(df, x = nth(x, n = 1L, default = 42)),
  114. mutate(df, x = mean(x)),
  115. mutate(df, x = mean(x, na.rm = TRUE)),
  116. mutate(df, x = sd(x)),
  117. mutate(df, x = sd(x, na.rm = TRUE)),
  118. mutate(df, x = var(x)),
  119. mutate(df, x = var(x, na.rm = TRUE)),
  120. mutate(df, x = min(x)),
  121. mutate(df, x = min(x, na.rm = TRUE)),
  122. mutate(df, x = max(x)),
  123. mutate(df, x = max(x, na.rm = TRUE)),
  124. mutate(df, x = sum(x)),
  125. mutate(df, x = sum(x, na.rm = TRUE))
  126. ) %>%
  127. mutate(hybrid = TRUE)
  128. mutate_non_hybrid <- benchs(
  129. iterations = 10,
  130. libs = libs,
  131. setup = {
  132. df <- tibble(x = rnorm(1e4), g = sample(rep(1:1e2, 100))) %>% group_by(g)
  133. },
  134. # window
  135. mutate(df, x = 0),
  136. mutate(df, x = 0 + lead(x)),
  137. mutate(df, x = 0 + lead(x, n = 1L)),
  138. mutate(df, x = 0 + lag(x)),
  139. mutate(df, x = 0 + lag(x, n = 1L)),
  140. mutate(df, x = 0 + ntile(n = 2)),
  141. mutate(df, x = 0 + ntile(x, n = 2)),
  142. mutate(df, x = 0 + min_rank(x)),
  143. mutate(df, x = 0 + dense_rank(x)),
  144. mutate(df, x = 0 + percent_rank(x)),
  145. mutate(df, x = 0 + cume_dist(x)),
  146. mutate(df, x = 0 + row_number(x)),
  147. # same as summarise() + recycling to match size
  148. mutate(df, n = 0 + n()),
  149. mutate(df, x = 0 + n_distinct(x)),
  150. mutate(df, x = 0 + first(x)),
  151. mutate(df, x = 0 + last(x)),
  152. mutate(df, x = 0 + nth(x, n = 1L)),
  153. mutate(df, x = 0 + first(x, default = 42)),
  154. mutate(df, x = 0 + last(x, default = 42)),
  155. mutate(df, x = 0 + nth(x, n = 1L, default = 42)),
  156. mutate(df, x = 0 + mean(x)),
  157. mutate(df, x = 0 + mean(x, na.rm = TRUE)),
  158. mutate(df, x = 0 + sd(x)),
  159. mutate(df, x = 0 + sd(x, na.rm = TRUE)),
  160. mutate(df, x = 0 + var(x)),
  161. mutate(df, x = 0 + var(x, na.rm = TRUE)),
  162. mutate(df, x = 0 + min(x)),
  163. mutate(df, x = 0 + min(x, na.rm = TRUE)),
  164. mutate(df, x = 0 + max(x)),
  165. mutate(df, x = 0 + max(x, na.rm = TRUE)),
  166. mutate(df, x = 0 + sum(x)),
  167. mutate(df, x = 0 + sum(x, na.rm = TRUE))
  168. ) %>%
  169. mutate(hybrid = FALSE)
  170. results <- as_tibble(vctrs::vec_rbind(summarise_hybrid, summarise_non_hybrid, mutate_hybrid, mutate_non_hybrid))