/tests/testthat/test-tidyr-pivot-wider.R

https://github.com/rstudio/sparklyr · R · 262 lines · 212 code · 50 blank · 0 comment · 0 complexity · 9dc1db95169bc3f46c11ff279ef3a01a MD5 · raw file

  1. context("tidyr-pivot-wider")
  2. sc <- testthat_spark_connection()
  3. test_that("can pivot all cols to wide", {
  4. test_requires_version("2.3.0")
  5. sdf <- copy_to(sc, tibble::tibble(key = c("x", "y", "z"), val = 1:3))
  6. pv <- tidyr::pivot_wider(
  7. sdf, names_from = key, values_from = val, names_sort = TRUE
  8. ) %>%
  9. collect()
  10. expect_equivalent(pv, tibble::tibble(x = 1, y = 2, z = 3))
  11. })
  12. test_that("non-pivoted cols are preserved", {
  13. test_requires_version("2.3.0")
  14. sdf <- copy_to(sc, tibble::tibble(a = 1, key = c("x", "y"), val = 1:2))
  15. pv <- tidyr::pivot_wider(
  16. sdf, names_from = key, values_from = val, names_sort = TRUE
  17. ) %>%
  18. collect()
  19. expect_equivalent(pv, tibble::tibble(a = 1, x = 1, y = 2))
  20. })
  21. test_that("implicit missings turn into explicit missings", {
  22. test_requires_version("2.3.0")
  23. sdf <- copy_to(sc, tibble::tibble(a = 1:2, key = c("x", "y"), val = 1:2))
  24. pv <- tidyr::pivot_wider(
  25. sdf, names_from = key, values_from = val, names_sort = TRUE
  26. ) %>%
  27. collect() %>%
  28. dplyr::arrange(a)
  29. expect_equivalent(pv, tibble::tibble(a = 1:2, x = c(1, NaN), y = c(NaN, 2)))
  30. })
  31. test_that("error when overwriting existing column", {
  32. test_requires_version("2.3.0")
  33. sdf <- copy_to(sc, tibble::tibble(a = 1, key = c("a", "b"), val = 1:2))
  34. expect_error(
  35. tidyr::pivot_wider(sdf, names_from = key, values_from = val),
  36. class = "tibble_error_column_names_must_be_unique"
  37. )
  38. })
  39. test_that("grouping is preserved", {
  40. test_requires_version("2.3.0")
  41. sdf <- copy_to(sc, tibble::tibble(g = 1, k = "x", v = 2))
  42. out <- sdf %>%
  43. dplyr::group_by(g) %>%
  44. tidyr::pivot_wider(names_from = k, values_from = v)
  45. expect_equal(dplyr::group_vars(out), "g")
  46. })
  47. test_that("nested list column pivots correctly", {
  48. test_requires_version("2.4.0")
  49. sdf <- copy_to(
  50. sc,
  51. tibble::tibble(
  52. i = c(1, 2, 1, 2),
  53. g = c("a", "a", "b", "b"),
  54. d = list(
  55. list(x = 1, y = 5), list(x = 2, y = 6), list(x = 3, y = 7), list(x = 4, y = 8)
  56. )
  57. )
  58. )
  59. out <- tidyr::pivot_wider(sdf, names_from = g, values_from = d, names_sort = TRUE) %>%
  60. collect() %>%
  61. dplyr::arrange(i)
  62. expect_equivalent(
  63. out,
  64. tibble::tibble(
  65. i = 1:2,
  66. a = list(list(x = 1, y = 5), list(x = 2, y = 6)),
  67. b = list(list(x = 3, y = 7), list(x = 4, y = 8))
  68. )
  69. )
  70. })
  71. test_that("can specify output column names using names_glue", {
  72. test_requires_version("2.3.0")
  73. sdf <- copy_to(
  74. sc,
  75. tibble::tibble(x = c("X", "Y"), y = 1:2, a = 1:2, b = 1:2)
  76. )
  77. expect_equivalent(
  78. tidyr::pivot_wider(
  79. sdf,
  80. names_from = x:y,
  81. values_from = a:b,
  82. names_glue = "{x}{y}_{.value}",
  83. names_sort = TRUE
  84. ) %>%
  85. collect(),
  86. tibble::tibble(X1_a = 1, Y2_a = 2, X1_b = 1, Y2_b = 2)
  87. )
  88. })
  89. test_that("can sort column names", {
  90. test_requires_version("2.3.0")
  91. sdf <- copy_to(
  92. sc,
  93. tibble::tibble(int = c(1, 3, 2), days = c("Mon", "Tues", "Wed"))
  94. )
  95. expect_equivalent(
  96. tidyr::pivot_wider(
  97. sdf, names_from = days, values_from = int, names_sort = TRUE
  98. ) %>%
  99. collect(),
  100. tibble::tibble(Mon = 1, Tues = 3, Wed = 2)
  101. )
  102. })
  103. test_that("can override default keys", {
  104. test_requires_version("2.3.0")
  105. skip_databricks_connect()
  106. sdf <- copy_to(
  107. sc,
  108. tibble::tribble(
  109. ~row, ~name, ~var, ~value,
  110. 1, "Sam", "age", 10,
  111. 2, "Sam", "height", 1.5,
  112. 3, "Bob", "age", 20,
  113. )
  114. )
  115. expect_equivalent(
  116. sdf %>%
  117. tidyr::pivot_wider(id_cols = name, names_from = var, values_from = value) %>%
  118. collect(),
  119. tibble::tribble(
  120. ~name, ~age, ~height,
  121. "Sam", 10, 1.5,
  122. "Bob", 20, NaN,
  123. )
  124. )
  125. })
  126. test_that("values_fn can be a single function", {
  127. test_requires_version("2.3.0")
  128. sdf <- copy_to(
  129. sc,
  130. tibble::tibble(a = c(1, 1, 2), key = c("x", "x", "x"), val = c(1, 10, 100))
  131. )
  132. pv <- tidyr::pivot_wider(
  133. sdf, names_from = key, values_from = val, values_fn = sum
  134. ) %>%
  135. collect() %>%
  136. dplyr::arrange(a)
  137. expect_equivalent(pv, tibble::tibble(a = 1:2, x = c(11, 100)))
  138. })
  139. test_that("values_summarize applied even when no-duplicates", {
  140. test_requires_version("2.3.0")
  141. sdf <- copy_to(sc, tibble::tibble(a = c(1, 2), key = c("x", "x"), val = 1:2))
  142. pv <- tidyr::pivot_wider(
  143. sdf,
  144. names_from = key,
  145. values_from = val,
  146. values_fn = list(val = rlang::expr(collect_list))
  147. ) %>%
  148. collect() %>%
  149. dplyr::arrange(a)
  150. expect_equal(pv$a, c(1, 2))
  151. expect_equivalent(pv, tibble::tibble(a = 1:2, x = list(1, 2)))
  152. })
  153. test_that("can fill in missing cells", {
  154. test_requires_version("2.3.0")
  155. sdf <- copy_to(sc, tibble::tibble(g = 1:2, var = c("x", "y"), val = 1:2))
  156. widen <- function(...) {
  157. sdf %>%
  158. tidyr::pivot_wider(names_from = var, values_from = val, ...) %>%
  159. collect() %>%
  160. dplyr::arrange(g)
  161. }
  162. expect_equivalent(
  163. widen(), tibble::tibble(g = 1:2, x = c(1, NaN), y = c(NaN, 2))
  164. )
  165. expect_equivalent(
  166. widen(values_fill = 0), tibble::tibble(g = 1:2, x = c(1, 0), y = c(0, 2))
  167. )
  168. expect_equivalent(
  169. widen(values_fill = list(val = 0)),
  170. tibble::tibble(g = 1:2, x = c(1, 0), y = c(0, 2))
  171. )
  172. })
  173. test_that("values_fill only affects missing cells", {
  174. test_requires_version("2.3.0")
  175. sdf <- copy_to(
  176. sc,
  177. tibble::tibble(g = c(1, 2), names = c("x", "y"), value = c(1, NA))
  178. )
  179. out <- sdf %>%
  180. tidyr::pivot_wider(names_from = names, values_from = value, values_fill = 0) %>%
  181. collect() %>%
  182. dplyr::arrange(g)
  183. expect_equivalent(out, tibble::tibble(g = 1:2, x = c(1, 0), y = c(0, NaN)))
  184. })
  185. test_that("can pivot from multiple measure cols", {
  186. test_requires_version("2.3.0")
  187. sdf <- copy_to(
  188. sc, tibble::tibble(row = 1, var = c("x", "y"), a = 1:2, b = 3:4)
  189. )
  190. pv <- tidyr::pivot_wider(sdf, names_from = var, values_from = c(a, b)) %>%
  191. collect()
  192. expect_equivalent(
  193. pv,
  194. tibble::tibble(row = 1, a_x = 1, a_y = 2, b_x = 3, b_y = 4)
  195. )
  196. })
  197. test_that("can pivot from multiple measure cols using all keys", {
  198. test_requires_version("2.3.0")
  199. sdf <- copy_to(sc, tibble::tibble(var = c("x", "y"), a = 1:2, b = 3:4))
  200. pv <- tidyr::pivot_wider(sdf, names_from = var, values_from = c(a, b)) %>%
  201. collect()
  202. expect_equivalent(pv, tibble::tibble(a_x = 1, a_y = 2, b_x = 3, b_y = 4))
  203. })
  204. test_that("default `names_from` and `values_from` works as expected", {
  205. test_requires_version("2.3.0")
  206. sdf <- copy_to(
  207. sc,
  208. tibble::tibble(name = c("x", "y"), value = c(1, 2))
  209. )
  210. pv <- sdf %>% tidyr::pivot_wider() %>% collect()
  211. expect_equivalent(pv, tibble::tibble(x = 1, y = 2))
  212. })