/tests/testthat/test-tidyr-pivot-wider.R

https://github.com/sparklyr/sparklyr · R · 270 lines · 220 code · 50 blank · 0 comment · 0 complexity · 417ec42274bde3e2f1a6624606344178 MD5 · raw file

  1. context("tidyr-pivot-wider")
  2. sc <- testthat_spark_connection()
  3. test_that("can pivot all cols to wide", {
  4. test_requires_version("2.3.0")
  5. sdf <- copy_to(sc, tibble::tibble(key = c("x", "y", "z"), val = 1:3))
  6. pv <- tidyr::pivot_wider(
  7. sdf,
  8. names_from = key, values_from = val, names_sort = TRUE
  9. ) %>%
  10. collect()
  11. expect_equivalent(pv, tibble::tibble(x = 1, y = 2, z = 3))
  12. })
  13. test_that("non-pivoted cols are preserved", {
  14. test_requires_version("2.3.0")
  15. sdf <- copy_to(sc, tibble::tibble(a = 1, key = c("x", "y"), val = 1:2))
  16. pv <- tidyr::pivot_wider(
  17. sdf,
  18. names_from = key, values_from = val, names_sort = TRUE
  19. ) %>%
  20. collect()
  21. expect_equivalent(pv, tibble::tibble(a = 1, x = 1, y = 2))
  22. })
  23. test_that("implicit missings turn into explicit missings", {
  24. test_requires_version("2.3.0")
  25. sdf <- copy_to(sc, tibble::tibble(a = 1:2, key = c("x", "y"), val = 1:2))
  26. pv <- tidyr::pivot_wider(
  27. sdf,
  28. names_from = key, values_from = val, names_sort = TRUE
  29. ) %>%
  30. collect() %>%
  31. dplyr::arrange(a)
  32. expect_equivalent(pv, tibble::tibble(a = 1:2, x = c(1, NaN), y = c(NaN, 2)))
  33. })
  34. test_that("error when overwriting existing column", {
  35. test_requires_version("2.3.0")
  36. sdf <- copy_to(sc, tibble::tibble(a = 1, key = c("a", "b"), val = 1:2))
  37. expect_error(
  38. tidyr::pivot_wider(sdf, names_from = key, values_from = val),
  39. class = "tibble_error_column_names_must_be_unique"
  40. )
  41. })
  42. test_that("grouping is preserved", {
  43. test_requires_version("2.3.0")
  44. sdf <- copy_to(sc, tibble::tibble(g = 1, k = "x", v = 2))
  45. out <- sdf %>%
  46. dplyr::group_by(g) %>%
  47. tidyr::pivot_wider(names_from = k, values_from = v)
  48. expect_equal(dplyr::group_vars(out), "g")
  49. })
  50. test_that("nested list column pivots correctly", {
  51. test_requires_version("2.4.0")
  52. sdf <- copy_to(
  53. sc,
  54. tibble::tibble(
  55. i = c(1, 2, 1, 2),
  56. g = c("a", "a", "b", "b"),
  57. d = list(
  58. list(x = 1, y = 5), list(x = 2, y = 6), list(x = 3, y = 7), list(x = 4, y = 8)
  59. )
  60. )
  61. )
  62. out <- tidyr::pivot_wider(sdf, names_from = g, values_from = d, names_sort = TRUE) %>%
  63. collect() %>%
  64. dplyr::arrange(i)
  65. expect_equivalent(
  66. out,
  67. tibble::tibble(
  68. i = 1:2,
  69. a = list(list(x = 1, y = 5), list(x = 2, y = 6)),
  70. b = list(list(x = 3, y = 7), list(x = 4, y = 8))
  71. )
  72. )
  73. })
  74. test_that("can specify output column names using names_glue", {
  75. test_requires_version("2.3.0")
  76. sdf <- copy_to(
  77. sc,
  78. tibble::tibble(x = c("X", "Y"), y = 1:2, a = 1:2, b = 1:2)
  79. )
  80. expect_equivalent(
  81. tidyr::pivot_wider(
  82. sdf,
  83. names_from = x:y,
  84. values_from = a:b,
  85. names_glue = "{x}{y}_{.value}",
  86. names_sort = TRUE
  87. ) %>%
  88. collect(),
  89. tibble::tibble(X1_a = 1, Y2_a = 2, X1_b = 1, Y2_b = 2)
  90. )
  91. })
  92. test_that("can sort column names", {
  93. test_requires_version("2.3.0")
  94. sdf <- copy_to(
  95. sc,
  96. tibble::tibble(int = c(1, 3, 2), days = c("Mon", "Tues", "Wed"))
  97. )
  98. expect_equivalent(
  99. tidyr::pivot_wider(
  100. sdf,
  101. names_from = days, values_from = int, names_sort = TRUE
  102. ) %>%
  103. collect(),
  104. tibble::tibble(Mon = 1, Tues = 3, Wed = 2)
  105. )
  106. })
  107. test_that("can override default keys", {
  108. test_requires_version("2.3.0")
  109. skip_databricks_connect()
  110. sdf <- copy_to(
  111. sc,
  112. tibble::tribble(
  113. ~row, ~name, ~var, ~value,
  114. 1, "Sam", "age", 10,
  115. 2, "Sam", "height", 1.5,
  116. 3, "Bob", "age", 20,
  117. )
  118. )
  119. expect_equivalent(
  120. sdf %>%
  121. tidyr::pivot_wider(id_cols = name, names_from = var, values_from = value) %>%
  122. collect() %>%
  123. dplyr::arrange(name),
  124. tibble::tribble(
  125. ~name, ~age, ~height,
  126. "Bob", 20, NaN,
  127. "Sam", 10, 1.5,
  128. )
  129. )
  130. })
  131. test_that("values_fn can be a single function", {
  132. test_requires_version("2.3.0")
  133. sdf <- copy_to(
  134. sc,
  135. tibble::tibble(a = c(1, 1, 2), key = c("x", "x", "x"), val = c(1, 10, 100))
  136. )
  137. pv <- tidyr::pivot_wider(
  138. sdf,
  139. names_from = key, values_from = val, values_fn = sum
  140. ) %>%
  141. collect() %>%
  142. dplyr::arrange(a)
  143. expect_equivalent(pv, tibble::tibble(a = 1:2, x = c(11, 100)))
  144. })
  145. test_that("values_summarize applied even when no-duplicates", {
  146. test_requires_version("2.3.0")
  147. sdf <- copy_to(sc, tibble::tibble(a = c(1, 2), key = c("x", "x"), val = 1:2))
  148. pv <- tidyr::pivot_wider(
  149. sdf,
  150. names_from = key,
  151. values_from = val,
  152. values_fn = list(val = rlang::expr(collect_list))
  153. ) %>%
  154. collect() %>%
  155. dplyr::arrange(a)
  156. expect_equal(pv$a, c(1, 2))
  157. expect_equivalent(pv, tibble::tibble(a = 1:2, x = list(1, 2)))
  158. })
  159. test_that("can fill in missing cells", {
  160. test_requires_version("2.3.0")
  161. sdf <- copy_to(sc, tibble::tibble(g = 1:2, var = c("x", "y"), val = 1:2))
  162. widen <- function(...) {
  163. sdf %>%
  164. tidyr::pivot_wider(names_from = var, values_from = val, ...) %>%
  165. collect() %>%
  166. dplyr::arrange(g)
  167. }
  168. expect_equivalent(
  169. widen(), tibble::tibble(g = 1:2, x = c(1, NaN), y = c(NaN, 2))
  170. )
  171. expect_equivalent(
  172. widen(values_fill = 0), tibble::tibble(g = 1:2, x = c(1, 0), y = c(0, 2))
  173. )
  174. expect_equivalent(
  175. widen(values_fill = list(val = 0)),
  176. tibble::tibble(g = 1:2, x = c(1, 0), y = c(0, 2))
  177. )
  178. })
  179. test_that("values_fill only affects missing cells", {
  180. test_requires_version("2.3.0")
  181. sdf <- copy_to(
  182. sc,
  183. tibble::tibble(g = c(1, 2), names = c("x", "y"), value = c(1, NA))
  184. )
  185. out <- sdf %>%
  186. tidyr::pivot_wider(names_from = names, values_from = value, values_fill = 0) %>%
  187. collect() %>%
  188. dplyr::arrange(g)
  189. expect_equivalent(out, tibble::tibble(g = 1:2, x = c(1, 0), y = c(0, NaN)))
  190. })
  191. test_that("can pivot from multiple measure cols", {
  192. test_requires_version("2.3.0")
  193. sdf <- copy_to(
  194. sc, tibble::tibble(row = 1, var = c("x", "y"), a = 1:2, b = 3:4)
  195. )
  196. pv <- tidyr::pivot_wider(sdf, names_from = var, values_from = c(a, b)) %>%
  197. collect()
  198. expect_equivalent(
  199. pv,
  200. tibble::tibble(row = 1, a_x = 1, a_y = 2, b_x = 3, b_y = 4)
  201. )
  202. })
  203. test_that("can pivot from multiple measure cols using all keys", {
  204. test_requires_version("2.3.0")
  205. sdf <- copy_to(sc, tibble::tibble(var = c("x", "y"), a = 1:2, b = 3:4))
  206. pv <- tidyr::pivot_wider(sdf, names_from = var, values_from = c(a, b)) %>%
  207. collect()
  208. expect_equivalent(pv, tibble::tibble(a_x = 1, a_y = 2, b_x = 3, b_y = 4))
  209. })
  210. test_that("default `names_from` and `values_from` works as expected", {
  211. test_requires_version("2.3.0")
  212. sdf <- copy_to(
  213. sc,
  214. tibble::tibble(name = c("x", "y"), value = c(1, 2))
  215. )
  216. pv <- sdf %>%
  217. tidyr::pivot_wider() %>%
  218. collect()
  219. expect_equivalent(pv, tibble::tibble(x = 1, y = 2))
  220. })