/tests/testthat/test-locate.R

https://github.com/qinwf/re2r · R · 296 lines · 230 code · 43 blank · 23 comment · 3 complexity · 63e35acaa0b2651fa0b41b6999dd28d5 MD5 · raw file

  1. context("test locate")
  2. source('helper.R')
  3. test_that("re2_locate", {
  4. tta = function(string,
  5. pattern,
  6. res,
  7. rep_res = rep(res, 1000),
  8. parallel_rep = FALSE) {
  9. eq_with_class(re2_locate_all(string, pattern), res)
  10. eq_with_class(re2_locate_all(string, pattern, parallel = T), res)
  11. eq_with_class(re2_locate_all(
  12. string,
  13. pattern,
  14. parallel = T,
  15. grain_size = 1
  16. ),
  17. res)
  18. if (parallel_rep) {
  19. eq_with_class(re2_locate_all(
  20. rep(string, 1000),
  21. pattern,
  22. parallel = T,
  23. grain_size = 1
  24. ),
  25. rep_res)
  26. }
  27. }
  28. tt = function(string,
  29. pattern,
  30. res,
  31. rep_res = NULL,
  32. parallel_rep = FALSE) {
  33. eq_with_class(re2_locate(string, pattern), res)
  34. eq_with_class(re2_locate(string, pattern, parallel = T), res)
  35. eq_with_class(re2_locate(
  36. string,
  37. pattern,
  38. parallel = T,
  39. grain_size = 1
  40. ),
  41. res)
  42. if (parallel_rep) {
  43. eq_with_class(re2_locate(
  44. rep(string, 1000),
  45. pattern,
  46. parallel = T,
  47. grain_size = 1
  48. ),
  49. rep_res)
  50. }
  51. }
  52. res = list(
  53. structure(
  54. c(NA_integer_, NA_integer_),
  55. .Dim = 1:2,
  56. .Dimnames = list(NULL, c("start", "end"))
  57. ),
  58. structure(1:2, .Dim = 1:2, .Dimnames = list(NULL, c("start", "end")))
  59. )
  60. tta(c(NA, "sd"), "sd", res)
  61. res = structure(c(NA, 1L, NA, 2L),
  62. .Dim = c(2L, 2L),
  63. .Dimnames = list(NULL, c("start", "end")))
  64. tt(c(NA, "sd"), "sd", res)
  65. res_2 = re2_locate_all(c(NA, "sds"), "sd")
  66. expect_equal(sub_string("sds", res_2[[2]]), "sd")
  67. expect_equal(sub_string("sds", 1, 2), "sd")
  68. # NULL
  69. res = structure(integer(0),
  70. .Dim = c(0L, 2L),
  71. .Dimnames = list(NULL, c("start", "end")))
  72. tt(NULL, pattern = "sd", res)
  73. # re2_locate_all
  74. res = list(
  75. structure(
  76. integer(0),
  77. .Dim = c(0L, 2L),
  78. .Dimnames = list(NULL, c("start", "end"))
  79. ),
  80. structure(
  81. integer(0),
  82. .Dim = c(0L, 2L),
  83. .Dimnames = list(NULL, c("start", "end"))
  84. ),
  85. structure(
  86. c(NA_integer_, NA_integer_),
  87. .Dim = 1:2,
  88. .Dimnames = list(NULL, c("start", "end"))
  89. )
  90. )
  91. tta(c("as", "as", NA), pattern = "sd", res)
  92. # re2_locate
  93. res = structure(
  94. c(
  95. NA_integer_,
  96. NA_integer_,
  97. NA_integer_,
  98. NA_integer_,
  99. NA_integer_,
  100. NA_integer_
  101. ),
  102. .Dim = c(3L, 2L),
  103. .Dimnames = list(NULL, c("start", "end"))
  104. )
  105. tt(c("as", "as", NA), pattern = "sd", res)
  106. # test locate empty string
  107. # NULL
  108. res = structure(integer(0),
  109. .Dim = c(0L, 2L),
  110. .Dimnames = list(NULL, c("start", "end")))
  111. tt(NULL, pattern = "", res)
  112. # re2_locate_all
  113. res = list(structure(c(1L, 2L, 3L, 0L, 1L, 2L), .Dim = c(3L, 2L), .Dimnames = list(
  114. NULL, c("start", "end"))), structure(c(1L, 2L, 3L, 0L, 1L,
  115. 2L), .Dim = c(3L, 2L), .Dimnames = list(NULL, c("start", "end"
  116. ))), structure(c(NA_integer_, NA_integer_), .Dim = 1:2, .Dimnames = list(
  117. NULL, c("start", "end"))))
  118. tta(c("as", "as", NA), pattern = "", res)
  119. # re2_locate
  120. res = structure(c(1L, 1L, NA, 0L, 0L, NA),
  121. .Dim = c(3L, 2L),
  122. .Dimnames = list(NULL, c("start", "end")))
  123. tt(c("as", "as", NA), pattern = "", res)
  124. # "\\P{M}" word bound
  125. res = structure(c(1L, 1L, NA, 0L, 0L, NA),
  126. .Dim = c(3L, 2L),
  127. .Dimnames = list(NULL, c("start", "end")))
  128. res = list(
  129. structure(
  130. c(1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
  131. 6L),
  132. .Dim = c(6L, 2L),
  133. .Dimnames = list(NULL, c("start", "end"))
  134. ),
  135. structure(
  136. c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L),
  137. .Dim = c(5L, 2L),
  138. .Dimnames = list(NULL, c("start", "end"))
  139. ),
  140. structure(
  141. c(NA_integer_, NA_integer_),
  142. .Dim = 1:2,
  143. .Dimnames = list(NULL, c("start", "end"))
  144. )
  145. )
  146. tta(c("asasd%", "\\P{M}", NA), pattern = "\\P{M}", res)
  147. # test locate $
  148. # NULL
  149. res = structure(integer(0),
  150. .Dim = c(0L, 2L),
  151. .Dimnames = list(NULL, c("start", "end")))
  152. tt(NULL, pattern = "$", res)
  153. # re2_locate_all
  154. res = list(
  155. structure(
  156. c(3L, 2L),
  157. .Dim = 1:2,
  158. .Dimnames = list(NULL, c("start", "end"))
  159. ),
  160. structure(
  161. c(3L, 2L),
  162. .Dim = 1:2,
  163. .Dimnames = list(NULL, c("start", "end"))
  164. ),
  165. structure(
  166. c(NA_integer_, NA_integer_),
  167. .Dim = 1:2,
  168. .Dimnames = list(NULL, c("start", "end"))
  169. )
  170. )
  171. tta(c("as", "as", NA), pattern = "$", res)
  172. # re2_locate
  173. res = structure(c(3L, 3L, NA, 2L, 2L, NA),
  174. .Dim = c(3L, 2L),
  175. .Dimnames = list(NULL, c("start", "end")))
  176. tt(c("as", "as", NA), pattern = "$", res)
  177. res = structure(c(1L, 1L, NA, 0L, 0L, NA),
  178. .Dim = c(3L, 2L),
  179. .Dimnames = list(NULL, c("start", "end")))
  180. tt(c("as", "as", NA), pattern = "^", res)
  181. })
  182. test_that("test sub_string", {
  183. sds = "sds"
  184. sub_string(sds, 1, 2) <- "ab"
  185. sds = "sds"
  186. sub_string(sds, from = matrix(c(1, 2), ncol = 2)) <- "ab"
  187. expect_identical(sds, "abs")
  188. })
  189. test_that("vectorize locate", {
  190. locate_list = list(list("bar", c("b", "a"), structure(
  191. c(1L, 2L, 1L, 2L),
  192. .Dim = c(2L, 2L),
  193. .Dimnames = list(NULL, c("start", "end"))
  194. )))
  195. for (ind in locate_list) {
  196. eq_with_class(re2_locate(ind[[1]], ind[[2]]), ind[[3]])
  197. }
  198. })
  199. test_that("stringi tests",{
  200. expect_is(re2_locate_all(character(0), "a"), "list")
  201. # differences for empty string
  202. # expect_equivalent(re2_locate_all(NA, character(0)), list())
  203. # expect_equivalent(re2_locate_all(LETTERS, character(0)), list())
  204. # suppressWarnings(expect_equivalent(re2_locate_all("abc", ""), list(matrix(c(NA_integer_,NA_integer_)))))
  205. # suppressWarnings(expect_equivalent(re2_locate_all("", ""), list(matrix(c(NA_integer_,NA_integer_)))))
  206. # suppressWarnings(expect_equivalent(re2_locate_all("", "abc"), list(matrix(c(NA_integer_,NA_integer_)))))
  207. eq_with_class(re2_locate_all(character(0), NA), list())
  208. expect_equal(as.integer(re2_locate_all(NA, "[a-z]")[[1]]),
  209. c(NA_integer_, NA_integer_))
  210. expect_equal(as.integer(re2_locate_all("?", "[a-z]")[[1]]),
  211. integer(0))
  212. eq_with_class(re2_locate_all("1a\u0105a", "\u0105"), list(matrix(c(3,3))))
  213. eq_with_class(re2_locate_all("X\U00024B62\U00024B63\U00024B64X",
  214. c("\U00024B62", "\U00024B63", "\U00024B64")),
  215. list(matrix(c(2L,2L)), matrix(c(3L,3L)), matrix(c(4L,4L))))
  216. eq_with_class(re2_locate_all("aaa", "aa"), list(matrix(c(1,2))))
  217. eq_with_class(re2_locate_all(c("", " "), "^.*$"), list(matrix(c(1,0)), matrix(c(1,1))))
  218. eq_with_class(re2_locate_all("1a\u0105a", "a.a"), list(matrix(c(2,4))))
  219. eq_with_class(re2_locate_all("ala ola ela ula", ".la"), list(matrix(c(1,5,9,13,3,7,11,15),ncol=2)))
  220. eq_with_class(re2_locate_all("ala ola ela ula", "(a|u|z)la"), list(matrix(c(1,13,3,15),ncol=2)))
  221. eq_with_class(re2_locate_all("aabaaaba", "(a+b)+"), list(matrix(c(1,7))))
  222. eq_with_class(re2_locate_all("aabaacba", "(a+b)+"), list(matrix(c(1,3))))
  223. eq_with_class(re2_locate_all("ababacba", "(a+b)+"), list(matrix(c(1,4))))
  224. eq_with_class(re2_locate_all("aabdaaaba", "(a+b)+"), list(matrix(c(1,5,3,8),ncol=2)))
  225. eq_with_class(re2_locate_all("aabdaacba", "(a+b)+"), list(matrix(c(1,3))))
  226. eq_with_class(re2_locate_all("ababdacba", "(a+b)+"), list(matrix(c(1,4))))
  227. eq_with_class(re2_locate_all(c("\u0105\u0106\u0107", "\u0105\u0107"), "\u0106*"),
  228. list(matrix(ncol=2, c(1,2,3,4,0,2,2,3)), matrix(ncol=2, c(1,2,3,0,1,2)))) # match of zero length
  229. # locate first
  230. expect_is(re2_locate(character(0), "a"), "re2_matrix")
  231. # expect_equivalent(nrow(re2_locate(NA, character(0))), 0)
  232. expect_equal(nrow(re2_locate(character(0), NA)), 0)
  233. # expect_equivalent(nrow(re2_locate(LETTERS, character(0))), 0)
  234. #suppressWarnings(expect_equivalent(re2_locate("abc", ""), matrix(c(NA_integer_,NA_integer_))))
  235. suppressWarnings(eq_with_class(re2_locate("", "abc"), matrix(c(NA_integer_,NA_integer_))))
  236. # suppressWarnings(expect_equivalent(re2_locate("", ""), matrix(c(NA_integer_,NA_integer_))))
  237. eq_with_class(re2_locate(c("", " "), "^.*$"), matrix(c(1,0,1,1), byrow=TRUE, ncol=2))
  238. eq_with_class(re2_locate("X\u0104\u0105\u106X", "\u0105"), matrix(c(3L,3L)))
  239. eq_with_class(re2_locate("X\u9999\u9998\u9997X", "\u9998"), matrix(c(3L,3L)))
  240. eq_with_class(re2_locate("X\U00024B62\U00024B63\U00024B64X", "\U00024B63"), matrix(c(3L,3L)))
  241. eq_with_class(re2_locate("aaa", "aa"), matrix(c(1L,2L)))
  242. eq_with_class(re2_locate("1a\u0105a", "a.a"), matrix(c(2,4)))
  243. eq_with_class(re2_locate("ala ola ela ula", ".la"), matrix(c(1,3)))
  244. eq_with_class(re2_locate("ala ola ela ula", "(e|u|z)la"), matrix(c(9,11)))
  245. eq_with_class(re2_locate("aabaaaba", "(a+b)+"), matrix(c(1,7)))
  246. eq_with_class(re2_locate("aabaacba", "(a+b)+"), matrix(c(1,3)))
  247. eq_with_class(re2_locate("ababacba", "(a+b)+"), matrix(c(1,4)))
  248. eq_with_class(re2_locate("aabdaaaba", "(a+b)+"), matrix(c(1,3)))
  249. eq_with_class(re2_locate("aabdaacba", "(a+b)+"), matrix(c(1,3)))
  250. eq_with_class(re2_locate("ababdacba", "(a+b)+"), matrix(c(1,4)))
  251. eq_with_class(re2_locate(c("\u0105\u0106\u0107", "\u0105\u0107"), "\u0106*"),
  252. matrix(ncol=2, c(1,1,0,0))) # match of zero length
  253. })