PageRenderTime 29ms CodeModel.GetById 6ms RepoModel.GetById 1ms app.codeStats 0ms

/lib_test/test_re.ml

http://github.com/avsm/ocaml-re
OCaml | 397 lines | 328 code | 59 blank | 10 comment | 0 complexity | 102f3ff3fbdbca1a79579e55a66db1d3 MD5 | raw file
Possible License(s): LGPL-2.1
  1. open Re
  2. let re_match ?pos ?len r s res =
  3. expect_equal_app
  4. ~msg:(str_printer s)
  5. ~printer:arr_ofs_printer
  6. id res
  7. (fun () -> get_all_ofs (exec ?pos ?len (compile r) s)) ()
  8. ;;
  9. let re_fail ?pos ?len r s =
  10. expect_equal_app
  11. ~msg:(str_printer s)
  12. ~printer:arr_ofs_printer
  13. not_found ()
  14. (fun () -> get_all_ofs (exec ?pos ?len (compile r) s)) ()
  15. ;;
  16. (* Substring Extraction *)
  17. let _ =
  18. let r =
  19. seq [group (char 'a');
  20. opt (group (char 'a'));
  21. group (char 'b')]
  22. in
  23. let m = exec (compile r) "ab" in
  24. expect_pass "get" (fun () ->
  25. expect_eq_str id "ab" (get m) 0;
  26. expect_eq_str id "a" (get m) 1;
  27. expect_eq_str not_found () (get m) 2;
  28. expect_eq_str id "b" (get m) 3;
  29. expect_eq_str not_found () (get m) 4;
  30. );
  31. expect_pass "get_ofs" (fun () ->
  32. expect_eq_ofs id (0,2) (get_ofs m) 0;
  33. expect_eq_ofs id (0,1) (get_ofs m) 1;
  34. expect_eq_ofs not_found () (get_ofs m) 2;
  35. expect_eq_ofs id (1,2) (get_ofs m) 3;
  36. expect_eq_ofs not_found () (get_ofs m) 4;
  37. );
  38. expect_pass "get_all" (fun () ->
  39. expect_eq_arr_str
  40. id [|"ab";"a";"";"b"|]
  41. get_all m
  42. );
  43. expect_pass "get_all_ofs" (fun () ->
  44. expect_eq_arr_ofs
  45. id [|(0,2);(0,1);(-1,-1);(1,2)|]
  46. get_all_ofs m
  47. );
  48. expect_pass "test" (fun () ->
  49. expect_eq_bool id true (test m) 0;
  50. expect_eq_bool id true (test m) 1;
  51. expect_eq_bool id false (test m) 2;
  52. expect_eq_bool id true (test m) 3;
  53. expect_eq_bool id false (test m) 4;
  54. );
  55. ;;
  56. (* Literal Match *)
  57. expect_pass "str" (fun () ->
  58. re_match (str "a") "a" [|(0,1)|];
  59. re_fail (str "a") "b";
  60. );
  61. expect_pass "char" (fun () ->
  62. re_match (char 'a') "a" [|(0,1)|];
  63. re_fail (char 'a') "b";
  64. );
  65. (* Basic Operations *)
  66. expect_pass "alt" (fun () ->
  67. re_match (alt [char 'a'; char 'b']) "a" [|(0,1)|];
  68. re_match (alt [char 'a'; char 'b']) "b" [|(0,1)|];
  69. re_fail (alt [char 'a'; char 'b']) "c";
  70. );
  71. expect_pass "seq" (fun () ->
  72. re_match (seq [char 'a'; char 'b']) "ab" [|(0,2)|];
  73. re_fail (seq [char 'a'; char 'b']) "ac";
  74. );
  75. expect_pass "empty" (fun () ->
  76. re_fail (empty) "";
  77. re_fail (empty) "a";
  78. );
  79. expect_pass "epsilon" (fun () ->
  80. re_match (epsilon) "" [|(0,0)|];
  81. re_match (epsilon) "a" [|(0,0)|];
  82. );
  83. expect_pass "rep" (fun () ->
  84. re_match (rep (char 'a')) "" [|(0,0)|];
  85. re_match (rep (char 'a')) "a" [|(0,1)|];
  86. re_match (rep (char 'a')) "aa" [|(0,2)|];
  87. re_match (rep (char 'a')) "b" [|(0,0)|];
  88. );
  89. expect_pass "rep1" (fun () ->
  90. re_match (rep1 (char 'a')) "a" [|(0,1)|];
  91. re_match (rep1 (char 'a')) "aa" [|(0,2)|];
  92. re_fail (rep1 (char 'a')) "";
  93. re_fail (rep1 (char 'a')) "b";
  94. );
  95. expect_pass "repn" (fun () ->
  96. re_match (repn (char 'a') 0 None) "" [|(0,0)|];
  97. re_match (repn (char 'a') 0 (Some 0)) "" [|(0,0)|];
  98. re_match (repn (char 'a') 1 (Some 2)) "a" [|(0,1)|];
  99. re_match (repn (char 'a') 1 (Some 2)) "aa" [|(0,2)|];
  100. re_fail (repn (char 'a') 1 (Some 2)) "";
  101. re_match (repn (char 'a') 1 (Some 2)) "aaa" [|(0,2)|];
  102. expect_equal_app
  103. invalid_arg "Re.repn"
  104. (fun () -> repn empty (-1) None) ();
  105. expect_equal_app
  106. invalid_arg "Re.repn"
  107. (fun () -> repn empty 1 (Some 0)) ();
  108. );
  109. expect_pass "opt" (fun () ->
  110. re_match (opt (char 'a')) "" [|(0,0)|];
  111. re_match (opt (char 'a')) "a" [|(0,1)|];
  112. );
  113. (* String, line, word *)
  114. expect_pass "bol" (fun () ->
  115. re_match (seq [bol; char 'a']) "ab" [|(0,1)|];
  116. re_match (seq [bol; char 'a']) "b\na" [|(2,3)|];
  117. re_fail (seq [bol; char 'a']) "ba";
  118. );
  119. expect_pass "eol" (fun () ->
  120. re_match (seq [char 'a'; eol]) "ba" [|(1,2)|];
  121. re_match (seq [char 'a'; eol]) "a\nb" [|(0,1)|];
  122. re_match (seq [char 'a'; eol]) "ba\n" [|(1,2)|];
  123. re_fail (seq [char 'a'; eol]) "ab";
  124. );
  125. expect_pass "bow" (fun () ->
  126. re_match (seq [bow; char 'a']) "a" [|(0,1)|];
  127. re_match (seq [bow; char 'a']) "bb aa" [|(3,4)|];
  128. re_fail (seq [bow; char 'a']) "ba ba";
  129. );
  130. expect_pass "eow" (fun () ->
  131. re_match (seq [char 'a'; eow]) "a" [|(0,1)|];
  132. re_match (seq [char 'a'; eow]) "bb aa" [|(4,5)|];
  133. re_fail (seq [char 'a'; eow]) "ab ab";
  134. );
  135. expect_pass "bos" (fun () ->
  136. re_match (seq [bos; char 'a']) "ab" [|(0,1)|];
  137. re_fail (seq [bos; char 'a']) "b\na";
  138. re_fail (seq [bos; char 'a']) "ba";
  139. );
  140. expect_pass "eos" (fun () ->
  141. re_match (seq [char 'a'; eos]) "ba" [|(1,2)|];
  142. re_fail (seq [char 'a'; eos]) "a\nb";
  143. re_fail (seq [char 'a'; eos]) "ba\n";
  144. re_fail (seq [char 'a'; eos]) "ab";
  145. );
  146. expect_pass "leol" (fun () ->
  147. re_match (seq [char 'a'; leol]) "ba" [|(1,2)|];
  148. re_fail (seq [char 'a'; leol]) "a\nb";
  149. re_match (seq [char 'a'; leol]) "ba\n" [|(1,2)|];
  150. re_fail (seq [char 'a'; leol]) "ab";
  151. re_match (alt [str "b\n"; seq [char 'a'; leol]]) "ab\n" [|(1,3)|];
  152. );
  153. expect_pass "start" (fun () ->
  154. re_match ~pos:1 (seq [start; char 'a']) "xab" [|(1,2)|];
  155. re_fail ~pos:1 (seq [start; char 'a']) "xb\na";
  156. re_fail ~pos:1 (seq [start; char 'a']) "xba";
  157. );
  158. expect_pass "stop" (fun () ->
  159. re_match ~len:2 (seq [char 'a'; stop]) "bax" [|(1,2)|];
  160. re_fail ~len:3 (seq [char 'a'; stop]) "a\nbx";
  161. re_fail ~len:3 (seq [char 'a'; stop]) "ba\nx";
  162. re_fail ~len:2 (seq [char 'a'; stop]) "abx";
  163. );
  164. expect_pass "word" (fun () ->
  165. re_match (word (str "aa")) "aa" [|(0,2)|];
  166. re_match (word (str "aa")) "bb aa" [|(3,5)|];
  167. re_fail (word (str "aa")) "aaa";
  168. );
  169. expect_pass "not_boundary" (fun () ->
  170. re_match (seq [not_boundary; char 'b'; not_boundary]) "abc" [|(1,2)|];
  171. re_fail (seq [not_boundary; char 'a']) "abc";
  172. re_fail (seq [char 'c'; not_boundary]) "abc";
  173. );
  174. (* Match semantics *)
  175. expect_pass "default match semantics" (fun () ->
  176. re_match
  177. (seq [(rep (alt [char 'a'; char 'b'])); char 'b'])
  178. "aabaab"
  179. [|(0,6)|];
  180. re_match
  181. (alt [str "aa"; str "aaa"])
  182. "aaaa"
  183. [|(0, 2)|];
  184. re_match
  185. (alt [str "aaa"; str "aa"])
  186. "aaaa"
  187. [|(0, 3)|];
  188. );
  189. expect_pass "shortest match" (fun () ->
  190. re_match
  191. (shortest (seq [(rep (alt [char 'a'; char 'b'])); char 'b']))
  192. "aabaab"
  193. [|(0,3)|];
  194. re_match
  195. (shortest (alt [str "aa"; str "aaa"]))
  196. "aaaa"
  197. [|(0, 2)|];
  198. re_match
  199. (shortest (alt [str "aaa"; str "aa"]))
  200. "aaaa"
  201. [|(0, 2)|];
  202. );
  203. expect_pass "longest match" (fun () ->
  204. re_match
  205. (longest (seq [(rep (alt [char 'a'; char 'b'])); char 'b']))
  206. "aabaab"
  207. [|(0,6)|];
  208. re_match
  209. (longest (alt [str "aa"; str "aaa"]))
  210. "aaaa"
  211. [|(0, 3)|];
  212. re_match
  213. (longest (alt [str "aaa"; str "aa"]))
  214. "aaaa"
  215. [|(0, 3)|];
  216. );
  217. expect_pass "first match" (fun () ->
  218. re_match
  219. (first (seq [(rep (alt [char 'a'; char 'b'])); char 'b']))
  220. "aabaab"
  221. [|(0,6)|];
  222. re_match
  223. (first (alt [str "aa"; str "aaa"]))
  224. "aaaa"
  225. [|(0, 2)|];
  226. re_match
  227. (first (alt [str "aaa"; str "aa"]))
  228. "aaaa"
  229. [|(0, 3)|];
  230. );
  231. expect_pass "greedy" (fun () ->
  232. re_match
  233. (greedy (seq [(rep (alt [char 'a'; char 'b'])); char 'b']))
  234. "aabaab"
  235. [|(0,6)|];
  236. re_match
  237. (greedy (rep (group (opt (char 'a')))))
  238. "aa"
  239. [|(0,2); (2,2)|];
  240. );
  241. expect_pass "non_greedy" (fun () ->
  242. re_match
  243. (non_greedy (longest (seq [(rep (alt [char 'a'; char 'b'])); char 'b'])))
  244. "aabaab"
  245. [|(0,6)|];
  246. re_match
  247. (non_greedy (first (seq [(rep (alt [char 'a'; char 'b'])); char 'b'])))
  248. "aabaab"
  249. [|(0,3)|];
  250. re_match
  251. (non_greedy (longest (rep (group (opt (char 'a'))))))
  252. "aa"
  253. [|(0,2); (1,2)|];
  254. );
  255. expect_pass "match semantics" (fun () ->
  256. let r = rep (group (alt [str "aaa"; str "aa"])) in
  257. re_match (longest r) "aaaaaaa" [|(0,7); (5, 7)|];
  258. re_match (first r) "aaaaaaa" [|(0,6); (3, 6)|];
  259. re_match (first (non_greedy r)) "aaaaaaa" [|(0,0); (-1, -1)|];
  260. re_match (shortest r) "aaaaaaa" [|(0,0); (-1, -1)|];
  261. let r' = rep (group (shortest (alt [str "aaa"; str "aa"]))) in
  262. re_match (longest r') "aaaaaaa" [|(0,7); (4, 7)|];
  263. re_match (first r') "aaaaaaa" [|(0,6); (4, 6)|];
  264. );
  265. (* Group (or submatch) *)
  266. expect_pass "group" (fun () ->
  267. let r =
  268. seq [group (char 'a');
  269. opt (group (char 'a'));
  270. group (char 'b')]
  271. in
  272. expect_eq_arr_ofs
  273. id [|(0,2);(0,1);(-1,-1);(1,2)|]
  274. (fun () -> get_all_ofs (exec (compile r) "ab")) ()
  275. );
  276. expect_pass "no_group" (fun () ->
  277. let r =
  278. no_group (
  279. seq [group (char 'a');
  280. opt (group (char 'a'));
  281. group (char 'b')]
  282. )
  283. in
  284. expect_eq_arr_ofs
  285. id [|(0,2)|]
  286. (fun () -> get_all_ofs (exec (compile r) "ab")) ()
  287. );
  288. expect_pass "nest" (fun () ->
  289. let r =
  290. rep (nest (alt [group (char 'a'); char 'b']))
  291. in
  292. re_match r "ab" [|(0,2); (-1, -1)|];
  293. re_match r "ba" [|(0,2); (1, 2)|];
  294. );
  295. (* Character set *)
  296. expect_pass "set" (fun () ->
  297. re_match (rep1 (set "abcd")) "bcbadbabcdba" [|(0,12)|];
  298. re_fail (set "abcd") "e";
  299. );
  300. expect_pass "rg" (fun () ->
  301. re_match (rep1 (rg '0' '9')) "0123456789" [|(0,10)|];
  302. re_fail (rep1 (rg '0' '9')) "a";
  303. );
  304. expect_pass "inter" (fun () ->
  305. re_match (rep1 (inter [rg '0' '9'; rg '4' '6'])) "456" [|(0,3)|];
  306. re_fail (rep1 (inter [rg '0' '9'; rg '4' '6'])) "7";
  307. re_match (inter [alt [char 'a'; char 'b']; char 'b']) "b" [|(0,1)|];
  308. );
  309. expect_pass "diff" (fun () ->
  310. re_match (rep1 (diff (rg '0' '9') (rg '4' '6'))) "0123789" [|(0,7)|];
  311. re_fail (rep1 (diff (rg '0' '9') (rg '4' '6'))) "4";
  312. );
  313. expect_pass "compl" (fun () ->
  314. re_match (rep1 (compl [rg '0' '9'; rg 'a' 'z'])) "A:Z+" [|(0,4)|];
  315. re_fail (rep1 (compl [rg '0' '9'; rg 'a' 'z'])) "0";
  316. re_fail (rep1 (compl [rg '0' '9'; rg 'a' 'z'])) "a";
  317. );
  318. (* Predefined character sets - should these be tested exhaustively? *)
  319. (* Case modifiers *)
  320. expect_pass "case" (fun () ->
  321. re_match (case (str "abc")) "abc" [|(0,3)|];
  322. re_match (no_case (case (str "abc"))) "abc" [|(0,3)|];
  323. re_fail (case (str "abc")) "ABC";
  324. re_fail (no_case (case (str "abc"))) "ABC";
  325. );
  326. expect_pass "no_case" (fun () ->
  327. re_match (no_case (str "abc")) "abc" [|(0,3)|];
  328. re_match (no_case (str "abc")) "ABC" [|(0,3)|];
  329. re_match (case (no_case (str "abc"))) "abc" [|(0,3)|];
  330. re_match (case (no_case (str "abc"))) "ABC" [|(0,3)|];
  331. );
  332. (* Fixed bugs *)
  333. expect_pass "bugs" (fun () ->
  334. try
  335. ignore (Re.compile (Re_perl.re "(.*?)(\\WPl|\\Bpl)(.*)"))
  336. with _ ->
  337. fail "bug in Re.handle_case"
  338. );