PageRenderTime 114ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/third_party/rust/regex/v1/crate/tests/regression.rs

https://github.com/chromium/chromium
Rust | 219 lines | 156 code | 28 blank | 35 comment | 0 complexity | d67ae8338b0132dd964e06d94b3244fe MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, Apache-2.0, BSD-3-Clause
  1. // See: https://github.com/rust-lang/regex/issues/48
  2. #[test]
  3. fn invalid_regexes_no_crash() {
  4. assert!(regex_new!("(*)").is_err());
  5. assert!(regex_new!("(?:?)").is_err());
  6. assert!(regex_new!("(?)").is_err());
  7. assert!(regex_new!("*").is_err());
  8. }
  9. // See: https://github.com/rust-lang/regex/issues/98
  10. #[test]
  11. fn regression_many_repeat_stack_overflow() {
  12. let re = regex!("^.{1,2500}");
  13. assert_eq!(vec![(0, 1)], findall!(re, "a"));
  14. }
  15. // See: https://github.com/rust-lang/regex/issues/555
  16. #[test]
  17. fn regression_invalid_repetition_expr() {
  18. assert!(regex_new!("(?m){1,1}").is_err());
  19. }
  20. // See: https://github.com/rust-lang/regex/issues/527
  21. #[test]
  22. fn regression_invalid_flags_expression() {
  23. assert!(regex_new!("(((?x)))").is_ok());
  24. }
  25. // See: https://github.com/rust-lang/regex/issues/75
  26. mat!(regression_unsorted_binary_search_1, r"(?i-u)[a_]+", "A_", Some((0, 2)));
  27. mat!(regression_unsorted_binary_search_2, r"(?i-u)[A_]+", "a_", Some((0, 2)));
  28. // See: https://github.com/rust-lang/regex/issues/99
  29. #[cfg(feature = "unicode-case")]
  30. mat!(regression_negated_char_class_1, r"(?i)[^x]", "x", None);
  31. #[cfg(feature = "unicode-case")]
  32. mat!(regression_negated_char_class_2, r"(?i)[^x]", "X", None);
  33. // See: https://github.com/rust-lang/regex/issues/101
  34. mat!(regression_ascii_word_underscore, r"[[:word:]]", "_", Some((0, 1)));
  35. // See: https://github.com/rust-lang/regex/issues/129
  36. #[test]
  37. fn regression_captures_rep() {
  38. let re = regex!(r"([a-f]){2}(?P<foo>[x-z])");
  39. let caps = re.captures(text!("abx")).unwrap();
  40. assert_eq!(match_text!(caps.name("foo").unwrap()), text!("x"));
  41. }
  42. // See: https://github.com/rust-lang/regex/issues/153
  43. mat!(regression_alt_in_alt1, r"ab?|$", "az", Some((0, 1)));
  44. mat!(regression_alt_in_alt2, r"^(.*?)(\n|\r\n?|$)", "ab\rcd", Some((0, 3)));
  45. // See: https://github.com/rust-lang/regex/issues/169
  46. mat!(regression_leftmost_first_prefix, r"z*azb", "azb", Some((0, 3)));
  47. // See: https://github.com/rust-lang/regex/issues/76
  48. #[cfg(all(feature = "unicode-case", feature = "unicode-gencat"))]
  49. mat!(uni_case_lower_nocase_flag, r"(?i)\p{Ll}+", "ΛΘΓΔα", Some((0, 10)));
  50. // See: https://github.com/rust-lang/regex/issues/191
  51. mat!(many_alternates, r"1|2|3|4|5|6|7|8|9|10|int", "int", Some((0, 3)));
  52. // burntsushi was bad and didn't create an issue for this bug.
  53. mat!(anchored_prefix1, r"^a[[:^space:]]", "a ", None);
  54. mat!(anchored_prefix2, r"^a[[:^space:]]", "foo boo a ", None);
  55. mat!(anchored_prefix3, r"^-[a-z]", "r-f", None);
  56. // See: https://github.com/rust-lang/regex/issues/204
  57. #[cfg(feature = "unicode-perl")]
  58. split!(
  59. split_on_word_boundary,
  60. r"\b",
  61. r"Should this (work?)",
  62. &[
  63. t!(""),
  64. t!("Should"),
  65. t!(" "),
  66. t!("this"),
  67. t!(" ("),
  68. t!("work"),
  69. t!("?)")
  70. ]
  71. );
  72. #[cfg(feature = "unicode-perl")]
  73. matiter!(
  74. word_boundary_dfa,
  75. r"\b",
  76. "a b c",
  77. (0, 0),
  78. (1, 1),
  79. (2, 2),
  80. (3, 3),
  81. (4, 4),
  82. (5, 5)
  83. );
  84. // See: https://github.com/rust-lang/regex/issues/268
  85. matiter!(partial_anchor, r"^a|b", "ba", (0, 1));
  86. // See: https://github.com/rust-lang/regex/issues/280
  87. ismatch!(partial_anchor_alternate_begin, r"^a|z", "yyyyya", false);
  88. ismatch!(partial_anchor_alternate_end, r"a$|z", "ayyyyy", false);
  89. // See: https://github.com/rust-lang/regex/issues/289
  90. mat!(lits_unambiguous1, r"(ABC|CDA|BC)X", "CDAX", Some((0, 4)));
  91. // See: https://github.com/rust-lang/regex/issues/291
  92. mat!(
  93. lits_unambiguous2,
  94. r"((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$",
  95. "CIMG2341",
  96. Some((0, 8)),
  97. Some((0, 4)),
  98. None,
  99. Some((0, 4)),
  100. Some((4, 8))
  101. );
  102. // See: https://github.com/rust-lang/regex/issues/271
  103. mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4)));
  104. mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4)));
  105. mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4)));
  106. #[cfg(feature = "unicode-perl")]
  107. mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1)));
  108. // See: https://github.com/rust-lang/regex/issues/321
  109. ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false);
  110. ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false);
  111. // See: https://github.com/BurntSushi/ripgrep/issues/1203
  112. ismatch!(reverse_suffix1, r"[0-4][0-4][0-4]000", "153.230000", true);
  113. ismatch!(reverse_suffix2, r"[0-9][0-9][0-9]000", "153.230000\n", true);
  114. matiter!(reverse_suffix3, r"[0-9][0-9][0-9]000", "153.230000\n", (4, 10));
  115. // See: https://github.com/rust-lang/regex/issues/334
  116. // See: https://github.com/rust-lang/regex/issues/557
  117. mat!(
  118. captures_after_dfa_premature_end1,
  119. r"a(b*(X|$))?",
  120. "abcbX",
  121. Some((0, 1)),
  122. None,
  123. None
  124. );
  125. mat!(
  126. captures_after_dfa_premature_end2,
  127. r"a(bc*(X|$))?",
  128. "abcbX",
  129. Some((0, 1)),
  130. None,
  131. None
  132. );
  133. mat!(captures_after_dfa_premature_end3, r"(aa$)?", "aaz", Some((0, 0)));
  134. // See: https://github.com/rust-lang/regex/issues/437
  135. ismatch!(
  136. literal_panic,
  137. r"typename type\-parameter\-[0-9]+\-[0-9]+::.+",
  138. "test",
  139. false
  140. );
  141. // See: https://github.com/rust-lang/regex/issues/533
  142. ismatch!(
  143. blank_matches_nothing_between_space_and_tab,
  144. r"[[:blank:]]",
  145. "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\
  146. \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\
  147. \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}",
  148. false
  149. );
  150. ismatch!(
  151. inverted_blank_matches_everything_between_space_and_tab,
  152. r"^[[:^blank:]]+$",
  153. "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\
  154. \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\
  155. \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}",
  156. true
  157. );
  158. // Tests that our Aho-Corasick optimization works correctly. It only
  159. // kicks in when we have >32 literals. By "works correctly," we mean that
  160. // leftmost-first match semantics are properly respected. That is, samwise
  161. // should match, not sam.
  162. mat!(
  163. ahocorasick1,
  164. "samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|\
  165. A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z",
  166. "samwise",
  167. Some((0, 7))
  168. );
  169. // See: https://github.com/BurntSushi/ripgrep/issues/1247
  170. #[test]
  171. #[cfg(feature = "unicode-perl")]
  172. fn regression_nfa_stops1() {
  173. let re = ::regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap();
  174. assert_eq!(0, re.find_iter(b"s\xE4").count());
  175. }
  176. // See: https://github.com/rust-lang/regex/issues/640
  177. #[cfg(feature = "unicode-case")]
  178. matiter!(
  179. flags_are_unset,
  180. r"((?i)foo)|Bar",
  181. "foo Foo bar Bar",
  182. (0, 3),
  183. (4, 7),
  184. (12, 15)
  185. );
  186. // See: https://github.com/rust-lang/regex/issues/659
  187. //
  188. // Note that 'Ј' is not 'j', but cyrillic Je
  189. // https://en.wikipedia.org/wiki/Je_(Cyrillic)
  190. ismatch!(empty_group_match, r"()Ј01", "zЈ01", true);
  191. matiter!(empty_group_find, r"()Ј01", "zЈ01", (1, 5));