PageRenderTime 48ms CodeModel.GetById 7ms RepoModel.GetById 0ms app.codeStats 0ms

/test/ruby/test_regexp.rb

http://github.com/ruby/ruby
Ruby | 1309 lines | 1258 code | 48 blank | 3 comment | 0 complexity | 5dfbadab43ed63bd0d3b5c9f7de87ac6 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, AGPL-3.0
  1. # coding: US-ASCII
  2. # frozen_string_literal: false
  3. require 'test/unit'
  4. class TestRegexp < Test::Unit::TestCase
  5. def setup
  6. @verbose = $VERBOSE
  7. $VERBOSE = nil
  8. end
  9. def teardown
  10. $VERBOSE = @verbose
  11. end
  12. def test_has_NOENCODING
  13. assert Regexp::NOENCODING
  14. re = //n
  15. assert_equal Regexp::NOENCODING, re.options
  16. end
  17. def test_ruby_dev_999
  18. assert_match(/(?<=a).*b/, "aab")
  19. assert_match(/(?<=\u3042).*b/, "\u3042ab")
  20. end
  21. def test_ruby_core_27247
  22. assert_match(/(a){2}z/, "aaz")
  23. end
  24. def test_ruby_dev_24643
  25. assert_nothing_raised("[ruby-dev:24643]") {
  26. /(?:(?:[a]*[a])?b)*a*$/ =~ "aabaaca"
  27. }
  28. end
  29. def test_ruby_talk_116455
  30. assert_match(/^(\w{2,}).* ([A-Za-z\xa2\xc0-\xff]{2,}?)$/n, "Hallo Welt")
  31. end
  32. def test_ruby_dev_24887
  33. assert_equal("a".gsub(/a\Z/, ""), "")
  34. end
  35. def test_yoshidam_net_20041111_1
  36. s = "[\xC2\xA0-\xC3\xBE]"
  37. assert_match(Regexp.new(s, nil, "u"), "\xC3\xBE")
  38. end
  39. def test_yoshidam_net_20041111_2
  40. assert_raise(RegexpError) do
  41. s = "[\xFF-\xFF]".force_encoding("utf-8")
  42. Regexp.new(s, nil, "u")
  43. end
  44. end
  45. def test_ruby_dev_31309
  46. assert_equal('Ruby', 'Ruby'.sub(/[^a-z]/i, '-'))
  47. end
  48. def test_assert_normal_exit
  49. # moved from knownbug. It caused core.
  50. Regexp.union("a", "a")
  51. end
  52. def test_to_s
  53. assert_equal '(?-mix:\x00)', Regexp.new("\0").to_s
  54. str = "abcd\u3042"
  55. [:UTF_16BE, :UTF_16LE, :UTF_32BE, :UTF_32LE].each do |es|
  56. enc = Encoding.const_get(es)
  57. rs = Regexp.new(str.encode(enc)).to_s
  58. assert_equal("(?-mix:abcd\u3042)".encode(enc), rs)
  59. assert_equal(enc, rs.encoding)
  60. end
  61. end
  62. def test_to_s_extended_subexp
  63. re = /#\g#{"\n"}/x
  64. re = /#{re}/
  65. assert_warn('', '[ruby-core:82328] [Bug #13798]') {re.to_s}
  66. end
  67. def test_union
  68. assert_equal :ok, begin
  69. Regexp.union(
  70. "a",
  71. Regexp.new("\xc2\xa1".force_encoding("euc-jp")),
  72. Regexp.new("\xc2\xa1".force_encoding("utf-8")))
  73. :ng
  74. rescue ArgumentError
  75. :ok
  76. end
  77. re = Regexp.union(/\//, "")
  78. re2 = eval(re.inspect)
  79. assert_equal(re.to_s, re2.to_s)
  80. assert_equal(re.source, re2.source)
  81. assert_equal(re, re2)
  82. end
  83. def test_word_boundary
  84. assert_match(/\u3042\b /, "\u3042 ")
  85. assert_not_match(/\u3042\ba/, "\u3042a")
  86. end
  87. def test_named_capture
  88. m = /&(?<foo>.*?);/.match("aaa &amp; yyy")
  89. assert_equal("amp", m["foo"])
  90. assert_equal("amp", m[:foo])
  91. assert_equal(5, m.begin(:foo))
  92. assert_equal(8, m.end(:foo))
  93. assert_equal([5,8], m.offset(:foo))
  94. assert_equal("aaa [amp] yyy",
  95. "aaa &amp; yyy".sub(/&(?<foo>.*?);/, '[\k<foo>]'))
  96. assert_equal('#<MatchData "&amp; y" foo:"amp">',
  97. /&(?<foo>.*?); (y)/.match("aaa &amp; yyy").inspect)
  98. assert_equal('#<MatchData "&amp; y" 1:"amp" 2:"y">',
  99. /&(.*?); (y)/.match("aaa &amp; yyy").inspect)
  100. assert_equal('#<MatchData "&amp; y" foo:"amp" bar:"y">',
  101. /&(?<foo>.*?); (?<bar>y)/.match("aaa &amp; yyy").inspect)
  102. assert_equal('#<MatchData "&amp; y" foo:"amp" foo:"y">',
  103. /&(?<foo>.*?); (?<foo>y)/.match("aaa &amp; yyy").inspect)
  104. /(?<_id>[A-Za-z_]+)/ =~ "!abc"
  105. assert_not_nil(Regexp.last_match)
  106. assert_equal("abc", Regexp.last_match(1))
  107. assert_equal("abc", Regexp.last_match(:_id))
  108. /a/ =~ "b" # doesn't match.
  109. assert_equal(nil, Regexp.last_match)
  110. assert_equal(nil, Regexp.last_match(1))
  111. assert_equal(nil, Regexp.last_match(:foo))
  112. bug11825_name = "\u{5b9d 77f3}"
  113. bug11825_str = "\u{30eb 30d3 30fc}"
  114. bug11825_re = /(?<#{bug11825_name}>)#{bug11825_str}/
  115. assert_equal(["foo", "bar"], /(?<foo>.)(?<bar>.)/.names)
  116. assert_equal(["foo"], /(?<foo>.)(?<foo>.)/.names)
  117. assert_equal([], /(.)(.)/.names)
  118. assert_equal([bug11825_name], bug11825_re.names)
  119. assert_equal(["foo", "bar"], /(?<foo>.)(?<bar>.)/.match("ab").names)
  120. assert_equal(["foo"], /(?<foo>.)(?<foo>.)/.match("ab").names)
  121. assert_equal([], /(.)(.)/.match("ab").names)
  122. assert_equal([bug11825_name], bug11825_re.match(bug11825_str).names)
  123. assert_equal({"foo"=>[1], "bar"=>[2]},
  124. /(?<foo>.)(?<bar>.)/.named_captures)
  125. assert_equal({"foo"=>[1, 2]},
  126. /(?<foo>.)(?<foo>.)/.named_captures)
  127. assert_equal({}, /(.)(.)/.named_captures)
  128. assert_equal("a[b]c", "abc".sub(/(?<x>[bc])/, "[\\k<x>]"))
  129. assert_equal("o", "foo"[/(?<bar>o)/, "bar"])
  130. assert_equal("o", "foo"[/(?<@bar>o)/, "@bar"])
  131. assert_equal("o", "foo"[/(?<@bar>.)\g<@bar>\k<@bar>/, "@bar"])
  132. s = "foo"
  133. s[/(?<bar>o)/, "bar"] = "baz"
  134. assert_equal("fbazo", s)
  135. /.*/ =~ "abc"
  136. "a".sub("a", "")
  137. assert_raise(IndexError) {Regexp.last_match(:_id)}
  138. end
  139. def test_named_capture_with_nul
  140. bug9902 = '[ruby-dev:48275] [Bug #9902]'
  141. m = /(?<a>.*)/.match("foo")
  142. assert_raise(IndexError, bug9902) {m["a\0foo"]}
  143. assert_raise(IndexError, bug9902) {m["a\0foo".to_sym]}
  144. m = Regexp.new("(?<foo\0bar>.*)").match("xxx")
  145. assert_raise(IndexError, bug9902) {m["foo"]}
  146. assert_raise(IndexError, bug9902) {m["foo".to_sym]}
  147. assert_nothing_raised(IndexError, bug9902) {
  148. assert_equal("xxx", m["foo\0bar"], bug9902)
  149. assert_equal("xxx", m["foo\0bar".to_sym], bug9902)
  150. }
  151. end
  152. def test_named_capture_nonascii
  153. bug9903 = '[ruby-dev:48278] [Bug #9903]'
  154. key = "\xb1\xb2".force_encoding(Encoding::EUC_JP)
  155. m = /(?<#{key}>.*)/.match("xxx")
  156. assert_equal("xxx", m[key])
  157. assert_raise(IndexError, bug9903) {m[key.dup.force_encoding(Encoding::Shift_JIS)]}
  158. end
  159. def test_match_data_named_captures
  160. assert_equal({'a' => '1', 'b' => '2', 'c' => nil}, /^(?<a>.)(?<b>.)(?<c>.)?/.match('12').named_captures)
  161. assert_equal({'a' => '1', 'b' => '2', 'c' => '3'}, /^(?<a>.)(?<b>.)(?<c>.)?/.match('123').named_captures)
  162. assert_equal({'a' => '1', 'b' => '2', 'c' => ''}, /^(?<a>.)(?<b>.)(?<c>.?)/.match('12').named_captures)
  163. assert_equal({'a' => 'x'}, /(?<a>x)|(?<a>y)/.match('x').named_captures)
  164. assert_equal({'a' => 'y'}, /(?<a>x)|(?<a>y)/.match('y').named_captures)
  165. assert_equal({'a' => '1', 'b' => '2'}, /^(.)(?<a>.)(?<b>.)/.match('012').named_captures)
  166. assert_equal({'a' => '2'}, /^(?<a>.)(?<a>.)/.match('12').named_captures)
  167. assert_equal({}, /^(.)/.match('123').named_captures)
  168. end
  169. def test_assign_named_capture
  170. assert_equal("a", eval('/(?<foo>.)/ =~ "a"; foo'))
  171. assert_equal(nil, eval('/(?<@foo>.)/ =~ "a"; defined?(@foo)'))
  172. assert_equal("a", eval('foo = 1; /(?<foo>.)/ =~ "a"; foo'))
  173. assert_equal("a", eval('1.times {|foo| /(?<foo>.)/ =~ "a"; break foo }'))
  174. assert_nothing_raised { eval('/(?<Foo>.)/ =~ "a"') }
  175. assert_nil(eval('/(?<Foo>.)/ =~ "a"; defined? Foo'))
  176. end
  177. def test_assign_named_capture_to_reserved_word
  178. /(?<nil>.)/ =~ "a"
  179. assert_not_include(local_variables, :nil, "[ruby-dev:32675]")
  180. end
  181. def test_assign_named_capture_to_const
  182. %W[C \u{1d402}].each do |name|
  183. assert_equal(:ok, Class.new.class_eval("#{name} = :ok; /(?<#{name}>.*)/ =~ 'ng'; #{name}"))
  184. end
  185. end
  186. def test_assign_named_capture_trace
  187. bug = '[ruby-core:79940] [Bug #13287]'
  188. assert_normal_exit("#{<<-"begin;"}\n#{<<-"end;"}", bug)
  189. begin;
  190. / (?<foo>.*)/ =~ "bar" &&
  191. true
  192. end;
  193. end
  194. def test_match_regexp
  195. r = /./
  196. m = r.match("a")
  197. assert_equal(r, m.regexp)
  198. re = /foo/
  199. assert_equal(re, re.match("foo").regexp)
  200. end
  201. def test_source
  202. bug5484 = '[ruby-core:40364]'
  203. assert_equal('', //.source)
  204. assert_equal('\:', /\:/.source, bug5484)
  205. assert_equal(':', %r:\::.source, bug5484)
  206. end
  207. def test_source_escaped
  208. expected, result = "$*+.?^|".each_char.map {|c|
  209. [
  210. ["\\#{c}", "\\#{c}", 1],
  211. begin
  212. re = eval("%r#{c}\\#{c}#{c}", nil, __FILE__, __LINE__)
  213. t = eval("/\\#{c}/", nil, __FILE__, __LINE__).source
  214. rescue SyntaxError => e
  215. [e, t, nil]
  216. else
  217. [re.source, t, re =~ "a#{c}a"]
  218. end
  219. ]
  220. }.transpose
  221. assert_equal(expected, result)
  222. end
  223. def test_source_escaped_paren
  224. bug7610 = '[ruby-core:51088] [Bug #7610]'
  225. bug8133 = '[ruby-core:53578] [Bug #8133]'
  226. [
  227. ["(", ")", bug7610], ["[", "]", bug8133],
  228. ["{", "}", bug8133], ["<", ">", bug8133],
  229. ].each do |lparen, rparen, bug|
  230. s = "\\#{lparen}a\\#{rparen}"
  231. assert_equal(/#{s}/, eval("%r#{lparen}#{s}#{rparen}"), bug)
  232. end
  233. end
  234. def test_source_unescaped
  235. expected, result = "!\"#%&',-/:;=@_`~".each_char.map {|c|
  236. [
  237. ["#{c}", "\\#{c}", 1],
  238. begin
  239. re = eval("%r#{c}\\#{c}#{c}", nil, __FILE__, __LINE__)
  240. t = eval("%r{\\#{c}}", nil, __FILE__, __LINE__).source
  241. rescue SyntaxError => e
  242. [e, t, nil]
  243. else
  244. [re.source, t, re =~ "a#{c}a"]
  245. end
  246. ]
  247. }.transpose
  248. assert_equal(expected, result)
  249. end
  250. def test_inspect
  251. assert_equal('//', //.inspect)
  252. assert_equal('//i', //i.inspect)
  253. assert_equal('/\//i', /\//i.inspect)
  254. assert_equal('/\//i', %r"#{'/'}"i.inspect)
  255. assert_equal('/\/x/i', /\/x/i.inspect)
  256. assert_equal('/\x00/i', /#{"\0"}/i.inspect)
  257. assert_equal("/\n/i", /#{"\n"}/i.inspect)
  258. s = [0xf1, 0xf2, 0xf3].pack("C*")
  259. assert_equal('/\/\xF1\xF2\xF3/i', /\/#{s}/i.inspect)
  260. end
  261. def test_char_to_option
  262. assert_equal("BAR", "FOOBARBAZ"[/b../i])
  263. assert_equal("bar", "foobarbaz"[/ b . . /x])
  264. assert_equal("bar\n", "foo\nbar\nbaz"[/b.../m])
  265. assert_raise(SyntaxError) { eval('//z') }
  266. end
  267. def test_char_to_option_kcode
  268. assert_equal("bar", "foobarbaz"[/b../s])
  269. assert_equal("bar", "foobarbaz"[/b../e])
  270. assert_equal("bar", "foobarbaz"[/b../u])
  271. end
  272. def test_to_s2
  273. assert_equal('(?-mix:foo)', /(?:foo)/.to_s)
  274. assert_equal('(?m-ix:foo)', /(?:foo)/m.to_s)
  275. assert_equal('(?mi-x:foo)', /(?:foo)/mi.to_s)
  276. assert_equal('(?mix:foo)', /(?:foo)/mix.to_s)
  277. assert_equal('(?m-ix:foo)', /(?m-ix:foo)/.to_s)
  278. assert_equal('(?mi-x:foo)', /(?mi-x:foo)/.to_s)
  279. assert_equal('(?mix:foo)', /(?mix:foo)/.to_s)
  280. assert_equal('(?mix:)', /(?mix)/.to_s)
  281. assert_equal('(?-mix:(?mix:foo) )', /(?mix:foo) /.to_s)
  282. end
  283. def test_casefold_p
  284. assert_equal(false, /a/.casefold?)
  285. assert_equal(true, /a/i.casefold?)
  286. assert_equal(false, /(?i:a)/.casefold?)
  287. end
  288. def test_options
  289. assert_equal(Regexp::IGNORECASE, /a/i.options)
  290. assert_equal(Regexp::EXTENDED, /a/x.options)
  291. assert_equal(Regexp::MULTILINE, /a/m.options)
  292. end
  293. def test_match_init_copy
  294. m = /foo/.match("foo")
  295. assert_equal(/foo/, m.dup.regexp)
  296. assert_raise(TypeError) do
  297. m.instance_eval { initialize_copy(nil) }
  298. end
  299. assert_equal([0, 3], m.offset(0))
  300. assert_equal(/foo/, m.dup.regexp)
  301. end
  302. def test_match_size
  303. m = /(.)(.)(\d+)(\d)/.match("THX1138.")
  304. assert_equal(5, m.size)
  305. end
  306. def test_match_offset_begin_end
  307. m = /(?<x>b..)/.match("foobarbaz")
  308. assert_equal([3, 6], m.offset("x"))
  309. assert_equal(3, m.begin("x"))
  310. assert_equal(6, m.end("x"))
  311. assert_raise(IndexError) { m.offset("y") }
  312. assert_raise(IndexError) { m.offset(2) }
  313. assert_raise(IndexError) { m.begin(2) }
  314. assert_raise(IndexError) { m.end(2) }
  315. m = /(?<x>q..)?/.match("foobarbaz")
  316. assert_equal([nil, nil], m.offset("x"))
  317. assert_equal(nil, m.begin("x"))
  318. assert_equal(nil, m.end("x"))
  319. m = /\A\u3042(.)(.)?(.)\z/.match("\u3042\u3043\u3044")
  320. assert_equal([1, 2], m.offset(1))
  321. assert_equal([nil, nil], m.offset(2))
  322. assert_equal([2, 3], m.offset(3))
  323. end
  324. def test_match_to_s
  325. m = /(?<x>b..)/.match("foobarbaz")
  326. assert_equal("bar", m.to_s)
  327. end
  328. def test_match_pre_post
  329. m = /(?<x>b..)/.match("foobarbaz")
  330. assert_equal("foo", m.pre_match)
  331. assert_equal("baz", m.post_match)
  332. end
  333. def test_match_array
  334. m = /(...)(...)(...)(...)?/.match("foobarbaz")
  335. assert_equal(["foobarbaz", "foo", "bar", "baz", nil], m.to_a)
  336. end
  337. def test_match_captures
  338. m = /(...)(...)(...)(...)?/.match("foobarbaz")
  339. assert_equal(["foo", "bar", "baz", nil], m.captures)
  340. end
  341. def test_match_aref
  342. m = /(...)(...)(...)(...)?/.match("foobarbaz")
  343. assert_equal("foobarbaz", m[0])
  344. assert_equal("foo", m[1])
  345. assert_equal("foo", m[-4])
  346. assert_nil(m[-1])
  347. assert_nil(m[-11])
  348. assert_nil(m[-11, 1])
  349. assert_nil(m[-11..1])
  350. assert_nil(m[5])
  351. assert_nil(m[9])
  352. assert_equal(["foo", "bar", "baz"], m[1..3])
  353. assert_equal(["foo", "bar", "baz"], m[1, 3])
  354. assert_equal([], m[3..1])
  355. assert_equal([], m[3, 0])
  356. assert_equal(nil, m[3, -1])
  357. assert_equal(nil, m[9, 1])
  358. assert_equal(["baz"], m[3, 1])
  359. assert_equal(["baz", nil], m[3, 5])
  360. assert_nil(m[5])
  361. assert_raise(IndexError) { m[:foo] }
  362. assert_raise(TypeError) { m[nil] }
  363. end
  364. def test_match_values_at
  365. idx = Object.new
  366. def idx.to_int; 2; end
  367. m = /(...)(...)(...)(...)?/.match("foobarbaz")
  368. assert_equal(["foo", "bar", "baz"], m.values_at(1, 2, 3))
  369. assert_equal(["foo", "bar", "baz"], m.values_at(1..3))
  370. assert_equal(["foo", "bar", "baz", nil, nil], m.values_at(1..5))
  371. assert_equal([], m.values_at(3..1))
  372. assert_equal([nil, nil, nil, nil, nil], m.values_at(5..9))
  373. assert_equal(["bar"], m.values_at(idx))
  374. assert_raise(RangeError){ m.values_at(-11..1) }
  375. assert_raise(TypeError){ m.values_at(nil) }
  376. m = /(?<a>\d+) *(?<op>[+\-*\/]) *(?<b>\d+)/.match("1 + 2")
  377. assert_equal(["1", "2", "+"], m.values_at(:a, 'b', :op))
  378. assert_equal(["+"], m.values_at(idx))
  379. assert_raise(TypeError){ m.values_at(nil) }
  380. assert_raise(IndexError){ m.values_at(:foo) }
  381. end
  382. def test_match_string
  383. m = /(?<x>b..)/.match("foobarbaz")
  384. assert_equal("foobarbaz", m.string)
  385. end
  386. def test_match_inspect
  387. m = /(...)(...)(...)(...)?/.match("foobarbaz")
  388. assert_equal('#<MatchData "foobarbaz" 1:"foo" 2:"bar" 3:"baz" 4:nil>', m.inspect)
  389. end
  390. def test_initialize
  391. assert_raise(ArgumentError) { Regexp.new }
  392. assert_equal(/foo/, Regexp.new(/foo/, Regexp::IGNORECASE))
  393. assert_equal(Encoding.find("US-ASCII"), Regexp.new("b..", nil, "n").encoding)
  394. assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")])
  395. assert_equal(//n, Regexp.new("", nil, "n"))
  396. arg_encoding_none = 32 # ARG_ENCODING_NONE is implementation defined value
  397. assert_equal(arg_encoding_none, Regexp.new("", nil, "n").options)
  398. assert_equal(arg_encoding_none, Regexp.new("", nil, "N").options)
  399. assert_raise(RegexpError) { Regexp.new(")(") }
  400. assert_raise(RegexpError) { Regexp.new('[\\40000000000') }
  401. assert_raise(RegexpError) { Regexp.new('[\\600000000000.') }
  402. assert_raise(RegexpError) { Regexp.new("((?<v>))\\g<0>") }
  403. end
  404. def test_unescape
  405. assert_raise(ArgumentError) { s = '\\'; /#{ s }/ }
  406. assert_equal(/\xFF/n, /#{ s="\\xFF" }/n)
  407. assert_equal(/\177/, (s = '\177'; /#{ s }/))
  408. assert_raise(ArgumentError) { s = '\u'; /#{ s }/ }
  409. assert_raise(ArgumentError) { s = '\u{ ffffffff }'; /#{ s }/ }
  410. assert_raise(ArgumentError) { s = '\u{ ffffff }'; /#{ s }/ }
  411. assert_raise(ArgumentError) { s = '\u{ ffff X }'; /#{ s }/ }
  412. assert_raise(ArgumentError) { s = '\u{ }'; /#{ s }/ }
  413. assert_equal("b", "abc"[(s = '\u{0062}'; /#{ s }/)])
  414. assert_equal("b", "abc"[(s = '\u0062'; /#{ s }/)])
  415. assert_raise(ArgumentError) { s = '\u0'; /#{ s }/ }
  416. assert_raise(ArgumentError) { s = '\u000X'; /#{ s }/ }
  417. assert_raise(ArgumentError) { s = "\xff" + '\u3042'; /#{ s }/ }
  418. assert_raise(ArgumentError) { s = '\u3042' + [0xff].pack("C"); /#{ s }/ }
  419. assert_raise(SyntaxError) { s = ''; eval(%q(/\u#{ s }/)) }
  420. assert_equal(/a/, eval(%q(s="\u0061";/#{s}/n)))
  421. assert_raise(RegexpError) { s = "\u3042"; eval(%q(/#{s}/n)) }
  422. assert_raise(RegexpError) { s = "\u0061"; eval(%q(/\u3042#{s}/n)) }
  423. assert_raise(RegexpError) { s1=[0xff].pack("C"); s2="\u3042"; eval(%q(/#{s1}#{s2}/)); [s1, s2] }
  424. assert_raise(ArgumentError) { s = '\x'; /#{ s }/ }
  425. assert_equal("\xe1", [0x00, 0xe1, 0xff].pack("C*")[/\M-a/])
  426. assert_equal("\xdc", [0x00, 0xdc, 0xff].pack("C*")[/\M-\\/])
  427. assert_equal("\x8a", [0x00, 0x8a, 0xff].pack("C*")[/\M-\n/])
  428. assert_equal("\x89", [0x00, 0x89, 0xff].pack("C*")[/\M-\t/])
  429. assert_equal("\x8d", [0x00, 0x8d, 0xff].pack("C*")[/\M-\r/])
  430. assert_equal("\x8c", [0x00, 0x8c, 0xff].pack("C*")[/\M-\f/])
  431. assert_equal("\x8b", [0x00, 0x8b, 0xff].pack("C*")[/\M-\v/])
  432. assert_equal("\x87", [0x00, 0x87, 0xff].pack("C*")[/\M-\a/])
  433. assert_equal("\x9b", [0x00, 0x9b, 0xff].pack("C*")[/\M-\e/])
  434. assert_equal("\x01", [0x00, 0x01, 0xff].pack("C*")[/\C-a/])
  435. assert_raise(ArgumentError) { s = '\M'; /#{ s }/ }
  436. assert_raise(ArgumentError) { s = '\M-\M-a'; /#{ s }/ }
  437. assert_raise(ArgumentError) { s = '\M-\\'; /#{ s }/ }
  438. assert_raise(ArgumentError) { s = '\C'; /#{ s }/ }
  439. assert_raise(ArgumentError) { s = '\c'; /#{ s }/ }
  440. assert_raise(ArgumentError) { s = '\C-\C-a'; /#{ s }/ }
  441. assert_raise(ArgumentError) { s = '\M-\z'; /#{ s }/ }
  442. assert_raise(ArgumentError) { s = '\M-\777'; /#{ s }/ }
  443. assert_equal("\u3042\u3042", "\u3042\u3042"[(s = "\u3042" + %q(\xe3\x81\x82); /#{s}/)])
  444. assert_raise(ArgumentError) { s = "\u3042" + %q(\xe3); /#{s}/ }
  445. assert_raise(ArgumentError) { s = "\u3042" + %q(\xe3\xe3); /#{s}/ }
  446. assert_raise(ArgumentError) { s = '\u3042' + [0xff].pack("C"); /#{s}/ }
  447. assert_raise(SyntaxError) { eval("/\u3042/n") }
  448. s = ".........."
  449. 5.times { s.sub!(".", "") }
  450. assert_equal(".....", s)
  451. assert_equal("\\\u{3042}", Regexp.new("\\\u{3042}").source)
  452. end
  453. def test_equal
  454. bug5484 = '[ruby-core:40364]'
  455. assert_equal(/abc/, /abc/)
  456. assert_not_equal(/abc/, /abc/m)
  457. assert_not_equal(/abc/, /abd/)
  458. assert_equal(/\/foo/, Regexp.new('/foo'), bug5484)
  459. end
  460. def test_match
  461. assert_nil(//.match(nil))
  462. assert_equal("abc", /.../.match(:abc)[0])
  463. assert_raise(TypeError) { /.../.match(Object.new)[0] }
  464. assert_equal("bc", /../.match('abc', 1)[0])
  465. assert_equal("bc", /../.match('abc', -2)[0])
  466. assert_nil(/../.match("abc", -4))
  467. assert_nil(/../.match("abc", 4))
  468. assert_equal('\x', /../n.match("\u3042" + '\x', 1)[0])
  469. r = nil
  470. /.../.match("abc") {|m| r = m[0] }
  471. assert_equal("abc", r)
  472. $_ = "abc"; assert_equal(1, ~/bc/)
  473. $_ = "abc"; assert_nil(~/d/)
  474. $_ = nil; assert_nil(~/./)
  475. end
  476. def test_match_p
  477. /backref/ =~ 'backref'
  478. # must match here, but not in a separate method, e.g., assert_send,
  479. # to check if $~ is affected or not.
  480. assert_equal(false, //.match?(nil))
  481. assert_equal(true, //.match?(""))
  482. assert_equal(true, /.../.match?(:abc))
  483. assert_raise(TypeError) { /.../.match?(Object.new) }
  484. assert_equal(true, /b/.match?('abc'))
  485. assert_equal(true, /b/.match?('abc', 1))
  486. assert_equal(true, /../.match?('abc', 1))
  487. assert_equal(true, /../.match?('abc', -2))
  488. assert_equal(false, /../.match?("abc", -4))
  489. assert_equal(false, /../.match?("abc", 4))
  490. assert_equal(true, /../.match?("\u3042xx", 1))
  491. assert_equal(false, /../.match?("\u3042x", 1))
  492. assert_equal(true, /\z/.match?(""))
  493. assert_equal(true, /\z/.match?("abc"))
  494. assert_equal(true, /R.../.match?("Ruby"))
  495. assert_equal(false, /R.../.match?("Ruby", 1))
  496. assert_equal(false, /P.../.match?("Ruby"))
  497. assert_equal('backref', $&)
  498. end
  499. def test_eqq
  500. assert_equal(false, /../ === nil)
  501. end
  502. def test_quote
  503. assert_equal("\xff", Regexp.quote([0xff].pack("C")))
  504. assert_equal("\\ ", Regexp.quote("\ "))
  505. assert_equal("\\t", Regexp.quote("\t"))
  506. assert_equal("\\n", Regexp.quote("\n"))
  507. assert_equal("\\r", Regexp.quote("\r"))
  508. assert_equal("\\f", Regexp.quote("\f"))
  509. assert_equal("\\v", Regexp.quote("\v"))
  510. assert_equal("\u3042\\t", Regexp.quote("\u3042\t"))
  511. assert_equal("\\t\xff", Regexp.quote("\t" + [0xff].pack("C")))
  512. bug13034 = '[ruby-core:78646] [Bug #13034]'
  513. str = "\x00".force_encoding("UTF-16BE")
  514. assert_equal(str, Regexp.quote(str), bug13034)
  515. end
  516. def test_try_convert
  517. assert_equal(/re/, Regexp.try_convert(/re/))
  518. assert_nil(Regexp.try_convert("re"))
  519. o = Object.new
  520. assert_nil(Regexp.try_convert(o))
  521. def o.to_regexp() /foo/ end
  522. assert_equal(/foo/, Regexp.try_convert(o))
  523. end
  524. def test_union2
  525. assert_equal(/(?!)/, Regexp.union)
  526. assert_equal(/foo/, Regexp.union(/foo/))
  527. assert_equal(/foo/, Regexp.union([/foo/]))
  528. assert_equal(/\t/, Regexp.union("\t"))
  529. assert_equal(/(?-mix:\u3042)|(?-mix:\u3042)/, Regexp.union(/\u3042/, /\u3042/))
  530. assert_equal("\u3041", "\u3041"[Regexp.union(/\u3042/, "\u3041")])
  531. end
  532. def test_dup
  533. assert_equal(//, //.dup)
  534. assert_raise(TypeError) { //.dup.instance_eval { initialize_copy(nil) } }
  535. end
  536. def test_regsub
  537. assert_equal("fooXXXbaz", "foobarbaz".sub!(/bar/, "XXX"))
  538. s = [0xff].pack("C")
  539. assert_equal(s, "X".sub!(/./, s))
  540. assert_equal('\\' + s, "X".sub!(/./, '\\' + s))
  541. assert_equal('\k', "foo".sub!(/.../, '\k'))
  542. assert_raise(RuntimeError) { "foo".sub!(/(?<x>o)/, '\k<x') }
  543. assert_equal('foo[bar]baz', "foobarbaz".sub!(/(b..)/, '[\0]'))
  544. assert_equal('foo[foo]baz', "foobarbaz".sub!(/(b..)/, '[\`]'))
  545. assert_equal('foo[baz]baz', "foobarbaz".sub!(/(b..)/, '[\\\']'))
  546. assert_equal('foo[r]baz', "foobarbaz".sub!(/(b)(.)(.)/, '[\+]'))
  547. assert_equal('foo[\\]baz', "foobarbaz".sub!(/(b..)/, '[\\\\]'))
  548. assert_equal('foo[\z]baz', "foobarbaz".sub!(/(b..)/, '[\z]'))
  549. end
  550. def test_regsub_K
  551. bug8856 = '[ruby-dev:47694] [Bug #8856]'
  552. result = "foobarbazquux/foobarbazquux".gsub(/foo\Kbar/, "")
  553. assert_equal('foobazquux/foobazquux', result, bug8856)
  554. end
  555. def test_KCODE
  556. assert_nil($KCODE)
  557. assert_nothing_raised { $KCODE = nil }
  558. assert_equal(false, $=)
  559. assert_nothing_raised { $= = nil }
  560. end
  561. def test_KCODE_warning
  562. assert_warning(/variable \$KCODE is no longer effective; ignored/) { $KCODE = nil }
  563. assert_warning(/variable \$KCODE is no longer effective/) { $KCODE = nil }
  564. end
  565. def test_ignorecase_warning
  566. assert_warning(/variable \$= is no longer effective; ignored/) { $= = nil }
  567. assert_warning(/variable \$= is no longer effective/) { $= }
  568. end
  569. def test_match_setter
  570. /foo/ =~ "foo"
  571. m = $~
  572. /bar/ =~ "bar"
  573. $~ = m
  574. assert_equal("foo", $&)
  575. end
  576. def test_match_without_regexp
  577. # create a MatchData for each assertion because the internal state may change
  578. test = proc {|&blk| "abc".sub("a", ""); blk.call($~) }
  579. bug10877 = '[ruby-core:68209] [Bug #10877]'
  580. test.call {|m| assert_raise_with_message(IndexError, /foo/, bug10877) {m["foo"]} }
  581. key = "\u{3042}"
  582. [Encoding::UTF_8, Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
  583. idx = key.encode(enc)
  584. test.call {|m| assert_raise_with_message(IndexError, /#{idx}/, bug10877) {m[idx]} }
  585. end
  586. test.call {|m| assert_equal(/a/, m.regexp) }
  587. test.call {|m| assert_equal("abc", m.string) }
  588. test.call {|m| assert_equal(1, m.size) }
  589. test.call {|m| assert_equal(0, m.begin(0)) }
  590. test.call {|m| assert_equal(1, m.end(0)) }
  591. test.call {|m| assert_equal([0, 1], m.offset(0)) }
  592. test.call {|m| assert_equal([], m.captures) }
  593. test.call {|m| assert_equal([], m.names) }
  594. test.call {|m| assert_equal({}, m.named_captures) }
  595. test.call {|m| assert_equal(/a/.match("abc"), m) }
  596. test.call {|m| assert_equal(/a/.match("abc").hash, m.hash) }
  597. test.call {|m| assert_equal("bc", m.post_match) }
  598. test.call {|m| assert_equal("", m.pre_match) }
  599. test.call {|m| assert_equal(["a", nil], m.values_at(0, 1)) }
  600. end
  601. def test_last_match
  602. /(...)(...)(...)(...)?/.match("foobarbaz")
  603. assert_equal("foobarbaz", Regexp.last_match(0))
  604. assert_equal("foo", Regexp.last_match(1))
  605. assert_nil(Regexp.last_match(5))
  606. assert_nil(Regexp.last_match(-1))
  607. end
  608. def test_getter
  609. alias $__REGEXP_TEST_LASTMATCH__ $&
  610. alias $__REGEXP_TEST_PREMATCH__ $`
  611. alias $__REGEXP_TEST_POSTMATCH__ $'
  612. alias $__REGEXP_TEST_LASTPARENMATCH__ $+
  613. /(b)(.)(.)/.match("foobarbaz")
  614. assert_equal("bar", $__REGEXP_TEST_LASTMATCH__)
  615. assert_equal("foo", $__REGEXP_TEST_PREMATCH__)
  616. assert_equal("baz", $__REGEXP_TEST_POSTMATCH__)
  617. assert_equal("r", $__REGEXP_TEST_LASTPARENMATCH__)
  618. /(...)(...)(...)/.match("foobarbaz")
  619. assert_equal("baz", $+)
  620. end
  621. def test_rindex_regexp
  622. assert_equal(3, "foobarbaz\u3042".rindex(/b../n, 5))
  623. end
  624. def assert_regexp(re, ss, fs = [], msg = nil)
  625. re = Regexp.new(re) unless re.is_a?(Regexp)
  626. ss = [ss] unless ss.is_a?(Array)
  627. ss.each do |e, s|
  628. s ||= e
  629. assert_match(re, s, msg)
  630. m = re.match(s)
  631. assert_equal(e, m[0], msg)
  632. end
  633. fs = [fs] unless fs.is_a?(Array)
  634. fs.each {|s| assert_no_match(re, s, msg) }
  635. end
  636. alias check assert_regexp
  637. def assert_fail(re)
  638. assert_raise(RegexpError) { %r"#{ re }" }
  639. end
  640. alias failcheck assert_fail
  641. def test_parse
  642. check(/\*\+\?\{\}\|\(\)\<\>\`\'/, "*+?{}|()<>`'")
  643. check(/\A\w\W\z/, %w(a. b!), %w(.. ab))
  644. check(/\A.\b.\b.\B.\B.\z/, %w(a.aaa .a...), %w(aaaaa .....))
  645. check(/\A\s\S\z/, [' a', "\n."], [' ', "\n\n", 'a '])
  646. check(/\A\d\D\z/, '0a', %w(00 aa))
  647. check(/\A\h\H\z/, %w(0g ag BH), %w(a0 af GG))
  648. check(/\Afoo\Z\s\z/, "foo\n", ["foo", "foo\nbar"])
  649. assert_equal(%w(a b c), "abc def".scan(/\G\w/))
  650. check(/\A\u3042\z/, "\u3042", ["", "\u3043", "a"])
  651. check(/\A(..)\1\z/, %w(abab ....), %w(abba aba))
  652. failcheck('\1')
  653. check(/\A\80\z/, "80", ["\100", ""])
  654. check(/\A\77\z/, "?")
  655. check(/\A\78\z/, "\7" + '8', ["\100", ""])
  656. check(eval('/\A\Qfoo\E\z/'), "QfooE")
  657. check(/\Aa++\z/, "aaa")
  658. check('\Ax]\z', "x]")
  659. check(/x#foo/x, "x", "#foo")
  660. check(/\Ax#foo#{ "\n" }x\z/x, "xx", ["x", "x#foo\nx"])
  661. check(/\A\p{Alpha}\z/, ["a", "z"], [".", "", ".."])
  662. check(/\A\p{^Alpha}\z/, [".", "!"], ["!a", ""])
  663. check(/\A\n\z/, "\n")
  664. check(/\A\t\z/, "\t")
  665. check(/\A\r\z/, "\r")
  666. check(/\A\f\z/, "\f")
  667. check(/\A\a\z/, "\007")
  668. check(/\A\e\z/, "\033")
  669. check(/\A\v\z/, "\v")
  670. failcheck('(')
  671. failcheck('(?foo)')
  672. failcheck('/\p{foobarbazqux}/')
  673. failcheck('/\p{foobarbazqux' + 'a' * 1000 + '}/')
  674. failcheck('/[1-\w]/')
  675. end
  676. def test_exec
  677. check(/A*B/, %w(B AB AAB AAAB), %w(A))
  678. check(/\w*!/, %w(! a! ab! abc!), %w(abc))
  679. check(/\w*\W/, %w(! a" ab# abc$), %w(abc))
  680. check(/\w*\w/, %w(z az abz abcz), %w(!))
  681. check(/[a-z]*\w/, %w(z az abz abcz), %w(!))
  682. check(/[a-z]*\W/, %w(! a" ab# abc$), %w(A))
  683. check(/((a|bb|ccc|dddd)(1|22|333|4444))/i, %w(a1 bb1 a22), %w(a2 b1))
  684. check(/\u0080/, (1..4).map {|i| ["\u0080", "\u0080" * i] }, ["\u0081"])
  685. check(/\u0080\u0080/, (2..4).map {|i| ["\u0080" * 2, "\u0080" * i] }, ["\u0081"])
  686. check(/\u0080\u0080\u0080/, (3..4).map {|i| ["\u0080" * 3, "\u0080" * i] }, ["\u0081"])
  687. check(/\u0080\u0080\u0080\u0080/, (4..4).map {|i| ["\u0080" * 4, "\u0080" * i] }, ["\u0081"])
  688. check(/[^\u3042\u3043\u3044]/, %W(a b \u0080 \u3041 \u3045), %W(\u3042 \u3043 \u3044))
  689. check(/a.+/m, %W(a\u0080 a\u0080\u0080 a\u0080\u0080\u0080), %W(a))
  690. check(/a.+z/m, %W(a\u0080z a\u0080\u0080z a\u0080\u0080\u0080z), %W(az))
  691. check(/abc\B.\Bxyz/, %w(abcXxyz abc0xyz), %w(abc|xyz abc-xyz))
  692. check(/\Bxyz/, [%w(xyz abcXxyz), %w(xyz abc0xyz)], %w(abc xyz abc-xyz))
  693. check(/abc\B/, [%w(abc abcXxyz), %w(abc abc0xyz)], %w(abc xyz abc-xyz))
  694. failcheck('(?<foo>abc)\1')
  695. check(/^(A+|B+)(?>\g<1>)*[BC]$/, %w(AC BC ABC BAC AABBC), %w(AABB))
  696. check(/^(A+|B(?>\g<1>)*)[AC]$/, %w(AAAC BBBAAAAC), %w(BBBAAA))
  697. check(/^()(?>\g<1>)*$/, "", "a")
  698. check(/^(?>(?=a)(#{ "a" * 1000 }|))++$/, ["a" * 1000, "a" * 2000, "a" * 3000], ["", "a" * 500, "b" * 1000])
  699. check(eval('/^(?:a?)?$/'), ["", "a"], ["aa"])
  700. check(eval('/^(?:a+)?$/'), ["", "a", "aa"], ["ab"])
  701. check(/^(?:a?)+?$/, ["", "a", "aa"], ["ab"])
  702. check(/^a??[ab]/, [["a", "a"], ["a", "aa"], ["b", "b"], ["a", "ab"]], ["c"])
  703. check(/^(?:a*){3,5}$/, ["", "a", "aa", "aaa", "aaaa", "aaaaa", "aaaaaa"], ["b"])
  704. check(/^(?:a+){3,5}$/, ["aaa", "aaaa", "aaaaa", "aaaaaa"], ["", "a", "aa", "b"])
  705. end
  706. def test_parse_look_behind
  707. check(/(?<=A)B(?=C)/, [%w(B ABC)], %w(aBC ABc aBc))
  708. check(/(?<!A)B(?!C)/, [%w(B aBc)], %w(ABC aBC ABc))
  709. failcheck('(?<=.*)')
  710. failcheck('(?<!.*)')
  711. check(/(?<=A|B.)C/, [%w(C AC), %w(C BXC)], %w(C BC))
  712. check(/(?<!A|B.)C/, [%w(C C), %w(C BC)], %w(AC BXC))
  713. assert_not_match(/(?<!aa|b)c/i, "Aac")
  714. assert_not_match(/(?<!b|aa)c/i, "Aac")
  715. end
  716. def test_parse_kg
  717. check(/\A(.)(.)\k<1>(.)\z/, %w(abac abab ....), %w(abcd aaba xxx))
  718. check(/\A(.)(.)\k<-1>(.)\z/, %w(abbc abba ....), %w(abcd aaba xxx))
  719. check(/\A(?<n>.)(?<x>\g<n>){0}(?<y>\k<n+0>){0}\g<x>\g<y>\z/, "aba", "abb")
  720. check(/\A(?<n>.)(?<x>\g<n>){0}(?<y>\k<n+1>){0}\g<x>\g<y>\z/, "abb", "aba")
  721. check(/\A(?<x>..)\k<x>\z/, %w(abab ....), %w(abac abba xxx))
  722. check(/\A(.)(..)\g<-1>\z/, "abcde", %w(.... ......))
  723. failcheck('\k<x>')
  724. failcheck('\k<')
  725. failcheck('\k<>')
  726. failcheck('\k<.>')
  727. failcheck('\k<x.>')
  728. failcheck('\k<1.>')
  729. failcheck('\k<x')
  730. failcheck('\k<x+')
  731. failcheck('()\k<-2>')
  732. failcheck('()\g<-2>')
  733. check(/\A(?<x>.)(?<x>.)\k<x>\z/, %w(aba abb), %w(abc .. ....))
  734. check(/\A(?<x>.)(?<x>.)\k<x>\z/i, %w(aba ABa abb ABb), %w(abc .. ....))
  735. check('\k\g', "kg")
  736. failcheck('(.\g<1>)')
  737. failcheck('(.\g<2>)')
  738. failcheck('(?=\g<1>)')
  739. failcheck('((?=\g<1>))')
  740. failcheck('(\g<1>|.)')
  741. failcheck('(.|\g<1>)')
  742. check(/(!)(?<=(a)|\g<1>)/, ["!"], %w(a))
  743. check(/^(a|b\g<1>c)$/, %w(a bac bbacc bbbaccc), %w(bbac bacc))
  744. check(/^(a|b\g<2>c)(B\g<1>C){0}$/, %w(a bBaCc bBbBaCcCc bBbBbBaCcCcCc), %w(bBbBaCcC BbBaCcCc))
  745. check(/\A(?<n>.|X\g<n>)(?<x>\g<n>){0}(?<y>\k<n+0>){0}\g<x>\g<y>\z/, "XXaXbXXa", %w(XXabXa abb))
  746. check(/\A(?<n>.|X\g<n>)(?<x>\g<n>){0}(?<y>\k<n+1>){0}\g<x>\g<y>\z/, "XaXXbXXb", %w(aXXbXb aba))
  747. failcheck('(?<x>)(?<x>)(\g<x>)')
  748. check(/^(?<x>foo)(bar)\k<x>/, %w(foobarfoo), %w(foobar barfoo))
  749. check(/^(?<a>f)(?<a>o)(?<a>o)(?<a>b)(?<a>a)(?<a>r)(?<a>b)(?<a>a)(?<a>z)\k<a>{9}$/, %w(foobarbazfoobarbaz foobarbazbazbarfoo foobarbazzabraboof), %w(foobar barfoo))
  750. end
  751. def test_parse_curly_brace
  752. check(/\A{/, ["{", ["{", "{x"]])
  753. check(/\A{ /, ["{ ", ["{ ", "{ x"]])
  754. check(/\A{,}\z/, "{,}")
  755. check(/\A{}\z/, "{}")
  756. check(/\Aa{0}+\z/, "", %w(a aa aab))
  757. check(/\Aa{1}+\z/, %w(a aa), ["", "aab"])
  758. check(/\Aa{1,2}b{1,2}\z/, %w(ab aab abb aabb), ["", "aaabb", "abbb"])
  759. check(/(?!x){0,1}/, [ ['', 'ab'], ['', ''] ])
  760. check(/c\z{0,1}/, [ ['c', 'abc'], ['c', 'cab']], ['abd'])
  761. check(/\A{0,1}a/, [ ['a', 'abc'], ['a', '____abc']], ['bcd'])
  762. failcheck('.{100001}')
  763. failcheck('.{0,100001}')
  764. failcheck('.{1,0}')
  765. failcheck('{0}')
  766. end
  767. def test_parse_comment
  768. check(/\A(?#foo\)bar)\z/, "", "a")
  769. failcheck('(?#')
  770. end
  771. def test_char_type
  772. check(/\u3042\d/, ["\u30421", "\u30422"])
  773. # CClassTable cache test
  774. assert_match(/\u3042\d/, "\u30421")
  775. assert_match(/\u3042\d/, "\u30422")
  776. end
  777. def test_char_class
  778. failcheck('[]')
  779. failcheck('[x')
  780. check('\A[]]\z', "]", "")
  781. check('\A[]\.]+\z', %w(] . ]..]), ["", "["])
  782. check(/\A[\u3042]\z/, "\u3042", "\u3042aa")
  783. check(/\A[\u3042\x61]+\z/, ["aa\u3042aa", "\u3042\u3042", "a"], ["", "b"])
  784. check(/\A[\u3042\x61\x62]+\z/, "abab\u3042abab\u3042")
  785. check(/\A[abc]+\z/, "abcba", ["", "ada"])
  786. check(/\A[\w][\W]\z/, %w(a. b!), %w(.. ab))
  787. check(/\A[\s][\S]\z/, [' a', "\n."], [' ', "\n\n", 'a '])
  788. check(/\A[\d][\D]\z/, '0a', %w(00 aa))
  789. check(/\A[\h][\H]\z/, %w(0g ag BH), %w(a0 af GG))
  790. check(/\A[\p{Alpha}]\z/, ["a", "z"], [".", "", ".."])
  791. check(/\A[\p{^Alpha}]\z/, [".", "!"], ["!a", ""])
  792. check(/\A[\xff]\z/, "\xff", ["", "\xfe"])
  793. check(/\A[\80]+\z/, "8008", ["\\80", "\100", "\1000"])
  794. check(/\A[\77]+\z/, "???")
  795. check(/\A[\78]+\z/, "\788\7")
  796. check(/\A[\0]\z/, "\0")
  797. check(/\A[[:0]]\z/, [":", "0"], ["", ":0"])
  798. check(/\A[0-]\z/, ["0", "-"], "0-")
  799. check('\A[a-&&\w]\z', "a", "-")
  800. check('\A[--0]\z', ["-", "/", "0"], ["", "1"])
  801. check('\A[\'--0]\z', %w(* + \( \) 0 ,), ["", ".", "1"])
  802. check(/\A[a-b-]\z/, %w(a b -), ["", "c"])
  803. check('\A[a-b-&&\w]\z', %w(a b), ["", "-"])
  804. check('\A[a-b-&&\W]\z', "-", ["", "a", "b"])
  805. check('\A[a-c-e]\z', %w(a b c e -), %w(d))
  806. check(/\A[a-f&&[^b-c]&&[^e]]\z/, %w(a d f), %w(b c e g 0))
  807. check(/\A[[^b-c]&&[^e]&&a-f]\z/, %w(a d f), %w(b c e g 0))
  808. check(/\A[\n\r\t]\z/, ["\n", "\r", "\t"])
  809. failcheck('[9-1]')
  810. assert_match(/\A\d+\z/, "0123456789")
  811. assert_no_match(/\d/, "\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19")
  812. assert_match(/\A\w+\z/, "09azAZ_")
  813. assert_no_match(/\w/, "\uff10\uff19\uff41\uff5a\uff21\uff3a")
  814. assert_match(/\A\s+\z/, "\r\n\v\f\r\s")
  815. assert_no_match(/\s/, "\u0085")
  816. end
  817. def test_posix_bracket
  818. check(/\A[[:alpha:]0]\z/, %w(0 a), %w(1 .))
  819. check(eval('/\A[[:^alpha:]0]\z/'), %w(0 1 .), "a")
  820. check(eval('/\A[[:alpha\:]]\z/'), %w(a l p h a :), %w(b 0 1 .))
  821. check(eval('/\A[[:alpha:foo]0]\z/'), %w(0 a), %w(1 .))
  822. check(/\A[[:xdigit:]&&[:alpha:]]\z/, "a", %w(g 0))
  823. check('\A[[:abcdefghijklmnopqrstu:]]+\z', "[]")
  824. failcheck('[[:alpha')
  825. failcheck('[[:alpha:')
  826. failcheck('[[:alp:]]')
  827. assert_match(/\A[[:digit:]]+\z/, "\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19")
  828. assert_match(/\A[[:alnum:]]+\z/, "\uff10\uff19\uff41\uff5a\uff21\uff3a")
  829. assert_match(/\A[[:space:]]+\z/, "\r\n\v\f\r\s\u0085")
  830. assert_match(/\A[[:ascii:]]+\z/, "\x00\x7F")
  831. assert_no_match(/[[:ascii:]]/, "\x80\xFF")
  832. end
  833. def test_cclass_R
  834. assert_match(/\A\R\z/, "\r")
  835. assert_match(/\A\R\z/, "\n")
  836. assert_match(/\A\R\z/, "\f")
  837. assert_match(/\A\R\z/, "\v")
  838. assert_match(/\A\R\z/, "\r\n")
  839. assert_match(/\A\R\z/, "\u0085")
  840. assert_match(/\A\R\z/, "\u2028")
  841. assert_match(/\A\R\z/, "\u2029")
  842. end
  843. def test_cclass_X
  844. assert_match(/\A\X\z/, "\u{20 200d}")
  845. assert_match(/\A\X\z/, "\u{600 600}")
  846. assert_match(/\A\X\z/, "\u{600 20}")
  847. assert_match(/\A\X\z/, "\u{261d 1F3FB}")
  848. assert_match(/\A\X\z/, "\u{1f600}")
  849. assert_match(/\A\X\z/, "\u{20 324}")
  850. assert_match(/\A\X\X\z/, "\u{a 324}")
  851. assert_match(/\A\X\X\z/, "\u{d 324}")
  852. assert_match(/\A\X\z/, "\u{1F477 1F3FF 200D 2640 FE0F}")
  853. assert_match(/\A\X\z/, "\u{1F468 200D 1F393}")
  854. assert_match(/\A\X\z/, "\u{1F46F 200D 2642 FE0F}")
  855. assert_match(/\A\X\z/, "\u{1f469 200d 2764 fe0f 200d 1f469}")
  856. assert_warning('') {/\X/ =~ "\u{a0}"}
  857. end
  858. def test_backward
  859. assert_equal(3, "foobar".rindex(/b.r/i))
  860. assert_equal(nil, "foovar".rindex(/b.r/i))
  861. assert_equal(3, ("foo" + "bar" * 1000).rindex(/#{"bar"*1000}/))
  862. assert_equal(4, ("foo\nbar\nbaz\n").rindex(/bar/i))
  863. end
  864. def test_uninitialized
  865. assert_raise(TypeError) { Regexp.allocate.hash }
  866. assert_raise(TypeError) { Regexp.allocate.eql? Regexp.allocate }
  867. assert_raise(TypeError) { Regexp.allocate == Regexp.allocate }
  868. assert_raise(TypeError) { Regexp.allocate =~ "" }
  869. assert_equal(false, Regexp.allocate === Regexp.allocate)
  870. assert_nil(~Regexp.allocate)
  871. assert_raise(TypeError) { Regexp.allocate.match("") }
  872. assert_raise(TypeError) { Regexp.allocate.to_s }
  873. assert_match(/^#<Regexp:.*>$/, Regexp.allocate.inspect)
  874. assert_raise(TypeError) { Regexp.allocate.source }
  875. assert_raise(TypeError) { Regexp.allocate.casefold? }
  876. assert_raise(TypeError) { Regexp.allocate.options }
  877. assert_equal(Encoding.find("ASCII-8BIT"), Regexp.allocate.encoding)
  878. assert_equal(false, Regexp.allocate.fixed_encoding?)
  879. assert_raise(TypeError) { Regexp.allocate.names }
  880. assert_raise(TypeError) { Regexp.allocate.named_captures }
  881. assert_not_respond_to(MatchData, :allocate)
  882. =begin
  883. assert_raise(TypeError) { MatchData.allocate.hash }
  884. assert_raise(TypeError) { MatchData.allocate.regexp }
  885. assert_raise(TypeError) { MatchData.allocate.names }
  886. assert_raise(TypeError) { MatchData.allocate.size }
  887. assert_raise(TypeError) { MatchData.allocate.length }
  888. assert_raise(TypeError) { MatchData.allocate.offset(0) }
  889. assert_raise(TypeError) { MatchData.allocate.begin(0) }
  890. assert_raise(TypeError) { MatchData.allocate.end(0) }
  891. assert_raise(TypeError) { MatchData.allocate.to_a }
  892. assert_raise(TypeError) { MatchData.allocate[:foo] }
  893. assert_raise(TypeError) { MatchData.allocate.captures }
  894. assert_raise(TypeError) { MatchData.allocate.values_at }
  895. assert_raise(TypeError) { MatchData.allocate.pre_match }
  896. assert_raise(TypeError) { MatchData.allocate.post_match }
  897. assert_raise(TypeError) { MatchData.allocate.to_s }
  898. assert_match(/^#<MatchData:.*>$/, MatchData.allocate.inspect)
  899. assert_raise(TypeError) { MatchData.allocate.string }
  900. $~ = MatchData.allocate
  901. assert_raise(TypeError) { $& }
  902. assert_raise(TypeError) { $` }
  903. assert_raise(TypeError) { $' }
  904. assert_raise(TypeError) { $+ }
  905. =end
  906. end
  907. def test_unicode
  908. assert_match(/^\u3042{0}\p{Any}$/, "a")
  909. assert_match(/^\u3042{0}\p{Any}$/, "\u3041")
  910. assert_match(/^\u3042{0}\p{Any}$/, "\0")
  911. assert_match(/^\p{Lo}{4}$/u, "\u3401\u4E01\u{20001}\u{2A701}")
  912. assert_no_match(/^\u3042{0}\p{Any}$/, "\0\0")
  913. assert_no_match(/^\u3042{0}\p{Any}$/, "")
  914. assert_raise(SyntaxError) { eval('/^\u3042{0}\p{' + "\u3042" + '}$/') }
  915. assert_raise(SyntaxError) { eval('/^\u3042{0}\p{' + 'a' * 1000 + '}$/') }
  916. assert_raise(SyntaxError) { eval('/^\u3042{0}\p{foobarbazqux}$/') }
  917. assert_match(/^(\uff21)(a)\1\2$/i, "\uff21A\uff41a")
  918. assert_no_match(/^(\uff21)\1$/i, "\uff21A")
  919. assert_no_match(/^(\uff41)\1$/i, "\uff41a")
  920. assert_match(/^\u00df$/i, "\u00df")
  921. assert_match(/^\u00df$/i, "ss")
  922. #assert_match(/^(\u00df)\1$/i, "\u00dfss") # this must be bug...
  923. assert_match(/^\u00df{2}$/i, "\u00dfss")
  924. assert_match(/^\u00c5$/i, "\u00c5")
  925. assert_match(/^\u00c5$/i, "\u00e5")
  926. assert_match(/^\u00c5$/i, "\u212b")
  927. assert_match(/^(\u00c5)\1\1$/i, "\u00c5\u00e5\u212b")
  928. assert_match(/^\u0149$/i, "\u0149")
  929. assert_match(/^\u0149$/i, "\u02bcn")
  930. #assert_match(/^(\u0149)\1$/i, "\u0149\u02bcn") # this must be bug...
  931. assert_match(/^\u0149{2}$/i, "\u0149\u02bcn")
  932. assert_match(/^\u0390$/i, "\u0390")
  933. assert_match(/^\u0390$/i, "\u03b9\u0308\u0301")
  934. #assert_match(/^(\u0390)\1$/i, "\u0390\u03b9\u0308\u0301") # this must be bug...
  935. assert_match(/^\u0390{2}$/i, "\u0390\u03b9\u0308\u0301")
  936. assert_match(/^\ufb05$/i, "\ufb05")
  937. assert_match(/^\ufb05$/i, "\ufb06")
  938. assert_match(/^\ufb05$/i, "st")
  939. #assert_match(/^(\ufb05)\1\1$/i, "\ufb05\ufb06st") # this must be bug...
  940. assert_match(/^\ufb05{3}$/i, "\ufb05\ufb06st")
  941. assert_match(/^\u03b9\u0308\u0301$/i, "\u0390")
  942. end
  943. def test_unicode_age
  944. assert_match(/^\p{Age=6.0}$/u, "\u261c")
  945. assert_match(/^\p{Age=1.1}$/u, "\u261c")
  946. assert_no_match(/^\P{age=6.0}$/u, "\u261c")
  947. assert_match(/^\p{age=6.0}$/u, "\u31f6")
  948. assert_match(/^\p{age=3.2}$/u, "\u31f6")
  949. assert_no_match(/^\p{age=3.1}$/u, "\u31f6")
  950. assert_no_match(/^\p{age=3.0}$/u, "\u31f6")
  951. assert_no_match(/^\p{age=1.1}$/u, "\u31f6")
  952. assert_match(/^\p{age=6.0}$/u, "\u2754")
  953. assert_no_match(/^\p{age=5.0}$/u, "\u2754")
  954. assert_no_match(/^\p{age=4.0}$/u, "\u2754")
  955. assert_no_match(/^\p{age=3.0}$/u, "\u2754")
  956. assert_no_match(/^\p{age=2.0}$/u, "\u2754")
  957. assert_no_match(/^\p{age=1.1}$/u, "\u2754")
  958. assert_no_match(/^\p{age=12.0}$/u, "\u32FF")
  959. assert_match(/^\p{age=12.1}$/u, "\u32FF")
  960. end
  961. MatchData_A = eval("class MatchData_\u{3042} < MatchData; self; end")
  962. def test_matchdata
  963. a = "haystack".match(/hay/)
  964. b = "haystack".match(/hay/)
  965. assert_equal(a, b, '[ruby-core:24748]')
  966. h = {a => 42}
  967. assert_equal(42, h[b], '[ruby-core:24748]')
  968. =begin
  969. assert_match(/#<TestRegexp::MatchData_\u{3042}:/, MatchData_A.allocate.inspect)
  970. =end
  971. h = /^(?<@time>\d+): (?<body>.*)/.match("123456: hoge fuga")
  972. assert_equal("123456", h["@time"])
  973. assert_equal("hoge fuga", h["body"])
  974. end
  975. def test_regexp_popped
  976. assert_nothing_raised { eval("a = 1; /\#{ a }/; a") }
  977. assert_nothing_raised { eval("a = 1; /\#{ a }/o; a") }
  978. end
  979. def test_invalid_fragment
  980. bug2547 = '[ruby-core:27374]'
  981. assert_raise(SyntaxError, bug2547) {eval('/#{"\\\\"}y/')}
  982. end
  983. def test_dup_warn
  984. assert_warning(/duplicated/) { Regexp.new('[\u3042\u3043\u3042]') }
  985. assert_warning(/duplicated/) { Regexp.new('[\u3042\u3043\u3043]') }
  986. assert_warning(/\A\z/) { Regexp.new('[\u3042\u3044\u3043]') }
  987. assert_warning(/\A\z/) { Regexp.new('[\u3042\u3045\u3043]') }
  988. assert_warning(/\A\z/) { Regexp.new('[\u3042\u3045\u3044]') }
  989. assert_warning(/\A\z/) { Regexp.new('[\u3042\u3045\u3043-\u3044]') }
  990. assert_warning(/duplicated/) { Regexp.new('[\u3042\u3045\u3042-\u3043]') }
  991. assert_warning(/duplicated/) { Regexp.new('[\u3042\u3045\u3044-\u3045]') }
  992. assert_warning(/\A\z/) { Regexp.new('[\u3042\u3046\u3044]') }
  993. assert_warning(/duplicated/) { Regexp.new('[\u1000-\u2000\u3042-\u3046\u3044]') }
  994. assert_warning(/duplicated/) { Regexp.new('[\u3044\u3041-\u3047]') }
  995. assert_warning(/duplicated/) { Regexp.new('[\u3042\u3044\u3046\u3041-\u3047]') }
  996. bug7471 = '[ruby-core:50344]'
  997. assert_warning('', bug7471) { Regexp.new('[\D]') =~ "\u3042" }
  998. bug8151 = '[ruby-core:53649]'
  999. assert_warning(/\A\z/, bug8151) { Regexp.new('(?:[\u{33}])').to_s }
  1000. assert_warning(%r[/.*/\Z]) { Regexp.new("[\n\n]") }
  1001. end
  1002. def test_property_warn
  1003. assert_in_out_err('-w', 'x=/\p%s/', [], %r"warning: invalid Unicode Property \\p: /\\p%s/")
  1004. end
  1005. def test_invalid_escape_error
  1006. bug3539 = '[ruby-core:31048]'
  1007. error = assert_raise(SyntaxError) {eval('/\x/', nil, bug3539)}
  1008. assert_match(/invalid hex escape/, error.message)
  1009. assert_equal(1, error.message.scan(/.*invalid .*escape.*/i).size, bug3539)
  1010. end
  1011. def test_raw_hyphen_and_tk_char_type_after_range
  1012. bug6853 = '[ruby-core:47115]'
  1013. # use Regexp.new instead of literal to ignore a parser warning.
  1014. check(Regexp.new('[0-1-\\s]'), [' ', '-'], ['2', 'a'], bug6853)
  1015. end
  1016. def test_error_message_on_failed_conversion
  1017. bug7539 = '[ruby-core:50733]'
  1018. assert_equal false, /x/=== 42
  1019. assert_raise_with_message(TypeError, 'no implicit conversion of Integer into String', bug7539) {
  1020. Regexp.quote(42)
  1021. }
  1022. end
  1023. def test_conditional_expression
  1024. bug8583 = '[ruby-dev:47480] [Bug #8583]'
  1025. conds = {"xy"=>true, "yx"=>true, "xx"=>false, "yy"=>false}
  1026. assert_match_each(/\A((x)|(y))(?(2)y|x)\z/, conds, bug8583)
  1027. assert_match_each(/\A((?<x>x)|(?<y>y))(?(<x>)y|x)\z/, conds, bug8583)
  1028. bug12418 = '[ruby-core:75694] [Bug #12418]'
  1029. assert_raise(RegexpError, bug12418){ Regexp.new('(0?0|(?(5)||)|(?(5)||))?') }
  1030. end
  1031. def test_options_in_look_behind
  1032. assert_nothing_raised {
  1033. assert_match_at("(?<=(?i)ab)cd", "ABcd", [[2,4]])
  1034. assert_match_at("(?<=(?i:ab))cd", "ABcd", [[2,4]])
  1035. assert_match_at("(?<!(?i)ab)cd", "aacd", [[2,4]])
  1036. assert_match_at("(?<!(?i:ab))cd", "aacd", [[2,4]])
  1037. assert_not_match("(?<=(?i)ab)cd", "ABCD")
  1038. assert_not_match("(?<=(?i:ab))cd", "ABCD")
  1039. assert_not_match("(?<!(?i)ab)cd", "ABcd")
  1040. assert_not_match("(?<!(?i:ab))cd", "ABcd")
  1041. }
  1042. end
  1043. def test_once
  1044. pr1 = proc{|i| /#{i}/o}
  1045. assert_equal(/0/, pr1.call(0))
  1046. assert_equal(/0/, pr1.call(1))
  1047. assert_equal(/0/, pr1.call(2))
  1048. end
  1049. def test_once_recursive
  1050. pr2 = proc{|i|
  1051. if i > 0
  1052. /#{pr2.call(i-1).to_s}#{i}/
  1053. else
  1054. //
  1055. end
  1056. }
  1057. assert_equal(/(?-mix:(?-mix:(?-mix:)1)2)3/, pr2.call(3))
  1058. end
  1059. def test_once_multithread
  1060. m = Thread::Mutex.new
  1061. pr3 = proc{|i|
  1062. /#{m.unlock; sleep 0.5; i}/o
  1063. }
  1064. ary = []
  1065. n = 0
  1066. th1 = Thread.new{m.lock; ary << pr3.call(n+=1)}
  1067. th2 = Thread.new{m.lock; ary << pr3.call(n+=1)}
  1068. th1.join; th2.join
  1069. assert_equal([/1/, /1/], ary)
  1070. end
  1071. def test_once_escape
  1072. pr4 = proc{|i|
  1073. catch(:xyzzy){
  1074. /#{throw :xyzzy, i}/o =~ ""
  1075. :ng
  1076. }
  1077. }
  1078. assert_equal(0, pr4.call(0))
  1079. assert_equal(1, pr4.call(1))
  1080. end
  1081. def test_eq_tilde_can_be_overridden
  1082. assert_separately([], <<-RUBY)
  1083. class Regexp
  1084. undef =~
  1085. def =~(str)
  1086. "foo"
  1087. end
  1088. end
  1089. assert_equal("foo", // =~ "")
  1090. RUBY
  1091. end
  1092. def test_invalid_free_at_parse_depth_limit_over
  1093. assert_separately([], "#{<<-"begin;"}\n#{<<-"end;"}")
  1094. begin;
  1095. begin
  1096. require '-test-/regexp'
  1097. rescue LoadError
  1098. else
  1099. bug = '[ruby-core:79624] [Bug #13234]'
  1100. Bug::Regexp.parse_depth_limit = 10
  1101. src = "[" * 100
  1102. 3.times do
  1103. assert_raise_with_message(RegexpError, /parse depth limit over/, bug) do
  1104. Regexp.new(src)
  1105. end
  1106. end
  1107. end
  1108. end;
  1109. end
  1110. def test_absent
  1111. assert_equal(0, /(?~(a|c)c)/ =~ "abb")
  1112. assert_equal("abb", $&)
  1113. assert_equal(0, /\/\*((?~\*\/))\*\// =~ "/*abc*def/xyz*/ /* */")
  1114. assert_equal("abc*def/xyz", $1)
  1115. assert_equal(0, /(?~(a)c)/ =~ "abb")
  1116. assert_nil($1)
  1117. end
  1118. # This assertion is for porting x2() tests in testpy.py of Onigmo.
  1119. def assert_match_at(re, str, positions, msg = nil)
  1120. re = Regexp.new(re) unless re.is_a?(Regexp)
  1121. match = re.match(str)
  1122. assert_not_nil match, message(msg) {
  1123. "Expected #{re.inspect} to match #{str.inspect}"
  1124. }
  1125. if match
  1126. actual_positions = (0...match.size).map { |i|
  1127. [match.begin(i), match.end(i)]
  1128. }
  1129. assert_equal positions, actual_positions, message(msg) {
  1130. "Expected #{re.inspect} to match #{str.inspect} at: #{positions.inspect}"
  1131. }
  1132. end
  1133. end
  1134. def assert_match_each(re, conds, msg = nil)
  1135. errs = conds.select {|str, match| match ^ (re =~ str)}
  1136. msg = message(msg) {
  1137. "Expected #{re.inspect} to\n" +
  1138. errs.map {|str, match| "\t#{'not ' unless match}match #{str.inspect}"}.join(",\n")
  1139. }
  1140. assert_empty(errs, msg)
  1141. end
  1142. end