PageRenderTime 56ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/Languages/IronPython/Tests/modules/io_related/re_test.py

http://github.com/IronLanguages/main
Python | 826 lines | 752 code | 38 blank | 36 comment | 11 complexity | 13c4b730eaf17e3212223a7e6c2042f3 MD5 | raw file
Possible License(s): CPL-1.0, BSD-3-Clause, ISC, GPL-2.0, MPL-2.0-no-copyleft-exception
  1. #####################################################################################
  2. #
  3. # Copyright (c) Microsoft Corporation. All rights reserved.
  4. #
  5. # This source code is subject to terms and conditions of the Apache License, Version 2.0. A
  6. # copy of the license can be found in the License.html file at the root of this distribution. If
  7. # you cannot locate the Apache License, Version 2.0, please send an email to
  8. # ironpy@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. # by the terms of the Apache License, Version 2.0.
  10. #
  11. # You must not remove this notice, or any other, from this software.
  12. #
  13. #
  14. #####################################################################################
  15. from iptest.assert_util import *
  16. import re
  17. def test_none():
  18. for x in 'compile search match split findall finditer'.split():
  19. y = getattr(re, x)
  20. AssertError(TypeError, y, None)
  21. AssertError(TypeError, y, None, None)
  22. AssertError(TypeError, y, None, 'abc')
  23. AssertError(TypeError, y, 'abc', None)
  24. # Other exceptional input tests
  25. for x in (re.sub, re.subn):
  26. AssertError(TypeError, x, 'abc', None, 'abc')
  27. AssertError(TypeError, x, 'abc', None, None)
  28. AssertError(TypeError, x, None, 'abc', 'abc')
  29. AssertError(TypeError, x, 'abc', 'abc', None)
  30. AssertError(TypeError, re.escape, None)
  31. def test_sanity_re():
  32. '''
  33. Basic sanity tests for the re module. Each module member is
  34. used at least once.
  35. '''
  36. #compile
  37. Assert(hasattr(re.compile("(abc){1}"), "pattern"))
  38. Assert(hasattr(re.compile("(abc){1}", re.L), "pattern"))
  39. Assert(hasattr(re.compile("(abc){1}", flags=re.L), "pattern"))
  40. #I IGNORECASE L LOCAL MMULTILINE S DOTALL U UNICODE X VERBOSE
  41. flags = ["I", "IGNORECASE",
  42. "L", "LOCALE",
  43. "M", "MULTILINE",
  44. "S", "DOTALL",
  45. "U", "UNICODE",
  46. "X", "VERBOSE"]
  47. for f in flags:
  48. Assert(hasattr(re, f))
  49. #search
  50. AreEqual(re.search("(abc){1}", ""), None)
  51. AreEqual(re.search("(abc){1}", "abcxyz").span(), (0,3))
  52. AreEqual(re.search("(abc){1}", "abcxyz", re.L).span(), (0,3))
  53. AreEqual(re.search("(abc){1}", "abcxyz", flags=re.L).span(), (0,3))
  54. AreEqual(re.search("(abc){1}", "xyzabc").span(), (3,6))
  55. AreEqual(re.search("(abc){1}", buffer("")), None)
  56. AreEqual(re.search("(abc){1}", buffer("abcxyz")).span(), (0,3))
  57. AreEqual(re.search("(abc){1}", buffer("abcxyz"), re.L).span(), (0,3))
  58. AreEqual(re.search("(abc){1}", buffer("abcxyz"), flags=re.L).span(), (0,3))
  59. AreEqual(re.search("(abc){1}", buffer("xyzabc")).span(), (3,6))
  60. #match
  61. AreEqual(re.match("(abc){1}", ""), None)
  62. AreEqual(re.match("(abc){1}", "abcxyz").span(), (0,3))
  63. AreEqual(re.match("(abc){1}", "abcxyz", re.L).span(), (0,3))
  64. AreEqual(re.match("(abc){1}", "abcxyz", flags=re.L).span(), (0,3))
  65. #split
  66. AreEqual(re.split("(abc){1}", ""), [''])
  67. AreEqual(re.split("(abc){1}", "abcxyz"), ['', 'abc', 'xyz'])
  68. #maxsplit
  69. AreEqual(re.split("(abc){1}", "abc", 0), ['', 'abc', ''])
  70. for i in xrange(3):
  71. AreEqual(re.split("(abc){1}", "abc", maxsplit=i), ['', 'abc', ''])
  72. AreEqual(re.split("(abc){1}", "", maxsplit=i), [''])
  73. AreEqual(re.split("(abc){1}", "abcxyz", maxsplit=i), ['', 'abc', 'xyz'])
  74. AreEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=0), ['', 'abc', 'xyz', 'abc', ''])
  75. AreEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=1), ['', 'abc', 'xyzabc'])
  76. AreEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=2), ['', 'abc', 'xyz', 'abc', ''])
  77. #findall
  78. AreEqual(re.findall("(abc){1}", ""), [])
  79. AreEqual(re.findall("(abc){1}", "abcxyz"), ['abc'])
  80. AreEqual(re.findall("(abc){1}", "abcxyz", re.L), ['abc'])
  81. AreEqual(re.findall("(abc){1}", "abcxyz", flags=re.L), ['abc'])
  82. AreEqual(re.findall("(abc){1}", "xyzabcabc"), ['abc', 'abc'])
  83. #finditer
  84. AreEqual([x.group() for x in re.finditer("(abc){1}", "")], [])
  85. AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz")], ['abc'])
  86. AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz", re.L)], ['abc'])
  87. AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz", flags=re.L)], ['abc'])
  88. AreEqual([x.group() for x in re.finditer("(abc){1}", "xyzabcabc")], ['abc', 'abc'])
  89. rex = re.compile("foo")
  90. for m in rex.finditer("this is a foo and a foo bar"):
  91. AreEqual((m.pos, m.endpos), (0, 27))
  92. for m in rex.finditer(""):
  93. AreEqual((m.pos, m.endpos), (0, 1))
  94. for m in rex.finditer("abc"):
  95. AreEqual((m.pos, m.endpos), (0, 4))
  96. for m in rex.finditer("foo foo foo foo foo"):
  97. AreEqual((m.pos, m.endpos), (0, 19))
  98. #sub
  99. AreEqual(re.sub("(abc){1}", "9", "abcd"), "9d")
  100. AreEqual(re.sub("(abc){1}", "abcxyz",'abcd'), "abcxyzd")
  101. AreEqual(re.sub("(abc){1}", "1", "abcd", 0), "1d")
  102. AreEqual(re.sub("(abc){1}", "1", "abcd", count=0), "1d")
  103. AreEqual(re.sub("(abc){1}", "1", "abcdabcd", 1), "1dabcd")
  104. AreEqual(re.sub("(abc){1}", "1", "abcdabcd", 2), "1d1d")
  105. AreEqual(re.sub("(abc){1}", "1", "ABCdabcd", 2, flags=re.I), "1d1d")
  106. #subn
  107. AreEqual(re.subn("(abc){1}", "9", "abcd"), ("9d", 1))
  108. AreEqual(re.subn("(abc){1}", "abcxyz",'abcd'), ("abcxyzd",1))
  109. AreEqual(re.subn("(abc){1}", "1", "abcd", 0), ("1d",1))
  110. AreEqual(re.subn("(abc){1}", "1", "abcd", count=0), ("1d",1))
  111. AreEqual(re.subn("(abc){1}", "1", "abcdabcd", 1), ("1dabcd",1))
  112. AreEqual(re.subn("(abc){1}", "1", "abcdabcd", 2), ("1d1d",2))
  113. AreEqual(re.subn("(abc){1}", "1", "ABCdabcd", 2, flags=re.I), ("1d1d",2))
  114. #escape
  115. AreEqual(re.escape("abc"), "abc")
  116. AreEqual(re.escape(""), "")
  117. AreEqual(re.escape("_"), "\\_")
  118. AreEqual(re.escape("a_c"), "a\\_c")
  119. #error
  120. exc = re.error()
  121. exc = re.error("some args")
  122. #purge
  123. re.purge()
  124. def test_sanity_re_pattern():
  125. '''
  126. Basic sanity tests for the re module's Regular Expression
  127. objects (i.e., Pattern in CPython). Each method/member is
  128. utilized at least once.
  129. '''
  130. pattern = re.compile("(abc){1}")
  131. #match
  132. AreEqual(pattern.match(""), None)
  133. AreEqual(pattern.match("abcxyz").span(), (0,3))
  134. AreEqual(pattern.match("abc", 0).span(), (0,3))
  135. AreEqual(pattern.match("abc", 0, 3).span(), (0,3))
  136. AreEqual(pattern.match("abc", pos=0, endpos=3).span(), (0,3))
  137. for i in [-1, -2, -5, -7, -8, -65536]:
  138. for j in [3, 4, 5, 7, 8, 65536]:
  139. AreEqual(pattern.match("abc", i, j).span(), (0,3))
  140. AssertError(OverflowError, lambda: pattern.match("abc", 0, 2**32).span())
  141. AssertError(OverflowError, lambda: pattern.match("abc", -(2**32), 3).span())
  142. #search
  143. AreEqual(pattern.search(""), None)
  144. AreEqual(pattern.search("abcxyz").span(), (0,3))
  145. AreEqual(pattern.search("abc", 0).span(), (0,3))
  146. AreEqual(pattern.search("abc", 0, 3).span(), (0,3))
  147. AreEqual(pattern.search("abc", pos=0, endpos=3).span(), (0,3))
  148. AreEqual(pattern.search("xyzabc").span(), (3,6))
  149. #split
  150. AreEqual(pattern.split(""), [''])
  151. AreEqual(pattern.split("abcxyz"), ['', 'abc', 'xyz'])
  152. AreEqual(pattern.split("abc", 0), ['', 'abc', ''])
  153. AreEqual(pattern.split("abc", maxsplit=0), ['', 'abc', ''])
  154. AreEqual(pattern.split("abcxyzabc", maxsplit=1), ['', 'abc', 'xyzabc'])
  155. #findall
  156. AreEqual(pattern.findall(""), [])
  157. AreEqual(pattern.findall("abcxyz"), ['abc'])
  158. AreEqual(pattern.findall("abc", 0), ['abc'])
  159. AreEqual(pattern.findall("abc", 0, 3), ['abc'])
  160. AreEqual(pattern.findall("abc", pos=0, endpos=3), ['abc'])
  161. AreEqual(pattern.findall("xyzabcabc"), ['abc', 'abc'])
  162. #sub
  163. AreEqual(pattern.sub("9", "abcd"), "9d")
  164. AreEqual(pattern.sub("abcxyz",'abcd'), "abcxyzd")
  165. AreEqual(pattern.sub("1", "abcd", 0), "1d")
  166. AreEqual(pattern.sub("1", "abcd", count=0), "1d")
  167. AreEqual(pattern.sub("1", "abcdabcd", 1), "1dabcd")
  168. AreEqual(pattern.sub("1", "abcdabcd", 2), "1d1d")
  169. #subn
  170. AreEqual(pattern.subn("9", "abcd"), ("9d", 1))
  171. AreEqual(pattern.subn("abcxyz",'abcd'), ("abcxyzd",1))
  172. AreEqual(pattern.subn("1", "abcd", 0), ("1d",1))
  173. AreEqual(pattern.subn("1", "abcd", count=0), ("1d",1))
  174. AreEqual(pattern.subn("1", "abcdabcd", 1), ("1dabcd",1))
  175. AreEqual(pattern.subn("1", "abcdabcd", 2), ("1d1d",2))
  176. #flags
  177. AreEqual(pattern.flags, 0)
  178. AreEqual(re.compile("(abc){1}", re.L).flags, re.L)
  179. #groupindex
  180. AreEqual(pattern.groupindex, {})
  181. AreEqual(re.compile("(?P<abc>)(?P<bcd>)").groupindex, {'bcd': 2, 'abc': 1})
  182. #pattern
  183. AreEqual(pattern.pattern, "(abc){1}")
  184. AreEqual(re.compile("").pattern, "")
  185. def test_groupindex_empty():
  186. test_list = [ ".", "^", "$", "1*", "2+", "3?", "4*?", "5+?", "6??", "7{1}", "8{1,2}",
  187. "9{1,2}?", "[a-z]", "|", "(...)", "(?:abc)",
  188. "\(\?P\<Blah\>abc\)", "(?#...)", "(?=...)", "(?!...)", "(?<=...)",
  189. "(?<!...)", "\1", "\A", "\d"
  190. ]
  191. for x in test_list:
  192. AreEqual(re.compile(x).groupindex, {})
  193. def test_sanity_re_match():
  194. '''
  195. Basic sanity tests for the re module's Match objects. Each method/member
  196. is utilized at least once.
  197. '''
  198. pattern = re.compile("(abc){1}")
  199. match_obj = pattern.match("abcxyzabc123 and some other words...")
  200. #expand
  201. AreEqual(match_obj.expand("\1\g<1>.nt"), '\x01abc.nt')
  202. #group
  203. AreEqual(match_obj.group(), 'abc')
  204. AreEqual(match_obj.group(1), 'abc')
  205. #groups
  206. AreEqual(match_obj.groups(), ('abc',))
  207. AreEqual(match_obj.groups(1), ('abc',))
  208. AreEqual(match_obj.groups(99), ('abc',))
  209. #groupdict
  210. AreEqual(match_obj.groupdict(), {})
  211. AreEqual(match_obj.groupdict(None), {})
  212. AreEqual(re.compile("(abc)").match("abcxyzabc123 and...").groupdict(), {})
  213. #start
  214. AreEqual(match_obj.start(), 0)
  215. AreEqual(match_obj.start(1), 0)
  216. #end
  217. AreEqual(match_obj.end(), 3)
  218. AreEqual(match_obj.end(1), 3)
  219. #span
  220. AreEqual(match_obj.span(), (0,3))
  221. AreEqual(match_obj.span(1), (0,3))
  222. #pos
  223. AreEqual(match_obj.pos, 0)
  224. #endpos
  225. AreEqual(match_obj.endpos, 36)
  226. #lastindex
  227. AreEqual(match_obj.lastindex, 1)
  228. #lastgroup
  229. #CodePlex Work Item 5518
  230. #AreEqual(match_obj.lastgroup, None)
  231. #re
  232. Assert(match_obj.re==pattern)
  233. #string
  234. AreEqual(match_obj.string, "abcxyzabc123 and some other words...")
  235. def test_comment():
  236. '''
  237. (?#...)
  238. '''
  239. pattern = "a(?#foo)bc"
  240. c = re.compile(pattern)
  241. AreEqual(c.findall("abc"), ['abc'])
  242. pattern = "a(?#)bc"
  243. c = re.compile(pattern)
  244. AreEqual(c.findall("abc"), ['abc'])
  245. pattern = "a(?#foo)bdc"
  246. c = re.compile(pattern)
  247. AreEqual(len(c.findall("abc")), 0)
  248. def test_optional_paren():
  249. pattern = r"""\(?\w+\)?"""
  250. c = re.compile(pattern, re.X)
  251. AreEqual(c.findall('abc'), ['abc'])
  252. def test_back_match():
  253. p = re.compile('(?P<grp>.+?)(?P=grp)')
  254. AreEqual(p.match('abcabc').groupdict(), {'grp':'abc'})
  255. p = re.compile(r'(?P<delim>[%$])(?P<escaped>(?P=delim))')
  256. AreEqual(p.match('$$').groupdict(), {'escaped': '$', 'delim': '$'})
  257. AreEqual(p.match('$%'), None)
  258. p = re.compile(r'(?P<grp>ab)(a(?P=grp)b)')
  259. AreEqual(p.match('abaabb').groups(), ('ab', 'aabb'))
  260. def test_expand():
  261. AreEqual(re.match("(a)(b)", "ab").expand("blah\g<1>\g<2>"), "blahab")
  262. AreEqual(re.match("(a)()", "ab").expand("blah\g<1>\g<2>\n\r\t\\\\"),'blaha\n\r\t\\')
  263. AreEqual(re.match("(a)()", "ab").expand(""),'')
  264. def test_sub():
  265. x = '\n #region Generated Foo\nblah\nblah#end region'
  266. a = re.compile("^([ \t]+)#region Generated Foo.*?#end region", re.MULTILINE|re.DOTALL)
  267. AreEqual(a.sub("xx", x), "\nxx") # should match successfully
  268. AreEqual(a.sub("\\x12", x), "\n\\x12") # should match, but shouldn't un-escape for \x
  269. #if optional count arg is 0 then all occurrences should be replaced
  270. AreEqual('bbbb', re.sub("a","b","abab", 0))
  271. AreEqual(re.sub(r'(?P<id>b)', '\g<id>\g<id>yadayada', 'bb'), 'bbyadayadabbyadayada')
  272. AreEqual(re.sub(r'(?P<id>b)', '\g<1>\g<id>yadayada', 'bb'), 'bbyadayadabbyadayada')
  273. AssertError(IndexError, re.sub, r'(?P<id>b)', '\g<1>\g<i2>yadayada', 'bb')
  274. # the native implementation just gives a sre_constants.error instead indicating an invalid
  275. # group reference
  276. if is_cli:
  277. AssertError(IndexError, re.sub, r'(?P<id>b)', '\g<1>\g<30>yadayada', 'bb')
  278. AreEqual(re.sub('x*', '-', 'abc'), '-a-b-c-')
  279. AreEqual(re.subn('x*', '-', 'abc'), ('-a-b-c-', 4))
  280. AreEqual(re.sub('a*', '-', 'abc'), '-b-c-')
  281. AreEqual(re.subn('a*', '-', 'abc'), ('-b-c-', 3))
  282. AreEqual(re.sub('a*', '-', 'a'), '-')
  283. AreEqual(re.subn('a*', '-', 'a'), ('-', 1))
  284. AreEqual(re.sub("a*", "-", "abaabb"), '-b-b-b-')
  285. AreEqual(re.subn("a*", "-", "abaabb"), ('-b-b-b-', 4))
  286. AreEqual(re.sub("(a*)b", "-", "abaabb"), '---')
  287. AreEqual(re.subn("(a*)b", "-", "abaabb"), ('---', 3))
  288. AreEqual(re.subn("(ab)*", "cd", "abababababab", 10), ('cd', 1))
  289. AreEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
  290. AreEqual(re.subn('x*', '-', 'abxd'), ('-a-b-d-', 4))
  291. Assert(re.sub('([^aeiou])y$', r'\lies', 'vacancy') == 'vacan\\lies')
  292. Assert(re.sub('([^aeiou])y$', r'\1ies', 'vacancy') == 'vacancies')
  293. AreEqual(re.sub("a+", "\n\t\\\?\"\b", "abc"), '\n\t\\?"\x08bc')
  294. AreEqual(re.sub("a+", r"\n\t\\\?\"\b", "abc"), '\n\t\\\\?\\"\x08bc')
  295. AreEqual(re.sub("a+", "\n\t\\\\\\?\"\b", "abc"), '\n\t\\\\?"\x08bc')
  296. def test_dot():
  297. a = re.compile('.')
  298. AreEqual(a.groupindex, {})
  299. p = re.compile('.')
  300. z = []
  301. for c in p.finditer('abc'): z.append((c.start(), c.end()))
  302. z.sort()
  303. AreEqual(z, [(0,1), (1,2), (2,3)])
  304. def test_x():
  305. nonmatchingp = re.compile('x')
  306. AreEqual(nonmatchingp.search('ecks', 1, 4), None)
  307. def test_match():
  308. p = re.compile('.')
  309. AreEqual(p.match('bazbar', 1,2).span(), (1,2))
  310. def test_span():
  311. AreEqual(re.match('(baz)(bar)(m)', "bazbarmxyz").span(2),(3, 6))
  312. def test_regs():
  313. AreEqual(re.match('(baz)(bar)(m)', "bazbarmxyz").regs,
  314. ((0, 7), (0, 3), (3, 6), (6, 7)))
  315. AreEqual(re.match('bazbar(mm)+(abc)(xyz)', "bazbarmmmmabcxyz123456abc").regs,
  316. ((0, 16), (8, 10), (10, 13), (13, 16)))
  317. def test_endpos():
  318. AreEqual(re.match('(baz)(bar)(m)', "bazbarmx").endpos, 8)
  319. pass
  320. def test_re():
  321. #Just ensure it's there for now
  322. stuff = re.match('a(baz)(bar)(m)', "abazbarmx")
  323. Assert(hasattr(stuff, "re"))
  324. Assert(hasattr(stuff.re, "sub"))
  325. def test_pos():
  326. AreEqual(re.match('(baz)(bar)(m)', "bazbarmx").pos, 0)
  327. def test_startandend():
  328. m = re.match(r'(a)|(b)', 'b')
  329. AreEqual(m.groups(), (None, 'b'))
  330. AreEqual(m.group(0), "b")
  331. AreEqual(m.start(0), 0)
  332. AreEqual(m.end(0), 1)
  333. AreEqual(m.start(1), -1)
  334. AreEqual(m.end(1), -1)
  335. m = re.match(".*", '')
  336. AreEqual(m.groups(), ())
  337. AreEqual(m.start(0), 0)
  338. AreEqual(m.end(0), 0)
  339. AssertError(IndexError, m.group, "112")
  340. AssertError(IndexError, m.group, 112)
  341. AssertError(IndexError, m.group, "-1")
  342. AssertError(IndexError, m.group, -1)
  343. AssertError(IndexError, m.start, 112)
  344. AssertError(IndexError, m.start, -1)
  345. AssertError(IndexError, m.end, "112")
  346. AssertError(IndexError, m.end, 112)
  347. AssertError(IndexError, m.end, "-1")
  348. AssertError(IndexError, m.end, -1)
  349. match = re.match(r'(?P<test>test)', 'test')
  350. AreEqual(match.start('test'), 0)
  351. AreEqual(match.end('test'), 4)
  352. def test_start_of_str():
  353. startOfStr = re.compile('^')
  354. AreEqual(startOfStr.match('bazbar', 1), None)
  355. AreEqual(startOfStr.match('bazbar', 0,0).span(), (0,0))
  356. AreEqual(startOfStr.match('bazbar', 1,2), None)
  357. AreEqual(startOfStr.match('bazbar', endpos=3).span(), (0,0))
  358. AreEqual(re.sub('^', 'x', ''), 'x')
  359. AreEqual(re.sub('^', 'x', ' '), 'x ')
  360. AreEqual(re.sub('^', 'x', 'abc'), 'xabc')
  361. # check that groups in split RE are added properly
  362. def test_split():
  363. AreEqual(re.split('{(,)?}', '1 {} 2 {,} 3 {} 4'), ['1 ', None, ' 2 ', ',', ' 3 ', None, ' 4'])
  364. pnogrp = ','
  365. ptwogrp = '((,))'
  366. csv = '0,1,1,2,3,5,8,13,21,44'
  367. AreEqual(re.split(pnogrp, csv, 1), ['0', csv[2:]])
  368. AreEqual(re.split(pnogrp, csv, 2), ['0','1', csv[4:]])
  369. AreEqual(re.split(pnogrp, csv, 1000), re.split(pnogrp, csv))
  370. AreEqual(re.split(pnogrp, csv, 0), re.split(pnogrp, csv))
  371. AreEqual(re.split(pnogrp, csv, -1), [csv])
  372. ponegrp = '(,)'
  373. AreEqual(re.split(ponegrp, csv, 1), ['0', ',', csv[2:]])
  374. def test_escape():
  375. compiled = re.compile(re.escape("hi_"))
  376. all = re.compile('(.*)')
  377. AreEqual(all.search('abcdef', 3).group(0), 'def')
  378. AssertError(IndexError, re.match("a[bcd]*b", 'abcbd').group, 1)
  379. AreEqual(re.match('(a[bcd]*b)', 'abcbd').group(1), 'abcb')
  380. s = ''
  381. for i in range(32, 128):
  382. if not chr(i).isalnum():
  383. s = s + chr(i)
  384. x = re.escape(s)
  385. Assert(x == '\\ \\!\\"\\#\\$\\%\\&\\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\`\\{\\|\\}\\~\\\x7f')
  386. x = re.compile(r'[\\A-Z\.\+]')
  387. Assert(x.search('aaaA\\B\\Caaa'))
  388. # From the docs: "^" matches only at the start of the string, or in MULTILINE mode also immediately
  389. # following a newline.
  390. m = re.compile("a").match("ba", 1) # succeed
  391. AreEqual('a', m.group(0))
  392. # bug 23668
  393. #AreEqual(re.compile("^a").search("ba", 1), None) # fails; 'a' not at start
  394. #AreEqual(re.compile("^a").search("\na", 1), None) # fails; 'a' not at start
  395. m = re.compile("^a", re.M).search("\na", 1) # succeed (multiline)
  396. AreEqual('a', m.group(0))
  397. # bug 938
  398. #AreEqual(re.compile("^a", re.M).search("ba", 1), None) # fails; no preceding \n
  399. # findall
  400. def test_findall():
  401. for (x, y, z) in (
  402. ('\d+', '99 blahblahblah 183 blah 12 blah 7777 yada yada', ['99', '183', '12', '7777']),
  403. ('^\d+', '0blahblahblah blah blah yada yada1', ['0']),
  404. ('^\d+', 'blahblahblah blah blah yada yada1', []),
  405. ("(\d+)|(\w+)", "x = 999y + 23", [('', 'x'), ('999', ''), ('', 'y'), ('23', '')]),
  406. ("(\d)(\d\d)(\d\d\d)", "123456789123456789", [('1', '23', '456'), ('7', '89', '123'), ('4', '56', '789')]),
  407. (r"(?i)(\w+)\s+fish\b", "green fish black fish red fish blue fish", ['green', 'black', 'red', 'blue']),
  408. ('(a)(b)', 'abab', [('a', 'b'), ('a', 'b')]),
  409. ):
  410. AreEqual(re.findall(x, y), z)
  411. AreEqual(re.compile(x).findall(y), z)
  412. def test_match_groups():
  413. m = re.match('(?P<test>a)(b)', 'ab')
  414. Assert(m.groups() == ('a', 'b'))
  415. m = re.match('(u)(?P<test>v)(b)(?P<Named2>w)(x)(y)', 'uvbwxy')
  416. Assert(m.groups() == ('u', 'v', 'b', 'w', 'x', 'y'))
  417. def test_options():
  418. # coverage for ?iLmsux options in re.compile path
  419. tests = [ ("t(?=s)", "atreftsadbeatwttta", ['t']),
  420. ("t(?!s)", "atreftsadbeatststs", ['t']) ]
  421. # native implementation does not handle extensions specified in this way
  422. if is_cli:
  423. tests.extend([
  424. ("(?i:foo)", "fooFoo FOO fOo fo oFO O\n\t\nFo ofO O", ['foo', 'Foo', 'FOO', 'fOo']),
  425. ("(?im:^foo)", "fooFoo FOO fOo\n\t\nFoo\nFOO", ['foo', 'Foo', 'FOO']), # ignorecase, multiline (matches at beginning of string and at each newline)
  426. ("(?s:foo.*bar)", "foo yadayadayada\nyadayadayada bar", ['foo yadayadayada\nyadayadayada bar']), # dotall (make "." match any chr, including a newline)
  427. ("(?x:baz bar)", "bazbar foo bar bazbar \n\n\tbazbar", ['bazbar', 'bazbar', 'bazbar']), #verbose (ignore whitespace)
  428. ])
  429. for (x, y, z) in tests:
  430. AreEqual(re.findall(x, y), z)
  431. AreEqual(re.compile(x).findall(y), z)
  432. def test_bug858():
  433. pattern = r"""\(? #optional paren
  434. \)? #optional paren
  435. \d+ """
  436. c = re.compile(pattern, re.X)
  437. l = c.findall("989")
  438. Assert(l == ['989'])
  439. def test_finditer():
  440. # finditer
  441. matches = re.finditer("baz","barbazbarbazbar")
  442. num = 0
  443. for m in matches:
  444. num = num + 1
  445. AreEqual("baz", m.group(0))
  446. Assert(num == 2)
  447. matches = re.finditer("baz","barbazbarbazbar", re.L)
  448. num = 0
  449. for m in matches:
  450. num = num + 1
  451. AreEqual("baz", m.group(0))
  452. Assert(num == 2)
  453. matches = re.compile("baz").finditer("barbazbarbazbar", 0)
  454. num = 0
  455. for m in matches:
  456. num = num + 1
  457. AreEqual("baz", m.group(0))
  458. Assert(num == 2)
  459. matches = re.compile("baz").finditer("barbazbarbazbar", 14)
  460. num = 0
  461. for m in matches:
  462. num = num + 1
  463. AreEqual("baz", m.group(0))
  464. Assert(num == 0)
  465. matches = re.compile("baz").finditer("barbazbarbazbar", 0, 14)
  466. num = 0
  467. for m in matches:
  468. num = num + 1
  469. AreEqual("baz", m.group(0))
  470. Assert(num == 2)
  471. matches = re.compile("baz").finditer("barbazbarbazbar", 9, 12)
  472. num = 0
  473. for m in matches:
  474. num = num + 1
  475. AreEqual("baz", m.group(0))
  476. AreEqual(num, 1)
  477. matches = re.compile("baz").finditer("barbazbarbazbar", 9, 11)
  478. num = 0
  479. for m in matches:
  480. num = num + 1
  481. AreEqual("baz", m.group(0))
  482. AreEqual(num, 0)
  483. matches = re.compile("baz").finditer("barbazbarbazbar", 10, 12)
  484. num = 0
  485. for m in matches:
  486. num = num + 1
  487. AreEqual("baz", m.group(0))
  488. AreEqual(num, 0)
  489. def test_search():
  490. # search
  491. sp = re.search('super', 'blahsupersuper').span()
  492. Assert(sp == (4, 9))
  493. sp = re.search('super', 'superblahsuper').span()
  494. Assert(sp == (0, 5))
  495. #re.search.group() index error
  496. AreEqual(re.search("z.*z", "az123za").group(),'z123z')
  497. AreEqual(re.search("z.*z", "az12za").group(),'z12z')
  498. AreEqual(re.search("z.*z", "azza").group(),'zz')
  499. AreEqual(re.search("z123p+z", "az123ppppppppppza").group(),'z123ppppppppppz')
  500. AreEqual(re.search("z123p+z", "az123pza").group(),'z123pz')
  501. AreEqual(re.search("z123p?z", "az123pza").group(),'z123pz')
  502. AreEqual(re.search("z123p?z", "az123za").group(),'z123z')
  503. AreEqual(re.search('b', 'abc').string, 'abc')
  504. def test_subn():
  505. # subn
  506. tup = re.subn("ab", "cd", "abababababab")
  507. Assert(tup == ('cdcdcdcdcdcd', 6))
  508. tup = re.subn("ab", "cd", "abababababab", 0)
  509. Assert(tup == ('cdcdcdcdcdcd', 6))
  510. tup = re.subn("ab", "cd", "abababababab", 1)
  511. Assert(tup == ('cdababababab', 1))
  512. tup = re.subn("ab", "cd", "abababababab", 10)
  513. Assert(tup == ('cdcdcdcdcdcd', 6))
  514. tup = re.subn("ababab", "cd", "ab", 10)
  515. Assert(tup == ('ab', 0))
  516. tup = re.subn("ababab", "cd", "ab")
  517. Assert(tup == ('ab', 0))
  518. tup = re.subn("(ab)*", "cd", "abababababab", 10)
  519. Assert(tup == ('cd', 1))
  520. tup = re.subn("(ab)?", "cd", "abababababab", 10)
  521. Assert(tup == ('cdcdcdcdcdcd', 6))
  522. def test_groups():
  523. reg = re.compile("\[(?P<header>.*?)\]")
  524. m = reg.search("[DEFAULT]")
  525. Assert( m.groups() == ('DEFAULT',))
  526. Assert( m.group('header') == 'DEFAULT' )
  527. reg2 = re.compile("(?P<grp>\S+)?")
  528. m2 = reg2.search("")
  529. Assert ( m2.groups() == (None,))
  530. Assert ( m2.groups('Default') == ('Default',))
  531. def test_locale_flags():
  532. AreEqual(re.compile(r"^\#[ \t]*(\w[\d\w]*)[ \t](.*)").flags, 0)
  533. AreEqual(re.compile(r"^\#[ \t]*(\w[\d\w]*)[ \t](.*)", re.L).flags, re.L)
  534. AreEqual(re.compile(r"(?L)^\#[ \t]*(\w[\d\w]*)[ \t](.*)").flags, re.L)
  535. def test_end():
  536. ex = re.compile(r'\s+')
  537. m = ex.match('(object Petal', 7)
  538. Assert (m.end(0) == 8)
  539. def test_lone_hat():
  540. """Single ^ reg-ex shouldn't match w/ a sub-set of a string"""
  541. sol = re.compile('^')
  542. AreEqual(sol.match('bazbar', 1, 2), None)
  543. AreEqual(sol.match('foobar', 1, 2), None)
  544. def test_escape_backslash():
  545. x = re.compile (r"[\\A-Z\.\+]")
  546. AreEqual(x.search('aaaA\\B\\Caaa').span(), (3,4))
  547. def test_eol():
  548. r = re.compile(r'<(/|\Z)')
  549. s = r.search("<", 0)
  550. Assert(s != None)
  551. AreEqual(s.span(), (0, 1))
  552. AreEqual(s.group(0), '<')
  553. AreEqual(r.search("<Z", 0), None)
  554. def test_lastindex():
  555. for (pat, index) in [
  556. ('(a)b', 1), ('((a)(b))', 1), ('((ab))', 1),
  557. ('(a)(b)', 2),
  558. ('(a)?ab', None),
  559. ('(a)?b', 1),
  560. ]:
  561. AreEqual(re.match(pat, 'ab').lastindex, index)
  562. for (pat, index) in [
  563. ('(a)ab', 1),
  564. ('(a)(a)b', 2),
  565. ('(a)(a)(b)', 3),
  566. ('((a)a(b))', 1),
  567. ('((a)(a)(b))', 1),
  568. ('(a(a)(b))', 1),
  569. ('(a(a)?(b))', 1),
  570. ('(aa(a)?(b))', 1),
  571. ('(aa(b))', 1),
  572. ('(a(ab))', 1),
  573. ('(a)?ab', 1),
  574. ('a(a)?ab', None),
  575. ('a(a)?(a)?b', 1),
  576. ('a(a)?(a)?(b)', 3),
  577. ('a(a)b', 1),
  578. ('(a(a))(b)', 3),
  579. ('(a(a))b', 1),
  580. ('((a)(a))(b)', 4),
  581. ('((a)(a))b', 1),
  582. ]:
  583. AreEqual(re.match(pat, 'aab').lastindex, index)
  584. def test_empty_split():
  585. cases =[
  586. ('', ['']),
  587. ('*', ['*']),
  588. (':', ['', '']),
  589. ('::', ['', '']),
  590. ('a::', ['a', '']),
  591. ('::b', ['', 'b']),
  592. (':c:', ['', 'c', '']),
  593. (':\t: ', ['', '\t', ' ']),
  594. ('a:b::c', ['a', 'b', 'c']),
  595. (':a:b::c', ['', 'a', 'b', 'c']),
  596. ('::a:b::c:', ['', 'a', 'b', 'c', '']),
  597. ]
  598. for expr, result in cases:
  599. AreEqual(re.split(":*", expr), result)
  600. @skip("silverlight")
  601. def test_cp15298():
  602. regex = "^" + "\d\.\d\.\d \(IronPython \d\.\d(\.\d)? ((Alpha )|(Beta )|())\(\d\.\d\.\d\.\d{3,4}\) on \.NET \d(\.\d{1,5}){3}\)" * 15 + "$"
  603. match_str = "2.5.0 (IronPython 2.0 Beta (2.0.0.1000) on .NET 2.0.50727.1433)" * 15
  604. compiled_regex = re.compile(regex)
  605. retval = compiled_regex.match(match_str)
  606. Assert(retval != None)
  607. retval = re.match(regex, match_str)
  608. Assert(retval != None)
  609. def test_cp11136():
  610. regex = re.compile(r"^(?P<msg>NMAKE[A-Za-z0-9]*)'\"?(?P<file>[\\A-Za-z0-9/:_\.\+]+)" )
  611. Assert(regex.search(r"NMAKE0119'adirectory\afile.txt")!=None)
  612. def test_cp17111():
  613. test_cases = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789~!@#%&_+-=]{};':,.//<>" + '"'
  614. for x in test_cases:
  615. regex = re.compile(r".*\\%s" % x)
  616. Assert(regex.search(r"\\%s" % x)!=None)
  617. Assert(regex.search(r"")==None)
  618. def test_cp1089():
  619. test_cases = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789~!@#%&_+-=]{};':,.//<>" + '"'
  620. for x in test_cases:
  621. #Just make sure they don't throw
  622. temp = re.compile('\\\\' + x)
  623. def test_cp16657():
  624. Assert(re.compile(r'^bar', re.M).search('foo\nbar') != None)
  625. Assert(re.compile(r'^bar(?m)').search('foo\nbar') != None)
  626. Assert(re.compile(r'^bar', re.M).search('foo\nbaar') == None)
  627. Assert(re.compile(r'^bar(?m)').search('foo\nbaar') == None)
  628. Assert(re.compile(r'^bar', re.U).search('bar') != None)
  629. Assert(re.compile(r'^bar(?u)').search('bar') != None)
  630. Assert(re.compile(r'^bar', re.U).search('baar') == None)
  631. Assert(re.compile(r'^bar(?u)').search('baar') == None)
  632. Assert(re.compile(r' b ar ', re.X).search('bar') != None)
  633. Assert(re.compile(r'b ar(?x)').search('bar') != None)
  634. Assert(re.compile(r' b ar ', re.X).search('baar') == None)
  635. Assert(re.compile(r'b ar(?x)').search('baar') == None)
  636. Assert(re.compile(r'b ar').search('bar') == None)
  637. def test_n_m_quantifier():
  638. AreEqual(re.search('ab{,2}a', 'abba').span(), (0, 4))
  639. AreEqual(re.search('ab{,2}a', 'aba').span(), (0, 3))
  640. AreEqual(re.search('ab{,2}a', 'abbba'), None)
  641. AreEqual(re.search('ab{,2}a', 'abba').span(), re.search('ab{0,2}a', 'abba').span())
  642. AreEqual(re.search('ab{0,2}a', 'abbba'), None)
  643. AreEqual(re.search('ab{2,}a', 'abba').span(), (0,4))
  644. AreEqual(re.search('ab{2,}a', 'abbba').span(), (0,5))
  645. AreEqual(re.search('ab{2,}a', 'aba'), None)
  646. def test_mixed_named_and_unnamed_groups():
  647. example1=r"(?P<one>Blah)"
  648. example2=r"(?P<one>(Blah))"
  649. RegExsToTest=[example1,example2]
  650. for regString in RegExsToTest:
  651. g=re.compile(regString)
  652. AreEqual(g.groupindex, {'one' : 1})
  653. def test__pickle():
  654. '''
  655. TODO: just a sanity test for now. Needs far more testing.
  656. '''
  657. regex = re.compile(r"^(?P<msg>NMAKE[A-Za-z0-9]*)'\"?(?P<file>[\\A-Za-z0-9/:_\.\+]+)" )
  658. pickled_regex = re._pickle(regex)
  659. AreEqual(len(pickled_regex), 2)
  660. AreEqual(pickled_regex[1],
  661. ('^(?P<msg>NMAKE[A-Za-z0-9]*)\'\\"?(?P<file>[\\\\A-Za-z0-9/:_\\.\\+]+)', 0))
  662. def test_conditional():
  663. p = re.compile(r'(a)?(b)((?(1)c))')
  664. AreEqual(p.match('abc').groups(), ('a', 'b', 'c'))
  665. p = re.compile(r'(?P<first>a)?(b)((?(first)c))')
  666. AreEqual(p.match('abc').groups(), ('a', 'b', 'c'))
  667. s = r'((?(a)ab|cd))'
  668. if is_cli or is_silverlight:
  669. p = re.compile(s)
  670. AreEqual(p.match('ab').groups(), ('ab',))
  671. else:
  672. AssertError(re.error, re.compile, s)
  673. def test_cp35146():
  674. # re.compile returns cached instances
  675. AreEqual(re.compile('cp35146'), re.compile('cp35146'))
  676. def test_cp35135():
  677. AreEqual(re.match(r"(?iu)aA", "aa").string, "aa")
  678. AreEqual(re.match(r"(?iu)Aa", "aa").string, "aa")
  679. AreEqual(re.match(r"(?iLmsux)Aa", "aa").string, "aa")
  680. def test_issue506():
  681. AreEqual(re.compile("^a", re.M).search("ba", 1), None)
  682. def test_issue1370():
  683. AreEqual(re.compile("\Z").match("\n"), None)
  684. AreEqual(re.compile("\Z").match("").group(0), "")
  685. #--MAIN------------------------------------------------------------------------
  686. run_test(__name__)