PageRenderTime 48ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/IronPython_1_1/Src/Tests/test_re.py

#
Python | 668 lines | 588 code | 33 blank | 47 comment | 9 complexity | b63208db73a0ec8727deac316f0eb159 MD5 | raw file
Possible License(s): GPL-2.0, MPL-2.0-no-copyleft-exception, CPL-1.0, CC-BY-SA-3.0, BSD-3-Clause, ISC, AGPL-3.0, LGPL-2.1, Apache-2.0
  1. #####################################################################################
  2. #
  3. # Copyright (c) Microsoft Corporation.
  4. #
  5. # This source code is subject to terms and conditions of the Microsoft Public
  6. # License. A copy of the license can be found in the License.html file at the
  7. # root of this distribution. If you cannot locate the Microsoft Public
  8. # License, please send an email to dlr@microsoft.com. By using this source
  9. # code in any fashion, you are agreeing to be bound by the terms of the
  10. # Microsoft Public License.
  11. #
  12. # You must not remove this notice, or any other, from this software.
  13. #
  14. #####################################################################################
  15. from lib.assert_util import *
  16. import re
  17. def test_none():
  18. for x in 'compile search match split findall finditer'.split():
  19. y = getattr(re, x)
  20. AssertError(TypeError, y, None)
  21. AssertError(TypeError, y, None, None)
  22. AssertError(TypeError, y, None, 'abc')
  23. AssertError(TypeError, y, 'abc', None)
  24. # Other exceptional input tests
  25. for x in (re.sub, re.subn):
  26. AssertError(TypeError, x, 'abc', None, 'abc')
  27. AssertError(TypeError, x, 'abc', None, None)
  28. AssertError(TypeError, x, None, 'abc', 'abc')
  29. AssertError(TypeError, x, 'abc', 'abc', None)
  30. AssertError(TypeError, re.escape, None)
  31. def test_sanity_re():
  32. '''
  33. Basic sanity tests for the re module. Each module member is
  34. used at least once.
  35. '''
  36. #compile
  37. Assert(hasattr(re.compile("(abc){1}"), "pattern"))
  38. Assert(hasattr(re.compile("(abc){1}", re.L), "pattern"))
  39. Assert(hasattr(re.compile("(abc){1}", flags=re.L), "pattern"))
  40. #I IGNORECASE L LOCAL MMULTILINE S DOTALL U UNICODE X VERBOSE
  41. flags = ["I", "IGNORECASE",
  42. "L", "LOCALE",
  43. "M", "MULTILINE",
  44. "S", "DOTALL",
  45. "U", "UNICODE",
  46. "X", "VERBOSE"]
  47. for f in flags:
  48. Assert(hasattr(re, f))
  49. #search
  50. AreEqual(re.search("(abc){1}", ""), None)
  51. AreEqual(re.search("(abc){1}", "abcxyz").span(), (0,3))
  52. AreEqual(re.search("(abc){1}", "abcxyz", re.L).span(), (0,3))
  53. AreEqual(re.search("(abc){1}", "abcxyz", flags=re.L).span(), (0,3))
  54. AreEqual(re.search("(abc){1}", "xyzabc").span(), (3,6))
  55. #match
  56. AreEqual(re.match("(abc){1}", ""), None)
  57. AreEqual(re.match("(abc){1}", "abcxyz").span(), (0,3))
  58. AreEqual(re.match("(abc){1}", "abcxyz", re.L).span(), (0,3))
  59. AreEqual(re.match("(abc){1}", "abcxyz", flags=re.L).span(), (0,3))
  60. #split
  61. AreEqual(re.split("(abc){1}", ""), [''])
  62. AreEqual(re.split("(abc){1}", "abcxyz"), ['', 'abc', 'xyz'])
  63. AreEqual(re.split("(abc){1}", "abc", 0), ['', 'abc', ''])
  64. AreEqual(re.split("(abc){1}", "abc", maxsplit=0), ['', 'abc', ''])
  65. #findall
  66. AreEqual(re.findall("(abc){1}", ""), [])
  67. AreEqual(re.findall("(abc){1}", "abcxyz"), ['abc'])
  68. AreEqual(re.findall("(abc){1}", "abcxyz", re.L), ['abc'])
  69. AreEqual(re.findall("(abc){1}", "abcxyz", flags=re.L), ['abc'])
  70. AreEqual(re.findall("(abc){1}", "xyzabcabc"), ['abc', 'abc'])
  71. #finditer
  72. AreEqual([x.group() for x in re.finditer("(abc){1}", "")], [])
  73. AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz")], ['abc'])
  74. AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz", re.L)], ['abc'])
  75. AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz", flags=re.L)], ['abc'])
  76. AreEqual([x.group() for x in re.finditer("(abc){1}", "xyzabcabc")], ['abc', 'abc'])
  77. #sub
  78. AreEqual(re.sub("(abc){1}", "9", "abcd"), "9d")
  79. AreEqual(re.sub("(abc){1}", "abcxyz",'abcd'), "abcxyzd")
  80. AreEqual(re.sub("(abc){1}", "1", "abcd", 0), "1d")
  81. AreEqual(re.sub("(abc){1}", "1", "abcd", count=0), "1d")
  82. AreEqual(re.sub("(abc){1}", "1", "abcdabcd", 1), "1dabcd")
  83. AreEqual(re.sub("(abc){1}", "1", "abcdabcd", 2), "1d1d")
  84. #subn
  85. AreEqual(re.subn("(abc){1}", "9", "abcd"), ("9d", 1))
  86. AreEqual(re.subn("(abc){1}", "abcxyz",'abcd'), ("abcxyzd",1))
  87. AreEqual(re.subn("(abc){1}", "1", "abcd", 0), ("1d",1))
  88. AreEqual(re.subn("(abc){1}", "1", "abcd", count=0), ("1d",1))
  89. AreEqual(re.subn("(abc){1}", "1", "abcdabcd", 1), ("1dabcd",1))
  90. AreEqual(re.subn("(abc){1}", "1", "abcdabcd", 2), ("1d1d",2))
  91. #escape
  92. AreEqual(re.escape("abc"), "abc")
  93. AreEqual(re.escape(""), "")
  94. AreEqual(re.escape("_"), "\\_")
  95. AreEqual(re.escape("a_c"), "a\\_c")
  96. #error
  97. exc = re.error()
  98. exc = re.error("some args")
  99. #purge
  100. #CodePlex Work Item 6277
  101. #re.purge()
  102. def test_sanity_re_pattern():
  103. '''
  104. Basic sanity tests for the re module's Regular Expression
  105. objects (i.e., Pattern in CPython). Each method/member is
  106. utilized at least once.
  107. '''
  108. pattern = re.compile("(abc){1}")
  109. #match
  110. AreEqual(pattern.match(""), None)
  111. AreEqual(pattern.match("abcxyz").span(), (0,3))
  112. AreEqual(pattern.match("abc", 0).span(), (0,3))
  113. AreEqual(pattern.match("abc", 0, 3).span(), (0,3))
  114. AreEqual(pattern.match("abc", pos=0, endpos=3).span(), (0,3))
  115. #CodePlex Work Item 6266
  116. #AreEqual(pattern.match("abc", -5, 5).span(), (0,3))
  117. #search
  118. AreEqual(pattern.search(""), None)
  119. AreEqual(pattern.search("abcxyz").span(), (0,3))
  120. AreEqual(pattern.search("abc", 0).span(), (0,3))
  121. AreEqual(pattern.search("abc", 0, 3).span(), (0,3))
  122. AreEqual(pattern.search("abc", pos=0, endpos=3).span(), (0,3))
  123. AreEqual(pattern.search("xyzabc").span(), (3,6))
  124. #split
  125. AreEqual(pattern.split(""), [''])
  126. AreEqual(pattern.split("abcxyz"), ['', 'abc', 'xyz'])
  127. AreEqual(pattern.split("abc", 0), ['', 'abc', ''])
  128. AreEqual(pattern.split("abc", maxsplit=0), ['', 'abc', ''])
  129. #findall
  130. AreEqual(pattern.findall(""), [])
  131. AreEqual(pattern.findall("abcxyz"), ['abc'])
  132. AreEqual(pattern.findall("abc", 0), ['abc'])
  133. AreEqual(pattern.findall("abc", 0, 3), ['abc'])
  134. AreEqual(pattern.findall("abc", pos=0, endpos=3), ['abc'])
  135. AreEqual(pattern.findall("xyzabcabc"), ['abc', 'abc'])
  136. #sub
  137. AreEqual(pattern.sub("9", "abcd"), "9d")
  138. AreEqual(pattern.sub("abcxyz",'abcd'), "abcxyzd")
  139. AreEqual(pattern.sub("1", "abcd", 0), "1d")
  140. AreEqual(pattern.sub("1", "abcd", count=0), "1d")
  141. AreEqual(pattern.sub("1", "abcdabcd", 1), "1dabcd")
  142. AreEqual(pattern.sub("1", "abcdabcd", 2), "1d1d")
  143. #subn
  144. AreEqual(pattern.subn("9", "abcd"), ("9d", 1))
  145. AreEqual(pattern.subn("abcxyz",'abcd'), ("abcxyzd",1))
  146. AreEqual(pattern.subn("1", "abcd", 0), ("1d",1))
  147. AreEqual(pattern.subn("1", "abcd", count=0), ("1d",1))
  148. AreEqual(pattern.subn("1", "abcdabcd", 1), ("1dabcd",1))
  149. AreEqual(pattern.subn("1", "abcdabcd", 2), ("1d1d",2))
  150. #flags
  151. AreEqual(pattern.flags, 0)
  152. AreEqual(re.compile("(abc){1}", re.L).flags, re.L)
  153. #groupindex
  154. #Merlin Work Item 148105
  155. #AreEqual(pattern.groupindex, {})
  156. AreEqual(re.compile("(?P<abc>)(?P<bcd>)").groupindex, {'bcd': 2, 'abc': 1})
  157. #pattern
  158. AreEqual(pattern.pattern, "(abc){1}")
  159. AreEqual(re.compile("").pattern, "")
  160. def test_sanity_re_match():
  161. '''
  162. Basic sanity tests for the re module's Match objects. Each method/member
  163. is utilized at least once.
  164. '''
  165. pattern = re.compile("(abc){1}")
  166. match_obj = pattern.match("abcxyzabc123 and some other words...")
  167. #expand
  168. AreEqual(match_obj.expand("\1\g<1>.nt"), '\x01abc.nt')
  169. #group
  170. AreEqual(match_obj.group(), 'abc')
  171. AreEqual(match_obj.group(1), 'abc')
  172. #groups
  173. AreEqual(match_obj.groups(), ('abc',))
  174. AreEqual(match_obj.groups(1), ('abc',))
  175. AreEqual(match_obj.groups(99), ('abc',))
  176. #groupdict
  177. #CodePlex Work Item 6271
  178. #AreEqual(match_obj.groupdict(), {})
  179. #CodePlex Work Item 6271
  180. #AreEqual(match_obj.groupdict(None), {})
  181. #start
  182. AreEqual(match_obj.start(), 0)
  183. AreEqual(match_obj.start(1), 0)
  184. #end
  185. AreEqual(match_obj.end(), 3)
  186. AreEqual(match_obj.end(1), 3)
  187. #span
  188. AreEqual(match_obj.span(), (0,3))
  189. AreEqual(match_obj.span(1), (0,3))
  190. #pos
  191. AreEqual(match_obj.pos, 0)
  192. #endpos
  193. #CodePlex Work Item 6272
  194. #AreEqual(match_obj.endpos, 36)
  195. #lastindex
  196. AreEqual(match_obj.lastindex, 1)
  197. #lastgroup
  198. #CodePlex Work Item 5518
  199. #AreEqual(match_obj.lastgroup, None)
  200. #re
  201. Assert(match_obj.re==pattern)
  202. #string
  203. AreEqual(match_obj.string, "abcxyzabc123 and some other words...")
  204. def test_comment():
  205. '''
  206. (?#...)
  207. '''
  208. pattern = "a(?#foo)bc"
  209. c = re.compile(pattern)
  210. AreEqual(c.findall("abc"), ['abc'])
  211. pattern = "a(?#)bc"
  212. c = re.compile(pattern)
  213. AreEqual(c.findall("abc"), ['abc'])
  214. pattern = "a(?#foo)bdc"
  215. c = re.compile(pattern)
  216. AreEqual(len(c.findall("abc")), 0)
  217. def test_optional_paren():
  218. pattern = r"""\(?\w+\)?"""
  219. c = re.compile(pattern, re.X)
  220. AreEqual(c.findall('abc'), ['abc'])
  221. def test_back_match():
  222. p = re.compile('(?P<grp>.+?)(?P=grp)')
  223. AreEqual(p.match('abcabc').groupdict(), {'grp':'abc'})
  224. def test_expand():
  225. AreEqual(re.match("(a)(b)", "ab").expand("blah\g<1>\g<2>"), "blahab")
  226. AreEqual(re.match("(a)()", "ab").expand("blah\g<1>\g<2>\n\r\t\\\\"),'blaha\n\r\t\\')
  227. AreEqual(re.match("(a)()", "ab").expand(""),'')
  228. def test_sub():
  229. x = '\n #region Generated Foo\nblah\nblah#end region'
  230. a = re.compile("^([ \t]+)#region Generated Foo.*?#end region", re.MULTILINE|re.DOTALL)
  231. AreEqual(a.sub("xx", x), "\nxx") # should match successfully
  232. AreEqual(a.sub("\\x12", x), "\n\\x12") # should match, but shouldn't un-escape for \x
  233. #if optional count arg is 0 then all occurrences should be replaced
  234. AreEqual('bbbb', re.sub("a","b","abab", 0))
  235. AreEqual(re.sub(r'(?P<id>b)', '\g<id>\g<id>yadayada', 'bb'), 'bbyadayadabbyadayada')
  236. AreEqual(re.sub(r'(?P<id>b)', '\g<1>\g<id>yadayada', 'bb'), 'bbyadayadabbyadayada')
  237. AssertError(IndexError, re.sub, r'(?P<id>b)', '\g<1>\g<i2>yadayada', 'bb')
  238. # the native implementation just gives a sre_constants.error instead indicating an invalid
  239. # group reference
  240. if is_cli:
  241. AssertError(IndexError, re.sub, r'(?P<id>b)', '\g<1>\g<30>yadayada', 'bb')
  242. AreEqual(re.sub('x*', '-', 'abc'), '-a-b-c-')
  243. AreEqual(re.subn('x*', '-', 'abc'), ('-a-b-c-', 4))
  244. AreEqual(re.sub('a*', '-', 'abc'), '-b-c-')
  245. AreEqual(re.subn('a*', '-', 'abc'), ('-b-c-', 3))
  246. AreEqual(re.sub('a*', '-', 'a'), '-')
  247. AreEqual(re.subn('a*', '-', 'a'), ('-', 1))
  248. AreEqual(re.sub("a*", "-", "abaabb"), '-b-b-b-')
  249. AreEqual(re.subn("a*", "-", "abaabb"), ('-b-b-b-', 4))
  250. AreEqual(re.sub("(a*)b", "-", "abaabb"), '---')
  251. AreEqual(re.subn("(a*)b", "-", "abaabb"), ('---', 3))
  252. AreEqual(re.subn("(ab)*", "cd", "abababababab", 10), ('cd', 1))
  253. AreEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
  254. AreEqual(re.subn('x*', '-', 'abxd'), ('-a-b-d-', 4))
  255. Assert(re.sub('([^aeiou])y$', r'\lies', 'vacancy') == 'vacan\\lies')
  256. Assert(re.sub('([^aeiou])y$', r'\1ies', 'vacancy') == 'vacancies')
  257. AreEqual(re.sub("a+", "\n\t\\\?\"\b", "abc"), '\n\t\\?"\x08bc')
  258. #CodePlex Work Item 6273
  259. #AreEqual(re.sub("a+", r"\n\t\\\?\"\b", "abc"), '\n\t\\\\?\\"\x08bc')
  260. #CodePlex Work Item 6273
  261. #AreEqual(re.sub("a+", "\n\t\\\\\\?\"\b", "abc"), '\n\t\\\\?"\x08bc')
  262. def test_dot():
  263. a = re.compile('.')
  264. AreEqual(a.groupindex, {})
  265. p = re.compile('.')
  266. z = []
  267. for c in p.finditer('abc'): z.append((c.start(), c.end()))
  268. z.sort()
  269. AreEqual(z, [(0,1), (1,2), (2,3)])
  270. def test_x():
  271. nonmatchingp = re.compile('x')
  272. AreEqual(nonmatchingp.search('ecks', 1, 4), None)
  273. def test_match():
  274. p = re.compile('.')
  275. AreEqual(p.match('bazbar', 1,2).span(), (1,2))
  276. def test_span():
  277. AreEqual(re.match('(baz)(bar)(m)', "bazbarmxyz").span(2),(3, 6))
  278. def test_regs():
  279. #CodePlex Work Item 6275
  280. #AreEqual(re.match('(baz)(bar)(m)', "bazbarmxyz").regs,
  281. # ((0, 7), (0, 3), (3, 6), (6, 7)))
  282. pass
  283. def test_endpos():
  284. #CodePlex Work Item 6272
  285. #AreEqual(re.match('(baz)(bar)(m)', "bazbarmx").endpos, 8)
  286. pass
  287. def test_re():
  288. #Just ensure it's there for now
  289. stuff = re.match('a(baz)(bar)(m)', "abazbarmx")
  290. Assert(hasattr(stuff, "re"))
  291. Assert(hasattr(stuff.re, "sub"))
  292. def test_pos():
  293. AreEqual(re.match('(baz)(bar)(m)', "bazbarmx").pos, 0)
  294. def test_startandend():
  295. m = re.match(r'(a)|(b)', 'b')
  296. AreEqual(m.groups(), (None, 'b'))
  297. AreEqual(m.group(0), "b")
  298. AreEqual(m.start(0), 0)
  299. AreEqual(m.end(0), 1)
  300. AreEqual(m.start(1), -1)
  301. AreEqual(m.end(1), -1)
  302. m = re.match(".*", '')
  303. AreEqual(m.groups(), ())
  304. AreEqual(m.start(0), 0)
  305. AreEqual(m.end(0), 0)
  306. AssertError(IndexError, m.group, "112")
  307. AssertError(IndexError, m.group, 112)
  308. AssertError(IndexError, m.group, "-1")
  309. AssertError(IndexError, m.group, -1)
  310. AssertError(IndexError, m.start, 112)
  311. AssertError(IndexError, m.start, -1)
  312. AssertError(IndexError, m.end, "112")
  313. AssertError(IndexError, m.end, 112)
  314. AssertError(IndexError, m.end, "-1")
  315. AssertError(IndexError, m.end, -1)
  316. match = re.match(r'(?P<test>test)', 'test')
  317. AreEqual(match.start('test'), 0)
  318. AreEqual(match.end('test'), 4)
  319. def test_start_of_str():
  320. startOfStr = re.compile('^')
  321. AreEqual(startOfStr.match('bazbar', 1), None)
  322. AreEqual(startOfStr.match('bazbar', 0,0).span(), (0,0))
  323. AreEqual(startOfStr.match('bazbar', 1,2), None)
  324. AreEqual(startOfStr.match('bazbar', endpos=3).span(), (0,0))
  325. # check that groups in split RE are added properly
  326. def test_split():
  327. AreEqual(re.split('{(,)?}', '1 {} 2 {,} 3 {} 4'), ['1 ', None, ' 2 ', ',', ' 3 ', None, ' 4'])
  328. pnogrp = ','
  329. ptwogrp = '((,))'
  330. csv = '0,1,1,2,3,5,8,13,21,44'
  331. AreEqual(re.split(pnogrp, csv, 1), ['0', csv[2:]])
  332. AreEqual(re.split(pnogrp, csv, 2), ['0','1', csv[4:]])
  333. AreEqual(re.split(pnogrp, csv, 1000), re.split(pnogrp, csv))
  334. AreEqual(re.split(pnogrp, csv, 0), re.split(pnogrp, csv))
  335. AreEqual(re.split(pnogrp, csv, -1), [csv])
  336. ponegrp = '(,)'
  337. AreEqual(re.split(ponegrp, csv, 1), ['0', ',', csv[2:]])
  338. def test_escape():
  339. compiled = re.compile(re.escape("hi_"))
  340. all = re.compile('(.*)')
  341. AreEqual(all.search('abcdef', 3).group(0), 'def')
  342. AssertError(IndexError, re.match("a[bcd]*b", 'abcbd').group, 1)
  343. AreEqual(re.match('(a[bcd]*b)', 'abcbd').group(1), 'abcb')
  344. s = ''
  345. for i in range(32, 128):
  346. if not chr(i).isalnum():
  347. s = s + chr(i)
  348. x = re.escape(s)
  349. Assert(x == '\\ \\!\\"\\#\\$\\%\\&\\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\`\\{\\|\\}\\~\\\x7f')
  350. x = re.compile(r'[\\A-Z\.\+]')
  351. Assert(x.search('aaaA\\B\\Caaa'))
  352. # From the docs: "^" matches only at the start of the string, or in MULTILINE mode also immediately
  353. # following a newline.
  354. # bug 827
  355. #m = re.compile("a").match("ba", 1) # succeed
  356. #AreEqual('a', m.group(0))
  357. #AreEqual(re.compile("^a").search("ba", 1), None) # fails; 'a' not at start
  358. #AreEqual(re.compile("^a").search("\na", 1), None) # fails; 'a' not at start
  359. #m = re.compile("^a", re.M).search("\na", 1) # succeed (multiline)
  360. #AreEqual('a', m.group(0))
  361. # bug 938
  362. #AreEqual(re.compile("^a", re.M).search("ba", 1), None) # fails; no preceding \n
  363. # findall
  364. def test_findall():
  365. for (x, y, z) in (
  366. ('\d+', '99 blahblahblah 183 blah 12 blah 7777 yada yada', ['99', '183', '12', '7777']),
  367. ('^\d+', '0blahblahblah blah blah yada yada1', ['0']),
  368. ('^\d+', 'blahblahblah blah blah yada yada1', []),
  369. ("(\d+)|(\w+)", "x = 999y + 23", [('', 'x'), ('999', ''), ('', 'y'), ('23', '')]),
  370. ("(\d)(\d\d)(\d\d\d)", "123456789123456789", [('1', '23', '456'), ('7', '89', '123'), ('4', '56', '789')]),
  371. (r"(?i)(\w+)\s+fish\b", "green fish black fish red fish blue fish", ['green', 'black', 'red', 'blue']),
  372. ('(a)(b)', 'abab', [('a', 'b'), ('a', 'b')]),
  373. ):
  374. AreEqual(re.findall(x, y), z)
  375. AreEqual(re.compile(x).findall(y), z)
  376. def test_match_groups():
  377. m = re.match('(?P<test>a)(b)', 'ab')
  378. Assert(m.groups() == ('a', 'b'))
  379. m = re.match('(u)(?P<test>v)(b)(?P<Named2>w)(x)(y)', 'uvbwxy')
  380. Assert(m.groups() == ('u', 'v', 'b', 'w', 'x', 'y'))
  381. def test_options():
  382. # coverage for ?iLmsux options in re.compile path
  383. tests = [ ("t(?=s)", "atreftsadbeatwttta", ['t']),
  384. ("t(?!s)", "atreftsadbeatststs", ['t']) ]
  385. # native implementation does not handle extensions specified in this way
  386. if is_cli:
  387. tests.extend([
  388. ("(?i:foo)", "fooFoo FOO fOo fo oFO O\n\t\nFo ofO O", ['foo', 'Foo', 'FOO', 'fOo']),
  389. ("(?im:^foo)", "fooFoo FOO fOo\n\t\nFoo\nFOO", ['foo', 'Foo', 'FOO']), # ignorecase, multiline (matches at beginning of string and at each newline)
  390. ("(?s:foo.*bar)", "foo yadayadayada\nyadayadayada bar", ['foo yadayadayada\nyadayadayada bar']), # dotall (make "." match any chr, including a newline)
  391. ("(?x:baz bar)", "bazbar foo bar bazbar \n\n\tbazbar", ['bazbar', 'bazbar', 'bazbar']), #verbose (ignore whitespace)
  392. ])
  393. for (x, y, z) in tests:
  394. AreEqual(re.findall(x, y), z)
  395. AreEqual(re.compile(x).findall(y), z)
  396. def test_bug858():
  397. pattern = r"""\(? #optional paren
  398. \)? #optional paren
  399. \d+ """
  400. c = re.compile(pattern, re.X)
  401. l = c.findall("989")
  402. Assert(l == ['989'])
  403. def test_finditer():
  404. # finditer
  405. matches = re.finditer("baz","barbazbarbazbar")
  406. num = 0
  407. for m in matches:
  408. num = num + 1
  409. AreEqual("baz", m.group(0))
  410. Assert(num == 2)
  411. matches = re.finditer("baz","barbazbarbazbar", re.L)
  412. num = 0
  413. for m in matches:
  414. num = num + 1
  415. AreEqual("baz", m.group(0))
  416. Assert(num == 2)
  417. matches = re.compile("baz").finditer("barbazbarbazbar", 0)
  418. num = 0
  419. for m in matches:
  420. num = num + 1
  421. AreEqual("baz", m.group(0))
  422. Assert(num == 2)
  423. matches = re.compile("baz").finditer("barbazbarbazbar", 14)
  424. num = 0
  425. for m in matches:
  426. num = num + 1
  427. AreEqual("baz", m.group(0))
  428. Assert(num == 0)
  429. matches = re.compile("baz").finditer("barbazbarbazbar", 0, 14)
  430. num = 0
  431. for m in matches:
  432. num = num + 1
  433. AreEqual("baz", m.group(0))
  434. Assert(num == 2)
  435. matches = re.compile("baz").finditer("barbazbarbazbar", 9, 12)
  436. num = 0
  437. for m in matches:
  438. num = num + 1
  439. AreEqual("baz", m.group(0))
  440. AreEqual(num, 1)
  441. matches = re.compile("baz").finditer("barbazbarbazbar", 9, 11)
  442. num = 0
  443. for m in matches:
  444. num = num + 1
  445. AreEqual("baz", m.group(0))
  446. AreEqual(num, 0)
  447. matches = re.compile("baz").finditer("barbazbarbazbar", 10, 12)
  448. num = 0
  449. for m in matches:
  450. num = num + 1
  451. AreEqual("baz", m.group(0))
  452. AreEqual(num, 0)
  453. def test_search():
  454. # search
  455. sp = re.search('super', 'blahsupersuper').span()
  456. Assert(sp == (4, 9))
  457. sp = re.search('super', 'superblahsuper').span()
  458. Assert(sp == (0, 5))
  459. #re.search.group() index error
  460. AreEqual(re.search("z.*z", "az123za").group(),'z123z')
  461. AreEqual(re.search("z.*z", "az12za").group(),'z12z')
  462. AreEqual(re.search("z.*z", "azza").group(),'zz')
  463. AreEqual(re.search("z123p+z", "az123ppppppppppza").group(),'z123ppppppppppz')
  464. AreEqual(re.search("z123p+z", "az123pza").group(),'z123pz')
  465. AreEqual(re.search("z123p?z", "az123pza").group(),'z123pz')
  466. AreEqual(re.search("z123p?z", "az123za").group(),'z123z')
  467. AreEqual(re.search('b', 'abc').string, 'abc')
  468. def test_subn():
  469. # subn
  470. tup = re.subn("ab", "cd", "abababababab")
  471. Assert(tup == ('cdcdcdcdcdcd', 6))
  472. tup = re.subn("ab", "cd", "abababababab", 0)
  473. Assert(tup == ('cdcdcdcdcdcd', 6))
  474. tup = re.subn("ab", "cd", "abababababab", 1)
  475. Assert(tup == ('cdababababab', 1))
  476. tup = re.subn("ab", "cd", "abababababab", 10)
  477. Assert(tup == ('cdcdcdcdcdcd', 6))
  478. tup = re.subn("ababab", "cd", "ab", 10)
  479. Assert(tup == ('ab', 0))
  480. tup = re.subn("ababab", "cd", "ab")
  481. Assert(tup == ('ab', 0))
  482. tup = re.subn("(ab)*", "cd", "abababababab", 10)
  483. Assert(tup == ('cd', 1))
  484. tup = re.subn("(ab)?", "cd", "abababababab", 10)
  485. Assert(tup == ('cdcdcdcdcdcd', 6))
  486. def test_groups():
  487. reg = re.compile("\[(?P<header>.*?)\]")
  488. m = reg.search("[DEFAULT]")
  489. Assert( m.groups() == ('DEFAULT',))
  490. Assert( m.group('header') == 'DEFAULT' )
  491. reg2 = re.compile("(?P<grp>\S+)?")
  492. m2 = reg2.search("")
  493. Assert ( m2.groups() == (None,))
  494. Assert ( m2.groups('Default') == ('Default',))
  495. def test_end():
  496. ex = re.compile(r'\s+')
  497. m = ex.match('(object Petal', 7)
  498. Assert (m.end(0) == 8)
  499. def test_lone_hat():
  500. """Single ^ reg-ex shouldn't match w/ a sub-set of a string"""
  501. sol = re.compile('^')
  502. AreEqual(sol.match('bazbar', 1, 2), None)
  503. def test_eol():
  504. r = re.compile(r'<(/|\Z)')
  505. s = r.search("<", 0)
  506. Assert(s != None)
  507. AreEqual(s.span(), (0, 1))
  508. AreEqual(s.group(0), '<')
  509. AreEqual(r.search("<Z", 0), None)
  510. def test_lastindex():
  511. for (pat, index) in [
  512. ('(a)b', 1), ('((a)(b))', 1), ('((ab))', 1),
  513. ('(a)(b)', 2),
  514. ('(a)?ab', None),
  515. ('(a)?b', 1),
  516. ]:
  517. AreEqual(re.match(pat, 'ab').lastindex, index)
  518. for (pat, index) in [
  519. ('(a)ab', 1),
  520. ('(a)(a)b', 2),
  521. ('(a)(a)(b)', 3),
  522. ('((a)a(b))', 1),
  523. ('((a)(a)(b))', 1),
  524. ('(a(a)(b))', 1),
  525. ('(a(a)?(b))', 1),
  526. ('(aa(a)?(b))', 1),
  527. ('(aa(b))', 1),
  528. ('(a(ab))', 1),
  529. ('(a)?ab', 1),
  530. ('a(a)?ab', None),
  531. ('a(a)?(a)?b', 1),
  532. ('a(a)?(a)?(b)', 3),
  533. ('a(a)b', 1),
  534. ('(a(a))(b)', 3),
  535. ('(a(a))b', 1),
  536. ('((a)(a))(b)', 4),
  537. ('((a)(a))b', 1),
  538. ]:
  539. AreEqual(re.match(pat, 'aab').lastindex, index)
  540. def test_match_endpos():
  541. AreEqual(re.compile("(abc){1}").match("abc", -5, 5).span(), (0, 3))
  542. AreEqual(re.compile("(abc)").match("abcxyzabc123 and...").endpos, 19)
  543. def test_match_groupdict():
  544. AreEqual(re.compile("(abc)").match("abcxyzabc123 and...").groupdict(), {})
  545. def test_sub_question():
  546. AreEqual(re.sub("a+", "\?", "abc"), '\\?bc')
  547. def test_match_regs():
  548. foo = re.match('bazbar(mm)+(abc)(xyz)', "bazbarmmmmabcxyz123456abc")
  549. AreEqual(foo.regs, ((0, 16), (8, 10), (10, 13), (13, 16)))
  550. def test_empty_split():
  551. AreEqual(re.split(':*', 'a:b::c'), ['a', 'b', 'c'])
  552. run_test(__name__)