PageRenderTime 50ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/lib-python/2.7/sre_parse.py

https://bitbucket.org/dac_io/pypy
Python | 790 lines | 695 code | 45 blank | 50 comment | 137 complexity | 61c13bb5b47b2ce4f8b1f783ce0621f7 MD5 | raw file
  1. #
  2. # Secret Labs' Regular Expression Engine
  3. #
  4. # convert re-style regular expression to sre pattern
  5. #
  6. # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
  7. #
  8. # See the sre.py file for information on usage and redistribution.
  9. #
  10. """Internal support module for sre"""
  11. # XXX: show string offset and offending character for all errors
  12. import sys
  13. from sre_constants import *
  14. SPECIAL_CHARS = ".\\[{()*+?^$|"
  15. REPEAT_CHARS = "*+?{"
  16. DIGITS = set("0123456789")
  17. OCTDIGITS = set("01234567")
  18. HEXDIGITS = set("0123456789abcdefABCDEF")
  19. WHITESPACE = set(" \t\n\r\v\f")
  20. ESCAPES = {
  21. r"\a": (LITERAL, ord("\a")),
  22. r"\b": (LITERAL, ord("\b")),
  23. r"\f": (LITERAL, ord("\f")),
  24. r"\n": (LITERAL, ord("\n")),
  25. r"\r": (LITERAL, ord("\r")),
  26. r"\t": (LITERAL, ord("\t")),
  27. r"\v": (LITERAL, ord("\v")),
  28. r"\\": (LITERAL, ord("\\"))
  29. }
  30. CATEGORIES = {
  31. r"\A": (AT, AT_BEGINNING_STRING), # start of string
  32. r"\b": (AT, AT_BOUNDARY),
  33. r"\B": (AT, AT_NON_BOUNDARY),
  34. r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
  35. r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
  36. r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
  37. r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
  38. r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
  39. r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
  40. r"\Z": (AT, AT_END_STRING), # end of string
  41. }
  42. FLAGS = {
  43. # standard flags
  44. "i": SRE_FLAG_IGNORECASE,
  45. "L": SRE_FLAG_LOCALE,
  46. "m": SRE_FLAG_MULTILINE,
  47. "s": SRE_FLAG_DOTALL,
  48. "x": SRE_FLAG_VERBOSE,
  49. # extensions
  50. "t": SRE_FLAG_TEMPLATE,
  51. "u": SRE_FLAG_UNICODE,
  52. }
  53. class Pattern:
  54. # master pattern object. keeps track of global attributes
  55. def __init__(self):
  56. self.flags = 0
  57. self.open = []
  58. self.groups = 1
  59. self.groupdict = {}
  60. def opengroup(self, name=None):
  61. gid = self.groups
  62. self.groups = gid + 1
  63. if name is not None:
  64. ogid = self.groupdict.get(name, None)
  65. if ogid is not None:
  66. raise error, ("redefinition of group name %s as group %d; "
  67. "was group %d" % (repr(name), gid, ogid))
  68. self.groupdict[name] = gid
  69. self.open.append(gid)
  70. return gid
  71. def closegroup(self, gid):
  72. self.open.remove(gid)
  73. def checkgroup(self, gid):
  74. return gid < self.groups and gid not in self.open
  75. class SubPattern:
  76. # a subpattern, in intermediate form
  77. def __init__(self, pattern, data=None):
  78. self.pattern = pattern
  79. if data is None:
  80. data = []
  81. self.data = data
  82. self.width = None
  83. def dump(self, level=0):
  84. nl = 1
  85. seqtypes = type(()), type([])
  86. for op, av in self.data:
  87. print level*" " + op,; nl = 0
  88. if op == "in":
  89. # member sublanguage
  90. print; nl = 1
  91. for op, a in av:
  92. print (level+1)*" " + op, a
  93. elif op == "branch":
  94. print; nl = 1
  95. i = 0
  96. for a in av[1]:
  97. if i > 0:
  98. print level*" " + "or"
  99. a.dump(level+1); nl = 1
  100. i = i + 1
  101. elif type(av) in seqtypes:
  102. for a in av:
  103. if isinstance(a, SubPattern):
  104. if not nl: print
  105. a.dump(level+1); nl = 1
  106. else:
  107. print a, ; nl = 0
  108. else:
  109. print av, ; nl = 0
  110. if not nl: print
  111. def __repr__(self):
  112. return repr(self.data)
  113. def __len__(self):
  114. return len(self.data)
  115. def __delitem__(self, index):
  116. del self.data[index]
  117. def __getitem__(self, index):
  118. if isinstance(index, slice):
  119. return SubPattern(self.pattern, self.data[index])
  120. return self.data[index]
  121. def __setitem__(self, index, code):
  122. self.data[index] = code
  123. def insert(self, index, code):
  124. self.data.insert(index, code)
  125. def append(self, code):
  126. self.data.append(code)
  127. def getwidth(self):
  128. # determine the width (min, max) for this subpattern
  129. if self.width:
  130. return self.width
  131. lo = hi = 0L
  132. UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
  133. REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
  134. for op, av in self.data:
  135. if op is BRANCH:
  136. i = sys.maxint
  137. j = 0
  138. for av in av[1]:
  139. l, h = av.getwidth()
  140. i = min(i, l)
  141. j = max(j, h)
  142. lo = lo + i
  143. hi = hi + j
  144. elif op is CALL:
  145. i, j = av.getwidth()
  146. lo = lo + i
  147. hi = hi + j
  148. elif op is SUBPATTERN:
  149. i, j = av[1].getwidth()
  150. lo = lo + i
  151. hi = hi + j
  152. elif op in REPEATCODES:
  153. i, j = av[2].getwidth()
  154. lo = lo + long(i) * av[0]
  155. hi = hi + long(j) * av[1]
  156. elif op in UNITCODES:
  157. lo = lo + 1
  158. hi = hi + 1
  159. elif op == SUCCESS:
  160. break
  161. self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
  162. return self.width
  163. class Tokenizer:
  164. def __init__(self, string):
  165. self.string = string
  166. self.index = 0
  167. self.__next()
  168. def __next(self):
  169. if self.index >= len(self.string):
  170. self.next = None
  171. return
  172. char = self.string[self.index]
  173. if char[0] == "\\":
  174. try:
  175. c = self.string[self.index + 1]
  176. except IndexError:
  177. raise error, "bogus escape (end of line)"
  178. char = char + c
  179. self.index = self.index + len(char)
  180. self.next = char
  181. def match(self, char, skip=1):
  182. if char == self.next:
  183. if skip:
  184. self.__next()
  185. return 1
  186. return 0
  187. def get(self):
  188. this = self.next
  189. self.__next()
  190. return this
  191. def tell(self):
  192. return self.index, self.next
  193. def seek(self, index):
  194. self.index, self.next = index
  195. def isident(char):
  196. return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
  197. def isdigit(char):
  198. return "0" <= char <= "9"
  199. def isname(name):
  200. # check that group name is a valid string
  201. if not isident(name[0]):
  202. return False
  203. for char in name[1:]:
  204. if not isident(char) and not isdigit(char):
  205. return False
  206. return True
  207. def _class_escape(source, escape):
  208. # handle escape code inside character class
  209. code = ESCAPES.get(escape)
  210. if code:
  211. return code
  212. code = CATEGORIES.get(escape)
  213. if code:
  214. return code
  215. try:
  216. c = escape[1:2]
  217. if c == "x":
  218. # hexadecimal escape (exactly two digits)
  219. while source.next in HEXDIGITS and len(escape) < 4:
  220. escape = escape + source.get()
  221. escape = escape[2:]
  222. if len(escape) != 2:
  223. raise error, "bogus escape: %s" % repr("\\" + escape)
  224. return LITERAL, int(escape, 16) & 0xff
  225. elif c in OCTDIGITS:
  226. # octal escape (up to three digits)
  227. while source.next in OCTDIGITS and len(escape) < 4:
  228. escape = escape + source.get()
  229. escape = escape[1:]
  230. return LITERAL, int(escape, 8) & 0xff
  231. elif c in DIGITS:
  232. raise error, "bogus escape: %s" % repr(escape)
  233. if len(escape) == 2:
  234. return LITERAL, ord(escape[1])
  235. except ValueError:
  236. pass
  237. raise error, "bogus escape: %s" % repr(escape)
  238. def _escape(source, escape, state):
  239. # handle escape code in expression
  240. code = CATEGORIES.get(escape)
  241. if code:
  242. return code
  243. code = ESCAPES.get(escape)
  244. if code:
  245. return code
  246. try:
  247. c = escape[1:2]
  248. if c == "x":
  249. # hexadecimal escape
  250. while source.next in HEXDIGITS and len(escape) < 4:
  251. escape = escape + source.get()
  252. if len(escape) != 4:
  253. raise ValueError
  254. return LITERAL, int(escape[2:], 16) & 0xff
  255. elif c == "0":
  256. # octal escape
  257. while source.next in OCTDIGITS and len(escape) < 4:
  258. escape = escape + source.get()
  259. return LITERAL, int(escape[1:], 8) & 0xff
  260. elif c in DIGITS:
  261. # octal escape *or* decimal group reference (sigh)
  262. if source.next in DIGITS:
  263. escape = escape + source.get()
  264. if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
  265. source.next in OCTDIGITS):
  266. # got three octal digits; this is an octal escape
  267. escape = escape + source.get()
  268. return LITERAL, int(escape[1:], 8) & 0xff
  269. # not an octal escape, so this is a group reference
  270. group = int(escape[1:])
  271. if group < state.groups:
  272. if not state.checkgroup(group):
  273. raise error, "cannot refer to open group"
  274. return GROUPREF, group
  275. raise ValueError
  276. if len(escape) == 2:
  277. return LITERAL, ord(escape[1])
  278. except ValueError:
  279. pass
  280. raise error, "bogus escape: %s" % repr(escape)
  281. def _parse_sub(source, state, nested=1):
  282. # parse an alternation: a|b|c
  283. items = []
  284. itemsappend = items.append
  285. sourcematch = source.match
  286. while 1:
  287. itemsappend(_parse(source, state))
  288. if sourcematch("|"):
  289. continue
  290. if not nested:
  291. break
  292. if not source.next or sourcematch(")", 0):
  293. break
  294. else:
  295. raise error, "pattern not properly closed"
  296. if len(items) == 1:
  297. return items[0]
  298. subpattern = SubPattern(state)
  299. subpatternappend = subpattern.append
  300. # check if all items share a common prefix
  301. while 1:
  302. prefix = None
  303. for item in items:
  304. if not item:
  305. break
  306. if prefix is None:
  307. prefix = item[0]
  308. elif item[0] != prefix:
  309. break
  310. else:
  311. # all subitems start with a common "prefix".
  312. # move it out of the branch
  313. for item in items:
  314. del item[0]
  315. subpatternappend(prefix)
  316. continue # check next one
  317. break
  318. # check if the branch can be replaced by a character set
  319. for item in items:
  320. if len(item) != 1 or item[0][0] != LITERAL:
  321. break
  322. else:
  323. # we can store this as a character set instead of a
  324. # branch (the compiler may optimize this even more)
  325. set = []
  326. setappend = set.append
  327. for item in items:
  328. setappend(item[0])
  329. subpatternappend((IN, set))
  330. return subpattern
  331. subpattern.append((BRANCH, (None, items)))
  332. return subpattern
  333. def _parse_sub_cond(source, state, condgroup):
  334. item_yes = _parse(source, state)
  335. if source.match("|"):
  336. item_no = _parse(source, state)
  337. if source.match("|"):
  338. raise error, "conditional backref with more than two branches"
  339. else:
  340. item_no = None
  341. if source.next and not source.match(")", 0):
  342. raise error, "pattern not properly closed"
  343. subpattern = SubPattern(state)
  344. subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
  345. return subpattern
  346. _PATTERNENDERS = set("|)")
  347. _ASSERTCHARS = set("=!<")
  348. _LOOKBEHINDASSERTCHARS = set("=!")
  349. _REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
  350. def _parse(source, state):
  351. # parse a simple pattern
  352. subpattern = SubPattern(state)
  353. # precompute constants into local variables
  354. subpatternappend = subpattern.append
  355. sourceget = source.get
  356. sourcematch = source.match
  357. _len = len
  358. PATTERNENDERS = _PATTERNENDERS
  359. ASSERTCHARS = _ASSERTCHARS
  360. LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS
  361. REPEATCODES = _REPEATCODES
  362. while 1:
  363. if source.next in PATTERNENDERS:
  364. break # end of subpattern
  365. this = sourceget()
  366. if this is None:
  367. break # end of pattern
  368. if state.flags & SRE_FLAG_VERBOSE:
  369. # skip whitespace and comments
  370. if this in WHITESPACE:
  371. continue
  372. if this == "#":
  373. while 1:
  374. this = sourceget()
  375. if this in (None, "\n"):
  376. break
  377. continue
  378. if this and this[0] not in SPECIAL_CHARS:
  379. subpatternappend((LITERAL, ord(this)))
  380. elif this == "[":
  381. # character set
  382. set = []
  383. setappend = set.append
  384. ## if sourcematch(":"):
  385. ## pass # handle character classes
  386. if sourcematch("^"):
  387. setappend((NEGATE, None))
  388. # check remaining characters
  389. start = set[:]
  390. while 1:
  391. this = sourceget()
  392. if this == "]" and set != start:
  393. break
  394. elif this and this[0] == "\\":
  395. code1 = _class_escape(source, this)
  396. elif this:
  397. code1 = LITERAL, ord(this)
  398. else:
  399. raise error, "unexpected end of regular expression"
  400. if sourcematch("-"):
  401. # potential range
  402. this = sourceget()
  403. if this == "]":
  404. if code1[0] is IN:
  405. code1 = code1[1][0]
  406. setappend(code1)
  407. setappend((LITERAL, ord("-")))
  408. break
  409. elif this:
  410. if this[0] == "\\":
  411. code2 = _class_escape(source, this)
  412. else:
  413. code2 = LITERAL, ord(this)
  414. if code1[0] != LITERAL or code2[0] != LITERAL:
  415. raise error, "bad character range"
  416. lo = code1[1]
  417. hi = code2[1]
  418. if hi < lo:
  419. raise error, "bad character range"
  420. setappend((RANGE, (lo, hi)))
  421. else:
  422. raise error, "unexpected end of regular expression"
  423. else:
  424. if code1[0] is IN:
  425. code1 = code1[1][0]
  426. setappend(code1)
  427. # XXX: <fl> should move set optimization to compiler!
  428. if _len(set)==1 and set[0][0] is LITERAL:
  429. subpatternappend(set[0]) # optimization
  430. elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
  431. subpatternappend((NOT_LITERAL, set[1][1])) # optimization
  432. else:
  433. # XXX: <fl> should add charmap optimization here
  434. subpatternappend((IN, set))
  435. elif this and this[0] in REPEAT_CHARS:
  436. # repeat previous item
  437. if this == "?":
  438. min, max = 0, 1
  439. elif this == "*":
  440. min, max = 0, MAXREPEAT
  441. elif this == "+":
  442. min, max = 1, MAXREPEAT
  443. elif this == "{":
  444. if source.next == "}":
  445. subpatternappend((LITERAL, ord(this)))
  446. continue
  447. here = source.tell()
  448. min, max = 0, MAXREPEAT
  449. lo = hi = ""
  450. while source.next in DIGITS:
  451. lo = lo + source.get()
  452. if sourcematch(","):
  453. while source.next in DIGITS:
  454. hi = hi + sourceget()
  455. else:
  456. hi = lo
  457. if not sourcematch("}"):
  458. subpatternappend((LITERAL, ord(this)))
  459. source.seek(here)
  460. continue
  461. if lo:
  462. min = int(lo)
  463. if hi:
  464. max = int(hi)
  465. if max < min:
  466. raise error, "bad repeat interval"
  467. else:
  468. raise error, "not supported"
  469. # figure out which item to repeat
  470. if subpattern:
  471. item = subpattern[-1:]
  472. else:
  473. item = None
  474. if not item or (_len(item) == 1 and item[0][0] == AT):
  475. raise error, "nothing to repeat"
  476. if item[0][0] in REPEATCODES:
  477. raise error, "multiple repeat"
  478. if sourcematch("?"):
  479. subpattern[-1] = (MIN_REPEAT, (min, max, item))
  480. else:
  481. subpattern[-1] = (MAX_REPEAT, (min, max, item))
  482. elif this == ".":
  483. subpatternappend((ANY, None))
  484. elif this == "(":
  485. group = 1
  486. name = None
  487. condgroup = None
  488. if sourcematch("?"):
  489. group = 0
  490. # options
  491. if sourcematch("P"):
  492. # python extensions
  493. if sourcematch("<"):
  494. # named group: skip forward to end of name
  495. name = ""
  496. while 1:
  497. char = sourceget()
  498. if char is None:
  499. raise error, "unterminated name"
  500. if char == ">":
  501. break
  502. name = name + char
  503. group = 1
  504. if not isname(name):
  505. raise error, "bad character in group name"
  506. elif sourcematch("="):
  507. # named backreference
  508. name = ""
  509. while 1:
  510. char = sourceget()
  511. if char is None:
  512. raise error, "unterminated name"
  513. if char == ")":
  514. break
  515. name = name + char
  516. if not isname(name):
  517. raise error, "bad character in group name"
  518. gid = state.groupdict.get(name)
  519. if gid is None:
  520. raise error, "unknown group name"
  521. subpatternappend((GROUPREF, gid))
  522. continue
  523. else:
  524. char = sourceget()
  525. if char is None:
  526. raise error, "unexpected end of pattern"
  527. raise error, "unknown specifier: ?P%s" % char
  528. elif sourcematch(":"):
  529. # non-capturing group
  530. group = 2
  531. elif sourcematch("#"):
  532. # comment
  533. while 1:
  534. if source.next is None or source.next == ")":
  535. break
  536. sourceget()
  537. if not sourcematch(")"):
  538. raise error, "unbalanced parenthesis"
  539. continue
  540. elif source.next in ASSERTCHARS:
  541. # lookahead assertions
  542. char = sourceget()
  543. dir = 1
  544. if char == "<":
  545. if source.next not in LOOKBEHINDASSERTCHARS:
  546. raise error, "syntax error"
  547. dir = -1 # lookbehind
  548. char = sourceget()
  549. p = _parse_sub(source, state)
  550. if not sourcematch(")"):
  551. raise error, "unbalanced parenthesis"
  552. if char == "=":
  553. subpatternappend((ASSERT, (dir, p)))
  554. else:
  555. subpatternappend((ASSERT_NOT, (dir, p)))
  556. continue
  557. elif sourcematch("("):
  558. # conditional backreference group
  559. condname = ""
  560. while 1:
  561. char = sourceget()
  562. if char is None:
  563. raise error, "unterminated name"
  564. if char == ")":
  565. break
  566. condname = condname + char
  567. group = 2
  568. if isname(condname):
  569. condgroup = state.groupdict.get(condname)
  570. if condgroup is None:
  571. raise error, "unknown group name"
  572. else:
  573. try:
  574. condgroup = int(condname)
  575. except ValueError:
  576. raise error, "bad character in group name"
  577. else:
  578. # flags
  579. if not source.next in FLAGS:
  580. raise error, "unexpected end of pattern"
  581. while source.next in FLAGS:
  582. state.flags = state.flags | FLAGS[sourceget()]
  583. if group:
  584. # parse group contents
  585. if group == 2:
  586. # anonymous group
  587. group = None
  588. else:
  589. group = state.opengroup(name)
  590. if condgroup:
  591. p = _parse_sub_cond(source, state, condgroup)
  592. else:
  593. p = _parse_sub(source, state)
  594. if not sourcematch(")"):
  595. raise error, "unbalanced parenthesis"
  596. if group is not None:
  597. state.closegroup(group)
  598. subpatternappend((SUBPATTERN, (group, p)))
  599. else:
  600. while 1:
  601. char = sourceget()
  602. if char is None:
  603. raise error, "unexpected end of pattern"
  604. if char == ")":
  605. break
  606. raise error, "unknown extension"
  607. elif this == "^":
  608. subpatternappend((AT, AT_BEGINNING))
  609. elif this == "$":
  610. subpattern.append((AT, AT_END))
  611. elif this and this[0] == "\\":
  612. code = _escape(source, this, state)
  613. subpatternappend(code)
  614. else:
  615. raise error, "parser error"
  616. return subpattern
  617. def parse(str, flags=0, pattern=None):
  618. # parse 're' pattern into list of (opcode, argument) tuples
  619. source = Tokenizer(str)
  620. if pattern is None:
  621. pattern = Pattern()
  622. pattern.flags = flags
  623. pattern.str = str
  624. p = _parse_sub(source, pattern, 0)
  625. tail = source.get()
  626. if tail == ")":
  627. raise error, "unbalanced parenthesis"
  628. elif tail:
  629. raise error, "bogus characters at end of regular expression"
  630. if flags & SRE_FLAG_DEBUG:
  631. p.dump()
  632. if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
  633. # the VERBOSE flag was switched on inside the pattern. to be
  634. # on the safe side, we'll parse the whole thing again...
  635. return parse(str, p.pattern.flags)
  636. return p
  637. def parse_template(source, pattern):
  638. # parse 're' replacement string into list of literals and
  639. # group references
  640. s = Tokenizer(source)
  641. sget = s.get
  642. p = []
  643. a = p.append
  644. def literal(literal, p=p, pappend=a):
  645. if p and p[-1][0] is LITERAL:
  646. p[-1] = LITERAL, p[-1][1] + literal
  647. else:
  648. pappend((LITERAL, literal))
  649. sep = source[:0]
  650. if type(sep) is type(""):
  651. makechar = chr
  652. else:
  653. makechar = unichr
  654. while 1:
  655. this = sget()
  656. if this is None:
  657. break # end of replacement string
  658. if this and this[0] == "\\":
  659. # group
  660. c = this[1:2]
  661. if c == "g":
  662. name = ""
  663. if s.match("<"):
  664. while 1:
  665. char = sget()
  666. if char is None:
  667. raise error, "unterminated group name"
  668. if char == ">":
  669. break
  670. name = name + char
  671. if not name:
  672. raise error, "bad group name"
  673. try:
  674. index = int(name)
  675. if index < 0:
  676. raise error, "negative group number"
  677. except ValueError:
  678. if not isname(name):
  679. raise error, "bad character in group name"
  680. try:
  681. index = pattern.groupindex[name]
  682. except KeyError:
  683. raise IndexError, "unknown group name"
  684. a((MARK, index))
  685. elif c == "0":
  686. if s.next in OCTDIGITS:
  687. this = this + sget()
  688. if s.next in OCTDIGITS:
  689. this = this + sget()
  690. literal(makechar(int(this[1:], 8) & 0xff))
  691. elif c in DIGITS:
  692. isoctal = False
  693. if s.next in DIGITS:
  694. this = this + sget()
  695. if (c in OCTDIGITS and this[2] in OCTDIGITS and
  696. s.next in OCTDIGITS):
  697. this = this + sget()
  698. isoctal = True
  699. literal(makechar(int(this[1:], 8) & 0xff))
  700. if not isoctal:
  701. a((MARK, int(this[1:])))
  702. else:
  703. try:
  704. this = makechar(ESCAPES[this][1])
  705. except KeyError:
  706. pass
  707. literal(this)
  708. else:
  709. literal(this)
  710. # convert template to groups and literals lists
  711. i = 0
  712. groups = []
  713. groupsappend = groups.append
  714. literals = [None] * len(p)
  715. for c, s in p:
  716. if c is MARK:
  717. groupsappend((i, s))
  718. # literal[i] is already None
  719. else:
  720. literals[i] = s
  721. i = i + 1
  722. return groups, literals
  723. def expand_template(template, match):
  724. g = match.group
  725. sep = match.string[:0]
  726. groups, literals = template
  727. literals = literals[:]
  728. try:
  729. for index, group in groups:
  730. literals[index] = s = g(group)
  731. if s is None:
  732. raise error, "unmatched group"
  733. except IndexError:
  734. raise error, "invalid group reference"
  735. return sep.join(literals)