/Lib/sre_parse.py

http://unladen-swallow.googlecode.com/ · Python · 796 lines · 675 code · 56 blank · 65 comment · 260 complexity · cb303a35856e78bdc9f7d4420c6b15e3 MD5 · raw file

  1. #
  2. # Secret Labs' Regular Expression Engine
  3. #
  4. # convert re-style regular expression to sre pattern
  5. #
  6. # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
  7. #
  8. # See the sre.py file for information on usage and redistribution.
  9. #
  10. """Internal support module for sre"""
  11. # XXX: show string offset and offending character for all errors
  12. import sys
  13. from sre_constants import *
  14. def set(seq):
  15. s = {}
  16. for elem in seq:
  17. s[elem] = 1
  18. return s
  19. SPECIAL_CHARS = ".\\[{()*+?^$|"
  20. REPEAT_CHARS = "*+?{"
  21. DIGITS = set("0123456789")
  22. OCTDIGITS = set("01234567")
  23. HEXDIGITS = set("0123456789abcdefABCDEF")
  24. WHITESPACE = set(" \t\n\r\v\f")
  25. ESCAPES = {
  26. r"\a": (LITERAL, ord("\a")),
  27. r"\b": (LITERAL, ord("\b")),
  28. r"\f": (LITERAL, ord("\f")),
  29. r"\n": (LITERAL, ord("\n")),
  30. r"\r": (LITERAL, ord("\r")),
  31. r"\t": (LITERAL, ord("\t")),
  32. r"\v": (LITERAL, ord("\v")),
  33. r"\\": (LITERAL, ord("\\"))
  34. }
  35. CATEGORIES = {
  36. r"\A": (AT, AT_BEGINNING_STRING), # start of string
  37. r"\b": (AT, AT_BOUNDARY),
  38. r"\B": (AT, AT_NON_BOUNDARY),
  39. r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
  40. r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
  41. r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
  42. r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
  43. r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
  44. r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
  45. r"\Z": (AT, AT_END_STRING), # end of string
  46. }
  47. FLAGS = {
  48. # standard flags
  49. "i": SRE_FLAG_IGNORECASE,
  50. "L": SRE_FLAG_LOCALE,
  51. "m": SRE_FLAG_MULTILINE,
  52. "s": SRE_FLAG_DOTALL,
  53. "x": SRE_FLAG_VERBOSE,
  54. # extensions
  55. "t": SRE_FLAG_TEMPLATE,
  56. "u": SRE_FLAG_UNICODE,
  57. }
  58. class Pattern:
  59. # master pattern object. keeps track of global attributes
  60. def __init__(self):
  61. self.flags = 0
  62. self.open = []
  63. self.groups = 1
  64. self.groupdict = {}
  65. def opengroup(self, name=None):
  66. gid = self.groups
  67. self.groups = gid + 1
  68. if name is not None:
  69. ogid = self.groupdict.get(name, None)
  70. if ogid is not None:
  71. raise error, ("redefinition of group name %s as group %d; "
  72. "was group %d" % (repr(name), gid, ogid))
  73. self.groupdict[name] = gid
  74. self.open.append(gid)
  75. return gid
  76. def closegroup(self, gid):
  77. self.open.remove(gid)
  78. def checkgroup(self, gid):
  79. return gid < self.groups and gid not in self.open
  80. class SubPattern:
  81. # a subpattern, in intermediate form
  82. def __init__(self, pattern, data=None):
  83. self.pattern = pattern
  84. if data is None:
  85. data = []
  86. self.data = data
  87. self.width = None
  88. def dump(self, level=0):
  89. nl = 1
  90. seqtypes = type(()), type([])
  91. for op, av in self.data:
  92. print level*" " + op,; nl = 0
  93. if op == "in":
  94. # member sublanguage
  95. print; nl = 1
  96. for op, a in av:
  97. print (level+1)*" " + op, a
  98. elif op == "branch":
  99. print; nl = 1
  100. i = 0
  101. for a in av[1]:
  102. if i > 0:
  103. print level*" " + "or"
  104. a.dump(level+1); nl = 1
  105. i = i + 1
  106. elif type(av) in seqtypes:
  107. for a in av:
  108. if isinstance(a, SubPattern):
  109. if not nl: print
  110. a.dump(level+1); nl = 1
  111. else:
  112. print a, ; nl = 0
  113. else:
  114. print av, ; nl = 0
  115. if not nl: print
  116. def __repr__(self):
  117. return repr(self.data)
  118. def __len__(self):
  119. return len(self.data)
  120. def __delitem__(self, index):
  121. del self.data[index]
  122. def __getitem__(self, index):
  123. if isinstance(index, slice):
  124. return SubPattern(self.pattern, self.data[index])
  125. return self.data[index]
  126. def __setitem__(self, index, code):
  127. self.data[index] = code
  128. def insert(self, index, code):
  129. self.data.insert(index, code)
  130. def append(self, code):
  131. self.data.append(code)
  132. def getwidth(self):
  133. # determine the width (min, max) for this subpattern
  134. if self.width:
  135. return self.width
  136. lo = hi = 0L
  137. UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
  138. REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
  139. for op, av in self.data:
  140. if op is BRANCH:
  141. i = sys.maxint
  142. j = 0
  143. for av in av[1]:
  144. l, h = av.getwidth()
  145. i = min(i, l)
  146. j = max(j, h)
  147. lo = lo + i
  148. hi = hi + j
  149. elif op is CALL:
  150. i, j = av.getwidth()
  151. lo = lo + i
  152. hi = hi + j
  153. elif op is SUBPATTERN:
  154. i, j = av[1].getwidth()
  155. lo = lo + i
  156. hi = hi + j
  157. elif op in REPEATCODES:
  158. i, j = av[2].getwidth()
  159. lo = lo + long(i) * av[0]
  160. hi = hi + long(j) * av[1]
  161. elif op in UNITCODES:
  162. lo = lo + 1
  163. hi = hi + 1
  164. elif op == SUCCESS:
  165. break
  166. self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
  167. return self.width
  168. class Tokenizer:
  169. def __init__(self, string):
  170. self.string = string
  171. self.index = 0
  172. self.__next()
  173. def __next(self):
  174. if self.index >= len(self.string):
  175. self.next = None
  176. return
  177. char = self.string[self.index]
  178. if char[0] == "\\":
  179. try:
  180. c = self.string[self.index + 1]
  181. except IndexError:
  182. raise error, "bogus escape (end of line)"
  183. char = char + c
  184. self.index = self.index + len(char)
  185. self.next = char
  186. def match(self, char, skip=1):
  187. if char == self.next:
  188. if skip:
  189. self.__next()
  190. return 1
  191. return 0
  192. def get(self):
  193. this = self.next
  194. self.__next()
  195. return this
  196. def tell(self):
  197. return self.index, self.next
  198. def seek(self, index):
  199. self.index, self.next = index
  200. def isident(char):
  201. return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
  202. def isdigit(char):
  203. return "0" <= char <= "9"
  204. def isname(name):
  205. # check that group name is a valid string
  206. if not isident(name[0]):
  207. return False
  208. for char in name[1:]:
  209. if not isident(char) and not isdigit(char):
  210. return False
  211. return True
  212. def _class_escape(source, escape):
  213. # handle escape code inside character class
  214. code = ESCAPES.get(escape)
  215. if code:
  216. return code
  217. code = CATEGORIES.get(escape)
  218. if code:
  219. return code
  220. try:
  221. c = escape[1:2]
  222. if c == "x":
  223. # hexadecimal escape (exactly two digits)
  224. while source.next in HEXDIGITS and len(escape) < 4:
  225. escape = escape + source.get()
  226. escape = escape[2:]
  227. if len(escape) != 2:
  228. raise error, "bogus escape: %s" % repr("\\" + escape)
  229. return LITERAL, int(escape, 16) & 0xff
  230. elif c in OCTDIGITS:
  231. # octal escape (up to three digits)
  232. while source.next in OCTDIGITS and len(escape) < 4:
  233. escape = escape + source.get()
  234. escape = escape[1:]
  235. return LITERAL, int(escape, 8) & 0xff
  236. elif c in DIGITS:
  237. raise error, "bogus escape: %s" % repr(escape)
  238. if len(escape) == 2:
  239. return LITERAL, ord(escape[1])
  240. except ValueError:
  241. pass
  242. raise error, "bogus escape: %s" % repr(escape)
  243. def _escape(source, escape, state):
  244. # handle escape code in expression
  245. code = CATEGORIES.get(escape)
  246. if code:
  247. return code
  248. code = ESCAPES.get(escape)
  249. if code:
  250. return code
  251. try:
  252. c = escape[1:2]
  253. if c == "x":
  254. # hexadecimal escape
  255. while source.next in HEXDIGITS and len(escape) < 4:
  256. escape = escape + source.get()
  257. if len(escape) != 4:
  258. raise ValueError
  259. return LITERAL, int(escape[2:], 16) & 0xff
  260. elif c == "0":
  261. # octal escape
  262. while source.next in OCTDIGITS and len(escape) < 4:
  263. escape = escape + source.get()
  264. return LITERAL, int(escape[1:], 8) & 0xff
  265. elif c in DIGITS:
  266. # octal escape *or* decimal group reference (sigh)
  267. if source.next in DIGITS:
  268. escape = escape + source.get()
  269. if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
  270. source.next in OCTDIGITS):
  271. # got three octal digits; this is an octal escape
  272. escape = escape + source.get()
  273. return LITERAL, int(escape[1:], 8) & 0xff
  274. # not an octal escape, so this is a group reference
  275. group = int(escape[1:])
  276. if group < state.groups:
  277. if not state.checkgroup(group):
  278. raise error, "cannot refer to open group"
  279. return GROUPREF, group
  280. raise ValueError
  281. if len(escape) == 2:
  282. return LITERAL, ord(escape[1])
  283. except ValueError:
  284. pass
  285. raise error, "bogus escape: %s" % repr(escape)
  286. def _parse_sub(source, state, nested=1):
  287. # parse an alternation: a|b|c
  288. items = []
  289. itemsappend = items.append
  290. sourcematch = source.match
  291. while 1:
  292. itemsappend(_parse(source, state))
  293. if sourcematch("|"):
  294. continue
  295. if not nested:
  296. break
  297. if not source.next or sourcematch(")", 0):
  298. break
  299. else:
  300. raise error, "pattern not properly closed"
  301. if len(items) == 1:
  302. return items[0]
  303. subpattern = SubPattern(state)
  304. subpatternappend = subpattern.append
  305. # check if all items share a common prefix
  306. while 1:
  307. prefix = None
  308. for item in items:
  309. if not item:
  310. break
  311. if prefix is None:
  312. prefix = item[0]
  313. elif item[0] != prefix:
  314. break
  315. else:
  316. # all subitems start with a common "prefix".
  317. # move it out of the branch
  318. for item in items:
  319. del item[0]
  320. subpatternappend(prefix)
  321. continue # check next one
  322. break
  323. # check if the branch can be replaced by a character set
  324. for item in items:
  325. if len(item) != 1 or item[0][0] != LITERAL:
  326. break
  327. else:
  328. # we can store this as a character set instead of a
  329. # branch (the compiler may optimize this even more)
  330. set = []
  331. setappend = set.append
  332. for item in items:
  333. setappend(item[0])
  334. subpatternappend((IN, set))
  335. return subpattern
  336. subpattern.append((BRANCH, (None, items)))
  337. return subpattern
  338. def _parse_sub_cond(source, state, condgroup):
  339. item_yes = _parse(source, state)
  340. if source.match("|"):
  341. item_no = _parse(source, state)
  342. if source.match("|"):
  343. raise error, "conditional backref with more than two branches"
  344. else:
  345. item_no = None
  346. if source.next and not source.match(")", 0):
  347. raise error, "pattern not properly closed"
  348. subpattern = SubPattern(state)
  349. subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
  350. return subpattern
  351. _PATTERNENDERS = set("|)")
  352. _ASSERTCHARS = set("=!<")
  353. _LOOKBEHINDASSERTCHARS = set("=!")
  354. _REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
  355. def _parse(source, state):
  356. # parse a simple pattern
  357. subpattern = SubPattern(state)
  358. # precompute constants into local variables
  359. subpatternappend = subpattern.append
  360. sourceget = source.get
  361. sourcematch = source.match
  362. _len = len
  363. PATTERNENDERS = _PATTERNENDERS
  364. ASSERTCHARS = _ASSERTCHARS
  365. LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS
  366. REPEATCODES = _REPEATCODES
  367. while 1:
  368. if source.next in PATTERNENDERS:
  369. break # end of subpattern
  370. this = sourceget()
  371. if this is None:
  372. break # end of pattern
  373. if state.flags & SRE_FLAG_VERBOSE:
  374. # skip whitespace and comments
  375. if this in WHITESPACE:
  376. continue
  377. if this == "#":
  378. while 1:
  379. this = sourceget()
  380. if this in (None, "\n"):
  381. break
  382. continue
  383. if this and this[0] not in SPECIAL_CHARS:
  384. subpatternappend((LITERAL, ord(this)))
  385. elif this == "[":
  386. # character set
  387. set = []
  388. setappend = set.append
  389. ## if sourcematch(":"):
  390. ## pass # handle character classes
  391. if sourcematch("^"):
  392. setappend((NEGATE, None))
  393. # check remaining characters
  394. start = set[:]
  395. while 1:
  396. this = sourceget()
  397. if this == "]" and set != start:
  398. break
  399. elif this and this[0] == "\\":
  400. code1 = _class_escape(source, this)
  401. elif this:
  402. code1 = LITERAL, ord(this)
  403. else:
  404. raise error, "unexpected end of regular expression"
  405. if sourcematch("-"):
  406. # potential range
  407. this = sourceget()
  408. if this == "]":
  409. if code1[0] is IN:
  410. code1 = code1[1][0]
  411. setappend(code1)
  412. setappend((LITERAL, ord("-")))
  413. break
  414. elif this:
  415. if this[0] == "\\":
  416. code2 = _class_escape(source, this)
  417. else:
  418. code2 = LITERAL, ord(this)
  419. if code1[0] != LITERAL or code2[0] != LITERAL:
  420. raise error, "bad character range"
  421. lo = code1[1]
  422. hi = code2[1]
  423. if hi < lo:
  424. raise error, "bad character range"
  425. setappend((RANGE, (lo, hi)))
  426. else:
  427. raise error, "unexpected end of regular expression"
  428. else:
  429. if code1[0] is IN:
  430. code1 = code1[1][0]
  431. setappend(code1)
  432. # XXX: <fl> should move set optimization to compiler!
  433. if _len(set)==1 and set[0][0] is LITERAL:
  434. subpatternappend(set[0]) # optimization
  435. elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
  436. subpatternappend((NOT_LITERAL, set[1][1])) # optimization
  437. else:
  438. # XXX: <fl> should add charmap optimization here
  439. subpatternappend((IN, set))
  440. elif this and this[0] in REPEAT_CHARS:
  441. # repeat previous item
  442. if this == "?":
  443. min, max = 0, 1
  444. elif this == "*":
  445. min, max = 0, MAXREPEAT
  446. elif this == "+":
  447. min, max = 1, MAXREPEAT
  448. elif this == "{":
  449. if source.next == "}":
  450. subpatternappend((LITERAL, ord(this)))
  451. continue
  452. here = source.tell()
  453. min, max = 0, MAXREPEAT
  454. lo = hi = ""
  455. while source.next in DIGITS:
  456. lo = lo + source.get()
  457. if sourcematch(","):
  458. while source.next in DIGITS:
  459. hi = hi + sourceget()
  460. else:
  461. hi = lo
  462. if not sourcematch("}"):
  463. subpatternappend((LITERAL, ord(this)))
  464. source.seek(here)
  465. continue
  466. if lo:
  467. min = int(lo)
  468. if hi:
  469. max = int(hi)
  470. if max < min:
  471. raise error, "bad repeat interval"
  472. else:
  473. raise error, "not supported"
  474. # figure out which item to repeat
  475. if subpattern:
  476. item = subpattern[-1:]
  477. else:
  478. item = None
  479. if not item or (_len(item) == 1 and item[0][0] == AT):
  480. raise error, "nothing to repeat"
  481. if item[0][0] in REPEATCODES:
  482. raise error, "multiple repeat"
  483. if sourcematch("?"):
  484. subpattern[-1] = (MIN_REPEAT, (min, max, item))
  485. else:
  486. subpattern[-1] = (MAX_REPEAT, (min, max, item))
  487. elif this == ".":
  488. subpatternappend((ANY, None))
  489. elif this == "(":
  490. group = 1
  491. name = None
  492. condgroup = None
  493. if sourcematch("?"):
  494. group = 0
  495. # options
  496. if sourcematch("P"):
  497. # python extensions
  498. if sourcematch("<"):
  499. # named group: skip forward to end of name
  500. name = ""
  501. while 1:
  502. char = sourceget()
  503. if char is None:
  504. raise error, "unterminated name"
  505. if char == ">":
  506. break
  507. name = name + char
  508. group = 1
  509. if not isname(name):
  510. raise error, "bad character in group name"
  511. elif sourcematch("="):
  512. # named backreference
  513. name = ""
  514. while 1:
  515. char = sourceget()
  516. if char is None:
  517. raise error, "unterminated name"
  518. if char == ")":
  519. break
  520. name = name + char
  521. if not isname(name):
  522. raise error, "bad character in group name"
  523. gid = state.groupdict.get(name)
  524. if gid is None:
  525. raise error, "unknown group name"
  526. subpatternappend((GROUPREF, gid))
  527. continue
  528. else:
  529. char = sourceget()
  530. if char is None:
  531. raise error, "unexpected end of pattern"
  532. raise error, "unknown specifier: ?P%s" % char
  533. elif sourcematch(":"):
  534. # non-capturing group
  535. group = 2
  536. elif sourcematch("#"):
  537. # comment
  538. while 1:
  539. if source.next is None or source.next == ")":
  540. break
  541. sourceget()
  542. if not sourcematch(")"):
  543. raise error, "unbalanced parenthesis"
  544. continue
  545. elif source.next in ASSERTCHARS:
  546. # lookahead assertions
  547. char = sourceget()
  548. dir = 1
  549. if char == "<":
  550. if source.next not in LOOKBEHINDASSERTCHARS:
  551. raise error, "syntax error"
  552. dir = -1 # lookbehind
  553. char = sourceget()
  554. p = _parse_sub(source, state)
  555. if not sourcematch(")"):
  556. raise error, "unbalanced parenthesis"
  557. if char == "=":
  558. subpatternappend((ASSERT, (dir, p)))
  559. else:
  560. subpatternappend((ASSERT_NOT, (dir, p)))
  561. continue
  562. elif sourcematch("("):
  563. # conditional backreference group
  564. condname = ""
  565. while 1:
  566. char = sourceget()
  567. if char is None:
  568. raise error, "unterminated name"
  569. if char == ")":
  570. break
  571. condname = condname + char
  572. group = 2
  573. if isname(condname):
  574. condgroup = state.groupdict.get(condname)
  575. if condgroup is None:
  576. raise error, "unknown group name"
  577. else:
  578. try:
  579. condgroup = int(condname)
  580. except ValueError:
  581. raise error, "bad character in group name"
  582. else:
  583. # flags
  584. if not source.next in FLAGS:
  585. raise error, "unexpected end of pattern"
  586. while source.next in FLAGS:
  587. state.flags = state.flags | FLAGS[sourceget()]
  588. if group:
  589. # parse group contents
  590. if group == 2:
  591. # anonymous group
  592. group = None
  593. else:
  594. group = state.opengroup(name)
  595. if condgroup:
  596. p = _parse_sub_cond(source, state, condgroup)
  597. else:
  598. p = _parse_sub(source, state)
  599. if not sourcematch(")"):
  600. raise error, "unbalanced parenthesis"
  601. if group is not None:
  602. state.closegroup(group)
  603. subpatternappend((SUBPATTERN, (group, p)))
  604. else:
  605. while 1:
  606. char = sourceget()
  607. if char is None:
  608. raise error, "unexpected end of pattern"
  609. if char == ")":
  610. break
  611. raise error, "unknown extension"
  612. elif this == "^":
  613. subpatternappend((AT, AT_BEGINNING))
  614. elif this == "$":
  615. subpattern.append((AT, AT_END))
  616. elif this and this[0] == "\\":
  617. code = _escape(source, this, state)
  618. subpatternappend(code)
  619. else:
  620. raise error, "parser error"
  621. return subpattern
  622. def parse(str, flags=0, pattern=None):
  623. # parse 're' pattern into list of (opcode, argument) tuples
  624. source = Tokenizer(str)
  625. if pattern is None:
  626. pattern = Pattern()
  627. pattern.flags = flags
  628. pattern.str = str
  629. p = _parse_sub(source, pattern, 0)
  630. tail = source.get()
  631. if tail == ")":
  632. raise error, "unbalanced parenthesis"
  633. elif tail:
  634. raise error, "bogus characters at end of regular expression"
  635. if flags & SRE_FLAG_DEBUG:
  636. p.dump()
  637. if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
  638. # the VERBOSE flag was switched on inside the pattern. to be
  639. # on the safe side, we'll parse the whole thing again...
  640. return parse(str, p.pattern.flags)
  641. return p
  642. def parse_template(source, pattern):
  643. # parse 're' replacement string into list of literals and
  644. # group references
  645. s = Tokenizer(source)
  646. sget = s.get
  647. p = []
  648. a = p.append
  649. def literal(literal, p=p, pappend=a):
  650. if p and p[-1][0] is LITERAL:
  651. p[-1] = LITERAL, p[-1][1] + literal
  652. else:
  653. pappend((LITERAL, literal))
  654. sep = source[:0]
  655. if type(sep) is type(""):
  656. makechar = chr
  657. else:
  658. makechar = unichr
  659. while 1:
  660. this = sget()
  661. if this is None:
  662. break # end of replacement string
  663. if this and this[0] == "\\":
  664. # group
  665. c = this[1:2]
  666. if c == "g":
  667. name = ""
  668. if s.match("<"):
  669. while 1:
  670. char = sget()
  671. if char is None:
  672. raise error, "unterminated group name"
  673. if char == ">":
  674. break
  675. name = name + char
  676. if not name:
  677. raise error, "bad group name"
  678. try:
  679. index = int(name)
  680. if index < 0:
  681. raise error, "negative group number"
  682. except ValueError:
  683. if not isname(name):
  684. raise error, "bad character in group name"
  685. try:
  686. index = pattern.groupindex[name]
  687. except KeyError:
  688. raise IndexError, "unknown group name"
  689. a((MARK, index))
  690. elif c == "0":
  691. if s.next in OCTDIGITS:
  692. this = this + sget()
  693. if s.next in OCTDIGITS:
  694. this = this + sget()
  695. literal(makechar(int(this[1:], 8) & 0xff))
  696. elif c in DIGITS:
  697. isoctal = False
  698. if s.next in DIGITS:
  699. this = this + sget()
  700. if (c in OCTDIGITS and this[2] in OCTDIGITS and
  701. s.next in OCTDIGITS):
  702. this = this + sget()
  703. isoctal = True
  704. literal(makechar(int(this[1:], 8) & 0xff))
  705. if not isoctal:
  706. a((MARK, int(this[1:])))
  707. else:
  708. try:
  709. this = makechar(ESCAPES[this][1])
  710. except KeyError:
  711. pass
  712. literal(this)
  713. else:
  714. literal(this)
  715. # convert template to groups and literals lists
  716. i = 0
  717. groups = []
  718. groupsappend = groups.append
  719. literals = [None] * len(p)
  720. for c, s in p:
  721. if c is MARK:
  722. groupsappend((i, s))
  723. # literal[i] is already None
  724. else:
  725. literals[i] = s
  726. i = i + 1
  727. return groups, literals
  728. def expand_template(template, match):
  729. g = match.group
  730. sep = match.string[:0]
  731. groups, literals = template
  732. literals = literals[:]
  733. try:
  734. for index, group in groups:
  735. literals[index] = s = g(group)
  736. if s is None:
  737. raise error, "unmatched group"
  738. except IndexError:
  739. raise error, "invalid group reference"
  740. return sep.join(literals)