PageRenderTime 52ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/pyconau/lib/reportlab/lib/xmllib.py

https://bitbucket.org/hexdump42/pypy-benchmarks
Python | 773 lines | 703 code | 30 blank | 40 comment | 91 complexity | 7090afbcc6fea8ebe4e2bd3add20f750 MD5 | raw file
Possible License(s): Apache-2.0, GPL-2.0, BSD-3-Clause
  1. # A parser for XML, using the derived class as static DTD.
  2. # Author: Sjoerd Mullender.
  3. # sgmlop support added by fredrik@pythonware.com (May 19, 1998)
  4. __version__=''' $Id: xmllib.py 3660 2010-02-08 18:17:33Z damian $ '''
  5. __doc__='''From before xmllib was in the Python standard library.
  6. Probably ought to be removed'''
  7. import re
  8. import string
  9. try:
  10. import sgmlop # this works for both builtin on the path or relative
  11. except ImportError:
  12. sgmlop = None
  13. # standard entity defs
  14. ENTITYDEFS = {
  15. 'lt': '<',
  16. 'gt': '>',
  17. 'amp': '&',
  18. 'quot': '"',
  19. 'apos': '\''
  20. }
  21. # XML parser base class -- find tags and call handler functions.
  22. # Usage: p = XMLParser(); p.feed(data); ...; p.close().
  23. # The dtd is defined by deriving a class which defines methods with
  24. # special names to handle tags: start_foo and end_foo to handle <foo>
  25. # and </foo>, respectively. The data between tags is passed to the
  26. # parser by calling self.handle_data() with some data as argument (the
  27. # data may be split up in arbutrary chunks). Entity references are
  28. # passed by calling self.handle_entityref() with the entity reference
  29. # as argument.
  30. # --------------------------------------------------------------------
  31. # original re-based XML parser
  32. _S = '[ \t\r\n]+'
  33. _opS = '[ \t\r\n]*'
  34. _Name = '[a-zA-Z_:][-a-zA-Z0-9._:]*'
  35. interesting = re.compile('[&<]')
  36. incomplete = re.compile('&(' + _Name + '|#[0-9]*|#x[0-9a-fA-F]*)?|'
  37. '<([a-zA-Z_:][^<>]*|'
  38. '/([a-zA-Z_:][^<>]*)?|'
  39. '![^<>]*|'
  40. '\?[^<>]*)?')
  41. ref = re.compile('&(' + _Name + '|#[0-9]+|#x[0-9a-fA-F]+);?')
  42. entityref = re.compile('&(?P<name>' + _Name + ')[^-a-zA-Z0-9._:]')
  43. charref = re.compile('&#(?P<char>[0-9]+[^0-9]|x[0-9a-fA-F]+[^0-9a-fA-F])')
  44. space = re.compile(_S)
  45. newline = re.compile('\n')
  46. starttagopen = re.compile('<' + _Name)
  47. endtagopen = re.compile('</')
  48. starttagend = re.compile(_opS + '(?P<slash>/?)>')
  49. endbracket = re.compile('>')
  50. tagfind = re.compile(_Name)
  51. cdataopen = re.compile('<!\[CDATA\[')
  52. cdataclose = re.compile('\]\]>')
  53. special = re.compile('<!(?P<special>[^<>]*)>')
  54. procopen = re.compile('<\?(?P<proc>' + _Name + ')' + _S)
  55. procclose = re.compile('\?>')
  56. commentopen = re.compile('<!--')
  57. commentclose = re.compile('-->')
  58. doubledash = re.compile('--')
  59. attrfind = re.compile(
  60. _opS + '(?P<name>' + _Name + ')'
  61. '(' + _opS + '=' + _opS +
  62. '(?P<value>\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9.:+*%?!()_#=~]+))')
  63. class SlowXMLParser:
  64. # Interface -- initialize and reset this instance
  65. def __init__(self, verbose=0):
  66. self.verbose = verbose
  67. self.reset()
  68. # Interface -- reset this instance. Loses all unprocessed data
  69. def reset(self):
  70. self.rawdata = ''
  71. self.stack = []
  72. self.lasttag = '???'
  73. self.nomoretags = 0
  74. self.literal = 0
  75. self.lineno = 1
  76. # For derived classes only -- enter literal mode (CDATA) till EOF
  77. def setnomoretags(self):
  78. self.nomoretags = self.literal = 1
  79. # For derived classes only -- enter literal mode (CDATA)
  80. def setliteral(self, *args):
  81. self.literal = 1
  82. # Interface -- feed some data to the parser. Call this as
  83. # often as you want, with as little or as much text as you
  84. # want (may include '\n'). (This just saves the text, all the
  85. # processing is done by goahead().)
  86. def feed(self, data):
  87. self.rawdata = self.rawdata + data
  88. self.goahead(0)
  89. # Interface -- handle the remaining data
  90. def close(self):
  91. self.goahead(1)
  92. # Interface -- translate references
  93. def translate_references(self, data):
  94. newdata = []
  95. i = 0
  96. while 1:
  97. res = ref.search(data, i)
  98. if res is None:
  99. newdata.append(data[i:])
  100. return string.join(newdata, '')
  101. if data[res.end(0) - 1] != ';':
  102. self.syntax_error(self.lineno,
  103. '; missing after entity/char reference')
  104. newdata.append(data[i:res.start(0)])
  105. str = res.group(1)
  106. if str[0] == '#':
  107. if str[1] == 'x':
  108. newdata.append(chr(string.atoi(str[2:], 16)))
  109. else:
  110. newdata.append(chr(string.atoi(str[1:])))
  111. else:
  112. try:
  113. newdata.append(self.entitydefs[str])
  114. except KeyError:
  115. # can't do it, so keep the entity ref in
  116. newdata.append('&' + str + ';')
  117. i = res.end(0)
  118. # Internal -- handle data as far as reasonable. May leave state
  119. # and data to be processed by a subsequent call. If 'end' is
  120. # true, force handling all data as if followed by EOF marker.
  121. def goahead(self, end):
  122. rawdata = self.rawdata
  123. i = 0
  124. n = len(rawdata)
  125. while i < n:
  126. if self.nomoretags:
  127. data = rawdata[i:n]
  128. self.handle_data(data)
  129. self.lineno = self.lineno + string.count(data, '\n')
  130. i = n
  131. break
  132. res = interesting.search(rawdata, i)
  133. if res:
  134. j = res.start(0)
  135. else:
  136. j = n
  137. if i < j:
  138. data = rawdata[i:j]
  139. self.handle_data(data)
  140. self.lineno = self.lineno + string.count(data, '\n')
  141. i = j
  142. if i == n: break
  143. if rawdata[i] == '<':
  144. if starttagopen.match(rawdata, i):
  145. if self.literal:
  146. data = rawdata[i]
  147. self.handle_data(data)
  148. self.lineno = self.lineno + string.count(data, '\n')
  149. i = i+1
  150. continue
  151. k = self.parse_starttag(i)
  152. if k < 0: break
  153. self.lineno = self.lineno + string.count(rawdata[i:k], '\n')
  154. i = k
  155. continue
  156. if endtagopen.match(rawdata, i):
  157. k = self.parse_endtag(i)
  158. if k < 0: break
  159. self.lineno = self.lineno + string.count(rawdata[i:k], '\n')
  160. i = k
  161. self.literal = 0
  162. continue
  163. if commentopen.match(rawdata, i):
  164. if self.literal:
  165. data = rawdata[i]
  166. self.handle_data(data)
  167. self.lineno = self.lineno + string.count(data, '\n')
  168. i = i+1
  169. continue
  170. k = self.parse_comment(i)
  171. if k < 0: break
  172. self.lineno = self.lineno + string.count(rawdata[i:k], '\n')
  173. i = k
  174. continue
  175. if cdataopen.match(rawdata, i):
  176. k = self.parse_cdata(i)
  177. if k < 0: break
  178. self.lineno = self.lineno + string.count(rawdata[i:i], '\n')
  179. i = k
  180. continue
  181. res = procopen.match(rawdata, i)
  182. if res:
  183. k = self.parse_proc(i, res)
  184. if k < 0: break
  185. self.lineno = self.lineno + string.count(rawdata[i:k], '\n')
  186. i = k
  187. continue
  188. res = special.match(rawdata, i)
  189. if res:
  190. if self.literal:
  191. data = rawdata[i]
  192. self.handle_data(data)
  193. self.lineno = self.lineno + string.count(data, '\n')
  194. i = i+1
  195. continue
  196. self.handle_special(res.group('special'))
  197. self.lineno = self.lineno + string.count(res.group(0), '\n')
  198. i = res.end(0)
  199. continue
  200. elif rawdata[i] == '&':
  201. res = charref.match(rawdata, i)
  202. if res is not None:
  203. i = res.end(0)
  204. if rawdata[i-1] != ';':
  205. self.syntax_error(self.lineno, '; missing in charref')
  206. i = i-1
  207. self.handle_charref(res.group('char')[:-1])
  208. self.lineno = self.lineno + string.count(res.group(0), '\n')
  209. continue
  210. res = entityref.match(rawdata, i)
  211. if res is not None:
  212. i = res.end(0)
  213. if rawdata[i-1] != ';':
  214. self.syntax_error(self.lineno, '; missing in entityref')
  215. i = i-1
  216. self.handle_entityref(res.group('name'))
  217. self.lineno = self.lineno + string.count(res.group(0), '\n')
  218. continue
  219. else:
  220. raise RuntimeError, 'neither < nor & ??'
  221. # We get here only if incomplete matches but
  222. # nothing else
  223. res = incomplete.match(rawdata, i)
  224. if not res:
  225. data = rawdata[i]
  226. self.handle_data(data)
  227. self.lineno = self.lineno + string.count(data, '\n')
  228. i = i+1
  229. continue
  230. j = res.end(0)
  231. if j == n:
  232. break # Really incomplete
  233. self.syntax_error(self.lineno, 'bogus < or &')
  234. data = res.group(0)
  235. self.handle_data(data)
  236. self.lineno = self.lineno + string.count(data, '\n')
  237. i = j
  238. # end while
  239. if end and i < n:
  240. data = rawdata[i:n]
  241. self.handle_data(data)
  242. self.lineno = self.lineno + string.count(data, '\n')
  243. i = n
  244. self.rawdata = rawdata[i:]
  245. # XXX if end: check for empty stack
  246. # Internal -- parse comment, return length or -1 if not terminated
  247. def parse_comment(self, i):
  248. rawdata = self.rawdata
  249. if rawdata[i:i+4] != '<!--':
  250. raise RuntimeError, 'unexpected call to handle_comment'
  251. res = commentclose.search(rawdata, i+4)
  252. if not res:
  253. return -1
  254. # doubledash search will succeed because it's a subset of commentclose
  255. if doubledash.search(rawdata, i+4).start(0) < res.start(0):
  256. self.syntax_error(self.lineno, "'--' inside comment")
  257. self.handle_comment(rawdata[i+4: res.start(0)])
  258. return res.end(0)
  259. # Internal -- handle CDATA tag, return lenth or -1 if not terminated
  260. def parse_cdata(self, i):
  261. rawdata = self.rawdata
  262. if rawdata[i:i+9] != '<![CDATA[':
  263. raise RuntimeError, 'unexpected call to handle_cdata'
  264. res = cdataclose.search(rawdata, i+9)
  265. if not res:
  266. return -1
  267. self.handle_cdata(rawdata[i+9:res.start(0)])
  268. return res.end(0)
  269. def parse_proc(self, i, res):
  270. rawdata = self.rawdata
  271. if not res:
  272. raise RuntimeError, 'unexpected call to parse_proc'
  273. name = res.group('proc')
  274. res = procclose.search(rawdata, res.end(0))
  275. if not res:
  276. return -1
  277. self.handle_proc(name, rawdata[res.pos:res.start(0)])
  278. return res.end(0)
  279. # Internal -- handle starttag, return length or -1 if not terminated
  280. def parse_starttag(self, i):
  281. rawdata = self.rawdata
  282. # i points to start of tag
  283. end = endbracket.search(rawdata, i+1)
  284. if not end:
  285. return -1
  286. j = end.start(0)
  287. # Now parse the data between i+1 and j into a tag and attrs
  288. attrdict = {}
  289. res = tagfind.match(rawdata, i+1)
  290. if not res:
  291. raise RuntimeError, 'unexpected call to parse_starttag'
  292. k = res.end(0)
  293. tag = res.group(0)
  294. if hasattr(self, tag + '_attributes'):
  295. attrlist = getattr(self, tag + '_attributes')
  296. else:
  297. attrlist = None
  298. self.lasttag = tag
  299. while k < j:
  300. res = attrfind.match(rawdata, k)
  301. if not res: break
  302. attrname, attrvalue = res.group('name', 'value')
  303. if attrvalue is None:
  304. self.syntax_error(self.lineno, 'no attribute value specified')
  305. attrvalue = attrname
  306. elif attrvalue[:1] == "'" == attrvalue[-1:] or \
  307. attrvalue[:1] == '"' == attrvalue[-1:]:
  308. attrvalue = attrvalue[1:-1]
  309. else:
  310. self.syntax_error(self.lineno, 'attribute value not quoted')
  311. if attrlist is not None and attrname not in attrlist:
  312. self.syntax_error(self.lineno,
  313. 'unknown attribute %s of element %s' %
  314. (attrname, tag))
  315. if attrname in attrdict:
  316. self.syntax_error(self.lineno, 'attribute specified twice')
  317. attrdict[attrname] = self.translate_references(attrvalue)
  318. k = res.end(0)
  319. res = starttagend.match(rawdata, k)
  320. if not res:
  321. self.syntax_error(self.lineno, 'garbage in start tag')
  322. self.finish_starttag(tag, attrdict)
  323. if res and res.group('slash') == '/':
  324. self.finish_endtag(tag)
  325. return end.end(0)
  326. # Internal -- parse endtag
  327. def parse_endtag(self, i):
  328. rawdata = self.rawdata
  329. end = endbracket.search(rawdata, i+1)
  330. if not end:
  331. return -1
  332. res = tagfind.match(rawdata, i+2)
  333. if not res:
  334. self.syntax_error(self.lineno, 'no name specified in end tag')
  335. tag = ''
  336. k = i+2
  337. else:
  338. tag = res.group(0)
  339. k = res.end(0)
  340. if k != end.start(0):
  341. # check that there is only white space at end of tag
  342. res = space.match(rawdata, k)
  343. if res is None or res.end(0) != end.start(0):
  344. self.syntax_error(self.lineno, 'garbage in end tag')
  345. self.finish_endtag(tag)
  346. return end.end(0)
  347. # Internal -- finish processing of start tag
  348. # Return -1 for unknown tag, 1 for balanced tag
  349. def finish_starttag(self, tag, attrs):
  350. self.stack.append(tag)
  351. try:
  352. method = getattr(self, 'start_' + tag)
  353. except AttributeError:
  354. self.unknown_starttag(tag, attrs)
  355. return -1
  356. else:
  357. self.handle_starttag(tag, method, attrs)
  358. return 1
  359. # Internal -- finish processing of end tag
  360. def finish_endtag(self, tag):
  361. if not tag:
  362. found = len(self.stack) - 1
  363. if found < 0:
  364. self.unknown_endtag(tag)
  365. return
  366. else:
  367. if tag not in self.stack:
  368. try:
  369. method = getattr(self, 'end_' + tag)
  370. except AttributeError:
  371. self.unknown_endtag(tag)
  372. return
  373. found = len(self.stack)
  374. for i in range(found):
  375. if self.stack[i] == tag: found = i
  376. while len(self.stack) > found:
  377. tag = self.stack[-1]
  378. try:
  379. method = getattr(self, 'end_' + tag)
  380. except AttributeError:
  381. method = None
  382. if method:
  383. self.handle_endtag(tag, method)
  384. else:
  385. self.unknown_endtag(tag)
  386. del self.stack[-1]
  387. # Overridable -- handle start tag
  388. def handle_starttag(self, tag, method, attrs):
  389. method(attrs)
  390. # Overridable -- handle end tag
  391. def handle_endtag(self, tag, method):
  392. method()
  393. # Example -- handle character reference, no need to override
  394. def handle_charref(self, name):
  395. try:
  396. if name[0] == 'x':
  397. n = string.atoi(name[1:], 16)
  398. else:
  399. n = string.atoi(name)
  400. except string.atoi_error:
  401. self.unknown_charref(name)
  402. return
  403. if not 0 <= n <= 255:
  404. self.unknown_charref(name)
  405. return
  406. self.handle_data(chr(n))
  407. # Definition of entities -- derived classes may override
  408. entitydefs = ENTITYDEFS
  409. # Example -- handle entity reference, no need to override
  410. def handle_entityref(self, name):
  411. table = self.entitydefs
  412. if name in table:
  413. self.handle_data(table[name])
  414. else:
  415. self.unknown_entityref(name)
  416. return
  417. # Example -- handle data, should be overridden
  418. def handle_data(self, data):
  419. pass
  420. # Example -- handle cdata, could be overridden
  421. def handle_cdata(self, data):
  422. pass
  423. # Example -- handle comment, could be overridden
  424. def handle_comment(self, data):
  425. pass
  426. # Example -- handle processing instructions, could be overridden
  427. def handle_proc(self, name, data):
  428. pass
  429. # Example -- handle special instructions, could be overridden
  430. def handle_special(self, data):
  431. pass
  432. # Example -- handle relatively harmless syntax errors, could be overridden
  433. def syntax_error(self, lineno, message):
  434. raise RuntimeError, 'Syntax error at line %d: %s' % (lineno, message)
  435. # To be overridden -- handlers for unknown objects
  436. def unknown_starttag(self, tag, attrs): pass
  437. def unknown_endtag(self, tag): pass
  438. def unknown_charref(self, ref): pass
  439. def unknown_entityref(self, ref): pass
  440. # --------------------------------------------------------------------
  441. # accelerated XML parser
  442. class FastXMLParser:
  443. # Interface -- initialize and reset this instance
  444. def __init__(self, verbose=0):
  445. self.verbose = verbose
  446. self.reset()
  447. # Interface -- reset this instance. Loses all unprocessed data
  448. def reset(self):
  449. self.rawdata = ''
  450. self.stack = []
  451. self.lasttag = '???'
  452. self.nomoretags = 0
  453. self.literal = 0
  454. self.lineno = 1
  455. self.parser = sgmlop.XMLParser()
  456. self.feed = self.parser.feed
  457. self.parser.register(self)
  458. # For derived classes only -- enter literal mode (CDATA) till EOF
  459. def setnomoretags(self):
  460. self.nomoretags = self.literal = 1
  461. # For derived classes only -- enter literal mode (CDATA)
  462. def setliteral(self, *args):
  463. self.literal = 1
  464. # Interface -- feed some data to the parser. Call this as
  465. # often as you want, with as little or as much text as you
  466. # want (may include '\n'). (This just saves the text, all the
  467. # processing is done by goahead().)
  468. def feed(self, data): # overridden by reset
  469. self.parser.feed(data)
  470. # Interface -- handle the remaining data
  471. def close(self):
  472. try:
  473. self.parser.close()
  474. finally:
  475. self.parser = None
  476. # Interface -- translate references
  477. def translate_references(self, data):
  478. newdata = []
  479. i = 0
  480. while 1:
  481. res = ref.search(data, i)
  482. if res is None:
  483. newdata.append(data[i:])
  484. return string.join(newdata, '')
  485. if data[res.end(0) - 1] != ';':
  486. self.syntax_error(self.lineno,
  487. '; missing after entity/char reference')
  488. newdata.append(data[i:res.start(0)])
  489. str = res.group(1)
  490. if str[0] == '#':
  491. if str[1] == 'x':
  492. newdata.append(chr(string.atoi(str[2:], 16)))
  493. else:
  494. newdata.append(chr(string.atoi(str[1:])))
  495. else:
  496. try:
  497. newdata.append(self.entitydefs[str])
  498. except KeyError:
  499. # can't do it, so keep the entity ref in
  500. newdata.append('&' + str + ';')
  501. i = res.end(0)
  502. # Internal -- finish processing of start tag
  503. # Return -1 for unknown tag, 1 for balanced tag
  504. def finish_starttag(self, tag, attrs):
  505. self.stack.append(tag)
  506. try:
  507. method = getattr(self, 'start_' + tag)
  508. except AttributeError:
  509. self.unknown_starttag(tag, attrs)
  510. return -1
  511. else:
  512. self.handle_starttag(tag, method, attrs)
  513. return 1
  514. # Internal -- finish processing of end tag
  515. def finish_endtag(self, tag):
  516. if not tag:
  517. found = len(self.stack) - 1
  518. if found < 0:
  519. self.unknown_endtag(tag)
  520. return
  521. else:
  522. if tag not in self.stack:
  523. try:
  524. method = getattr(self, 'end_' + tag)
  525. except AttributeError:
  526. self.unknown_endtag(tag)
  527. return
  528. found = len(self.stack)
  529. for i in range(found):
  530. if self.stack[i] == tag: found = i
  531. while len(self.stack) > found:
  532. tag = self.stack[-1]
  533. try:
  534. method = getattr(self, 'end_' + tag)
  535. except AttributeError:
  536. method = None
  537. if method:
  538. self.handle_endtag(tag, method)
  539. else:
  540. self.unknown_endtag(tag)
  541. del self.stack[-1]
  542. # Overridable -- handle start tag
  543. def handle_starttag(self, tag, method, attrs):
  544. method(attrs)
  545. # Overridable -- handle end tag
  546. def handle_endtag(self, tag, method):
  547. method()
  548. # Example -- handle character reference, no need to override
  549. def handle_charref(self, name):
  550. try:
  551. if name[0] == 'x':
  552. n = string.atoi(name[1:], 16)
  553. else:
  554. n = string.atoi(name)
  555. except string.atoi_error:
  556. self.unknown_charref(name)
  557. return
  558. if not 0 <= n <= 255:
  559. self.unknown_charref(name)
  560. return
  561. self.handle_data(chr(n))
  562. # Definition of entities -- derived classes may override
  563. entitydefs = ENTITYDEFS
  564. # Example -- handle entity reference, no need to override
  565. def handle_entityref(self, name):
  566. table = self.entitydefs
  567. if name in table:
  568. self.handle_data(table[name])
  569. else:
  570. self.unknown_entityref(name)
  571. return
  572. # Example -- handle data, should be overridden
  573. def handle_data(self, data):
  574. pass
  575. # Example -- handle cdata, could be overridden
  576. def handle_cdata(self, data):
  577. pass
  578. # Example -- handle comment, could be overridden
  579. def handle_comment(self, data):
  580. pass
  581. # Example -- handle processing instructions, could be overridden
  582. def handle_proc(self, name, data):
  583. pass
  584. # Example -- handle special instructions, could be overridden
  585. def handle_special(self, data):
  586. pass
  587. # Example -- handle relatively harmless syntax errors, could be overridden
  588. def syntax_error(self, lineno, message):
  589. raise RuntimeError, 'Syntax error at line %d: %s' % (lineno, message)
  590. # To be overridden -- handlers for unknown objects
  591. def unknown_starttag(self, tag, attrs): pass
  592. def unknown_endtag(self, tag): pass
  593. def unknown_charref(self, ref): pass
  594. def unknown_entityref(self, ref): pass
  595. #sgmlop = None
  596. # pick a suitable parser
  597. if sgmlop:
  598. XMLParser = FastXMLParser
  599. else:
  600. XMLParser = SlowXMLParser
  601. # --------------------------------------------------------------------
  602. # test stuff
  603. class TestXMLParser(XMLParser):
  604. def __init__(self, verbose=0):
  605. self.testdata = ""
  606. XMLParser.__init__(self, verbose)
  607. def handle_data(self, data):
  608. self.testdata = self.testdata + data
  609. if len(repr(self.testdata)) >= 70:
  610. self.flush()
  611. def flush(self):
  612. data = self.testdata
  613. if data:
  614. self.testdata = ""
  615. print 'data:', repr(data)
  616. def handle_cdata(self, data):
  617. self.flush()
  618. print 'cdata:', repr(data)
  619. def handle_proc(self, name, data):
  620. self.flush()
  621. print 'processing:',name,repr(data)
  622. def handle_special(self, data):
  623. self.flush()
  624. print 'special:',repr(data)
  625. def handle_comment(self, data):
  626. self.flush()
  627. r = repr(data)
  628. if len(r) > 68:
  629. r = r[:32] + '...' + r[-32:]
  630. print 'comment:', r
  631. def syntax_error(self, lineno, message):
  632. print 'error at line %d:' % lineno, message
  633. def unknown_starttag(self, tag, attrs):
  634. self.flush()
  635. if not attrs:
  636. print 'start tag: <' + tag + '>'
  637. else:
  638. print 'start tag: <' + tag,
  639. for name, value in attrs.items():
  640. print name + '=' + '"' + value + '"',
  641. print '>'
  642. def unknown_endtag(self, tag):
  643. self.flush()
  644. print 'end tag: </' + tag + '>'
  645. def unknown_entityref(self, ref):
  646. self.flush()
  647. print '*** unknown entity ref: &' + ref + ';'
  648. def unknown_charref(self, ref):
  649. self.flush()
  650. print '*** unknown char ref: &#' + ref + ';'
  651. def close(self):
  652. XMLParser.close(self)
  653. self.flush()
  654. def test(args = None):
  655. import sys
  656. if not args:
  657. args = sys.argv[1:]
  658. if args and args[0] == '-s':
  659. args = args[1:]
  660. klass = XMLParser
  661. else:
  662. klass = TestXMLParser
  663. if args:
  664. file = args[0]
  665. else:
  666. file = 'test.xml'
  667. if file == '-':
  668. f = sys.stdin
  669. else:
  670. try:
  671. f = open(file, 'r')
  672. except IOError, msg:
  673. print file, ":", msg
  674. sys.exit(1)
  675. data = f.read()
  676. if f is not sys.stdin:
  677. f.close()
  678. x = klass()
  679. for c in data:
  680. x.feed(c)
  681. x.close()
  682. if __name__ == '__main__': #NO_REPORTLAB_TEST
  683. test()