PageRenderTime 56ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/lib-python/2.7/test/test_xml_etree.py

https://bitbucket.org/dac_io/pypy
Python | 1886 lines | 1725 code | 73 blank | 88 comment | 10 complexity | b02753c3451c3955964841db169ab165 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. # xml.etree test. This file contains enough tests to make sure that
  2. # all included components work as they should.
  3. # Large parts are extracted from the upstream test suite.
  4. # IMPORTANT: the same doctests are run from "test_xml_etree_c" in
  5. # order to ensure consistency between the C implementation and the
  6. # Python implementation.
  7. #
  8. # For this purpose, the module-level "ET" symbol is temporarily
  9. # monkey-patched when running the "test_xml_etree_c" test suite.
  10. # Don't re-import "xml.etree.ElementTree" module in the docstring,
  11. # except if the test is specific to the Python implementation.
  12. import sys
  13. import cgi
  14. from test import test_support
  15. from test.test_support import findfile
  16. from xml.etree import ElementTree as ET
  17. SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
  18. SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
  19. SAMPLE_XML = """\
  20. <body>
  21. <tag class='a'>text</tag>
  22. <tag class='b' />
  23. <section>
  24. <tag class='b' id='inner'>subtext</tag>
  25. </section>
  26. </body>
  27. """
  28. SAMPLE_SECTION = """\
  29. <section>
  30. <tag class='b' id='inner'>subtext</tag>
  31. <nexttag />
  32. <nextsection>
  33. <tag />
  34. </nextsection>
  35. </section>
  36. """
  37. SAMPLE_XML_NS = """
  38. <body xmlns="http://effbot.org/ns">
  39. <tag>text</tag>
  40. <tag />
  41. <section>
  42. <tag>subtext</tag>
  43. </section>
  44. </body>
  45. """
  46. def sanity():
  47. """
  48. Import sanity.
  49. >>> from xml.etree import ElementTree
  50. >>> from xml.etree import ElementInclude
  51. >>> from xml.etree import ElementPath
  52. """
  53. def check_method(method):
  54. if not hasattr(method, '__call__'):
  55. print method, "not callable"
  56. def serialize(elem, to_string=True, **options):
  57. import StringIO
  58. file = StringIO.StringIO()
  59. tree = ET.ElementTree(elem)
  60. tree.write(file, **options)
  61. if to_string:
  62. return file.getvalue()
  63. else:
  64. file.seek(0)
  65. return file
  66. def summarize(elem):
  67. if elem.tag == ET.Comment:
  68. return "<Comment>"
  69. return elem.tag
  70. def summarize_list(seq):
  71. return [summarize(elem) for elem in seq]
  72. def normalize_crlf(tree):
  73. for elem in tree.iter():
  74. if elem.text:
  75. elem.text = elem.text.replace("\r\n", "\n")
  76. if elem.tail:
  77. elem.tail = elem.tail.replace("\r\n", "\n")
  78. def check_string(string):
  79. len(string)
  80. for char in string:
  81. if len(char) != 1:
  82. print "expected one-character string, got %r" % char
  83. new_string = string + ""
  84. new_string = string + " "
  85. string[:0]
  86. def check_mapping(mapping):
  87. len(mapping)
  88. keys = mapping.keys()
  89. items = mapping.items()
  90. for key in keys:
  91. item = mapping[key]
  92. mapping["key"] = "value"
  93. if mapping["key"] != "value":
  94. print "expected value string, got %r" % mapping["key"]
  95. def check_element(element):
  96. if not ET.iselement(element):
  97. print "not an element"
  98. if not hasattr(element, "tag"):
  99. print "no tag member"
  100. if not hasattr(element, "attrib"):
  101. print "no attrib member"
  102. if not hasattr(element, "text"):
  103. print "no text member"
  104. if not hasattr(element, "tail"):
  105. print "no tail member"
  106. check_string(element.tag)
  107. check_mapping(element.attrib)
  108. if element.text is not None:
  109. check_string(element.text)
  110. if element.tail is not None:
  111. check_string(element.tail)
  112. for elem in element:
  113. check_element(elem)
  114. # --------------------------------------------------------------------
  115. # element tree tests
  116. def interface():
  117. r"""
  118. Test element tree interface.
  119. >>> element = ET.Element("tag")
  120. >>> check_element(element)
  121. >>> tree = ET.ElementTree(element)
  122. >>> check_element(tree.getroot())
  123. >>> element = ET.Element("t\xe4g", key="value")
  124. >>> tree = ET.ElementTree(element)
  125. >>> repr(element) # doctest: +ELLIPSIS
  126. "<Element 't\\xe4g' at 0x...>"
  127. >>> element = ET.Element("tag", key="value")
  128. Make sure all standard element methods exist.
  129. >>> check_method(element.append)
  130. >>> check_method(element.extend)
  131. >>> check_method(element.insert)
  132. >>> check_method(element.remove)
  133. >>> check_method(element.getchildren)
  134. >>> check_method(element.find)
  135. >>> check_method(element.iterfind)
  136. >>> check_method(element.findall)
  137. >>> check_method(element.findtext)
  138. >>> check_method(element.clear)
  139. >>> check_method(element.get)
  140. >>> check_method(element.set)
  141. >>> check_method(element.keys)
  142. >>> check_method(element.items)
  143. >>> check_method(element.iter)
  144. >>> check_method(element.itertext)
  145. >>> check_method(element.getiterator)
  146. These methods return an iterable. See bug 6472.
  147. >>> check_method(element.iter("tag").next)
  148. >>> check_method(element.iterfind("tag").next)
  149. >>> check_method(element.iterfind("*").next)
  150. >>> check_method(tree.iter("tag").next)
  151. >>> check_method(tree.iterfind("tag").next)
  152. >>> check_method(tree.iterfind("*").next)
  153. These aliases are provided:
  154. >>> assert ET.XML == ET.fromstring
  155. >>> assert ET.PI == ET.ProcessingInstruction
  156. >>> assert ET.XMLParser == ET.XMLTreeBuilder
  157. """
  158. def simpleops():
  159. """
  160. Basic method sanity checks.
  161. >>> elem = ET.XML("<body><tag/></body>")
  162. >>> serialize(elem)
  163. '<body><tag /></body>'
  164. >>> e = ET.Element("tag2")
  165. >>> elem.append(e)
  166. >>> serialize(elem)
  167. '<body><tag /><tag2 /></body>'
  168. >>> elem.remove(e)
  169. >>> serialize(elem)
  170. '<body><tag /></body>'
  171. >>> elem.insert(0, e)
  172. >>> serialize(elem)
  173. '<body><tag2 /><tag /></body>'
  174. >>> elem.remove(e)
  175. >>> elem.extend([e])
  176. >>> serialize(elem)
  177. '<body><tag /><tag2 /></body>'
  178. >>> elem.remove(e)
  179. >>> element = ET.Element("tag", key="value")
  180. >>> serialize(element) # 1
  181. '<tag key="value" />'
  182. >>> subelement = ET.Element("subtag")
  183. >>> element.append(subelement)
  184. >>> serialize(element) # 2
  185. '<tag key="value"><subtag /></tag>'
  186. >>> element.insert(0, subelement)
  187. >>> serialize(element) # 3
  188. '<tag key="value"><subtag /><subtag /></tag>'
  189. >>> element.remove(subelement)
  190. >>> serialize(element) # 4
  191. '<tag key="value"><subtag /></tag>'
  192. >>> element.remove(subelement)
  193. >>> serialize(element) # 5
  194. '<tag key="value" />'
  195. >>> element.remove(subelement)
  196. Traceback (most recent call last):
  197. ValueError: list.remove(x): x not in list
  198. >>> serialize(element) # 6
  199. '<tag key="value" />'
  200. >>> element[0:0] = [subelement, subelement, subelement]
  201. >>> serialize(element[1])
  202. '<subtag />'
  203. >>> element[1:9] == [element[1], element[2]]
  204. True
  205. >>> element[:9:2] == [element[0], element[2]]
  206. True
  207. >>> del element[1:2]
  208. >>> serialize(element)
  209. '<tag key="value"><subtag /><subtag /></tag>'
  210. """
  211. def cdata():
  212. """
  213. Test CDATA handling (etc).
  214. >>> serialize(ET.XML("<tag>hello</tag>"))
  215. '<tag>hello</tag>'
  216. >>> serialize(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"))
  217. '<tag>hello</tag>'
  218. >>> serialize(ET.XML("<tag><![CDATA[hello]]></tag>"))
  219. '<tag>hello</tag>'
  220. """
  221. # Only with Python implementation
  222. def simplefind():
  223. """
  224. Test find methods using the elementpath fallback.
  225. >>> from xml.etree import ElementTree
  226. >>> CurrentElementPath = ElementTree.ElementPath
  227. >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
  228. >>> elem = ElementTree.XML(SAMPLE_XML)
  229. >>> elem.find("tag").tag
  230. 'tag'
  231. >>> ElementTree.ElementTree(elem).find("tag").tag
  232. 'tag'
  233. >>> elem.findtext("tag")
  234. 'text'
  235. >>> elem.findtext("tog")
  236. >>> elem.findtext("tog", "default")
  237. 'default'
  238. >>> ElementTree.ElementTree(elem).findtext("tag")
  239. 'text'
  240. >>> summarize_list(elem.findall("tag"))
  241. ['tag', 'tag']
  242. >>> summarize_list(elem.findall(".//tag"))
  243. ['tag', 'tag', 'tag']
  244. Path syntax doesn't work in this case.
  245. >>> elem.find("section/tag")
  246. >>> elem.findtext("section/tag")
  247. >>> summarize_list(elem.findall("section/tag"))
  248. []
  249. >>> ElementTree.ElementPath = CurrentElementPath
  250. """
  251. def find():
  252. """
  253. Test find methods (including xpath syntax).
  254. >>> elem = ET.XML(SAMPLE_XML)
  255. >>> elem.find("tag").tag
  256. 'tag'
  257. >>> ET.ElementTree(elem).find("tag").tag
  258. 'tag'
  259. >>> elem.find("section/tag").tag
  260. 'tag'
  261. >>> elem.find("./tag").tag
  262. 'tag'
  263. >>> ET.ElementTree(elem).find("./tag").tag
  264. 'tag'
  265. >>> ET.ElementTree(elem).find("/tag").tag
  266. 'tag'
  267. >>> elem[2] = ET.XML(SAMPLE_SECTION)
  268. >>> elem.find("section/nexttag").tag
  269. 'nexttag'
  270. >>> ET.ElementTree(elem).find("section/tag").tag
  271. 'tag'
  272. >>> ET.ElementTree(elem).find("tog")
  273. >>> ET.ElementTree(elem).find("tog/foo")
  274. >>> elem.findtext("tag")
  275. 'text'
  276. >>> elem.findtext("section/nexttag")
  277. ''
  278. >>> elem.findtext("section/nexttag", "default")
  279. ''
  280. >>> elem.findtext("tog")
  281. >>> elem.findtext("tog", "default")
  282. 'default'
  283. >>> ET.ElementTree(elem).findtext("tag")
  284. 'text'
  285. >>> ET.ElementTree(elem).findtext("tog/foo")
  286. >>> ET.ElementTree(elem).findtext("tog/foo", "default")
  287. 'default'
  288. >>> ET.ElementTree(elem).findtext("./tag")
  289. 'text'
  290. >>> ET.ElementTree(elem).findtext("/tag")
  291. 'text'
  292. >>> elem.findtext("section/tag")
  293. 'subtext'
  294. >>> ET.ElementTree(elem).findtext("section/tag")
  295. 'subtext'
  296. >>> summarize_list(elem.findall("."))
  297. ['body']
  298. >>> summarize_list(elem.findall("tag"))
  299. ['tag', 'tag']
  300. >>> summarize_list(elem.findall("tog"))
  301. []
  302. >>> summarize_list(elem.findall("tog/foo"))
  303. []
  304. >>> summarize_list(elem.findall("*"))
  305. ['tag', 'tag', 'section']
  306. >>> summarize_list(elem.findall(".//tag"))
  307. ['tag', 'tag', 'tag', 'tag']
  308. >>> summarize_list(elem.findall("section/tag"))
  309. ['tag']
  310. >>> summarize_list(elem.findall("section//tag"))
  311. ['tag', 'tag']
  312. >>> summarize_list(elem.findall("section/*"))
  313. ['tag', 'nexttag', 'nextsection']
  314. >>> summarize_list(elem.findall("section//*"))
  315. ['tag', 'nexttag', 'nextsection', 'tag']
  316. >>> summarize_list(elem.findall("section/.//*"))
  317. ['tag', 'nexttag', 'nextsection', 'tag']
  318. >>> summarize_list(elem.findall("*/*"))
  319. ['tag', 'nexttag', 'nextsection']
  320. >>> summarize_list(elem.findall("*//*"))
  321. ['tag', 'nexttag', 'nextsection', 'tag']
  322. >>> summarize_list(elem.findall("*/tag"))
  323. ['tag']
  324. >>> summarize_list(elem.findall("*/./tag"))
  325. ['tag']
  326. >>> summarize_list(elem.findall("./tag"))
  327. ['tag', 'tag']
  328. >>> summarize_list(elem.findall(".//tag"))
  329. ['tag', 'tag', 'tag', 'tag']
  330. >>> summarize_list(elem.findall("././tag"))
  331. ['tag', 'tag']
  332. >>> summarize_list(elem.findall(".//tag[@class]"))
  333. ['tag', 'tag', 'tag']
  334. >>> summarize_list(elem.findall(".//tag[@class='a']"))
  335. ['tag']
  336. >>> summarize_list(elem.findall(".//tag[@class='b']"))
  337. ['tag', 'tag']
  338. >>> summarize_list(elem.findall(".//tag[@id]"))
  339. ['tag']
  340. >>> summarize_list(elem.findall(".//section[tag]"))
  341. ['section']
  342. >>> summarize_list(elem.findall(".//section[element]"))
  343. []
  344. >>> summarize_list(elem.findall("../tag"))
  345. []
  346. >>> summarize_list(elem.findall("section/../tag"))
  347. ['tag', 'tag']
  348. >>> summarize_list(ET.ElementTree(elem).findall("./tag"))
  349. ['tag', 'tag']
  350. Following example is invalid in 1.2.
  351. A leading '*' is assumed in 1.3.
  352. >>> elem.findall("section//") == elem.findall("section//*")
  353. True
  354. ET's Path module handles this case incorrectly; this gives
  355. a warning in 1.3, and the behaviour will be modified in 1.4.
  356. >>> summarize_list(ET.ElementTree(elem).findall("/tag"))
  357. ['tag', 'tag']
  358. >>> elem = ET.XML(SAMPLE_XML_NS)
  359. >>> summarize_list(elem.findall("tag"))
  360. []
  361. >>> summarize_list(elem.findall("{http://effbot.org/ns}tag"))
  362. ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
  363. >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag"))
  364. ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
  365. """
  366. def file_init():
  367. """
  368. >>> import StringIO
  369. >>> stringfile = StringIO.StringIO(SAMPLE_XML)
  370. >>> tree = ET.ElementTree(file=stringfile)
  371. >>> tree.find("tag").tag
  372. 'tag'
  373. >>> tree.find("section/tag").tag
  374. 'tag'
  375. >>> tree = ET.ElementTree(file=SIMPLE_XMLFILE)
  376. >>> tree.find("element").tag
  377. 'element'
  378. >>> tree.find("element/../empty-element").tag
  379. 'empty-element'
  380. """
  381. def bad_find():
  382. """
  383. Check bad or unsupported path expressions.
  384. >>> elem = ET.XML(SAMPLE_XML)
  385. >>> elem.findall("/tag")
  386. Traceback (most recent call last):
  387. SyntaxError: cannot use absolute path on element
  388. """
  389. def path_cache():
  390. """
  391. Check that the path cache behaves sanely.
  392. >>> elem = ET.XML(SAMPLE_XML)
  393. >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
  394. >>> cache_len_10 = len(ET.ElementPath._cache)
  395. >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
  396. >>> len(ET.ElementPath._cache) == cache_len_10
  397. True
  398. >>> for i in range(20): ET.ElementTree(elem).find('./'+str(i))
  399. >>> len(ET.ElementPath._cache) > cache_len_10
  400. True
  401. >>> for i in range(600): ET.ElementTree(elem).find('./'+str(i))
  402. >>> len(ET.ElementPath._cache) < 500
  403. True
  404. """
  405. def copy():
  406. """
  407. Test copy handling (etc).
  408. >>> import copy
  409. >>> e1 = ET.XML("<tag>hello<foo/></tag>")
  410. >>> e2 = copy.copy(e1)
  411. >>> e3 = copy.deepcopy(e1)
  412. >>> e1.find("foo").tag = "bar"
  413. >>> serialize(e1)
  414. '<tag>hello<bar /></tag>'
  415. >>> serialize(e2)
  416. '<tag>hello<bar /></tag>'
  417. >>> serialize(e3)
  418. '<tag>hello<foo /></tag>'
  419. """
  420. def attrib():
  421. """
  422. Test attribute handling.
  423. >>> elem = ET.Element("tag")
  424. >>> elem.get("key") # 1.1
  425. >>> elem.get("key", "default") # 1.2
  426. 'default'
  427. >>> elem.set("key", "value")
  428. >>> elem.get("key") # 1.3
  429. 'value'
  430. >>> elem = ET.Element("tag", key="value")
  431. >>> elem.get("key") # 2.1
  432. 'value'
  433. >>> elem.attrib # 2.2
  434. {'key': 'value'}
  435. >>> attrib = {"key": "value"}
  436. >>> elem = ET.Element("tag", attrib)
  437. >>> attrib.clear() # check for aliasing issues
  438. >>> elem.get("key") # 3.1
  439. 'value'
  440. >>> elem.attrib # 3.2
  441. {'key': 'value'}
  442. >>> attrib = {"key": "value"}
  443. >>> elem = ET.Element("tag", **attrib)
  444. >>> attrib.clear() # check for aliasing issues
  445. >>> elem.get("key") # 4.1
  446. 'value'
  447. >>> elem.attrib # 4.2
  448. {'key': 'value'}
  449. >>> elem = ET.Element("tag", {"key": "other"}, key="value")
  450. >>> elem.get("key") # 5.1
  451. 'value'
  452. >>> elem.attrib # 5.2
  453. {'key': 'value'}
  454. >>> elem = ET.Element('test')
  455. >>> elem.text = "aa"
  456. >>> elem.set('testa', 'testval')
  457. >>> elem.set('testb', 'test2')
  458. >>> ET.tostring(elem)
  459. '<test testa="testval" testb="test2">aa</test>'
  460. >>> sorted(elem.keys())
  461. ['testa', 'testb']
  462. >>> sorted(elem.items())
  463. [('testa', 'testval'), ('testb', 'test2')]
  464. >>> elem.attrib['testb']
  465. 'test2'
  466. >>> elem.attrib['testb'] = 'test1'
  467. >>> elem.attrib['testc'] = 'test2'
  468. >>> ET.tostring(elem)
  469. '<test testa="testval" testb="test1" testc="test2">aa</test>'
  470. """
  471. def makeelement():
  472. """
  473. Test makeelement handling.
  474. >>> elem = ET.Element("tag")
  475. >>> attrib = {"key": "value"}
  476. >>> subelem = elem.makeelement("subtag", attrib)
  477. >>> if subelem.attrib is attrib:
  478. ... print "attrib aliasing"
  479. >>> elem.append(subelem)
  480. >>> serialize(elem)
  481. '<tag><subtag key="value" /></tag>'
  482. >>> elem.clear()
  483. >>> serialize(elem)
  484. '<tag />'
  485. >>> elem.append(subelem)
  486. >>> serialize(elem)
  487. '<tag><subtag key="value" /></tag>'
  488. >>> elem.extend([subelem, subelem])
  489. >>> serialize(elem)
  490. '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>'
  491. >>> elem[:] = [subelem]
  492. >>> serialize(elem)
  493. '<tag><subtag key="value" /></tag>'
  494. >>> elem[:] = tuple([subelem])
  495. >>> serialize(elem)
  496. '<tag><subtag key="value" /></tag>'
  497. """
  498. def parsefile():
  499. """
  500. Test parsing from file.
  501. >>> tree = ET.parse(SIMPLE_XMLFILE)
  502. >>> normalize_crlf(tree)
  503. >>> tree.write(sys.stdout)
  504. <root>
  505. <element key="value">text</element>
  506. <element>text</element>tail
  507. <empty-element />
  508. </root>
  509. >>> tree = ET.parse(SIMPLE_NS_XMLFILE)
  510. >>> normalize_crlf(tree)
  511. >>> tree.write(sys.stdout)
  512. <ns0:root xmlns:ns0="namespace">
  513. <ns0:element key="value">text</ns0:element>
  514. <ns0:element>text</ns0:element>tail
  515. <ns0:empty-element />
  516. </ns0:root>
  517. >>> with open(SIMPLE_XMLFILE) as f:
  518. ... data = f.read()
  519. >>> parser = ET.XMLParser()
  520. >>> parser.version # doctest: +ELLIPSIS
  521. 'Expat ...'
  522. >>> parser.feed(data)
  523. >>> print serialize(parser.close())
  524. <root>
  525. <element key="value">text</element>
  526. <element>text</element>tail
  527. <empty-element />
  528. </root>
  529. >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility
  530. >>> parser.feed(data)
  531. >>> print serialize(parser.close())
  532. <root>
  533. <element key="value">text</element>
  534. <element>text</element>tail
  535. <empty-element />
  536. </root>
  537. >>> target = ET.TreeBuilder()
  538. >>> parser = ET.XMLParser(target=target)
  539. >>> parser.feed(data)
  540. >>> print serialize(parser.close())
  541. <root>
  542. <element key="value">text</element>
  543. <element>text</element>tail
  544. <empty-element />
  545. </root>
  546. """
  547. def parseliteral():
  548. """
  549. >>> element = ET.XML("<html><body>text</body></html>")
  550. >>> ET.ElementTree(element).write(sys.stdout)
  551. <html><body>text</body></html>
  552. >>> element = ET.fromstring("<html><body>text</body></html>")
  553. >>> ET.ElementTree(element).write(sys.stdout)
  554. <html><body>text</body></html>
  555. >>> sequence = ["<html><body>", "text</bo", "dy></html>"]
  556. >>> element = ET.fromstringlist(sequence)
  557. >>> print ET.tostring(element)
  558. <html><body>text</body></html>
  559. >>> print "".join(ET.tostringlist(element))
  560. <html><body>text</body></html>
  561. >>> ET.tostring(element, "ascii")
  562. "<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>"
  563. >>> _, ids = ET.XMLID("<html><body>text</body></html>")
  564. >>> len(ids)
  565. 0
  566. >>> _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
  567. >>> len(ids)
  568. 1
  569. >>> ids["body"].tag
  570. 'body'
  571. """
  572. def iterparse():
  573. """
  574. Test iterparse interface.
  575. >>> iterparse = ET.iterparse
  576. >>> context = iterparse(SIMPLE_XMLFILE)
  577. >>> action, elem = next(context)
  578. >>> print action, elem.tag
  579. end element
  580. >>> for action, elem in context:
  581. ... print action, elem.tag
  582. end element
  583. end empty-element
  584. end root
  585. >>> context.root.tag
  586. 'root'
  587. >>> context = iterparse(SIMPLE_NS_XMLFILE)
  588. >>> for action, elem in context:
  589. ... print action, elem.tag
  590. end {namespace}element
  591. end {namespace}element
  592. end {namespace}empty-element
  593. end {namespace}root
  594. >>> events = ()
  595. >>> context = iterparse(SIMPLE_XMLFILE, events)
  596. >>> for action, elem in context:
  597. ... print action, elem.tag
  598. >>> events = ()
  599. >>> context = iterparse(SIMPLE_XMLFILE, events=events)
  600. >>> for action, elem in context:
  601. ... print action, elem.tag
  602. >>> events = ("start", "end")
  603. >>> context = iterparse(SIMPLE_XMLFILE, events)
  604. >>> for action, elem in context:
  605. ... print action, elem.tag
  606. start root
  607. start element
  608. end element
  609. start element
  610. end element
  611. start empty-element
  612. end empty-element
  613. end root
  614. >>> events = ("start", "end", "start-ns", "end-ns")
  615. >>> context = iterparse(SIMPLE_NS_XMLFILE, events)
  616. >>> for action, elem in context:
  617. ... if action in ("start", "end"):
  618. ... print action, elem.tag
  619. ... else:
  620. ... print action, elem
  621. start-ns ('', 'namespace')
  622. start {namespace}root
  623. start {namespace}element
  624. end {namespace}element
  625. start {namespace}element
  626. end {namespace}element
  627. start {namespace}empty-element
  628. end {namespace}empty-element
  629. end {namespace}root
  630. end-ns None
  631. >>> events = ("start", "end", "bogus")
  632. >>> with open(SIMPLE_XMLFILE, "rb") as f:
  633. ... iterparse(f, events)
  634. Traceback (most recent call last):
  635. ValueError: unknown event 'bogus'
  636. >>> import StringIO
  637. >>> source = StringIO.StringIO(
  638. ... "<?xml version='1.0' encoding='iso-8859-1'?>\\n"
  639. ... "<body xmlns='http://&#233;ffbot.org/ns'\\n"
  640. ... " xmlns:cl\\xe9='http://effbot.org/ns'>text</body>\\n")
  641. >>> events = ("start-ns",)
  642. >>> context = iterparse(source, events)
  643. >>> for action, elem in context:
  644. ... print action, elem
  645. start-ns ('', u'http://\\xe9ffbot.org/ns')
  646. start-ns (u'cl\\xe9', 'http://effbot.org/ns')
  647. >>> source = StringIO.StringIO("<document />junk")
  648. >>> try:
  649. ... for action, elem in iterparse(source):
  650. ... print action, elem.tag
  651. ... except ET.ParseError, v:
  652. ... print v
  653. junk after document element: line 1, column 12
  654. """
  655. def writefile():
  656. """
  657. >>> elem = ET.Element("tag")
  658. >>> elem.text = "text"
  659. >>> serialize(elem)
  660. '<tag>text</tag>'
  661. >>> ET.SubElement(elem, "subtag").text = "subtext"
  662. >>> serialize(elem)
  663. '<tag>text<subtag>subtext</subtag></tag>'
  664. Test tag suppression
  665. >>> elem.tag = None
  666. >>> serialize(elem)
  667. 'text<subtag>subtext</subtag>'
  668. >>> elem.insert(0, ET.Comment("comment"))
  669. >>> serialize(elem) # assumes 1.3
  670. 'text<!--comment--><subtag>subtext</subtag>'
  671. >>> elem[0] = ET.PI("key", "value")
  672. >>> serialize(elem)
  673. 'text<?key value?><subtag>subtext</subtag>'
  674. """
  675. def custom_builder():
  676. """
  677. Test parser w. custom builder.
  678. >>> with open(SIMPLE_XMLFILE) as f:
  679. ... data = f.read()
  680. >>> class Builder:
  681. ... def start(self, tag, attrib):
  682. ... print "start", tag
  683. ... def end(self, tag):
  684. ... print "end", tag
  685. ... def data(self, text):
  686. ... pass
  687. >>> builder = Builder()
  688. >>> parser = ET.XMLParser(target=builder)
  689. >>> parser.feed(data)
  690. start root
  691. start element
  692. end element
  693. start element
  694. end element
  695. start empty-element
  696. end empty-element
  697. end root
  698. >>> with open(SIMPLE_NS_XMLFILE) as f:
  699. ... data = f.read()
  700. >>> class Builder:
  701. ... def start(self, tag, attrib):
  702. ... print "start", tag
  703. ... def end(self, tag):
  704. ... print "end", tag
  705. ... def data(self, text):
  706. ... pass
  707. ... def pi(self, target, data):
  708. ... print "pi", target, repr(data)
  709. ... def comment(self, data):
  710. ... print "comment", repr(data)
  711. >>> builder = Builder()
  712. >>> parser = ET.XMLParser(target=builder)
  713. >>> parser.feed(data)
  714. pi pi 'data'
  715. comment ' comment '
  716. start {namespace}root
  717. start {namespace}element
  718. end {namespace}element
  719. start {namespace}element
  720. end {namespace}element
  721. start {namespace}empty-element
  722. end {namespace}empty-element
  723. end {namespace}root
  724. """
  725. def getchildren():
  726. """
  727. Test Element.getchildren()
  728. >>> with open(SIMPLE_XMLFILE, "r") as f:
  729. ... tree = ET.parse(f)
  730. >>> for elem in tree.getroot().iter():
  731. ... summarize_list(elem.getchildren())
  732. ['element', 'element', 'empty-element']
  733. []
  734. []
  735. []
  736. >>> for elem in tree.getiterator():
  737. ... summarize_list(elem.getchildren())
  738. ['element', 'element', 'empty-element']
  739. []
  740. []
  741. []
  742. >>> elem = ET.XML(SAMPLE_XML)
  743. >>> len(elem.getchildren())
  744. 3
  745. >>> len(elem[2].getchildren())
  746. 1
  747. >>> elem[:] == elem.getchildren()
  748. True
  749. >>> child1 = elem[0]
  750. >>> child2 = elem[2]
  751. >>> del elem[1:2]
  752. >>> len(elem.getchildren())
  753. 2
  754. >>> child1 == elem[0]
  755. True
  756. >>> child2 == elem[1]
  757. True
  758. >>> elem[0:2] = [child2, child1]
  759. >>> child2 == elem[0]
  760. True
  761. >>> child1 == elem[1]
  762. True
  763. >>> child1 == elem[0]
  764. False
  765. >>> elem.clear()
  766. >>> elem.getchildren()
  767. []
  768. """
  769. def writestring():
  770. """
  771. >>> elem = ET.XML("<html><body>text</body></html>")
  772. >>> ET.tostring(elem)
  773. '<html><body>text</body></html>'
  774. >>> elem = ET.fromstring("<html><body>text</body></html>")
  775. >>> ET.tostring(elem)
  776. '<html><body>text</body></html>'
  777. """
  778. def check_encoding(encoding):
  779. """
  780. >>> check_encoding("ascii")
  781. >>> check_encoding("us-ascii")
  782. >>> check_encoding("iso-8859-1")
  783. >>> check_encoding("iso-8859-15")
  784. >>> check_encoding("cp437")
  785. >>> check_encoding("mac-roman")
  786. """
  787. ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
  788. def encoding():
  789. r"""
  790. Test encoding issues.
  791. >>> elem = ET.Element("tag")
  792. >>> elem.text = u"abc"
  793. >>> serialize(elem)
  794. '<tag>abc</tag>'
  795. >>> serialize(elem, encoding="utf-8")
  796. '<tag>abc</tag>'
  797. >>> serialize(elem, encoding="us-ascii")
  798. '<tag>abc</tag>'
  799. >>> serialize(elem, encoding="iso-8859-1")
  800. "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
  801. >>> elem.text = "<&\"\'>"
  802. >>> serialize(elem)
  803. '<tag>&lt;&amp;"\'&gt;</tag>'
  804. >>> serialize(elem, encoding="utf-8")
  805. '<tag>&lt;&amp;"\'&gt;</tag>'
  806. >>> serialize(elem, encoding="us-ascii") # cdata characters
  807. '<tag>&lt;&amp;"\'&gt;</tag>'
  808. >>> serialize(elem, encoding="iso-8859-1")
  809. '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>'
  810. >>> elem.attrib["key"] = "<&\"\'>"
  811. >>> elem.text = None
  812. >>> serialize(elem)
  813. '<tag key="&lt;&amp;&quot;\'&gt;" />'
  814. >>> serialize(elem, encoding="utf-8")
  815. '<tag key="&lt;&amp;&quot;\'&gt;" />'
  816. >>> serialize(elem, encoding="us-ascii")
  817. '<tag key="&lt;&amp;&quot;\'&gt;" />'
  818. >>> serialize(elem, encoding="iso-8859-1")
  819. '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="&lt;&amp;&quot;\'&gt;" />'
  820. >>> elem.text = u'\xe5\xf6\xf6<>'
  821. >>> elem.attrib.clear()
  822. >>> serialize(elem)
  823. '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
  824. >>> serialize(elem, encoding="utf-8")
  825. '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>'
  826. >>> serialize(elem, encoding="us-ascii")
  827. '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
  828. >>> serialize(elem, encoding="iso-8859-1")
  829. "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>"
  830. >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
  831. >>> elem.text = None
  832. >>> serialize(elem)
  833. '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
  834. >>> serialize(elem, encoding="utf-8")
  835. '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />'
  836. >>> serialize(elem, encoding="us-ascii")
  837. '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
  838. >>> serialize(elem, encoding="iso-8859-1")
  839. '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6&lt;&gt;" />'
  840. """
  841. def methods():
  842. r"""
  843. Test serialization methods.
  844. >>> e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
  845. >>> e.tail = "\n"
  846. >>> serialize(e)
  847. '<html><link /><script>1 &lt; 2</script></html>\n'
  848. >>> serialize(e, method=None)
  849. '<html><link /><script>1 &lt; 2</script></html>\n'
  850. >>> serialize(e, method="xml")
  851. '<html><link /><script>1 &lt; 2</script></html>\n'
  852. >>> serialize(e, method="html")
  853. '<html><link><script>1 < 2</script></html>\n'
  854. >>> serialize(e, method="text")
  855. '1 < 2\n'
  856. """
  857. def iterators():
  858. """
  859. Test iterators.
  860. >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
  861. >>> summarize_list(e.iter())
  862. ['html', 'body', 'i']
  863. >>> summarize_list(e.find("body").iter())
  864. ['body', 'i']
  865. >>> summarize(next(e.iter()))
  866. 'html'
  867. >>> "".join(e.itertext())
  868. 'this is a paragraph...'
  869. >>> "".join(e.find("body").itertext())
  870. 'this is a paragraph.'
  871. >>> next(e.itertext())
  872. 'this is a '
  873. Method iterparse should return an iterator. See bug 6472.
  874. >>> sourcefile = serialize(e, to_string=False)
  875. >>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS
  876. ('end', <Element 'i' at 0x...>)
  877. >>> tree = ET.ElementTree(None)
  878. >>> tree.iter()
  879. Traceback (most recent call last):
  880. AttributeError: 'NoneType' object has no attribute 'iter'
  881. """
  882. ENTITY_XML = """\
  883. <!DOCTYPE points [
  884. <!ENTITY % user-entities SYSTEM 'user-entities.xml'>
  885. %user-entities;
  886. ]>
  887. <document>&entity;</document>
  888. """
  889. def entity():
  890. """
  891. Test entity handling.
  892. 1) good entities
  893. >>> e = ET.XML("<document title='&#x8230;'>test</document>")
  894. >>> serialize(e)
  895. '<document title="&#33328;">test</document>'
  896. 2) bad entities
  897. >>> ET.XML("<document>&entity;</document>")
  898. Traceback (most recent call last):
  899. ParseError: undefined entity: line 1, column 10
  900. >>> ET.XML(ENTITY_XML)
  901. Traceback (most recent call last):
  902. ParseError: undefined entity &entity;: line 5, column 10
  903. 3) custom entity
  904. >>> parser = ET.XMLParser()
  905. >>> parser.entity["entity"] = "text"
  906. >>> parser.feed(ENTITY_XML)
  907. >>> root = parser.close()
  908. >>> serialize(root)
  909. '<document>text</document>'
  910. """
  911. def error(xml):
  912. """
  913. Test error handling.
  914. >>> issubclass(ET.ParseError, SyntaxError)
  915. True
  916. >>> error("foo").position
  917. (1, 0)
  918. >>> error("<tag>&foo;</tag>").position
  919. (1, 5)
  920. >>> error("foobar<").position
  921. (1, 6)
  922. """
  923. try:
  924. ET.XML(xml)
  925. except ET.ParseError:
  926. return sys.exc_value
  927. def namespace():
  928. """
  929. Test namespace issues.
  930. 1) xml namespace
  931. >>> elem = ET.XML("<tag xml:lang='en' />")
  932. >>> serialize(elem) # 1.1
  933. '<tag xml:lang="en" />'
  934. 2) other "well-known" namespaces
  935. >>> elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
  936. >>> serialize(elem) # 2.1
  937. '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />'
  938. >>> elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
  939. >>> serialize(elem) # 2.2
  940. '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />'
  941. >>> elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
  942. >>> serialize(elem) # 2.3
  943. '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />'
  944. 3) unknown namespaces
  945. >>> elem = ET.XML(SAMPLE_XML_NS)
  946. >>> print serialize(elem)
  947. <ns0:body xmlns:ns0="http://effbot.org/ns">
  948. <ns0:tag>text</ns0:tag>
  949. <ns0:tag />
  950. <ns0:section>
  951. <ns0:tag>subtext</ns0:tag>
  952. </ns0:section>
  953. </ns0:body>
  954. """
  955. def qname():
  956. """
  957. Test QName handling.
  958. 1) decorated tags
  959. >>> elem = ET.Element("{uri}tag")
  960. >>> serialize(elem) # 1.1
  961. '<ns0:tag xmlns:ns0="uri" />'
  962. >>> elem = ET.Element(ET.QName("{uri}tag"))
  963. >>> serialize(elem) # 1.2
  964. '<ns0:tag xmlns:ns0="uri" />'
  965. >>> elem = ET.Element(ET.QName("uri", "tag"))
  966. >>> serialize(elem) # 1.3
  967. '<ns0:tag xmlns:ns0="uri" />'
  968. >>> elem = ET.Element(ET.QName("uri", "tag"))
  969. >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
  970. >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
  971. >>> serialize(elem) # 1.4
  972. '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>'
  973. 2) decorated attributes
  974. >>> elem.clear()
  975. >>> elem.attrib["{uri}key"] = "value"
  976. >>> serialize(elem) # 2.1
  977. '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
  978. >>> elem.clear()
  979. >>> elem.attrib[ET.QName("{uri}key")] = "value"
  980. >>> serialize(elem) # 2.2
  981. '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
  982. 3) decorated values are not converted by default, but the
  983. QName wrapper can be used for values
  984. >>> elem.clear()
  985. >>> elem.attrib["{uri}key"] = "{uri}value"
  986. >>> serialize(elem) # 3.1
  987. '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />'
  988. >>> elem.clear()
  989. >>> elem.attrib["{uri}key"] = ET.QName("{uri}value")
  990. >>> serialize(elem) # 3.2
  991. '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />'
  992. >>> elem.clear()
  993. >>> subelem = ET.Element("tag")
  994. >>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
  995. >>> elem.append(subelem)
  996. >>> elem.append(subelem)
  997. >>> serialize(elem) # 3.3
  998. '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>'
  999. 4) Direct QName tests
  1000. >>> str(ET.QName('ns', 'tag'))
  1001. '{ns}tag'
  1002. >>> str(ET.QName('{ns}tag'))
  1003. '{ns}tag'
  1004. >>> q1 = ET.QName('ns', 'tag')
  1005. >>> q2 = ET.QName('ns', 'tag')
  1006. >>> q1 == q2
  1007. True
  1008. >>> q2 = ET.QName('ns', 'other-tag')
  1009. >>> q1 == q2
  1010. False
  1011. >>> q1 == 'ns:tag'
  1012. False
  1013. >>> q1 == '{ns}tag'
  1014. True
  1015. """
  1016. def doctype_public():
  1017. """
  1018. Test PUBLIC doctype.
  1019. >>> elem = ET.XML('<!DOCTYPE html PUBLIC'
  1020. ... ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
  1021. ... ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
  1022. ... '<html>text</html>')
  1023. """
  1024. def xpath_tokenizer(p):
  1025. """
  1026. Test the XPath tokenizer.
  1027. >>> # tests from the xml specification
  1028. >>> xpath_tokenizer("*")
  1029. ['*']
  1030. >>> xpath_tokenizer("text()")
  1031. ['text', '()']
  1032. >>> xpath_tokenizer("@name")
  1033. ['@', 'name']
  1034. >>> xpath_tokenizer("@*")
  1035. ['@', '*']
  1036. >>> xpath_tokenizer("para[1]")
  1037. ['para', '[', '1', ']']
  1038. >>> xpath_tokenizer("para[last()]")
  1039. ['para', '[', 'last', '()', ']']
  1040. >>> xpath_tokenizer("*/para")
  1041. ['*', '/', 'para']
  1042. >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
  1043. ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
  1044. >>> xpath_tokenizer("chapter//para")
  1045. ['chapter', '//', 'para']
  1046. >>> xpath_tokenizer("//para")
  1047. ['//', 'para']
  1048. >>> xpath_tokenizer("//olist/item")
  1049. ['//', 'olist', '/', 'item']
  1050. >>> xpath_tokenizer(".")
  1051. ['.']
  1052. >>> xpath_tokenizer(".//para")
  1053. ['.', '//', 'para']
  1054. >>> xpath_tokenizer("..")
  1055. ['..']
  1056. >>> xpath_tokenizer("../@lang")
  1057. ['..', '/', '@', 'lang']
  1058. >>> xpath_tokenizer("chapter[title]")
  1059. ['chapter', '[', 'title', ']']
  1060. >>> xpath_tokenizer("employee[@secretary and @assistant]")
  1061. ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
  1062. >>> # additional tests
  1063. >>> xpath_tokenizer("{http://spam}egg")
  1064. ['{http://spam}egg']
  1065. >>> xpath_tokenizer("./spam.egg")
  1066. ['.', '/', 'spam.egg']
  1067. >>> xpath_tokenizer(".//{http://spam}egg")
  1068. ['.', '//', '{http://spam}egg']
  1069. """
  1070. from xml.etree import ElementPath
  1071. out = []
  1072. for op, tag in ElementPath.xpath_tokenizer(p):
  1073. out.append(op or tag)
  1074. return out
  1075. def processinginstruction():
  1076. """
  1077. Test ProcessingInstruction directly
  1078. >>> ET.tostring(ET.ProcessingInstruction('test', 'instruction'))
  1079. '<?test instruction?>'
  1080. >>> ET.tostring(ET.PI('test', 'instruction'))
  1081. '<?test instruction?>'
  1082. Issue #2746
  1083. >>> ET.tostring(ET.PI('test', '<testing&>'))
  1084. '<?test <testing&>?>'
  1085. >>> ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1')
  1086. "<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
  1087. """
  1088. #
  1089. # xinclude tests (samples from appendix C of the xinclude specification)
  1090. XINCLUDE = {}
  1091. XINCLUDE["C1.xml"] = """\
  1092. <?xml version='1.0'?>
  1093. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1094. <p>120 Mz is adequate for an average home user.</p>
  1095. <xi:include href="disclaimer.xml"/>
  1096. </document>
  1097. """
  1098. XINCLUDE["disclaimer.xml"] = """\
  1099. <?xml version='1.0'?>
  1100. <disclaimer>
  1101. <p>The opinions represented herein represent those of the individual
  1102. and should not be interpreted as official policy endorsed by this
  1103. organization.</p>
  1104. </disclaimer>
  1105. """
  1106. XINCLUDE["C2.xml"] = """\
  1107. <?xml version='1.0'?>
  1108. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1109. <p>This document has been accessed
  1110. <xi:include href="count.txt" parse="text"/> times.</p>
  1111. </document>
  1112. """
  1113. XINCLUDE["count.txt"] = "324387"
  1114. XINCLUDE["C2b.xml"] = """\
  1115. <?xml version='1.0'?>
  1116. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1117. <p>This document has been <em>accessed</em>
  1118. <xi:include href="count.txt" parse="text"/> times.</p>
  1119. </document>
  1120. """
  1121. XINCLUDE["C3.xml"] = """\
  1122. <?xml version='1.0'?>
  1123. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1124. <p>The following is the source of the "data.xml" resource:</p>
  1125. <example><xi:include href="data.xml" parse="text"/></example>
  1126. </document>
  1127. """
  1128. XINCLUDE["data.xml"] = """\
  1129. <?xml version='1.0'?>
  1130. <data>
  1131. <item><![CDATA[Brooks & Shields]]></item>
  1132. </data>
  1133. """
  1134. XINCLUDE["C5.xml"] = """\
  1135. <?xml version='1.0'?>
  1136. <div xmlns:xi="http://www.w3.org/2001/XInclude">
  1137. <xi:include href="example.txt" parse="text">
  1138. <xi:fallback>
  1139. <xi:include href="fallback-example.txt" parse="text">
  1140. <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
  1141. </xi:include>
  1142. </xi:fallback>
  1143. </xi:include>
  1144. </div>
  1145. """
  1146. XINCLUDE["default.xml"] = """\
  1147. <?xml version='1.0'?>
  1148. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1149. <p>Example.</p>
  1150. <xi:include href="{}"/>
  1151. </document>
  1152. """.format(cgi.escape(SIMPLE_XMLFILE, True))
  1153. def xinclude_loader(href, parse="xml", encoding=None):
  1154. try:
  1155. data = XINCLUDE[href]
  1156. except KeyError:
  1157. raise IOError("resource not found")
  1158. if parse == "xml":
  1159. from xml.etree.ElementTree import XML
  1160. return XML(data)
  1161. return data
  1162. def xinclude():
  1163. r"""
  1164. Basic inclusion example (XInclude C.1)
  1165. >>> from xml.etree import ElementTree as ET
  1166. >>> from xml.etree import ElementInclude
  1167. >>> document = xinclude_loader("C1.xml")
  1168. >>> ElementInclude.include(document, xinclude_loader)
  1169. >>> print serialize(document) # C1
  1170. <document>
  1171. <p>120 Mz is adequate for an average home user.</p>
  1172. <disclaimer>
  1173. <p>The opinions represented herein represent those of the individual
  1174. and should not be interpreted as official policy endorsed by this
  1175. organization.</p>
  1176. </disclaimer>
  1177. </document>
  1178. Textual inclusion example (XInclude C.2)
  1179. >>> document = xinclude_loader("C2.xml")
  1180. >>> ElementInclude.include(document, xinclude_loader)
  1181. >>> print serialize(document) # C2
  1182. <document>
  1183. <p>This document has been accessed
  1184. 324387 times.</p>
  1185. </document>
  1186. Textual inclusion after sibling element (based on modified XInclude C.2)
  1187. >>> document = xinclude_loader("C2b.xml")
  1188. >>> ElementInclude.include(document, xinclude_loader)
  1189. >>> print(serialize(document)) # C2b
  1190. <document>
  1191. <p>This document has been <em>accessed</em>
  1192. 324387 times.</p>
  1193. </document>
  1194. Textual inclusion of XML example (XInclude C.3)
  1195. >>> document = xinclude_loader("C3.xml")
  1196. >>> ElementInclude.include(document, xinclude_loader)
  1197. >>> print serialize(document) # C3
  1198. <document>
  1199. <p>The following is the source of the "data.xml" resource:</p>
  1200. <example>&lt;?xml version='1.0'?&gt;
  1201. &lt;data&gt;
  1202. &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;
  1203. &lt;/data&gt;
  1204. </example>
  1205. </document>
  1206. Fallback example (XInclude C.5)
  1207. Note! Fallback support is not yet implemented
  1208. >>> document = xinclude_loader("C5.xml")
  1209. >>> ElementInclude.include(document, xinclude_loader)
  1210. Traceback (most recent call last):
  1211. IOError: resource not found
  1212. >>> # print serialize(document) # C5
  1213. """
  1214. def xinclude_default():
  1215. """
  1216. >>> from xml.etree import ElementInclude
  1217. >>> document = xinclude_loader("default.xml")
  1218. >>> ElementInclude.include(document)
  1219. >>> print serialize(document) # default
  1220. <document>
  1221. <p>Example.</p>
  1222. <root>
  1223. <element key="value">text</element>
  1224. <element>text</element>tail
  1225. <empty-element />
  1226. </root>
  1227. </document>
  1228. """
  1229. #
  1230. # badly formatted xi:include tags
  1231. XINCLUDE_BAD = {}
  1232. XINCLUDE_BAD["B1.xml"] = """\
  1233. <?xml version='1.0'?>
  1234. <document xmlns:xi="http://www.w3.org/2001/XInclude">
  1235. <p>120 Mz is adequate for an average home user.</p>
  1236. <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
  1237. </document>
  1238. """
  1239. XINCLUDE_BAD["B2.xml"] = """\
  1240. <?xml version='1.0'?>
  1241. <div xmlns:xi="http://www.w3.org/2001/XInclude">
  1242. <xi:fallback></xi:fallback>
  1243. </div>
  1244. """
  1245. def xinclude_failures():
  1246. r"""
  1247. Test failure to locate included XML file.
  1248. >>> from xml.etree import ElementInclude
  1249. >>> def none_loader(href, parser, encoding=None):
  1250. ... return None
  1251. >>> document = ET.XML(XINCLUDE["C1.xml"])
  1252. >>> ElementInclude.include(document, loader=none_loader)
  1253. Traceback (most recent call last):
  1254. FatalIncludeError: cannot load 'disclaimer.xml' as 'xml'
  1255. Test failure to locate included text file.
  1256. >>> document = ET.XML(XINCLUDE["C2.xml"])
  1257. >>> ElementInclude.include(document, loader=none_loader)
  1258. Traceback (most recent call last):
  1259. FatalIncludeError: cannot load 'count.txt' as 'text'
  1260. Test bad parse type.
  1261. >>> document = ET.XML(XINCLUDE_BAD["B1.xml"])
  1262. >>> ElementInclude.include(document, loader=none_loader)
  1263. Traceback (most recent call last):
  1264. FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE')
  1265. Test xi:fallback outside xi:include.
  1266. >>> document = ET.XML(XINCLUDE_BAD["B2.xml"])
  1267. >>> ElementInclude.include(document, loader=none_loader)
  1268. Traceback (most recent call last):
  1269. FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback')
  1270. """
  1271. # --------------------------------------------------------------------
  1272. # reported bugs
  1273. def bug_xmltoolkit21():
  1274. """
  1275. marshaller gives obscure errors for non-string values
  1276. >>> elem = ET.Element(123)
  1277. >>> serialize(elem) # tag
  1278. Traceback (most recent call last):
  1279. TypeError: cannot serialize 123 (type int)
  1280. >>> elem = ET.Element("elem")
  1281. >>> elem.text = 123
  1282. >>> serialize(elem) # text
  1283. Traceback (most recent call last):
  1284. TypeError: cannot serialize 123 (type int)
  1285. >>> elem = ET.Element("elem")
  1286. >>> elem.tail = 123
  1287. >>> serialize(elem) # tail
  1288. Traceback (most recent call last):
  1289. TypeError: cannot serialize 123 (type int)
  1290. >>> elem = ET.Element("elem")
  1291. >>> elem.set(123, "123")
  1292. >>> serialize(elem) # attribute key
  1293. Traceback (most recent call last):
  1294. TypeError: cannot serialize 123 (type int)
  1295. >>> elem = ET.Element("elem")
  1296. >>> elem.set("123", 123)
  1297. >>> serialize(elem) # attribute value
  1298. Traceback (most recent call last):
  1299. TypeError: cannot serialize 123 (type int)
  1300. """
  1301. def bug_xmltoolkit25():
  1302. """
  1303. typo in ElementTree.findtext
  1304. >>> elem = ET.XML(SAMPLE_XML)
  1305. >>> tree = ET.ElementTree(elem)
  1306. >>> tree.findtext("tag")
  1307. 'text'
  1308. >>> tree.findtext("section/tag")
  1309. 'subtext'
  1310. """
  1311. def bug_xmltoolkit28():
  1312. """
  1313. .//tag causes exceptions
  1314. >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
  1315. >>> summarize_list(tree.findall(".//thead"))
  1316. []
  1317. >>> summarize_list(tree.findall(".//tbody"))
  1318. ['tbody']
  1319. """
  1320. def bug_xmltoolkitX1():
  1321. """
  1322. dump() doesn't flush the output buffer
  1323. >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
  1324. >>> ET.dump(tree); sys.stdout.write("tail")
  1325. <doc><table><tbody /></table></doc>
  1326. tail
  1327. """
  1328. def bug_xmltoolkit39():
  1329. """
  1330. non-ascii element and attribute names doesn't work
  1331. >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
  1332. >>> ET.tostring(tree, "utf-8")
  1333. '<t\\xc3\\xa4g />'
  1334. >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><tag \xe4ttr='v&#228;lue' />")
  1335. >>> tree.attrib
  1336. {u'\\xe4ttr': u'v\\xe4lue'}
  1337. >>> ET.tostring(tree, "utf-8")
  1338. '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
  1339. >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g>text</t\xe4g>")
  1340. >>> ET.tostring(tree, "utf-8")
  1341. '<t\\xc3\\xa4g>text</t\\xc3\\xa4g>'
  1342. >>> tree = ET.Element(u"t\u00e4g")
  1343. >>> ET.tostring(tree, "utf-8")
  1344. '<t\\xc3\\xa4g />'
  1345. >>> tree = ET.Element("tag")
  1346. >>> tree.set(u"\u00e4ttr", u"v\u00e4lue")
  1347. >>> ET.tostring(tree, "utf-8")
  1348. '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
  1349. """
  1350. def bug_xmltoolkit54():
  1351. """
  1352. problems handling internally defined entities
  1353. >>> e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]><doc>&ldots;</doc>")
  1354. >>> serialize(e)
  1355. '<doc>&#33328;</doc>'
  1356. """
  1357. def bug_xmltoolkit55():
  1358. """
  1359. make sure we're reporting the first error, not the last
  1360. >>> e = ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>")
  1361. Traceback (most recent call last):
  1362. ParseError: undefined entity &ldots;: line 1, column 36
  1363. """
  1364. class ExceptionFile:
  1365. def read(self, x):
  1366. raise IOError
  1367. def xmltoolkit60():
  1368. """
  1369. Handle crash in stream source.
  1370. >>> tree = ET.parse(ExceptionFile())
  1371. Traceback (most recent call last):
  1372. IOError
  1373. """
  1374. XMLTOOLKIT62_DOC = """<?xml version="1.0" encoding="UTF-8"?>
  1375. <!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
  1376. <patent-application-publication>
  1377. <subdoc-abstract>
  1378. <paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
  1379. </subdoc-abstract>
  1380. </patent-application-publication>"""
  1381. def xmltoolkit62():
  1382. """
  1383. Don't crash when using custom entities.
  1384. >>> xmltoolkit62()
  1385. u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.'
  1386. """
  1387. ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'}
  1388. parser = ET.XMLTreeBuilder()
  1389. parser.entity.update(ENTITIES)
  1390. parser.feed(XMLTOOLKIT62_DOC)
  1391. t = parser.close()
  1392. return t.find('.//paragraph').text
  1393. def xmltoolkit63():
  1394. """
  1395. Check reference leak.
  1396. >>> xmltoolkit63()
  1397. >>> count = sys.getrefcount(None)
  1398. >>> for i in range(1000):
  1399. ... xmltoolkit63()
  1400. >>> sys.getrefcount(None) - count
  1401. 0
  1402. """
  1403. tree = ET.TreeBuilder()
  1404. tree.start("tag", {})
  1405. tree.data("text")
  1406. tree.end("tag")
  1407. # --------------------------------------------------------------------
  1408. def bug_200708_newline():
  1409. r"""
  1410. Preserve newlines in attributes.
  1411. >>> e = ET.Element('SomeTag', text="def _f():\n return 3\n")
  1412. >>> ET.tostring(e)
  1413. '<SomeTag text="def _f():&#10; return 3&#10;" />'
  1414. >>> ET.XML(ET.tostring(e)).get("text")
  1415. 'def _f():\n return 3\n'
  1416. >>> ET.tostring(ET.XML(ET.tostring(e)))
  1417. '<SomeTag text="def _f():&#10; return 3&#10;" />'
  1418. """
  1419. def bug_200708_close():
  1420. """
  1421. Test default builder.
  1422. >>> parser = ET.XMLParser() # default
  1423. >>> parser.feed("<element>some text</element>")
  1424. >>> summarize(parser.close())
  1425. 'element'
  1426. Test custom builder.
  1427. >>> class EchoTarget:
  1428. ... def close(self):
  1429. ... return ET.Element("element") # simulate root
  1430. >>> parser = ET.XMLParser(EchoTarget())
  1431. >>> parser.feed("<element>some text</element>")
  1432. >>> summarize(parser.close())
  1433. 'element'
  1434. """
  1435. def bug_200709_default_namespace():
  1436. """
  1437. >>> e = ET.Element("{default}elem")
  1438. >>> s = ET.SubElement(e, "{default}elem")
  1439. >>> serialize(e, default_namespace="default") # 1
  1440. '<elem xmlns="default"><elem /></elem>'
  1441. >>> e = ET.Element("{default}elem")
  1442. >>> s = ET.SubElement(e, "{default}elem")
  1443. >>> s = ET.SubElement(e, "{not-default}elem")
  1444. >>> serialize(e, default_namespace="default") # 2
  1445. '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>'
  1446. >>> e = ET.Element("{default}elem")
  1447. >>> s = ET.SubElement(e, "{default}elem")
  1448. >>> s = ET.SubElement(e, "elem") # unprefixed name
  1449. >>> serialize(e, default_namespace="default") # 3
  1450. Traceback (most recent call last):
  1451. ValueError: cannot use non-qualified names with default_namespace option
  1452. """
  1453. def bug_200709_register_namespace():
  1454. """
  1455. >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
  1456. '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
  1457. >>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
  1458. >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
  1459. '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
  1460. And the Dublin Core namespace is in the default list:
  1461. >>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title"))
  1462. '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
  1463. """
  1464. def bug_200709_element_comment():
  1465. """
  1466. Not sure if this can be fixed, really (since the serializer needs
  1467. ET.Comment, not cET.comment).
  1468. >>> a = ET.Element('a')
  1469. >>> a.append(ET.Comment('foo'))
  1470. >>> a[0].tag == ET.Comment
  1471. True
  1472. >>> a = ET.Element('a')
  1473. >>> a.append(ET.PI('foo'))
  1474. >>> a[0].tag == ET.PI
  1475. True
  1476. """
  1477. def bug_200709_element_insert():
  1478. """
  1479. >>> a = ET.Element('a')
  1480. >>> b = ET.SubElement(a, 'b')
  1481. >>> c = ET.SubElement(a, 'c')
  1482. >>> d = ET.Element('d')
  1483. >>> a.insert(0, d)
  1484. >>> summarize_list(a)
  1485. ['d', 'b', 'c']
  1486. >>> a.insert(-1, d)
  1487. >>> summarize_list(a)
  1488. ['d', 'b', 'd', 'c']
  1489. """
  1490. def bug_200709_iter_comment():
  1491. """
  1492. >>> a = ET.Element('a')
  1493. >>> b = ET.SubElement(a, 'b')
  1494. >>> comment_b = ET.Comment("TEST-b")
  1495. >>> b.append(comment_b)
  1496. >>> summarize_list(a.iter(ET.Comment))
  1497. ['<Comment>']
  1498. """
  1499. # --------------------------------------------------------------------
  1500. # reported on bugs.python.org
  1501. def bug_1534630():
  1502. """
  1503. >>> bob = ET.TreeBuilder()
  1504. >>> e = bob.data("data")
  1505. >>> e = bob.start("tag", {})
  1506. >>> e = bob.end("tag")
  1507. >>> e = bob.close()
  1508. >>> serialize(e)
  1509. '<tag />'
  1510. """
  1511. def check_issue6233():
  1512. """
  1513. >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\\xc3\\xa3g</body>")
  1514. >>> ET.tostring(e, 'ascii')
  1515. "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
  1516. >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\\xe3g</body>")
  1517. >>> ET.tostring(e, 'ascii')
  1518. "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
  1519. """
  1520. def check_issue3151():

Large files files are truncated, but you can click here to view the full file