/vendor/packages/beautifulsoup4/bs4/tests/test_lxml.py

https://github.com/openhatch/oh-mainline · Python · 91 lines · 69 code · 14 blank · 8 comment · 5 complexity · 2b664dd0380f41ad100f3a0789cbbe59 MD5 · raw file

  1. """Tests to ensure that the lxml tree builder generates good trees."""
  2. import re
  3. import warnings
  4. try:
  5. import lxml.etree
  6. LXML_PRESENT = True
  7. LXML_VERSION = lxml.etree.LXML_VERSION
  8. except ImportError, e:
  9. LXML_PRESENT = False
  10. LXML_VERSION = (0,)
  11. if LXML_PRESENT:
  12. from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
  13. from bs4 import (
  14. BeautifulSoup,
  15. BeautifulStoneSoup,
  16. )
  17. from bs4.element import Comment, Doctype, SoupStrainer
  18. from bs4.testing import skipIf
  19. from bs4.tests import test_htmlparser
  20. from bs4.testing import (
  21. HTMLTreeBuilderSmokeTest,
  22. XMLTreeBuilderSmokeTest,
  23. SoupTest,
  24. skipIf,
  25. )
  26. @skipIf(
  27. not LXML_PRESENT,
  28. "lxml seems not to be present, not testing its tree builder.")
  29. class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
  30. """See ``HTMLTreeBuilderSmokeTest``."""
  31. @property
  32. def default_builder(self):
  33. return LXMLTreeBuilder()
  34. def test_out_of_range_entity(self):
  35. self.assertSoupEquals(
  36. "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
  37. self.assertSoupEquals(
  38. "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
  39. self.assertSoupEquals(
  40. "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
  41. # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
  42. # test if an old version of lxml is installed.
  43. @skipIf(
  44. not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
  45. "Skipping doctype test for old version of lxml to avoid segfault.")
  46. def test_empty_doctype(self):
  47. soup = self.soup("<!DOCTYPE>")
  48. doctype = soup.contents[0]
  49. self.assertEqual("", doctype.strip())
  50. def test_beautifulstonesoup_is_xml_parser(self):
  51. # Make sure that the deprecated BSS class uses an xml builder
  52. # if one is installed.
  53. with warnings.catch_warnings(record=True) as w:
  54. soup = BeautifulStoneSoup("<b />")
  55. self.assertEqual(u"<b/>", unicode(soup.b))
  56. self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
  57. def test_real_xhtml_document(self):
  58. """lxml strips the XML definition from an XHTML doc, which is fine."""
  59. markup = b"""<?xml version="1.0" encoding="utf-8"?>
  60. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
  61. <html xmlns="http://www.w3.org/1999/xhtml">
  62. <head><title>Hello.</title></head>
  63. <body>Goodbye.</body>
  64. </html>"""
  65. soup = self.soup(markup)
  66. self.assertEqual(
  67. soup.encode("utf-8").replace(b"\n", b''),
  68. markup.replace(b'\n', b'').replace(
  69. b'<?xml version="1.0" encoding="utf-8"?>', b''))
  70. @skipIf(
  71. not LXML_PRESENT,
  72. "lxml seems not to be present, not testing its XML tree builder.")
  73. class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
  74. """See ``HTMLTreeBuilderSmokeTest``."""
  75. @property
  76. def default_builder(self):
  77. return LXMLTreeBuilderForXML()