/vendor/packages/beautifulsoup4/bs4/tests/test_lxml.py
https://github.com/openhatch/oh-mainline · Python · 91 lines · 69 code · 14 blank · 8 comment · 5 complexity · 2b664dd0380f41ad100f3a0789cbbe59 MD5 · raw file
- """Tests to ensure that the lxml tree builder generates good trees."""
- import re
- import warnings
- try:
- import lxml.etree
- LXML_PRESENT = True
- LXML_VERSION = lxml.etree.LXML_VERSION
- except ImportError, e:
- LXML_PRESENT = False
- LXML_VERSION = (0,)
- if LXML_PRESENT:
- from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
- from bs4 import (
- BeautifulSoup,
- BeautifulStoneSoup,
- )
- from bs4.element import Comment, Doctype, SoupStrainer
- from bs4.testing import skipIf
- from bs4.tests import test_htmlparser
- from bs4.testing import (
- HTMLTreeBuilderSmokeTest,
- XMLTreeBuilderSmokeTest,
- SoupTest,
- skipIf,
- )
- @skipIf(
- not LXML_PRESENT,
- "lxml seems not to be present, not testing its tree builder.")
- class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
- """See ``HTMLTreeBuilderSmokeTest``."""
- @property
- def default_builder(self):
- return LXMLTreeBuilder()
- def test_out_of_range_entity(self):
- self.assertSoupEquals(
- "<p>foo�bar</p>", "<p>foobar</p>")
- self.assertSoupEquals(
- "<p>foo�bar</p>", "<p>foobar</p>")
- self.assertSoupEquals(
- "<p>foo�bar</p>", "<p>foobar</p>")
- # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
- # test if an old version of lxml is installed.
- @skipIf(
- not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
- "Skipping doctype test for old version of lxml to avoid segfault.")
- def test_empty_doctype(self):
- soup = self.soup("<!DOCTYPE>")
- doctype = soup.contents[0]
- self.assertEqual("", doctype.strip())
- def test_beautifulstonesoup_is_xml_parser(self):
- # Make sure that the deprecated BSS class uses an xml builder
- # if one is installed.
- with warnings.catch_warnings(record=True) as w:
- soup = BeautifulStoneSoup("<b />")
- self.assertEqual(u"<b/>", unicode(soup.b))
- self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
- def test_real_xhtml_document(self):
- """lxml strips the XML definition from an XHTML doc, which is fine."""
- markup = b"""<?xml version="1.0" encoding="utf-8"?>
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
- <html xmlns="http://www.w3.org/1999/xhtml">
- <head><title>Hello.</title></head>
- <body>Goodbye.</body>
- </html>"""
- soup = self.soup(markup)
- self.assertEqual(
- soup.encode("utf-8").replace(b"\n", b''),
- markup.replace(b'\n', b'').replace(
- b'<?xml version="1.0" encoding="utf-8"?>', b''))
- @skipIf(
- not LXML_PRESENT,
- "lxml seems not to be present, not testing its XML tree builder.")
- class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
- """See ``HTMLTreeBuilderSmokeTest``."""
- @property
- def default_builder(self):
- return LXMLTreeBuilderForXML()