/vendor/packages/mechanize/test/test_html.py

https://github.com/openhatch/oh-mainline · Python · 151 lines · 118 code · 30 blank · 3 comment · 9 complexity · 079f5c95370987158921a694de21636d MD5 · raw file

  1. #!/usr/bin/env python
  2. from unittest import TestCase
  3. import mechanize
  4. import mechanize._form
  5. from mechanize._response import test_html_response
  6. class RegressionTests(TestCase):
  7. def test_close_base_tag(self):
  8. # any document containing a </base> tag used to cause an exception
  9. br = mechanize.Browser()
  10. response = test_html_response("</base>")
  11. br.set_response(response)
  12. list(br.links())
  13. def test_bad_base_tag(self):
  14. # a document with a base tag with no href used to cause an exception
  15. for factory in [mechanize.DefaultFactory(), mechanize.RobustFactory()]:
  16. br = mechanize.Browser(factory=factory)
  17. response = test_html_response(
  18. "<BASE TARGET='_main'><a href='http://example.com/'>eg</a>")
  19. br.set_response(response)
  20. list(br.links())
  21. def test_robust_form_parser_uses_beautifulsoup(self):
  22. factory = mechanize.RobustFormsFactory()
  23. self.assertIs(factory.form_parser_class,
  24. mechanize._form.RobustFormParser)
  25. def test_form_parser_does_not_use_beautifulsoup(self):
  26. factory = mechanize.FormsFactory()
  27. self.assertIs(factory.form_parser_class, mechanize._form.FormParser)
  28. def _make_forms_from_bad_html(self, factory):
  29. bad_html = "<! -- : -- >"
  30. factory.set_response(test_html_response(bad_html), "utf-8")
  31. return list(factory.forms())
  32. def test_robust_form_parser_does_not_raise_on_bad_html(self):
  33. self._make_forms_from_bad_html(mechanize.RobustFormsFactory())
  34. def test_form_parser_fails_on_bad_html(self):
  35. self.assertRaises(
  36. mechanize.ParseError,
  37. self._make_forms_from_bad_html, mechanize.FormsFactory())
  38. class CachingGeneratorFunctionTests(TestCase):
  39. def _get_simple_cgenf(self, log):
  40. from mechanize._html import CachingGeneratorFunction
  41. todo = []
  42. for ii in range(2):
  43. def work(ii=ii):
  44. log.append(ii)
  45. return ii
  46. todo.append(work)
  47. def genf():
  48. for a in todo:
  49. yield a()
  50. return CachingGeneratorFunction(genf())
  51. def test_cache(self):
  52. log = []
  53. cgenf = self._get_simple_cgenf(log)
  54. for repeat in range(2):
  55. for ii, jj in zip(cgenf(), range(2)):
  56. self.assertEqual(ii, jj)
  57. self.assertEqual(log, range(2)) # work only done once
  58. def test_interleaved(self):
  59. log = []
  60. cgenf = self._get_simple_cgenf(log)
  61. cgen = cgenf()
  62. self.assertEqual(cgen.next(), 0)
  63. self.assertEqual(log, [0])
  64. cgen2 = cgenf()
  65. self.assertEqual(cgen2.next(), 0)
  66. self.assertEqual(log, [0])
  67. self.assertEqual(cgen.next(), 1)
  68. self.assertEqual(log, [0, 1])
  69. self.assertEqual(cgen2.next(), 1)
  70. self.assertEqual(log, [0, 1])
  71. self.assertRaises(StopIteration, cgen.next)
  72. self.assertRaises(StopIteration, cgen2.next)
  73. class UnescapeTests(TestCase):
  74. def test_unescape_charref(self):
  75. from mechanize._html import unescape_charref
  76. mdash_utf8 = u"\u2014".encode("utf-8")
  77. for ref, codepoint, utf8, latin1 in [
  78. ("38", 38, u"&".encode("utf-8"), "&"),
  79. ("x2014", 0x2014, mdash_utf8, "&#x2014;"),
  80. ("8212", 8212, mdash_utf8, "&#8212;"),
  81. ]:
  82. self.assertEqual(unescape_charref(ref, None), unichr(codepoint))
  83. self.assertEqual(unescape_charref(ref, 'latin-1'), latin1)
  84. self.assertEqual(unescape_charref(ref, 'utf-8'), utf8)
  85. def test_unescape(self):
  86. import htmlentitydefs
  87. from mechanize._html import unescape
  88. data = "&amp; &lt; &mdash; &#8212; &#x2014;"
  89. mdash_utf8 = u"\u2014".encode("utf-8")
  90. ue = unescape(data, htmlentitydefs.name2codepoint, "utf-8")
  91. self.assertEqual("& < %s %s %s" % ((mdash_utf8,)*3), ue)
  92. for text, expect in [
  93. ("&a&amp;", "&a&"),
  94. ("a&amp;", "a&"),
  95. ]:
  96. got = unescape(text, htmlentitydefs.name2codepoint, "latin-1")
  97. self.assertEqual(got, expect)
  98. class EncodingFinderTests(TestCase):
  99. def make_response(self, encodings):
  100. return mechanize._response.test_response(
  101. headers=[("Content-type", "text/html; charset=\"%s\"" % encoding)
  102. for encoding in encodings])
  103. def test_known_encoding(self):
  104. encoding_finder = mechanize._html.EncodingFinder("default")
  105. response = self.make_response(["utf-8"])
  106. self.assertEqual(encoding_finder.encoding(response), "utf-8")
  107. def test_unknown_encoding(self):
  108. encoding_finder = mechanize._html.EncodingFinder("default")
  109. response = self.make_response(["bogus"])
  110. self.assertEqual(encoding_finder.encoding(response), "default")
  111. def test_precedence(self):
  112. encoding_finder = mechanize._html.EncodingFinder("default")
  113. response = self.make_response(["latin-1", "utf-8"])
  114. self.assertEqual(encoding_finder.encoding(response), "latin-1")
  115. def test_fallback(self):
  116. encoding_finder = mechanize._html.EncodingFinder("default")
  117. response = self.make_response(["bogus", "utf-8"])
  118. self.assertEqual(encoding_finder.encoding(response), "utf-8")
  119. if __name__ == "__main__":
  120. import unittest
  121. unittest.main()