/tests/test-broken-html.py

https://github.com/JonathanRRogers/twill · Python · 181 lines · 82 code · 40 blank · 59 comment · 4 complexity · e25d84d3aa8e88e6a313db4af2106ede MD5 · raw file

  1. """
  2. @CTB: still need to find something that only BS (and not the intolerant
  3. parser) can parse.
  4. """
  5. import twilltestlib
  6. from twill import commands
  7. def setup_module():
  8. global url
  9. url = twilltestlib.get_url()
  10. def test_links_parsing():
  11. commands.config('use_tidy', '0')
  12. commands.go('/broken_linktext')
  13. commands.follow('some text')
  14. commands.config('use_tidy', '1')
  15. def test_raw():
  16. """
  17. test parsing of raw, unfixed HTML.
  18. """
  19. b = commands.get_browser()
  20. commands.config('use_tidy', '0')
  21. commands.config('use_BeautifulSoup', '0')
  22. commands.config('allow_parse_errors', '0')
  23. commands.go(url)
  24. ###
  25. # Apparently, mechanize is more tolerant than it used to be.
  26. # commands.go('/tidy_fixable_html')
  27. # forms = [ i for i in b._browser.forms() ]
  28. # logging.info("forms: %s", forms)
  29. # assert len(forms) == 0, "there should be no correct forms on this page"
  30. ###
  31. commands.go('/BS_fixable_html')
  32. forms = [ i for i in b._browser.forms() ]
  33. assert len(forms) == 1, "there should be one mangled form on this page"
  34. ###
  35. # commands.go('/unfixable_html')
  36. # try:
  37. # b._browser.forms()
  38. # assert 0, "this page has a parse error."
  39. # except mechanize.ParseError:
  40. # pass
  41. def test_tidy():
  42. """
  43. test parsing of tidy-processed HTML.
  44. """
  45. b = commands.get_browser()
  46. commands.config('use_tidy', '1')
  47. commands.config('use_BeautifulSoup', '0')
  48. commands.config('allow_parse_errors', '0')
  49. commands.go(url)
  50. ###
  51. commands.go('/tidy_fixable_html')
  52. forms = [ i for i in b._browser.forms() ]
  53. assert len(forms) == 1, \
  54. "you must have 'tidy' installed for this test to pass"
  55. ###
  56. commands.go('/BS_fixable_html')
  57. forms = [ i for i in b._browser.forms() ]
  58. assert len(forms) == 1, \
  59. "there should be one mangled form on this page"
  60. ###
  61. # commands.go('/unfixable_html')
  62. # try:
  63. # b._browser.forms()
  64. # assert 0, "this page has a parse error."
  65. # except mechanize.ParseError:
  66. # pass
  67. def test_BeautifulSoup():
  68. """
  69. test parsing of BS-processed HTML.
  70. """
  71. b = commands.get_browser()
  72. commands.config('use_tidy', '0')
  73. commands.config('use_BeautifulSoup', '1')
  74. commands.config('allow_parse_errors', '0')
  75. commands.go(url)
  76. ###
  77. # Apparently, mechanize is more tolerant than it used to be.
  78. # commands.go('/tidy_fixable_html')
  79. # forms = [ i for i in b._browser.forms() ]
  80. # assert len(forms) == 0, \
  81. # "there should be no correct forms on this page"
  82. ###
  83. commands.go('/BS_fixable_html')
  84. forms = [ i for i in b._browser.forms() ]
  85. assert len(forms) == 1, \
  86. "there should be one mangled form on this page"
  87. ###
  88. # this no longer breaks... @CTB
  89. # commands.go('/unfixable_html')
  90. # try:
  91. # b._browser.forms()
  92. # assert 0, "this page has a parse error."
  93. # except mechanize.ParseError:
  94. # pass
  95. def test_allow_parse_errors():
  96. """
  97. test nice parsing.
  98. """
  99. b = commands.get_browser()
  100. commands.config('use_tidy', '0')
  101. commands.config('use_BeautifulSoup', '1')
  102. commands.config('allow_parse_errors', '1')
  103. commands.go(url)
  104. commands.go('/unfixable_html')
  105. b._browser.forms()
  106. def test_global_form():
  107. """
  108. test the handling of global form elements
  109. """
  110. b = commands.get_browser()
  111. commands.config('use_tidy', '0')
  112. commands.go(url)
  113. commands.go('/effed_up_forms')
  114. forms = list(b._browser.forms())
  115. assert len(forms) == 1
  116. assert b._browser.global_form()
  117. def test_effed_up_forms2():
  118. """
  119. should always succeed; didn't back ~0.7.
  120. """
  121. commands.config('use_tidy', '1')
  122. commands.config('use_BeautifulSoup', '1')
  123. commands.config('allow_parse_errors', '0')
  124. commands.go(url)
  125. commands.go('/effed_up_forms2')
  126. b = commands.get_browser()
  127. forms = [ i for i in b._browser.forms() ]
  128. form = forms[0]
  129. assert len(form.controls) == 3, "you must have 'tidy' installed for this test to pass"
  130. # Apparently, mechanize is more correct than it used to be.
  131. # # with a more correct form parser this would work like the above.
  132. # commands.config('use_tidy', '0')
  133. # commands.reload()
  134. # forms = [ i for i in b._browser.forms() ]
  135. # form = forms[0]
  136. # assert len(form.controls) == 1, \
  137. # "Expected exactly one control, but got: %s" % form.controls