PageRenderTime 45ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/openappdotorg/vendor/plugins/hpricot-0.5.140/test/test_parser.rb

http://openappdotorg.googlecode.com/
Ruby | 347 lines | 299 code | 41 blank | 7 comment | 4 complexity | d05dc7adaabe6edebdbdf235c953170f MD5 | raw file
Possible License(s): LGPL-2.0, BSD-3-Clause, GPL-2.0
  1. #!/usr/bin/env ruby
  2. require 'test/unit'
  3. require 'hpricot'
  4. require 'load_files'
  5. class TestParser < Test::Unit::TestCase
  6. def test_set_attr
  7. @basic = Hpricot.parse(TestFiles::BASIC)
  8. @basic.search('//p').set('class', 'para')
  9. assert_equal 4, @basic.search('//p').length
  10. assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
  11. end
  12. # Test creating a new element
  13. def test_new_element
  14. elem = Hpricot::Elem.new(Hpricot::STag.new('form'))
  15. assert_not_nil(elem)
  16. assert_not_nil(elem.attributes)
  17. end
  18. def test_scan_text
  19. assert_equal 'FOO', Hpricot.make("FOO").first.content
  20. end
  21. def test_filter_by_attr
  22. @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
  23. # this link is escaped in the doc
  24. link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
  25. assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
  26. end
  27. def test_get_element_by_id
  28. @basic = Hpricot.parse(TestFiles::BASIC)
  29. assert_equal 'link1', @basic.get_element_by_id('link1')['id']
  30. assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
  31. end
  32. def test_get_element_by_tag_name
  33. @basic = Hpricot.parse(TestFiles::BASIC)
  34. assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
  35. assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
  36. end
  37. def test_output_basic
  38. @basic = Hpricot.parse(TestFiles::BASIC)
  39. @basic2 = Hpricot.parse(@basic.inner_html)
  40. scan_basic @basic2
  41. end
  42. def test_scan_basic
  43. @basic = Hpricot.parse(TestFiles::BASIC)
  44. scan_basic @basic
  45. end
  46. def scan_basic doc
  47. assert_kind_of Hpricot::XMLDecl, doc.children.first
  48. assert_not_equal doc.children.first.to_s, doc.children[1].to_s
  49. assert_equal 'link1', doc.at('#link1')['id']
  50. assert_equal 'link1', doc.at("p a")['id']
  51. assert_equal 'link1', (doc/:p/:a).first['id']
  52. assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
  53. assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
  54. assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
  55. assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
  56. assert_equal 4, (doc/'p').filter('*').length
  57. assert_equal 4, (doc/'p').filter('* *').length
  58. eles = (doc/'p').filter('.ohmy')
  59. assert_equal 1, eles.length
  60. assert_equal 'ohmy', eles.first.get_attribute('class')
  61. assert_equal 3, (doc/'p:not(.ohmy)').length
  62. assert_equal 3, (doc/'p').not('.ohmy').length
  63. assert_equal 3, (doc/'p').not(eles.first).length
  64. assert_equal 2, (doc/'p').filter('[@class]').length
  65. assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
  66. assert_equal 1, (doc/'p').filter('[@class~="final"]').length
  67. assert_equal 2, (doc/'p > a').length
  68. assert_equal 1, (doc/'p.ohmy > a').length
  69. assert_equal 2, (doc/'p / a').length
  70. assert_equal 2, (doc/'link ~ link').length
  71. assert_equal 3, (doc/'title ~ link').length
  72. assert_equal 5, (doc/"//p/text()").length
  73. assert_equal 6, (doc/"//p[a]//text()").length
  74. assert_equal 2, (doc/"//p/a/text()").length
  75. end
  76. def test_positional
  77. h = Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
  78. assert_equal "<p>one</p>", h.search("//div/p:eq(0)").to_s
  79. assert_equal "<p>one</p>", h.search("//div/p:first").to_s
  80. assert_equal "<p>one</p>", h.search("//div/p:first()").to_s
  81. end
  82. def test_pace
  83. doc = Hpricot(TestFiles::PACE_APPLICATION)
  84. assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
  85. # assert_equal '2', doc.at('#hdnSpouse')['value']
  86. end
  87. def test_scan_boingboing
  88. @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
  89. assert_equal 60, (@boingboing/'p.posted').length
  90. assert_equal 1, @boingboing.search("//a[@name='027906']").length
  91. assert_equal 10, @boingboing.search("script comment()").length
  92. assert_equal 3, @boingboing.search("a[text()*='Boing']").length
  93. assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
  94. assert_equal 0, @boingboing.search("h3[text()='College']").length
  95. assert_equal 60, @boingboing.search("h3").length
  96. assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
  97. assert_equal 17, @boingboing.search("h3[text()$='s']").length
  98. assert_equal 128, @boingboing.search("p[text()]").length
  99. assert_equal 211, @boingboing.search("p").length
  100. end
  101. def test_reparent
  102. doc = Hpricot(%{<div id="blurb_1"></div>})
  103. div1 = doc.search('#blurb_1')
  104. div1.before('<div id="blurb_0"></div>')
  105. div0 = doc.search('#blurb_0')
  106. div0.before('<div id="blurb_a"></div>')
  107. assert_equal 'div', doc.at('#blurb_1').name
  108. end
  109. def test_siblings
  110. @basic = Hpricot.parse(TestFiles::BASIC)
  111. t = @basic.at(:title)
  112. e = t.next_sibling
  113. assert_equal 'test1.css', e['href']
  114. assert_equal 'title', e.previous_sibling.name
  115. end
  116. def test_css_negation
  117. @basic = Hpricot.parse(TestFiles::BASIC)
  118. assert_equal 3, (@basic/'p:not(.final)').length
  119. end
  120. def test_remove_attribute
  121. @basic = Hpricot.parse(TestFiles::BASIC)
  122. (@basic/:p).each { |ele| ele.remove_attribute('class') }
  123. assert_equal 0, (@basic/'p[@class]').length
  124. end
  125. def test_abs_xpath
  126. @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
  127. assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
  128. assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
  129. assert_equal 18, @boingboing.search("//script").length
  130. divs = @boingboing.search("//script/../div")
  131. assert_equal 2, divs.length
  132. assert_equal 1, divs.search('a').length
  133. imgs = @boingboing.search('//div/p/a/img')
  134. assert_equal 15, imgs.length
  135. assert_equal 17, @boingboing.search('//div').search('p/a/img').length
  136. assert imgs.all? { |x| x.name == 'img' }
  137. end
  138. def test_predicates
  139. @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
  140. assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
  141. p_imgs = @boingboing.search('//div/p[/a/img]')
  142. assert_equal 15, p_imgs.length
  143. assert p_imgs.all? { |x| x.name == 'p' }
  144. p_imgs = @boingboing.search('//div/p[a/img]')
  145. assert_equal 18, p_imgs.length
  146. assert p_imgs.all? { |x| x.name == 'p' }
  147. assert_equal 1, @boingboing.search('//input[@checked]').length
  148. end
  149. def test_alt_predicates
  150. @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
  151. assert_equal 2, @boingboing.search('//table/tr:last').length
  152. @basic = Hpricot.parse(TestFiles::BASIC)
  153. assert_equal "<p>The third paragraph</p>",
  154. @basic.search('p:eq(2)').to_html
  155. assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
  156. @basic.search('p:last').to_html
  157. assert_equal 'last final', @basic.search('//p:last-of-type').first.get_attribute('class')
  158. end
  159. def test_insert_after # ticket #63
  160. doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
  161. (doc/'div').each do |element|
  162. element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
  163. end
  164. assert_equal doc.to_html, '<html><body><div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p></body></html>'
  165. end
  166. def test_insert_before # ticket #61
  167. doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
  168. (doc/'div').each do |element|
  169. element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
  170. end
  171. assert_equal doc.to_html, '<html><body><p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div></body></html>'
  172. end
  173. def test_many_paths
  174. @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
  175. assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
  176. assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
  177. end
  178. def test_stacked_search
  179. @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
  180. assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
  181. end
  182. def test_class_search
  183. # test case sent by Chih-Chao Lam
  184. doc = Hpricot("<div class=xyz'>abc</div>")
  185. assert_equal 1, doc.search(".xyz").length
  186. doc = Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
  187. assert_equal 1, doc.search(".xyz").length
  188. assert_equal 4, doc.search("*").length
  189. end
  190. def test_kleene_star
  191. # bug noticed by raja bhatia
  192. doc = Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
  193. assert_equal 2, doc.search("*[@class*='small']").length
  194. assert_equal 2, doc.search("*.small").length
  195. assert_equal 2, doc.search(".small").length
  196. assert_equal 2, doc.search(".large").length
  197. end
  198. def test_empty_comment
  199. doc = Hpricot("<p><!----></p>")
  200. assert doc.children[0].children[0].comment?
  201. doc = Hpricot("<p><!-- --></p>")
  202. assert doc.children[0].children[0].comment?
  203. end
  204. def test_body_newlines
  205. @immob = Hpricot.parse(TestFiles::IMMOB)
  206. body = @immob.at(:body)
  207. {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
  208. 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
  209. 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
  210. assert_equal v, body[k]
  211. end
  212. end
  213. def test_nested_twins
  214. @doc = Hpricot("<div>Hi<div>there</div></div>")
  215. assert_equal 1, (@doc/"div div").length
  216. end
  217. def test_wildcard
  218. @basic = Hpricot.parse(TestFiles::BASIC)
  219. assert_equal 3, (@basic/"*[@id]").length
  220. assert_equal 3, (@basic/"//*[@id]").length
  221. end
  222. def test_javascripts
  223. @immob = Hpricot.parse(TestFiles::IMMOB)
  224. assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
  225. end
  226. def test_nested_scripts
  227. @week9 = Hpricot.parse(TestFiles::WEEK9)
  228. assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
  229. end
  230. def test_uswebgen
  231. @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
  232. # sent by brent beardsley, hpricot 0.3 had problems with all the links.
  233. assert_equal 67, (@uswebgen/:a).length
  234. end
  235. def test_mangled_tags
  236. [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
  237. %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
  238. %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
  239. %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
  240. each do |str|
  241. doc = Hpricot(str)
  242. assert_equal 1, (doc/:form).length
  243. assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
  244. end
  245. end
  246. def test_procins
  247. doc = Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
  248. assert_equal "php", doc.children[0].target
  249. assert_equal "blah='blah'", doc.children[2].content
  250. end
  251. def test_buffer_error
  252. assert_raise Hpricot::ParseError, "ran out of buffer space on element <input>, starting on line 3." do
  253. Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
  254. end
  255. end
  256. def test_youtube_attr
  257. str = <<-edoc
  258. <html><body>
  259. Lorem ipsum. Jolly roger, ding-dong sing-a-long
  260. <object width="425" height="350">
  261. <param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
  262. <param name="wmode" value="transparent"></param>
  263. <embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
  264. type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
  265. </embed>
  266. </object>
  267. Check out my posting, I have bright mice in large clown cars.
  268. <object width="425" height="350">
  269. <param name="movie" value="http://www.youtube.com/v/foobar"></param>
  270. <param name="wmode" value="transparent"></param>
  271. <embed src="http://www.youtube.com/v/foobar"
  272. type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
  273. </embed>
  274. </object>
  275. </body></html?
  276. edoc
  277. doc = Hpricot(str)
  278. assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
  279. doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
  280. end
  281. def test_filters
  282. @basic = Hpricot.parse(TestFiles::BASIC)
  283. assert_equal 0, (@basic/"title:parent").size
  284. assert_equal 3, (@basic/"p:parent").size
  285. assert_equal 1, (@basic/"title:empty").size
  286. assert_equal 1, (@basic/"p:empty").size
  287. end
  288. def test_keep_cdata
  289. str = %{<script> /*<![CDATA[*/
  290. /*]]>*/ </script>}
  291. assert_equal str, Hpricot(str).to_html
  292. end
  293. def test_namespace
  294. chunk = <<-END
  295. <a xmlns:t="http://www.nexopia.com/dev/template">
  296. <t:sam>hi </t:sam>
  297. </a>
  298. END
  299. doc = Hpricot::XML(chunk)
  300. assert (doc/"//t:sam").size > 0 # at least this should probably work
  301. # assert (doc/"//sam").size > 0 # this would be nice
  302. end
  303. end