PageRenderTime 45ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/test/test_feednormalizer.rb

http://github.com/aasmith/feed-normalizer
Ruby | 277 lines | 229 code | 45 blank | 3 comment | 0 complexity | a034b038d1c9283117b82dd319f1b4ba MD5 | raw file
  1. $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), '../lib')))
  2. require 'test/unit'
  3. require 'feed-normalizer'
  4. require 'yaml'
  5. class FeedNormalizerTest < Test::Unit::TestCase
  6. XML_FILES = {}
  7. Fn = FeedNormalizer
  8. data_dir = File.dirname(__FILE__) + '/data'
  9. # Load up the xml files
  10. Dir.open(data_dir).each do |fn|
  11. next unless fn =~ /[.]xml$/
  12. XML_FILES[File.basename(fn, File.extname(fn)).to_sym] = File.read(data_dir + "/#{fn}")
  13. end
  14. def test_basic_parse
  15. assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
  16. end
  17. def test_force_parser
  18. assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20],
  19. :force_parser => Fn::RubyRssParser, :try_others => true)
  20. end
  21. def test_force_parser_exclusive
  22. assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20],
  23. :force_parser => Fn::RubyRssParser, :try_others => false)
  24. end
  25. def test_ruby_rss_parser
  26. assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20],
  27. :force_parser => Fn::RubyRssParser, :try_others => false)
  28. assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10],
  29. :force_parser => Fn::RubyRssParser, :try_others => false)
  30. end
  31. def test_simple_rss_parser
  32. assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20],
  33. :force_parser => Fn::SimpleRssParser, :try_others => false)
  34. assert_kind_of Fn::Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10],
  35. :force_parser => Fn::SimpleRssParser, :try_others => false)
  36. end
  37. def test_parser_failover_order
  38. assert_equal 'SimpleRSS', FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], :force_parser => Fn::RubyRssParser).parser
  39. end
  40. def test_force_parser_fail
  41. assert_nil FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], :force_parser => Fn::RubyRssParser, :try_others => false)
  42. end
  43. def test_all_parsers_fail
  44. assert_nil FeedNormalizer::FeedNormalizer.parse("This isn't RSS or Atom!")
  45. end
  46. def test_correct_parser_used
  47. assert_equal 'RSS::Parser', FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20]).parser
  48. assert_equal 'SimpleRSS', FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10]).parser
  49. end
  50. def test_rss
  51. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
  52. assert_equal "BBC News | Technology | UK Edition", feed.title
  53. assert_equal ["http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm"], feed.urls
  54. assert_equal 15, feed.ttl
  55. assert_equal [6, 7, 8, 9, 10, 11], feed.skip_hours
  56. assert_equal ["Sunday"], feed.skip_days
  57. assert_equal "MP3 player court order overturned", feed.entries.last.title
  58. assert_equal "<b>SanDisk</b> puts its MP3 players back on display at a German electronics show after overturning a court injunction.", feed.entries.last.description
  59. assert_match(/test\d/, feed.entries.last.content)
  60. assert_instance_of Time, feed.entries.last.date_published
  61. end
  62. def test_simplerss
  63. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
  64. assert_equal "~:caboose", feed.title
  65. assert_equal "http://habtm.com/xml/atom10/feed.xml", feed.url
  66. assert_equal nil, feed.ttl
  67. assert_equal [], feed.skip_hours
  68. assert_equal [], feed.skip_days
  69. assert_equal "Starfish - Easy Distribution of Site Maintenance", feed.entries.last.title
  70. assert_equal "urn:uuid:6c028f36-f87a-4f53-b7e3-1f943d2341f0", feed.entries.last.id
  71. assert !feed.entries.last.description.include?("google fame")
  72. assert feed.entries.last.content.include?("google fame")
  73. end
  74. def test_sanity_check
  75. XML_FILES.keys.each do |xml_file|
  76. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[xml_file])
  77. assert [feed.parser, feed.title, feed.url, feed.entries.first.url].collect{|e| e.is_a?(String)}.all?, "Not everything was a String in #{xml_file}"
  78. end
  79. end
  80. def test_feed_equality
  81. assert_equal FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20]), FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
  82. assert_equal FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10]), FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
  83. assert_not_equal FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom03]), FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
  84. assert_not_equal FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20]), FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
  85. assert_not_equal FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20]), FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20diff])
  86. end
  87. def test_feed_diff
  88. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
  89. diff = feed.diff(FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20diff]))
  90. diff_short = feed.diff(FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20diff_short]))
  91. no_diff = feed.diff(feed)
  92. assert diff.keys.all? {|key| [:title, :items].include?(key)}
  93. assert_equal 3, diff[:items].size
  94. assert diff_short.keys.all? {|key| [:title, :items].include?(key)}
  95. assert_equal [3,2], diff_short[:items]
  96. assert no_diff.empty?
  97. end
  98. def test_marshal
  99. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
  100. assert_nothing_raised { Marshal.load(Marshal.dump(feed)) }
  101. end
  102. def test_yaml
  103. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
  104. assert_nothing_raised { YAML.load(YAML.dump(feed)) }
  105. end
  106. def test_method_missing
  107. assert_raise(NoMethodError) { FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20]).nonexistent }
  108. # Another test of Singular's method_missing: sending :flatten to a 2-D array of FeedNormalizer::Entrys
  109. # causes :to_ary to be sent to the Entrys.
  110. assert_nothing_raised { [[Fn::Entry.new], [Fn::Entry.new]].flatten }
  111. end
  112. def test_clean
  113. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
  114. assert_match(/<plaintext>/, feed.entries.first.content)
  115. assert_match(/<plaintext>/, feed.entries.first.description)
  116. feed.clean!
  117. assert_no_match(/<plaintext>/, feed.entries.first.content)
  118. assert_no_match(/<plaintext>/, feed.entries.first.description)
  119. end
  120. def test_malformed_feed
  121. assert_nothing_raised { FeedNormalizer::FeedNormalizer.parse('<feed></feed>') }
  122. end
  123. def test_dublin_core_date_ruby_rss
  124. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => Fn::RubyRssParser, :try_others => false)
  125. assert_instance_of Time, feed.entries.first.date_published
  126. end
  127. def test_dublin_core_date_simple_rss
  128. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => Fn::SimpleRssParser, :try_others => false)
  129. assert_instance_of Time, feed.entries.first.date_published
  130. end
  131. def test_dublin_core_creator_ruby_rss
  132. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => Fn::RubyRssParser, :try_others => false)
  133. assert_equal 'Jeff Hecht', feed.entries.last.author
  134. end
  135. def test_dublin_core_creator_simple_rss
  136. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rdf10], :force_parser => Fn::SimpleRssParser, :try_others => false)
  137. assert_equal 'Jeff Hecht', feed.entries.last.author
  138. end
  139. def test_entry_categories_ruby_rss
  140. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false)
  141. assert_equal [['Click'],['Technology'],[]], feed.items.collect {|i|i.categories}
  142. end
  143. def test_entry_categories_simple_rss
  144. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::SimpleRssParser, :try_others => false)
  145. assert_equal [['Click'],['Technology'],[]], feed.items.collect {|i|i.categories}
  146. end
  147. def test_loose_categories_ruby_rss
  148. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false, :loose => true)
  149. assert_equal [1,2,0], feed.entries.collect{|e|e.categories.size}
  150. end
  151. def test_loose_categories_simple_rss
  152. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::SimpleRssParser, :try_others => false, :loose => true)
  153. assert_equal [1,1,0], feed.entries.collect{|e|e.categories.size}
  154. end
  155. def test_content_encoded_simple_rss
  156. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::SimpleRssParser, :try_others => false)
  157. feed.entries.each_with_index do |e, i|
  158. assert_match(/\s*<p>test#{i+1}<\/p>\s*/, e.content)
  159. end
  160. end
  161. def test_content_encoded_ruby_rss
  162. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false)
  163. feed.entries.each_with_index do |e, i|
  164. assert_match(/\s*<p>test#{i+1}<\/p>\s*/, e.content)
  165. end
  166. end
  167. def test_atom_content_contains_pluses
  168. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], :force_parser => Fn::SimpleRssParser, :try_others => false)
  169. assert_equal 2, feed.entries.last.content.scan(/\+/).size
  170. end
  171. # http://code.google.com/p/feed-normalizer/issues/detail?id=13
  172. def test_times_are_reparsed
  173. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false)
  174. Time.class_eval "alias :old_to_s :to_s; def to_s(x=1); old_to_s; end"
  175. assert_equal Time.parse("Sat Sep 09 10:57:06 -0400 2006").to_s, feed.last_updated.to_s(:foo)
  176. assert_equal Time.parse("Sat Sep 09 08:45:35 -0400 2006").to_s, feed.entries.first.date_published.to_s(:foo)
  177. end
  178. def test_atom03_has_issued
  179. SimpleRSS.class_eval "@@item_tags.delete(:issued)"
  180. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom03], :force_parser => Fn::SimpleRssParser, :try_others => false)
  181. assert_nil feed.entries.first.date_published
  182. SimpleRSS.class_eval "@@item_tags << :issued"
  183. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom03], :force_parser => Fn::SimpleRssParser, :try_others => false)
  184. assert_equal "Tue Aug 29 02:31:03 UTC 2006", feed.entries.first.date_published.to_s
  185. end
  186. def test_html_should_be_escaped_by_default
  187. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false)
  188. assert_match "<b>SanDisk</b>", feed.items.last.description
  189. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::SimpleRssParser, :try_others => false)
  190. assert_match "<b>SanDisk</b>", feed.items.last.description
  191. end
  192. def test_relative_links_and_images_should_be_rewritten_with_url_base
  193. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom03])
  194. assert_match '<a href="http://www.cheapstingybargains.com/link/tplclick?lid=41000000011334249&#038;pubid=21000000000053626"' +
  195. ' target=_"blank"><img src="http://www.cheapstingybargains.com/assets/images/product/productDetail/9990000058546711.jpg"' +
  196. ' width="150" height="150" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a>',
  197. feed.items.first.content
  198. end
  199. def test_last_updated_simple_rss
  200. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], :force_parser => Fn::SimpleRssParser, :try_others => false)
  201. assert_equal Time.parse("Wed Aug 16 09:59:44 -0700 2006"), feed.entries.first.last_updated
  202. end
  203. def test_last_updated_ruby_rss
  204. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false)
  205. assert_equal feed.entries.first.date_published, feed.entries.first.last_updated
  206. end
  207. def test_skip_empty_items
  208. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::RubyRssParser, :try_others => false)
  209. assert_not_nil feed.items.last.description
  210. feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], :force_parser => Fn::SimpleRssParser, :try_others => false)
  211. assert_not_nil feed.items.last.description
  212. end
  213. end