/vendor/tools/parser/lib/parser/globo_esporte.rb

https://bitbucket.org/tbueno/planetafutebol · Ruby · 44 lines · 31 code · 13 blank · 0 comment · 1 complexity · 304360ed5e8f41f26b487b3ddd12aaf8 MD5 · raw file

  1. require 'open-uri'
  2. require 'action_view'
  3. module Parser
  4. class GloboEsporteEntry < Feedzirra::Parser::RSSEntry
  5. include ActionView::Helpers::SanitizeHelper
  6. include SAXMachine
  7. include Feedzirra::FeedEntryUtilities
  8. def content
  9. return @content if @content
  10. page = Nokogiri::HTML(open(url)).css('div#materia-letra').first
  11. @content = page.inner_html ? sanitize(page.inner_html, :attributes => %w(alt src title href)) : ''
  12. end
  13. def summary
  14. sanitize @summary, :tags => %w(p)
  15. end
  16. def author
  17. 'globoesporte.com'
  18. end
  19. end
  20. class GloboEsporte
  21. include SAXMachine
  22. include Feedzirra::FeedUtilities
  23. element :title
  24. element :link, :as => :url
  25. elements :item , :as => :entries, :class => GloboEsporteEntry
  26. attr_accessor :feed_url
  27. def self.able_to_parse?(xml)
  28. (/globoesporte.globo.com/ =~ xml)
  29. end
  30. end
  31. end