PageRenderTime 54ms CodeModel.GetById 29ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/simple-rss.rb

https://github.com/knuton/simple-rss
Ruby | 168 lines | 144 code | 21 blank | 3 comment | 15 complexity | a4a7f057ddc1b198561a97aa34d04b45 MD5 | raw file
Possible License(s): LGPL-2.1
  1. require 'cgi'
  2. require 'time'
  3. class SimpleRSS
  4. VERSION = "1.2.3"
  5. attr_reader :items, :source
  6. alias :entries :items
  7. @@feed_tags = [
  8. :id,
  9. :title, :subtitle, :link,
  10. :description,
  11. :author, :webMaster, :managingEditor, :contributor,
  12. :pubDate, :lastBuildDate, :updated, :'dc:date',
  13. :generator, :language, :docs, :cloud,
  14. :ttl, :skipHours, :skipDays,
  15. :image, :logo, :icon, :rating,
  16. :rights, :copyright,
  17. :textInput, :'feedburner:browserFriendly',
  18. :'itunes:author', :'itunes:category'
  19. ]
  20. @@item_tags = [
  21. :id,
  22. :title, :link, :'link+alternate', :'link+self', :'link+edit', :'link+replies',
  23. :author, :contributor,
  24. :description, :summary, :content, :'content:encoded', :comments,
  25. :pubDate, :published, :updated, :expirationDate, :modified, :'dc:date',
  26. :category, :guid,
  27. :'trackback:ping', :'trackback:about',
  28. :'dc:creator', :'dc:title', :'dc:subject', :'dc:rights', :'dc:publisher',
  29. :'feedburner:origLink',
  30. :'media:content#url', :'media:content#type', :'media:content#height', :'media:content#width',
  31. :'media:title', :'media:thumbnail#url', :'media:thumbnail#height', :'media:thumbnail#width',
  32. :'media:credit', :'media:credit#role',
  33. :'media:category', :'media:category#scheme'
  34. ]
  35. def initialize(source, options={})
  36. @source = source.respond_to?(:read) ? source.read : source.to_s
  37. @items = Array.new
  38. @options = Hash.new.update(options)
  39. parse
  40. end
  41. def channel() self end
  42. alias :feed :channel
  43. class << self
  44. def feed_tags
  45. @@feed_tags
  46. end
  47. def feed_tags=(ft)
  48. @@feed_tags = ft
  49. end
  50. def item_tags
  51. @@item_tags
  52. end
  53. def item_tags=(it)
  54. @@item_tags = it
  55. end
  56. # The strict attribute is for compatibility with Ruby's standard RSS parser
  57. def parse(source, options={})
  58. new source, options
  59. end
  60. end
  61. private
  62. def parse
  63. raise SimpleRSSError, "Poorly formatted feed" unless @source =~ %r{<(channel|feed).*?>.*?</(channel|feed)>}mi
  64. # Feed's title and link
  65. feed_content = $1 if @source =~ %r{(.*?)<(rss:|atom:)?(item|entry).*?>.*?</(rss:|atom:)?(item|entry)>}mi
  66. @@feed_tags.each do |tag|
  67. if feed_content && feed_content =~ %r{<(rss:|atom:)#{tag}(.*?)>(.*?)</(rss:|atom:)#{tag}>}mi
  68. nil
  69. elsif feed_content && feed_content =~ %r{<()#{tag}(.*?)>(.*?)</()#{tag}>}mi
  70. nil
  71. elsif feed_content && feed_content =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
  72. nil
  73. elsif @source =~ %r{<(rss:|atom:)#{tag}(.*?)>(.*?)</(rss:|atom:)#{tag}>}mi
  74. nil
  75. elsif @source =~ %r{<()#{tag}(.*?)>(.*?)</()#{tag}>}mi
  76. nil
  77. elsif @source =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
  78. nil
  79. end
  80. if $2 || $3
  81. tag_cleaned = clean_tag(tag)
  82. instance_variable_set("@#{ tag_cleaned }", clean_content(tag, $2, $3))
  83. self.class.send(:attr_reader, tag_cleaned)
  84. end
  85. end
  86. # RSS items' title, link, and description
  87. @source.scan( %r{<(rss:|atom:)?(item|entry)([\s][^>]*)?>(.*?)</(rss:|atom:)?(item|entry)>}mi ) do |match|
  88. item = Hash.new
  89. @@item_tags.each do |tag|
  90. if tag.to_s.include?("+")
  91. tag_data = tag.to_s.split("+")
  92. tag = tag_data[0]
  93. rel = tag_data[1]
  94. if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)rel=['"]#{rel}['"](.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
  95. nil
  96. elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)rel=['"]#{rel}['"](.*?)/\s*>}mi
  97. nil
  98. end
  99. item[clean_tag("#{tag}+#{rel}")] = clean_content(tag, $3, $4) if $3 || $4
  100. elsif tag.to_s.include?("#")
  101. tag_data = tag.to_s.split("#")
  102. tag = tag_data[0]
  103. attrib = tag_data[1]
  104. if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)#{attrib}=['"](.*?)['"](.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
  105. nil
  106. elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)#{attrib}=['"](.*?)['"](.*?)/\s*>}mi
  107. nil
  108. end
  109. item[clean_tag("#{tag}_#{attrib}")] = clean_content(tag, attrib, $3) if $3
  110. else
  111. if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
  112. nil
  113. elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)/\s*>}mi
  114. nil
  115. end
  116. item[clean_tag(tag)] = clean_content(tag, $2, $3) if $2 || $3
  117. end
  118. end
  119. def item.method_missing(name, *args) self[name] end
  120. @items << item
  121. end
  122. end
  123. def clean_content(tag, attrs, content)
  124. content = content.to_s
  125. case tag
  126. when :pubDate, :lastBuildDate, :published, :updated, :expirationDate, :modified, :'dc:date'
  127. Time.parse(content) rescue unescape(content)
  128. when :author, :contributor, :skipHours, :skipDays
  129. unescape(content.gsub(/<.*?>/,''))
  130. else
  131. content.empty? && "#{attrs} " =~ /href=['"]?([^'"]*)['" ]/mi ? $1.strip : unescape(content)
  132. end
  133. end
  134. def clean_tag(tag)
  135. tag.to_s.gsub(':','_').intern
  136. end
  137. def unescape(content)
  138. if content =~ /([^-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]]%)/un then
  139. CGI.unescape(content).gsub(/(<!\[CDATA\[|\]\]>)/u,'').strip
  140. else
  141. content.gsub(/(<!\[CDATA\[|\]\]>)/u,'').strip
  142. end
  143. end
  144. end
  145. class SimpleRSSError < StandardError
  146. end