PageRenderTime 61ms CodeModel.GetById 29ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/youtube_it/parser.rb

https://github.com/klacointe/youtube_it
Ruby | 617 lines | 511 code | 84 blank | 22 comment | 34 complexity | 1c5b77963bbc15ee297ddabb13e6664b MD5 | raw file
  1. # encoding: UTF-8
  2. class YouTubeIt
  3. module Parser #:nodoc:
  4. class FeedParser #:nodoc:
  5. def initialize(content)
  6. @content = (content =~ URI::regexp(%w(http https)) ? open(content).read : content)
  7. rescue OpenURI::HTTPError => e
  8. raise OpenURI::HTTPError.new(e.io.status[0],e)
  9. rescue
  10. @content = content
  11. end
  12. def parse
  13. parse_content @content
  14. end
  15. def parse_single_entry
  16. doc = Nokogiri::XML(@content)
  17. parse_entry(doc.at("entry") || doc)
  18. end
  19. def parse_videos
  20. doc = Nokogiri::XML(@content)
  21. videos = []
  22. doc.css("entry").each do |video|
  23. videos << parse_entry(video)
  24. end
  25. videos
  26. end
  27. def remove_bom str
  28. str.gsub /\xEF\xBB\xBF|/, ''
  29. end
  30. end
  31. class CommentsFeedParser < FeedParser #:nodoc:
  32. # return array of comments
  33. def parse_content(content)
  34. doc = Nokogiri::XML(content.body)
  35. feed = doc.at("feed")
  36. comments = []
  37. feed.css("entry").each do |entry|
  38. comments << parse_entry(entry)
  39. end
  40. return comments
  41. end
  42. protected
  43. def parse_entry(entry)
  44. author = YouTubeIt::Model::Author.new(
  45. :name => (entry.at("author/name").text rescue nil),
  46. :uri => (entry.at("author/uri").text rescue nil)
  47. )
  48. YouTubeIt::Model::Comment.new(
  49. :author => author,
  50. :content => remove_bom(entry.at("content").text),
  51. :published => entry.at("published").text,
  52. :title => remove_bom(entry.at("title").text),
  53. :updated => entry.at("updated").text,
  54. :url => entry.at("id").text,
  55. :reply_to => parse_reply(entry)
  56. )
  57. end
  58. def parse_reply(entry)
  59. if link = entry.at_xpath("xmlns:link[@rel='http://gdata.youtube.com/schemas/2007#in-reply-to']")
  60. link["href"].split('/').last.gsub(/\?client.*/, '')
  61. end
  62. end
  63. end
  64. class PlaylistFeedParser < FeedParser #:nodoc:
  65. def parse_content(content)
  66. xml = Nokogiri::XML(content.body)
  67. entry = xml.at("feed") || xml.at("entry")
  68. YouTubeIt::Model::Playlist.new(
  69. :title => entry.at("title") && entry.at("title").text,
  70. :summary => ((entry.at("summary") || entry.at_xpath("media:group").at_xpath("media:description")).text rescue nil),
  71. :description => ((entry.at("summary") || entry.at_xpath("media:group").at_xpath("media:description")).text rescue nil),
  72. :playlist_id => (entry.at("id").text[/playlist:([\w\-]+)/, 1] rescue nil),
  73. :published => entry.at("published") ? entry.at("published").text : nil,
  74. :response_code => content.status,
  75. :xml => content.body)
  76. end
  77. end
  78. class PlaylistsFeedParser < FeedParser #:nodoc:
  79. # return array of playlist objects
  80. def parse_content(content)
  81. doc = Nokogiri::XML(content.body)
  82. feed = doc.at("feed")
  83. playlists = []
  84. feed.css("entry").each do |entry|
  85. playlists << parse_entry(entry)
  86. end
  87. return playlists
  88. end
  89. protected
  90. def parse_entry(entry)
  91. YouTubeIt::Model::Playlist.new(
  92. :title => entry.at("title").text,
  93. :summary => (entry.at("summary") || entry.at_xpath("media:group").at_xpath("media:description")).text,
  94. :description => (entry.at("summary") || entry.at_xpath("media:group").at_xpath("media:description")).text,
  95. :playlist_id => entry.at("id").text[/playlist([^<]+)/, 1].sub(':',''),
  96. :published => entry.at("published") ? entry.at("published").text : nil,
  97. :response_code => nil,
  98. :xml => nil)
  99. end
  100. end
  101. # Returns an array of the user's activity
  102. class ActivityParser < FeedParser
  103. def parse_content(content)
  104. doc = Nokogiri::XML(content.body)
  105. feed = doc.at("feed")
  106. activities = []
  107. feed.css("entry").each do |entry|
  108. if parsed_activity = parse_activity(entry)
  109. activities << parsed_activity
  110. end
  111. end
  112. return activities
  113. end
  114. protected
  115. # Parses the user's activity feed.
  116. def parse_activity(entry)
  117. # Figure out what kind of activity we have
  118. video_type = nil
  119. parsed_activity = nil
  120. entry.css("category").each do |category_tag|
  121. if category_tag["scheme"] == "http://gdata.youtube.com/schemas/2007/userevents.cat"
  122. video_type = category_tag["term"]
  123. end
  124. end
  125. if video_type
  126. case video_type
  127. when "video_rated"
  128. parsed_activity = YouTubeIt::Model::Activity.new(
  129. :type => "video_rated",
  130. :time => entry.at("updated") ? entry.at("updated").text : nil,
  131. :author => entry.at("author/name") ? entry.at("author/name").text : nil,
  132. :videos => parse_activity_videos(entry),
  133. :video_id => entry.at_xpath("yt:videoid") ? entry.at_xpath("yt:videoid").text : nil
  134. )
  135. when "video_shared"
  136. parsed_activity = YouTubeIt::Model::Activity.new(
  137. :type => "video_shared",
  138. :time => entry.at("updated") ? entry.at("updated").text : nil,
  139. :author => entry.at("author/name") ? entry.at("author/name").text : nil,
  140. :videos => parse_activity_videos(entry),
  141. :video_id => entry.at_xpath("yt:videoid") ? entry.at_xpath("yt:videoid").text : nil
  142. )
  143. when "video_favorited"
  144. parsed_activity = YouTubeIt::Model::Activity.new(
  145. :type => "video_favorited",
  146. :time => entry.at("updated") ? entry.at("updated").text : nil,
  147. :author => entry.at("author/name") ? entry.at("author/name").text : nil,
  148. :videos => parse_activity_videos(entry),
  149. :video_id => entry.at_xpath("yt:videoid") ? entry.at_xpath("yt:videoid").text : nil
  150. )
  151. when "video_commented"
  152. # Load the comment and video URL
  153. comment_thread_url = nil
  154. video_url = nil
  155. entry.css("link").each do |link_tag|
  156. case link_tag["rel"]
  157. when "http://gdata.youtube.com/schemas/2007#comments"
  158. comment_thread_url = link_tag["href"]
  159. when "http://gdata.youtube.com/schemas/2007#video"
  160. video_url = link_tag["href"]
  161. else
  162. # Invalid rel type, do nothing
  163. end
  164. end
  165. parsed_activity = YouTubeIt::Model::Activity.new(
  166. :type => "video_commented",
  167. :time => entry.at("updated") ? entry.at("updated").text : nil,
  168. :author => entry.at("author/name") ? entry.at("author/name").text : nil,
  169. :videos => parse_activity_videos(entry),
  170. :video_id => entry.at_xpath("yt:videoid") ? entry.at_xpath("yt:videoid").text : nil,
  171. :comment_thread_url => comment_thread_url,
  172. :video_url => video_url
  173. )
  174. when "video_uploaded"
  175. parsed_activity = YouTubeIt::Model::Activity.new(
  176. :type => "video_uploaded",
  177. :time => entry.at("updated") ? entry.at("updated").text : nil,
  178. :author => entry.at("author/name") ? entry.at("author/name").text : nil,
  179. :videos => parse_activity_videos(entry),
  180. :video_id => entry.at_xpath("yt:videoid") ? entry.at_xpath("yt:videoid").text : nil
  181. )
  182. when "friend_added"
  183. parsed_activity = YouTubeIt::Model::Activity.new(
  184. :type => "friend_added",
  185. :time => entry.at("updated") ? entry.at("updated").text : nil,
  186. :author => entry.at("author/name") ? entry.at("author/name").text : nil,
  187. :username => entry.at_xpath("yt:username") ? entry.at_xpath("yt:username").text : nil
  188. )
  189. when "user_subscription_added"
  190. parsed_activity = YouTubeIt::Model::Activity.new(
  191. :type => "user_subscription_added",
  192. :time => entry.at("updated") ? entry.at("updated").text : nil,
  193. :author => entry.at("author/name") ? entry.at("author/name").text : nil,
  194. :username => entry.at_xpath("yt:username") ? entry.at_xpath("yt:username").text : nil
  195. )
  196. else
  197. # Invalid activity type, just let it return nil
  198. end
  199. end
  200. return parsed_activity
  201. end
  202. # If a user enabled inline attribute videos may be included in results.
  203. def parse_activity_videos(entry)
  204. videos = []
  205. entry.css("link").each do |link_tag|
  206. videos << YouTubeIt::Parser::VideoFeedParser.new(link_tag).parse if link_tag.at("entry")
  207. end
  208. if videos.size <= 0
  209. videos = nil
  210. end
  211. return videos
  212. end
  213. end
  214. # Returns an array of the user's contacts
  215. class ContactsParser < FeedParser
  216. def parse_content(content)
  217. doc = Nokogiri::XML(content.body)
  218. feed = doc.at("feed")
  219. contacts = []
  220. feed.css("entry").each do |entry|
  221. temp_contact = YouTubeIt::Model::Contact.new(
  222. :title => entry.at("title") ? entry.at("title").text : nil,
  223. :username => entry.at_xpath("yt:username") ? entry.at_xpath("yt:username").text : nil,
  224. :status => entry.at_xpath("yt:status") ? entry.at_xpath("yt:status").text : nil
  225. )
  226. contacts << temp_contact
  227. end
  228. return contacts
  229. end
  230. end
  231. # Returns an array of the user's messages
  232. class MessagesParser < FeedParser
  233. def parse_content(content)
  234. doc = Nokogiri::XML(content.body)
  235. feed = doc.at("feed")
  236. messages = []
  237. feed.css("entry").each do |entry|
  238. author = entry.at("author")
  239. temp_message = YouTubeIt::Model::Message.new(
  240. :id => entry.at("id") ? entry.at("id").text.gsub(/.+:inbox:/, "") : nil,
  241. :title => entry.at("title") ? entry.at("title").text : nil,
  242. :name => author && author.at("name") ? author.at("name").text : nil,
  243. :summary => entry.at("summary") ? entry.at("summary").text : nil,
  244. :published => entry.at("published") ? entry.at("published").text : nil
  245. )
  246. messages << temp_message
  247. end
  248. return messages
  249. end
  250. end
  251. class ProfileFeedParser < FeedParser #:nodoc:
  252. def parse_content(content)
  253. xml = Nokogiri::XML(content.body)
  254. entry = xml.at("entry") || xml.at("feed")
  255. parse_entry(entry)
  256. end
  257. def parse_entry(entry)
  258. YouTubeIt::Model::User.new(
  259. :age => entry.at_xpath("yt:age") ? entry.at_xpath("yt:age").text : nil,
  260. :username => entry.at_xpath("yt:username") ? entry.at_xpath("yt:username").text : nil,
  261. :username_display => (entry.at_xpath("yt:username")['display'] rescue nil),
  262. :user_id => (entry.at_xpath("xmlns:author/yt:userId").text rescue nil),
  263. :last_name => (entry.at_xpath("yt:lastName").text rescue nil),
  264. :first_name => (entry.at_xpath("yt:firstName").text rescue nil),
  265. :company => entry.at_xpath("yt:company") ? entry.at_xpath("yt:company").text : nil,
  266. :gender => entry.at_xpath("yt:gender") ? entry.at_xpath("yt:gender").text : nil,
  267. :hobbies => entry.at_xpath("yt:hobbies") ? entry.at_xpath("yt:hobbies").text : nil,
  268. :hometown => entry.at_xpath("yt:hometown") ? entry.at_xpath("yt:hometown").text : nil,
  269. :location => entry.at_xpath("yt:location") ? entry.at_xpath("yt:location").text : nil,
  270. :last_login => entry.at_xpath("yt:statistics")["lastWebAccess"],
  271. :join_date => entry.at("published") ? entry.at("published").text : nil,
  272. :movies => entry.at_xpath("yt:movies") ? entry.at_xpath("yt:movies").text : nil,
  273. :music => entry.at_xpath("yt:music") ? entry.at_xpath("yt:music").text : nil,
  274. :occupation => entry.at_xpath("yt:occupation") ? entry.at_xpath("yt:occupation").text : nil,
  275. :relationship => entry.at_xpath("yt:relationship") ? entry.at_xpath("yt:relationship").text : nil,
  276. :school => entry.at_xpath("yt:school") ? entry.at_xpath("yt:school").text : nil,
  277. :avatar => entry.at_xpath("media:thumbnail") ? entry.at_xpath("media:thumbnail")["url"] : nil,
  278. :upload_count => (entry.at_xpath('gd:feedLink[@rel="http://gdata.youtube.com/schemas/2007#user.uploads"]')['countHint'].to_i rescue nil),
  279. :max_upload_duration => (entry.at_xpath("yt:maxUploadDuration")['seconds'].to_i rescue nil),
  280. :subscribers => entry.at_xpath("yt:statistics")["subscriberCount"],
  281. :videos_watched => entry.at_xpath("yt:statistics")["videoWatchCount"],
  282. :view_count => entry.at_xpath("yt:statistics")["viewCount"],
  283. :upload_views => entry.at_xpath("yt:statistics")["totalUploadViews"],
  284. :insight_uri => (entry.at_xpath('xmlns:link[@rel="http://gdata.youtube.com/schemas/2007#insight.views"]')['href'] rescue nil)
  285. )
  286. end
  287. end
  288. class BatchProfileFeedParser < ProfileFeedParser
  289. def parse_content(content)
  290. Nokogiri::XML(content.body).xpath("//xmlns:entry").map do |entry|
  291. entry.namespaces.each {|name, url| entry.document.root.add_namespace name, url }
  292. username = entry.at_xpath('batch:id', entry.namespaces).text
  293. result = catch(:result) do
  294. case entry.at_xpath('batch:status', entry.namespaces)['code'].to_i
  295. when 200...300 then parse_entry(entry)
  296. else nil
  297. end
  298. end
  299. { username => result }
  300. end.reduce({},:merge)
  301. end
  302. end
  303. class SubscriptionFeedParser < FeedParser #:nodoc:
  304. def parse_content(content)
  305. doc = Nokogiri::XML(content.body)
  306. feed = doc.at("feed")
  307. subscriptions = []
  308. feed.css("entry").each do |entry|
  309. subscriptions << parse_entry(entry)
  310. end
  311. return subscriptions
  312. end
  313. protected
  314. def parse_entry(entry)
  315. YouTubeIt::Model::Subscription.new(
  316. :title => entry.at("title").text,
  317. :id => entry.at("id").text[/subscription([^<]+)/, 1].sub(':',''),
  318. :published => entry.at("published") ? entry.at("published").text : nil
  319. )
  320. end
  321. end
  322. class CaptionFeedParser < FeedParser #:nodoc:
  323. def parse_content(content)
  324. doc = (content.is_a?(Nokogiri::XML::Document)) ? content : Nokogiri::XML(content)
  325. entry = doc.at "entry"
  326. parse_entry(entry)
  327. end
  328. protected
  329. def parse_entry(entry)
  330. YouTubeIt::Model::Caption.new(
  331. :title => entry.at("title").text,
  332. :id => entry.at("id").text[/captions([^<]+)/, 1].sub(':',''),
  333. :published => entry.at("published") ? entry.at("published").text : nil
  334. )
  335. end
  336. end
  337. class VideoFeedParser < FeedParser #:nodoc:
  338. def parse_content(content)
  339. doc = (content.is_a?(Nokogiri::XML::Document)) ? content : Nokogiri::XML(content)
  340. entry = doc.at "entry"
  341. parse_entry(entry)
  342. end
  343. protected
  344. def parse_entry(entry)
  345. video_id = entry.at("id").text
  346. published_at = entry.at("published") ? Time.parse(entry.at("published").text) : nil
  347. uploaded_at = entry.at_xpath("media:group/yt:uploaded") ? Time.parse(entry.at_xpath("media:group/yt:uploaded").text) : nil
  348. updated_at = entry.at("updated") ? Time.parse(entry.at("updated").text) : nil
  349. recorded_at = entry.at_xpath("yt:recorded") ? Time.parse(entry.at_xpath("yt:recorded").text) : nil
  350. # parse the category and keyword lists
  351. categories = []
  352. keywords = []
  353. entry.css("category").each do |category|
  354. # determine if it's really a category, or just a keyword
  355. scheme = category["scheme"]
  356. if (scheme =~ /\/categories\.cat$/)
  357. # it's a category
  358. categories << YouTubeIt::Model::Category.new(
  359. :term => category["term"],
  360. :label => category["label"])
  361. elsif (scheme =~ /\/keywords\.cat$/)
  362. # it's a keyword
  363. keywords << category["term"]
  364. end
  365. end
  366. title = entry.at("title").text
  367. html_content = nil #entry.at("content") ? entry.at("content").text : nil
  368. # parse the author
  369. author_element = entry.at("author")
  370. author = nil
  371. if author_element
  372. author = YouTubeIt::Model::Author.new(
  373. :name => author_element.at("name").text,
  374. :uri => author_element.at("uri").text)
  375. end
  376. media_group = entry.at_xpath('media:group')
  377. ytid = nil
  378. unless media_group.at_xpath("yt:videoid").nil?
  379. ytid = media_group.at_xpath("yt:videoid").text
  380. end
  381. # if content is not available on certain region, there is no media:description, media:player or yt:duration
  382. description = ""
  383. unless media_group.at_xpath("media:description").nil?
  384. description = media_group.at_xpath("media:description").text
  385. end
  386. # if content is not available on certain region, there is no media:description, media:player or yt:duration
  387. duration = 0
  388. unless media_group.at_xpath("yt:duration").nil?
  389. duration = media_group.at_xpath("yt:duration")["seconds"].to_i
  390. end
  391. # if content is not available on certain region, there is no media:description, media:player or yt:duration
  392. player_url = ""
  393. unless media_group.at_xpath("media:player").nil?
  394. player_url = media_group.at_xpath("media:player")["url"]
  395. end
  396. unless media_group.at_xpath("yt:aspectRatio").nil?
  397. widescreen = media_group.at_xpath("yt:aspectRatio").text == 'widescreen' ? true : false
  398. end
  399. media_content = []
  400. media_group.xpath("media:content").each do |mce|
  401. media_content << parse_media_content(mce)
  402. end
  403. # parse thumbnails
  404. thumbnails = []
  405. media_group.xpath("media:thumbnail").each do |thumb_element|
  406. # TODO: convert time HH:MM:ss string to seconds?
  407. thumbnails << YouTubeIt::Model::Thumbnail.new(
  408. :url => thumb_element["url"],
  409. :height => thumb_element["height"].to_i,
  410. :width => thumb_element["width"].to_i,
  411. :time => thumb_element["time"],
  412. :name => thumb_element["name"])
  413. end
  414. rating_element = entry.at_xpath("gd:rating")
  415. extended_rating_element = entry.at_xpath("yt:rating")
  416. rating = nil
  417. if rating_element
  418. rating_values = {
  419. :min => rating_element["min"].to_i,
  420. :max => rating_element["max"].to_i,
  421. :rater_count => rating_element["numRaters"].to_i,
  422. :average => rating_element["average"].to_f
  423. }
  424. if extended_rating_element
  425. rating_values[:likes] = extended_rating_element["numLikes"].to_i
  426. rating_values[:dislikes] = extended_rating_element["numDislikes"].to_i
  427. end
  428. rating = YouTubeIt::Model::Rating.new(rating_values)
  429. end
  430. if (el = entry.at_xpath("yt:statistics"))
  431. view_count, favorite_count = el["viewCount"].to_i, el["favoriteCount"].to_i
  432. else
  433. view_count, favorite_count = 0,0
  434. end
  435. comment_feed = entry.at_xpath('gd:comments/gd:feedLink[@rel="http://gdata.youtube.com/schemas/2007#comments"]')
  436. comment_count = comment_feed ? comment_feed['countHint'].to_i : 0
  437. access_control = entry.xpath('yt:accessControl').map do |e|
  438. { e['action'] => e['permission'] }
  439. end.compact.reduce({},:merge)
  440. noembed = entry.at_xpath("yt:noembed") ? true : false
  441. safe_search = entry.at_xpath("media:rating") ? true : false
  442. if entry.namespaces['xmlns:georss'] and where = entry.at_xpath("georss:where")
  443. position = where.at_xpath("gml:Point").at_xpath("gml:pos").text
  444. latitude, longitude = position.split.map &:to_f
  445. end
  446. if entry.namespaces['xmlns:app']
  447. control = entry.at_xpath("app:control")
  448. state = { :name => "published" }
  449. if control && control.at_xpath("yt:state")
  450. state = {
  451. :name => control.at_xpath("yt:state")["name"],
  452. :reason_code => control.at_xpath("yt:state")["reasonCode"],
  453. :help_url => control.at_xpath("yt:state")["helpUrl"],
  454. :copy => control.at_xpath("yt:state").text
  455. }
  456. end
  457. end
  458. insight_uri = (entry.at_xpath('xmlns:link[@rel="http://gdata.youtube.com/schemas/2007#insight.views"]')['href'] rescue nil)
  459. perm_private = media_group.at_xpath("yt:private") ? true : false
  460. YouTubeIt::Model::Video.new(
  461. :video_id => video_id,
  462. :published_at => published_at,
  463. :updated_at => updated_at,
  464. :uploaded_at => uploaded_at,
  465. :recorded_at => recorded_at,
  466. :categories => categories,
  467. :keywords => keywords,
  468. :title => title,
  469. :html_content => html_content,
  470. :author => author,
  471. :description => description,
  472. :duration => duration,
  473. :media_content => media_content,
  474. :player_url => player_url,
  475. :thumbnails => thumbnails,
  476. :rating => rating,
  477. :view_count => view_count,
  478. :favorite_count => favorite_count,
  479. :comment_count => comment_count,
  480. :access_control => access_control,
  481. :widescreen => widescreen,
  482. :noembed => noembed,
  483. :safe_search => safe_search,
  484. :position => position,
  485. :latitude => latitude,
  486. :longitude => longitude,
  487. :state => state,
  488. :insight_uri => insight_uri,
  489. :unique_id => ytid,
  490. :perm_private => perm_private)
  491. end
  492. def parse_media_content (elem)
  493. content_url = elem["url"]
  494. format_code = elem["format"].to_i
  495. format = YouTubeIt::Model::Video::Format.by_code(format_code)
  496. duration = elem["duration"].to_i
  497. mime_type = elem["type"]
  498. default = (elem["isDefault"] == "true")
  499. YouTubeIt::Model::Content.new(
  500. :url => content_url,
  501. :format => format,
  502. :duration => duration,
  503. :mime_type => mime_type,
  504. :default => default)
  505. end
  506. end
  507. class VideosFeedParser < VideoFeedParser #:nodoc:
  508. private
  509. def parse_content(content)
  510. videos = []
  511. doc = Nokogiri::XML(content)
  512. feed = doc.at "feed"
  513. if feed
  514. feed_id = feed.at("id").text
  515. updated_at = Time.parse(feed.at("updated").text)
  516. total_result_count = feed.at_xpath("openSearch:totalResults").text.to_i
  517. offset = feed.at_xpath("openSearch:startIndex").text.to_i
  518. max_result_count = feed.at_xpath("openSearch:itemsPerPage").text.to_i
  519. feed.css("entry").each do |entry|
  520. videos << parse_entry(entry)
  521. end
  522. end
  523. YouTubeIt::Response::VideoSearch.new(
  524. :feed_id => feed_id || nil,
  525. :updated_at => updated_at || nil,
  526. :total_result_count => total_result_count || nil,
  527. :offset => offset || nil,
  528. :max_result_count => max_result_count || nil,
  529. :videos => videos)
  530. end
  531. end
  532. end
  533. end