PageRenderTime 45ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/plugins/appendtitle.rb

https://github.com/Epictetus/termtter
Ruby | 95 lines | 84 code | 6 blank | 5 comment | 10 complexity | b1ac630097f2f3c40f9ad99a91b333f2 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. require 'uri'
  3. require 'open-uri'
  4. require 'nokogiri'
  5. require 'timeout'
  6. require 'digest/sha1'
  7. module Termtter::Client
  8. config.plugins.appendtitle.set_default(:timeout, 30)
  9. config.plugins.appendtitle.set_default(:cache_expire, 3600 * 24 * 7)
  10. def self.fetch_title_data(uri) # returns {:title, :uri} | {:uri} | nil
  11. return unless uri
  12. key = %w{ plugins appendtitle title-data}.push(Digest::SHA1.hexdigest(uri)).join('-')
  13. if v = memory_cache.get(key)
  14. logger.debug "appendtitle: cache hit for #{uri}"
  15. return v
  16. end
  17. memory_cache.set(key, {}, config.plugins.appendtitle.cache_expire) # to avoid duplicate fetch
  18. logger.debug "appendtitle: fetching title for #{uri}"
  19. data = {}
  20. uri_fetch = uri
  21. begin
  22. io = URI.parse(uri_fetch).read
  23. base_uri = io.base_uri.to_s
  24. base_uri = uri_fetch if base_uri.length > 1000
  25. data[:uri] = base_uri
  26. charset = io.scan(/charset="?([^\s"]*)/i).flatten.inject(Hash.new{0}){|a, b| a[b]+=1; a}.to_a.sort_by{|a|a[1]}.reverse.first[0] # XXX: scan charset from source
  27. begin # title
  28. source = Nokogiri(io, base_uri, charset)
  29. title = source.at('title').text rescue nil
  30. title ||= source.at('h1').text rescue nil
  31. title ||= source.at('h2').text rescue nil
  32. title = title.gsub(/\n/, '').gsub(/\s+/, ' ') if title
  33. data[:title] = title if title
  34. rescue
  35. end
  36. memory_cache.set(key, data, config.plugins.appendtitle.cache_expire)
  37. data
  38. rescue RuntimeError => error
  39. # example: redirection forbidden: http://bit.ly/gSarwN -> https://github.com/jugyo/termtter/commit/6e5fa4455a5117fb6c10bdf82bae52cfcf57a91f
  40. if error.message =~ /^redirection forbidden/
  41. logger.debug "appendtitle: #{error.message}"
  42. uri_fetch = error.message.split(/\s+/).last
  43. retry
  44. end
  45. rescue Timeout::Error, StandardError => error
  46. logger.debug "appendtitle: error #{uri}, #{error.class.to_s}: #{error.message}"
  47. nil
  48. end
  49. end
  50. register_hook(
  51. :name => :appendtitle,
  52. :point => :filter_for_output,
  53. :exec_proc => lambda do |statuses, event|
  54. threads = statuses.map do |status|
  55. Thread.new{
  56. begin
  57. status.text.gsub!(URI.regexp(['http', 'https'])) {|uri_before|
  58. data = fetch_title_data(uri_before) || {}
  59. title = data[:title]
  60. body_for_compare = status.text.gsub(/\n/, '').gsub(/\s+/, ' ')
  61. uri_after = data[:uri] || uri_before
  62. if title and not (
  63. body_for_compare.include? title or
  64. body_for_compare.include? title[0..(title.length/2)] or
  65. body_for_compare.include? title[(title.length/2)..-1]) # XXX: heuristic!!!
  66. "#{uri_after} (#{title})"
  67. else
  68. uri_after
  69. end
  70. }
  71. rescue => error
  72. logger.debug "appendtitle: [ERROR] #{error.class.to_s}: #{error.message}"
  73. end
  74. }
  75. end
  76. begin
  77. # wait for join or timeout
  78. timeout(config.plugins.appendtitle.timeout) {
  79. threads.each{ |t| t.join }
  80. }
  81. rescue Timeout::Error
  82. logger.error 'appendtitle: timeout'
  83. end
  84. statuses
  85. end
  86. )
  87. end
  88. # appendtitle.rb:
  89. # append title for uri and expand short uri.