PageRenderTime 68ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/data/rbot/plugins/search.rb

https://github.com/jsn/rbot
Ruby | 518 lines | 425 code | 64 blank | 29 comment | 52 complexity | b52d77e3430ff35a5d97c7b086763a96 MD5 | raw file
  1. #-- vim:sw=2:et
  2. #++
  3. #
  4. # :title: Google and Wikipedia search plugin for rbot
  5. #
  6. # Author:: Tom Gilbert (giblet) <tom@linuxbrit.co.uk>
  7. # Author:: Giuseppe "Oblomov" Bilotta <giuseppe.bilotta@gmail.com>
  8. #
  9. # Copyright:: (C) 2002-2005 Tom Gilbert
  10. # Copyright:: (C) 2006 Tom Gilbert, Giuseppe Bilotta
  11. # Copyright:: (C) 2006-2007 Giuseppe Bilotta
  12. # TODO:: use lr=lang_<code> or whatever is most appropriate to let google know
  13. # it shouldn't use the bot's location to find the preferred language
  14. # TODO:: support localized uncyclopedias -- not easy because they have different names
  15. # for most languages
  16. GOOGLE_SEARCH = "http://www.google.com/search?oe=UTF-8&q="
  17. GOOGLE_WAP_SEARCH = "http://www.google.com/m/search?hl=en&q="
  18. GOOGLE_WAP_LINK = /"r">(?:<div[^>]*>)?<a href="([^"]+)"[^>]*>(.*?)<\/a>/im
  19. GOOGLE_CALC_RESULT = %r{<h[1-6] class="r" [^>]*>(.+?)</h}
  20. GOOGLE_COUNT_RESULT = %r{<font size=-1>Results <b>1<\/b> - <b>10<\/b> of about <b>(.*)<\/b> for}
  21. GOOGLE_DEF_RESULT = %r{onebox_result">\s*(.*?)\s*<br/>\s*(.*?)<table}
  22. GOOGLE_TIME_RESULT = %r{alt="Clock"></td><td valign=[^>]+>(.+?)<(br|/td)>}
  23. DDG_API_SEARCH = "http://api.duckduckgo.com/?format=xml&no_html=1&skip_disambig=1&no_redirect=0&q="
  24. WOLFRAM_API_SEARCH = "http://api.wolframalpha.com/v2/query?input=%{terms}&appid=%{key}&format=plaintext" +
  25. "&scantimeout=3.0&podtimeout=4.0&formattimeout=8.0&parsetimeout=5.0" +
  26. "&excludepodid=SeriesRepresentations:*"
  27. WOLFRAM_API_KEY = "4EU37Y-TX9WJG3JH3"
  28. class SearchPlugin < Plugin
  29. Config.register Config::IntegerValue.new('duckduckgo.hits',
  30. :default => 3, :validate => Proc.new{|v| v > 0},
  31. :desc => "Number of hits to return from searches")
  32. Config.register Config::IntegerValue.new('duckduckgo.first_par',
  33. :default => 0,
  34. :desc => "When set to n > 0, the bot will return the first paragraph from the first n search hits")
  35. Config.register Config::IntegerValue.new('google.hits',
  36. :default => 3,
  37. :desc => "Number of hits to return from Google searches")
  38. Config.register Config::IntegerValue.new('google.first_par',
  39. :default => 0,
  40. :desc => "When set to n > 0, the bot will return the first paragraph from the first n search hits")
  41. Config.register Config::IntegerValue.new('wikipedia.hits',
  42. :default => 3,
  43. :desc => "Number of hits to return from Wikipedia searches")
  44. Config.register Config::IntegerValue.new('wikipedia.first_par',
  45. :default => 1,
  46. :desc => "When set to n > 0, the bot will return the first paragraph from the first n wikipedia search hits")
  47. def help(plugin, topic="")
  48. case topic
  49. when "ddg"
  50. "Use '#{topic} <string>' to return a search or calculation from " +
  51. "DuckDuckGo. Use #{topic} define <string> to return a definition."
  52. when "search", "google"
  53. "#{topic} <string> => search google for <string>"
  54. when "gcalc"
  55. "gcalc <equation> => use the google calculator to find the answer to <equation>"
  56. when "gdef"
  57. "gdef <term(s)> => use the google define mechanism to find a definition of <term(s)>"
  58. when "gtime"
  59. "gtime <location> => use the google clock to find the current time at <location>"
  60. when "wa"
  61. "wa <string> => searches WolframAlpha for <string>"
  62. when "wp"
  63. "wp [<code>] <string> => search for <string> on Wikipedia. You can select a national <code> to only search the national Wikipedia"
  64. when "unpedia"
  65. "unpedia <string> => search for <string> on Uncyclopedia"
  66. else
  67. "search <string> (or: google <string>) => search google for <string> | ddg <string> to search DuckDuckGo | wp <string> => search for <string> on Wikipedia | wa <string> => search for <string> on WolframAlpha | unpedia <string> => search for <string> on Uncyclopedia"
  68. end
  69. end
  70. def duckduckgo(m, params)
  71. what = params[:words].to_s
  72. terms = CGI.escape what
  73. url = DDG_API_SEARCH + terms
  74. hits = @bot.config['duckduckgo.hits']
  75. first_pars = params[:firstpar] || @bot.config['duckduckgo.first_par']
  76. single = params[:lucky] || (hits == 1 and first_pars == 1)
  77. begin
  78. feed = @bot.httputil.get(url)
  79. raise unless feed
  80. rescue => e
  81. m.reply "error duckduckgoing for #{what}"
  82. return
  83. end
  84. debug feed
  85. xml = REXML::Document.new feed
  86. heading = xml.elements['//Heading/text()'].to_s
  87. # answer is returned for calculations
  88. answer = xml.elements['//Answer/text()'].to_s
  89. # abstract is returned for definitions etc
  90. abstract = xml.elements['//AbstractText/text()'].to_s
  91. abfrom = ""
  92. unless abstract.empty?
  93. absrc = xml.elements['//AbstractSource/text()'].to_s
  94. aburl = xml.elements['//AbstractURL/text()'].to_s
  95. unless absrc.empty? and aburl.empty?
  96. abfrom = " --"
  97. abfrom << " " << absrc unless absrc.empty?
  98. abfrom << " " << aburl unless aburl.empty?
  99. end
  100. end
  101. # but also definition (yes, you can have both, see e.g. printf)
  102. definition = xml.elements['//Definition/text()'].to_s
  103. deffrom = ""
  104. unless definition.empty?
  105. defsrc = xml.elements['//Definition/@source/text()'].to_s
  106. defurl = xml.elements['//Definition/@url/text()'].to_s
  107. unless defsrc.empty? and defurl.empty?
  108. deffrom = " --"
  109. deffrom << " " << defsrc unless defsrc.empty?
  110. deffrom << " " << defurl unless defurl.empty?
  111. end
  112. end
  113. if heading.empty? and answer.empty? and abstract.empty? and definition.empty?
  114. m.reply "no results"
  115. return
  116. end
  117. # if we got a one-shot answer (e.g. a calculation, return it)
  118. unless answer.empty?
  119. m.reply answer
  120. return
  121. end
  122. # otherwise, return the abstract, followed by as many hits as found
  123. unless heading.empty? or abstract.empty?
  124. m.reply "%{bold}%{heading}:%{bold} %{abstract}%{abfrom}" % {
  125. :bold => Bold, :heading => heading,
  126. :abstract => abstract, :abfrom => abfrom
  127. }
  128. end
  129. unless heading.empty? or definition.empty?
  130. m.reply "%{bold}%{heading}:%{bold} %{abstract}%{abfrom}" % {
  131. :bold => Bold, :heading => heading,
  132. :abstract => definition, :abfrom => deffrom
  133. }
  134. end
  135. # return zeroclick search results
  136. links, texts = [], []
  137. xml.elements.each("//Results/Result/FirstURL") { |element|
  138. links << element.text
  139. break if links.size == hits
  140. }
  141. return if links.empty?
  142. xml.elements.each("//Results/Result/Text") { |element|
  143. texts << " #{element.text}"
  144. break if links.size == hits
  145. }
  146. # TODO see treatment of `single` in google search
  147. single ||= (links.length == 1)
  148. pretty = []
  149. links.each_with_index do |u, i|
  150. t = texts[i]
  151. pretty.push("%{n}%{b}%{t}%{b}%{sep}%{u}" % {
  152. :n => (single ? "" : "#{i}. "),
  153. :sep => (single ? " -- " : ": "),
  154. :b => Bold, :t => t, :u => u
  155. })
  156. end
  157. result_string = pretty.join(" | ")
  158. # If we return a single, full result, change the output to a more compact representation
  159. if single
  160. fp = first_pars > 0 ? " -- #{Utils.get_first_pars(links, first_pars)}" : ""
  161. m.reply("Result for %{what}: %{string}%{fp}" % {
  162. :what => what, :string => result_string, :fp => fp
  163. }, :overlong => :truncate)
  164. return
  165. end
  166. m.reply "Results for #{what}: #{result_string}", :split_at => /\s+\|\s+/
  167. return unless first_pars > 0
  168. Utils.get_first_pars urls, first_pars, :message => m
  169. end
  170. def google(m, params)
  171. what = params[:words].to_s
  172. if what.match(/^define:/)
  173. return google_define(m, what, params)
  174. end
  175. searchfor = CGI.escape what
  176. # This method is also called by other methods to restrict searching to some sites
  177. if params[:site]
  178. site = "site:#{params[:site]}+"
  179. else
  180. site = ""
  181. end
  182. # It is also possible to choose a filter to remove constant parts from the titles
  183. # e.g.: "Wikipedia, the free encyclopedia" when doing Wikipedia searches
  184. filter = params[:filter] || ""
  185. url = GOOGLE_WAP_SEARCH + site + searchfor
  186. hits = params[:hits] || @bot.config['google.hits']
  187. hits = 1 if params[:lucky]
  188. first_pars = params[:firstpar] || @bot.config['google.first_par']
  189. single = params[:lucky] || (hits == 1 and first_pars == 1)
  190. begin
  191. wml = @bot.httputil.get(url)
  192. raise unless wml
  193. rescue => e
  194. m.reply "error googling for #{what}"
  195. return
  196. end
  197. results = wml.scan(GOOGLE_WAP_LINK)
  198. if results.length == 0
  199. m.reply "no results found for #{what}"
  200. return
  201. end
  202. single ||= (results.length==1)
  203. pretty = []
  204. begin
  205. urls = Array.new
  206. debug results
  207. results.each do |res|
  208. t = res[1].ircify_html(:img => "[%{src} %{alt} %{dimensions}]").strip
  209. u = res[0]
  210. if u.sub!(%r{^http://www.google.com/aclk\?},'')
  211. u = CGI::parse(u)['adurl'].first
  212. debug "skipping ad for #{u}"
  213. next
  214. elsif u.sub!(%r{^http://www.google.com/gwt/x\?},'')
  215. u = CGI::parse(u)['u'].first
  216. elsif u.sub!(%r{^/url\?},'')
  217. u = CGI::parse(u)['q'].first
  218. end
  219. urls.push(u)
  220. pretty.push("%{n}%{b}%{t}%{b}%{sep}%{u}" % {
  221. :n => (single ? "" : "#{urls.length}. "),
  222. :sep => (single ? " -- " : ": "),
  223. :b => Bold, :t => t, :u => u
  224. })
  225. break if urls.length == hits
  226. end
  227. rescue => e
  228. m.reply "failed to understand what google found for #{what}"
  229. error e
  230. debug wml
  231. debug results
  232. return
  233. end
  234. if params[:lucky]
  235. m.reply pretty.first
  236. return
  237. end
  238. result_string = pretty.join(" | ")
  239. # If we return a single, full result, change the output to a more compact representation
  240. if single
  241. m.reply "Result for %s: %s -- %s" % [what, result_string, Utils.get_first_pars(urls, first_pars)], :overlong => :truncate
  242. return
  243. end
  244. m.reply "Results for #{what}: #{result_string}", :split_at => /\s+\|\s+/
  245. return unless first_pars > 0
  246. Utils.get_first_pars urls, first_pars, :message => m
  247. end
  248. def google_define(m, what, params)
  249. begin
  250. wml = @bot.httputil.get(GOOGLE_SEARCH + CGI.escape(what))
  251. raise unless wml
  252. rescue => e
  253. m.reply "error googling for #{what}"
  254. return
  255. end
  256. begin
  257. related_index = wml.index(/Related phrases:/, 0)
  258. raise unless related_index
  259. defs_index = wml.index(/Definitions of <b>/, related_index)
  260. raise unless defs_index
  261. defs_end = wml.index(/<input/, defs_index)
  262. raise unless defs_end
  263. rescue => e
  264. m.reply "no results found for #{what}"
  265. return
  266. end
  267. related = wml[related_index...defs_index]
  268. defs = wml[defs_index...defs_end]
  269. m.reply defs.ircify_html(:a_href => Underline), :split_at => (Underline + ' ')
  270. end
  271. def lucky(m, params)
  272. params.merge!(:lucky => true)
  273. google(m, params)
  274. end
  275. def gcalc(m, params)
  276. what = params[:words].to_s
  277. searchfor = CGI.escape(what)
  278. debug "Getting gcalc thing: #{searchfor.inspect}"
  279. url = GOOGLE_WAP_SEARCH + searchfor
  280. begin
  281. html = @bot.httputil.get(url)
  282. rescue => e
  283. m.reply "error googlecalcing #{what}"
  284. return
  285. end
  286. debug "#{html.size} bytes of html recieved"
  287. debug html
  288. candidates = html.match(GOOGLE_CALC_RESULT)
  289. debug "candidates: #{candidates.inspect}"
  290. if candidates.nil?
  291. m.reply "couldn't calculate #{what}"
  292. return
  293. end
  294. result = candidates[1]
  295. debug "replying with: #{result.inspect}"
  296. m.reply result.ircify_html
  297. end
  298. def gcount(m, params)
  299. what = params[:words].to_s
  300. searchfor = CGI.escape(what)
  301. debug "Getting gcount thing: #{searchfor.inspect}"
  302. url = GOOGLE_SEARCH + searchfor
  303. begin
  304. html = @bot.httputil.get(url)
  305. rescue => e
  306. m.reply "error googlecounting #{what}"
  307. return
  308. end
  309. debug "#{html.size} bytes of html recieved"
  310. results = html.scan(GOOGLE_COUNT_RESULT)
  311. debug "results: #{results.inspect}"
  312. if results.length != 1
  313. m.reply "couldn't count #{what}"
  314. return
  315. end
  316. result = results[0][0].ircify_html
  317. debug "replying with: #{result.inspect}"
  318. m.reply "total results: #{result}"
  319. end
  320. def gdef(m, params)
  321. what = params[:words].to_s
  322. searchfor = CGI.escape("define " + what)
  323. debug "Getting gdef thing: #{searchfor.inspect}"
  324. url = GOOGLE_WAP_SEARCH + searchfor
  325. begin
  326. html = @bot.httputil.get(url)
  327. rescue => e
  328. m.reply "error googledefining #{what}"
  329. return
  330. end
  331. debug html
  332. results = html.scan(GOOGLE_DEF_RESULT)
  333. debug "results: #{results.inspect}"
  334. if results.length != 1
  335. m.reply "couldn't find a definition for #{what} on Google"
  336. return
  337. end
  338. head = results[0][0].ircify_html
  339. text = results[0][1].ircify_html
  340. m.reply "#{head} -- #{text}"
  341. end
  342. def wolfram(m, params)
  343. what = params[:words].to_s
  344. terms = CGI.escape what
  345. url = WOLFRAM_API_SEARCH % {
  346. :terms => terms, :key => WOLFRAM_API_KEY
  347. }
  348. begin
  349. feed = @bot.httputil.get(url)
  350. raise unless feed
  351. rescue => e
  352. m.reply "error asking WolframAlfa about #{what}"
  353. return
  354. end
  355. debug feed
  356. xml = REXML::Document.new feed
  357. if xml.elements['/queryresult'].attributes['error'] == "true"
  358. m.reply xml.elements['/queryresult/error/text()'].to_s
  359. return
  360. end
  361. unless xml.elements['/queryresult'].attributes['success'] == "true"
  362. m.reply "no data available"
  363. return
  364. end
  365. answer_type, answer = [], []
  366. xml.elements.each("//pod") { |element|
  367. answer_type << element.attributes['title']
  368. answer << element.elements['subpod/plaintext'].text
  369. }
  370. # find the first answer that isn't nil,
  371. # starting on the second pod in the array
  372. n = 1
  373. answer[1..-1].each { |a|
  374. break unless a.nil?
  375. n += 1
  376. }
  377. if answer[n].nil?
  378. m.reply "no results"
  379. return
  380. end
  381. # strip spaces, pipes, and line breaks
  382. sep = Bold + ' :: ' + Bold
  383. chars = [ [/\n/, sep], [/\t/, " "], [/\s+/, " "], ["|", "-"] ]
  384. chars.each { |c| answer[n].gsub!(c[0], c[1]) }
  385. m.reply answer_type[n] + sep + answer[n]
  386. end
  387. def wikipedia(m, params)
  388. lang = params[:lang]
  389. site = "#{lang.nil? ? '' : lang + '.'}wikipedia.org"
  390. debug "Looking up things on #{site}"
  391. params[:site] = site
  392. params[:filter] = / - Wikipedia.*$/
  393. params[:hits] = @bot.config['wikipedia.hits']
  394. params[:firstpar] = @bot.config['wikipedia.first_par']
  395. return google(m, params)
  396. end
  397. def unpedia(m, params)
  398. site = "uncyclopedia.org"
  399. debug "Looking up things on #{site}"
  400. params[:site] = site
  401. params[:filter] = / - Uncyclopedia.*$/
  402. params[:hits] = @bot.config['wikipedia.hits']
  403. params[:firstpar] = @bot.config['wikipedia.first_par']
  404. return google(m, params)
  405. end
  406. def gtime(m, params)
  407. where = params[:words].to_s
  408. where.sub!(/^\s*in\s*/, '')
  409. searchfor = CGI.escape("time in " + where)
  410. url = GOOGLE_SEARCH + searchfor
  411. begin
  412. html = @bot.httputil.get(url)
  413. rescue => e
  414. m.reply "Error googletiming #{where}"
  415. return
  416. end
  417. debug html
  418. results = html.scan(GOOGLE_TIME_RESULT)
  419. debug "results: #{results.inspect}"
  420. if results.length != 1
  421. m.reply "Couldn't find the time for #{where} on Google"
  422. return
  423. end
  424. time = results[0][0].ircify_html
  425. m.reply "#{time}"
  426. end
  427. end
  428. plugin = SearchPlugin.new
  429. plugin.map "ddg *words", :action => 'duckduckgo', :threaded => true
  430. plugin.map "search *words", :action => 'google', :threaded => true
  431. plugin.map "google *words", :action => 'google', :threaded => true
  432. plugin.map "lucky *words", :action => 'lucky', :threaded => true
  433. plugin.map "gcount *words", :action => 'gcount', :threaded => true
  434. plugin.map "gcalc *words", :action => 'gcalc', :threaded => true
  435. plugin.map "gdef *words", :action => 'gdef', :threaded => true
  436. plugin.map "gtime *words", :action => 'gtime', :threaded => true
  437. plugin.map "wa *words", :action => 'wolfram', :threaded => true
  438. plugin.map "wp :lang *words", :action => 'wikipedia', :requirements => { :lang => /^\w\w\w?$/ }, :threaded => true
  439. plugin.map "wp *words", :action => 'wikipedia', :threaded => true
  440. plugin.map "unpedia *words", :action => 'unpedia', :threaded => true