PageRenderTime 57ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 0ms

/modules/search.py

https://github.com/asl97/phenny
Python | 265 lines | 249 code | 6 blank | 10 comment | 10 complexity | 624ff22a0acc6f9e866bf30461e4b443 MD5 | raw file
  1. #!/usr/bin/env python
  2. """
  3. search.py - Phenny Web Search Module
  4. Copyright 2008-9, Sean B. Palmer, inamidst.com
  5. Licensed under the Eiffel Forum License 2.
  6. http://inamidst.com/phenny/
  7. """
  8. import re
  9. import web
  10. class Grab(web.urllib.URLopener):
  11. def __init__(self, *args):
  12. self.version = 'Mozilla/5.0 (Phenny)'
  13. web.urllib.URLopener.__init__(self, *args)
  14. self.addheader('Referer', 'https://github.com/sbp/phenny')
  15. def http_error_default(self, url, fp, errcode, errmsg, headers):
  16. return web.urllib.addinfourl(fp, [headers, errcode], "http:" + url)
  17. def google_ajax(query):
  18. """Search using AjaxSearch, and return its JSON."""
  19. if isinstance(query, unicode):
  20. query = query.encode('utf-8')
  21. uri = 'http://ajax.googleapis.com/ajax/services/search/web'
  22. args = '?v=1.0&safe=off&q=' + web.urllib.quote(query)
  23. handler = web.urllib._urlopener
  24. web.urllib._urlopener = Grab()
  25. bytes = web.get(uri + args)
  26. web.urllib._urlopener = handler
  27. return web.json(bytes)
  28. def google_search(query):
  29. results = google_ajax(query)
  30. try: return results['responseData']['results'][0]['unescapedUrl']
  31. except IndexError: return None
  32. except TypeError:
  33. print results
  34. return False
  35. def google_count(query):
  36. results = google_ajax(query)
  37. if not results.has_key('responseData'): return '0'
  38. if not results['responseData'].has_key('cursor'): return '0'
  39. if not results['responseData']['cursor'].has_key('estimatedResultCount'):
  40. return '0'
  41. return results['responseData']['cursor']['estimatedResultCount']
  42. def formatnumber(n):
  43. """Format a number with beautiful commas."""
  44. parts = list(str(n))
  45. for i in range((len(parts) - 3), 0, -3):
  46. parts.insert(i, ',')
  47. return ''.join(parts)
  48. def old_gc(query):
  49. return formatnumber(google_count(query))
  50. def g(phenny, input):
  51. """Queries Google for the specified input."""
  52. query = input.group(2)
  53. if not query:
  54. return phenny.reply('.g what?')
  55. query = query.encode('utf-8')
  56. uri = google_search(query)
  57. if uri:
  58. phenny.reply(uri)
  59. if not hasattr(phenny.bot, 'last_seen_uri'):
  60. phenny.bot.last_seen_uri = {}
  61. phenny.bot.last_seen_uri[input.sender] = uri
  62. elif uri is False: phenny.reply("Problem getting data from Google.")
  63. else: phenny.reply("No results found for '%s'." % query)
  64. g.commands = ['g']
  65. g.priority = 'high'
  66. g.example = '.g swhack'
  67. def oldgc(phenny, input):
  68. """Returns the number of Google results for the specified input."""
  69. query = input.group(2)
  70. if not query:
  71. return phenny.reply('.gc what?')
  72. query = query.encode('utf-8')
  73. num = formatnumber(google_count(query))
  74. phenny.say(query + ': ' + num)
  75. oldgc.commands = ['ogc', 'oldgc']
  76. oldgc.example = '.oldgc extrapolate'
  77. r_query = re.compile(
  78. r'\+?"[^"\\]*(?:\\.[^"\\]*)*"|\[[^]\\]*(?:\\.[^]\\]*)*\]|\S+'
  79. )
  80. def gcs(phenny, input):
  81. if not input.group(2):
  82. return phenny.reply("Nothing to compare.")
  83. queries = r_query.findall(input.group(2))
  84. if len(queries) > 6:
  85. return phenny.reply('Sorry, can only compare up to six things.')
  86. results = []
  87. for i, query in enumerate(queries):
  88. query = query.strip('[]')
  89. query = query.encode('utf-8')
  90. n = int((formatnumber(google_count(query)) or '0').replace(',', ''))
  91. results.append((n, query))
  92. if i >= 2: __import__('time').sleep(0.25)
  93. if i >= 4: __import__('time').sleep(0.25)
  94. results = [(term, n) for (n, term) in reversed(sorted(results))]
  95. reply = ', '.join('%s (%s)' % (t, formatnumber(n)) for (t, n) in results)
  96. phenny.say(reply)
  97. gcs.commands = ['gcs', 'comp']
  98. r_bing = re.compile(r'<h3><a href="([^"]+)"')
  99. def bing_search(query, lang='en-GB'):
  100. query = web.urllib.quote(query)
  101. base = 'http://www.bing.com/search?mkt=%s&q=' % lang
  102. bytes = web.get(base + query)
  103. for result in r_bing.findall(bytes):
  104. if "r.msn.com/" in result: continue
  105. return result
  106. def bing(phenny, input):
  107. """Queries Bing for the specified input."""
  108. query = input.group(2)
  109. if query.startswith(':'):
  110. lang, query = query.split(' ', 1)
  111. lang = lang[1:]
  112. else: lang = 'en-GB'
  113. if not query:
  114. return phenny.reply('.bing what?')
  115. query = query.encode('utf-8')
  116. uri = bing_search(query, lang)
  117. if uri:
  118. phenny.reply(uri)
  119. if not hasattr(phenny.bot, 'last_seen_uri'):
  120. phenny.bot.last_seen_uri = {}
  121. phenny.bot.last_seen_uri[input.sender] = uri
  122. else: phenny.reply("No results found for '%s'." % query)
  123. bing.commands = ['bing']
  124. bing.example = '.bing swhack'
  125. r_duck = re.compile(r'nofollow" class="[^"]+" href="(.*?)">')
  126. def duck_search(query):
  127. query = query.replace('!', '')
  128. query = web.urllib.quote(query)
  129. uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query
  130. bytes = web.get(uri)
  131. m = r_duck.search(bytes)
  132. if m: return web.decode(m.group(1))
  133. def duck(phenny, input):
  134. query = input.group(2)
  135. if not query: return phenny.reply('.ddg what?')
  136. query = query.encode('utf-8')
  137. uri = duck_search(query)
  138. if uri:
  139. phenny.reply(uri)
  140. if not hasattr(phenny.bot, 'last_seen_uri'):
  141. phenny.bot.last_seen_uri = {}
  142. phenny.bot.last_seen_uri[input.sender] = uri
  143. else: phenny.reply("No results found for '%s'." % query)
  144. duck.commands = ['duck', 'ddg']
  145. def search(phenny, input):
  146. if not input.group(2):
  147. return phenny.reply('.search for what?')
  148. query = input.group(2).encode('utf-8')
  149. gu = google_search(query) or '-'
  150. bu = bing_search(query) or '-'
  151. du = duck_search(query) or '-'
  152. if (gu == bu) and (bu == du):
  153. result = '%s (g, b, d)' % gu
  154. elif (gu == bu):
  155. result = '%s (g, b), %s (d)' % (gu, du)
  156. elif (bu == du):
  157. result = '%s (b, d), %s (g)' % (bu, gu)
  158. elif (gu == du):
  159. result = '%s (g, d), %s (b)' % (gu, bu)
  160. else:
  161. if len(gu) > 250: gu = '(extremely long link)'
  162. if len(bu) > 150: bu = '(extremely long link)'
  163. if len(du) > 150: du = '(extremely long link)'
  164. result = '%s (g), %s (b), %s (d)' % (gu, bu, du)
  165. phenny.reply(result)
  166. search.commands = ['search']
  167. def suggest(phenny, input):
  168. if not input.group(2):
  169. return phenny.reply("No query term.")
  170. query = input.group(2).encode('utf-8')
  171. uri = 'http://websitedev.de/temp-bin/suggest.pl?q='
  172. answer = web.get(uri + web.urllib.quote(query).replace('+', '%2B'))
  173. if answer:
  174. phenny.say(answer)
  175. else: phenny.reply('Sorry, no result.')
  176. suggest.commands = ['suggest']
  177. def new_gc(query):
  178. uri = 'https://www.google.com/search?hl=en&q='
  179. uri = uri + web.urllib.quote(query).replace('+', '%2B')
  180. # if '"' in query: uri += '&tbs=li:1'
  181. bytes = web.get(uri)
  182. if "did not match any documents" in bytes:
  183. return "0"
  184. for result in re.compile(r'(?ims)([0-9,]+) results?').findall(bytes):
  185. return result
  186. return None
  187. def newest_gc(query):
  188. uri = 'https://www.google.com/search?hl=en&q='
  189. uri = uri + web.urllib.quote(query).replace('+', '%2B')
  190. bytes = web.get(uri + '&tbs=li:1')
  191. if "did not match any documents" in bytes:
  192. return "0"
  193. for result in re.compile(r'(?ims)([0-9,]+) results?').findall(bytes):
  194. return result
  195. return None
  196. def newerest_gc(query):
  197. uri = 'https://www.google.com/search?hl=en&q='
  198. uri = uri + web.urllib.quote(query).replace('+', '%2B')
  199. bytes = web.get(uri + '&prmd=imvns&start=950')
  200. if "did not match any documents" in bytes:
  201. return "0"
  202. for result in re.compile(r'(?ims)([0-9,]+) results?').findall(bytes):
  203. return result
  204. return None
  205. def ngc(phenny, input):
  206. if not input.group(2):
  207. return phenny.reply("No query term.")
  208. query = input.group(2).encode('utf-8')
  209. result = new_gc(query)
  210. if result:
  211. phenny.say(query + ": " + result)
  212. else: phenny.reply("Sorry, couldn't get a result.")
  213. ngc.commands = ['ngc']
  214. ngc.priority = 'high'
  215. ngc.example = '.ngc extrapolate'
  216. def gc(phenny, input):
  217. if not input.group(2):
  218. return phenny.reply("No query term.")
  219. query = input.group(2).encode('utf-8')
  220. result = query + ": "
  221. result += (old_gc(query) or "?") + " (api)"
  222. result += ", " + (newerest_gc(query) or "?") + " (end)"
  223. result += ", " + (new_gc(query) or "?") + " (site)"
  224. if '"' in query:
  225. result += ", " + (newest_gc(query) or "?") + " (verbatim)"
  226. phenny.say(result)
  227. gc.commands = ['gc']
  228. gc.priority = 'high'
  229. gc.example = '.gc extrapolate'
  230. if __name__ == '__main__':
  231. print __doc__.strip()