PageRenderTime 38ms CodeModel.GetById 2ms app.highlight 31ms RepoModel.GetById 1ms app.codeStats 0ms

/modules/search.py

http://github.com/sbp/phenny
Python | 265 lines | 249 code | 6 blank | 10 comment | 11 complexity | 624ff22a0acc6f9e866bf30461e4b443 MD5 | raw file
  1#!/usr/bin/env python
  2"""
  3search.py - Phenny Web Search Module
  4Copyright 2008-9, Sean B. Palmer, inamidst.com
  5Licensed under the Eiffel Forum License 2.
  6
  7http://inamidst.com/phenny/
  8"""
  9
 10import re
 11import web
 12
 13class Grab(web.urllib.URLopener):
 14   def __init__(self, *args):
 15      self.version = 'Mozilla/5.0 (Phenny)'
 16      web.urllib.URLopener.__init__(self, *args)
 17      self.addheader('Referer', 'https://github.com/sbp/phenny')
 18   def http_error_default(self, url, fp, errcode, errmsg, headers):
 19      return web.urllib.addinfourl(fp, [headers, errcode], "http:" + url)
 20
 21def google_ajax(query): 
 22   """Search using AjaxSearch, and return its JSON."""
 23   if isinstance(query, unicode): 
 24      query = query.encode('utf-8')
 25   uri = 'http://ajax.googleapis.com/ajax/services/search/web'
 26   args = '?v=1.0&safe=off&q=' + web.urllib.quote(query)
 27   handler = web.urllib._urlopener
 28   web.urllib._urlopener = Grab()
 29   bytes = web.get(uri + args)
 30   web.urllib._urlopener = handler
 31   return web.json(bytes)
 32
 33def google_search(query): 
 34   results = google_ajax(query)
 35   try: return results['responseData']['results'][0]['unescapedUrl']
 36   except IndexError: return None
 37   except TypeError: 
 38      print results
 39      return False
 40
 41def google_count(query): 
 42   results = google_ajax(query)
 43   if not results.has_key('responseData'): return '0'
 44   if not results['responseData'].has_key('cursor'): return '0'
 45   if not results['responseData']['cursor'].has_key('estimatedResultCount'): 
 46      return '0'
 47   return results['responseData']['cursor']['estimatedResultCount']
 48
 49def formatnumber(n): 
 50   """Format a number with beautiful commas."""
 51   parts = list(str(n))
 52   for i in range((len(parts) - 3), 0, -3):
 53      parts.insert(i, ',')
 54   return ''.join(parts)
 55
 56def old_gc(query):
 57   return formatnumber(google_count(query))
 58
 59def g(phenny, input): 
 60   """Queries Google for the specified input."""
 61   query = input.group(2)
 62   if not query: 
 63      return phenny.reply('.g what?')
 64   query = query.encode('utf-8')
 65   uri = google_search(query)
 66   if uri: 
 67      phenny.reply(uri)
 68      if not hasattr(phenny.bot, 'last_seen_uri'):
 69         phenny.bot.last_seen_uri = {}
 70      phenny.bot.last_seen_uri[input.sender] = uri
 71   elif uri is False: phenny.reply("Problem getting data from Google.")
 72   else: phenny.reply("No results found for '%s'." % query)
 73g.commands = ['g']
 74g.priority = 'high'
 75g.example = '.g swhack'
 76
 77def oldgc(phenny, input): 
 78   """Returns the number of Google results for the specified input."""
 79   query = input.group(2)
 80   if not query: 
 81      return phenny.reply('.gc what?')
 82   query = query.encode('utf-8')
 83   num = formatnumber(google_count(query))
 84   phenny.say(query + ': ' + num)
 85oldgc.commands = ['ogc', 'oldgc']
 86oldgc.example = '.oldgc extrapolate'
 87
 88r_query = re.compile(
 89   r'\+?"[^"\\]*(?:\\.[^"\\]*)*"|\[[^]\\]*(?:\\.[^]\\]*)*\]|\S+'
 90)
 91
 92def gcs(phenny, input): 
 93   if not input.group(2):
 94      return phenny.reply("Nothing to compare.")
 95   queries = r_query.findall(input.group(2))
 96   if len(queries) > 6: 
 97      return phenny.reply('Sorry, can only compare up to six things.')
 98
 99   results = []
100   for i, query in enumerate(queries): 
101      query = query.strip('[]')
102      query = query.encode('utf-8')
103      n = int((formatnumber(google_count(query)) or '0').replace(',', ''))
104      results.append((n, query))
105      if i >= 2: __import__('time').sleep(0.25)
106      if i >= 4: __import__('time').sleep(0.25)
107
108   results = [(term, n) for (n, term) in reversed(sorted(results))]
109   reply = ', '.join('%s (%s)' % (t, formatnumber(n)) for (t, n) in results)
110   phenny.say(reply)
111gcs.commands = ['gcs', 'comp']
112
113r_bing = re.compile(r'<h3><a href="([^"]+)"')
114
115def bing_search(query, lang='en-GB'): 
116   query = web.urllib.quote(query)
117   base = 'http://www.bing.com/search?mkt=%s&q=' % lang
118   bytes = web.get(base + query)
119   for result in r_bing.findall(bytes):
120      if "r.msn.com/" in result: continue
121      return result
122
123def bing(phenny, input): 
124   """Queries Bing for the specified input."""
125   query = input.group(2)
126   if query.startswith(':'): 
127      lang, query = query.split(' ', 1)
128      lang = lang[1:]
129   else: lang = 'en-GB'
130   if not query:
131      return phenny.reply('.bing what?')
132
133   query = query.encode('utf-8')
134   uri = bing_search(query, lang)
135   if uri: 
136      phenny.reply(uri)
137      if not hasattr(phenny.bot, 'last_seen_uri'):
138         phenny.bot.last_seen_uri = {}
139      phenny.bot.last_seen_uri[input.sender] = uri
140   else: phenny.reply("No results found for '%s'." % query)
141bing.commands = ['bing']
142bing.example = '.bing swhack'
143
144r_duck = re.compile(r'nofollow" class="[^"]+" href="(.*?)">')
145
146def duck_search(query): 
147   query = query.replace('!', '')
148   query = web.urllib.quote(query)
149   uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query
150   bytes = web.get(uri)
151   m = r_duck.search(bytes)
152   if m: return web.decode(m.group(1))
153
154def duck(phenny, input): 
155   query = input.group(2)
156   if not query: return phenny.reply('.ddg what?')
157
158   query = query.encode('utf-8')
159   uri = duck_search(query)
160   if uri: 
161      phenny.reply(uri)
162      if not hasattr(phenny.bot, 'last_seen_uri'):
163         phenny.bot.last_seen_uri = {}
164      phenny.bot.last_seen_uri[input.sender] = uri
165   else: phenny.reply("No results found for '%s'." % query)
166duck.commands = ['duck', 'ddg']
167
168def search(phenny, input): 
169   if not input.group(2): 
170      return phenny.reply('.search for what?')
171   query = input.group(2).encode('utf-8')
172   gu = google_search(query) or '-'
173   bu = bing_search(query) or '-'
174   du = duck_search(query) or '-'
175
176   if (gu == bu) and (bu == du): 
177      result = '%s (g, b, d)' % gu
178   elif (gu == bu): 
179      result = '%s (g, b), %s (d)' % (gu, du)
180   elif (bu == du): 
181      result = '%s (b, d), %s (g)' % (bu, gu)
182   elif (gu == du): 
183      result = '%s (g, d), %s (b)' % (gu, bu)
184   else: 
185      if len(gu) > 250: gu = '(extremely long link)'
186      if len(bu) > 150: bu = '(extremely long link)'
187      if len(du) > 150: du = '(extremely long link)'
188      result = '%s (g), %s (b), %s (d)' % (gu, bu, du)
189
190   phenny.reply(result)
191search.commands = ['search']
192
193def suggest(phenny, input): 
194   if not input.group(2):
195      return phenny.reply("No query term.")
196   query = input.group(2).encode('utf-8')
197   uri = 'http://websitedev.de/temp-bin/suggest.pl?q='
198   answer = web.get(uri + web.urllib.quote(query).replace('+', '%2B'))
199   if answer: 
200      phenny.say(answer)
201   else: phenny.reply('Sorry, no result.')
202suggest.commands = ['suggest']
203
204def new_gc(query):
205   uri = 'https://www.google.com/search?hl=en&q='
206   uri = uri + web.urllib.quote(query).replace('+', '%2B')
207   # if '"' in query: uri += '&tbs=li:1'
208   bytes = web.get(uri)
209   if "did not match any documents" in bytes:
210      return "0"
211   for result in re.compile(r'(?ims)([0-9,]+) results?').findall(bytes):
212      return result
213   return None
214
215def newest_gc(query):
216   uri = 'https://www.google.com/search?hl=en&q='
217   uri = uri + web.urllib.quote(query).replace('+', '%2B')
218   bytes = web.get(uri + '&tbs=li:1')
219   if "did not match any documents" in bytes:
220      return "0"
221   for result in re.compile(r'(?ims)([0-9,]+) results?').findall(bytes):
222      return result
223   return None
224
225def newerest_gc(query):
226   uri = 'https://www.google.com/search?hl=en&q='
227   uri = uri + web.urllib.quote(query).replace('+', '%2B')
228   bytes = web.get(uri + '&prmd=imvns&start=950')
229   if "did not match any documents" in bytes:
230      return "0"
231   for result in re.compile(r'(?ims)([0-9,]+) results?').findall(bytes):
232      return result
233   return None
234
235def ngc(phenny, input):
236   if not input.group(2):
237      return phenny.reply("No query term.")
238   query = input.group(2).encode('utf-8')
239   result = new_gc(query)
240   if result:
241      phenny.say(query + ": " + result)
242   else: phenny.reply("Sorry, couldn't get a result.")
243
244ngc.commands = ['ngc']
245ngc.priority = 'high'
246ngc.example = '.ngc extrapolate'
247
248def gc(phenny, input):
249   if not input.group(2):
250      return phenny.reply("No query term.")
251   query = input.group(2).encode('utf-8')
252   result = query + ": "
253   result += (old_gc(query) or "?") + " (api)"
254   result += ", " + (newerest_gc(query) or "?") + " (end)"
255   result += ", " + (new_gc(query) or "?") + " (site)"
256   if '"' in query:
257      result += ", " + (newest_gc(query) or "?") + " (verbatim)"
258   phenny.say(result)
259
260gc.commands = ['gc']
261gc.priority = 'high'
262gc.example = '.gc extrapolate'
263
264if __name__ == '__main__': 
265   print __doc__.strip()