PageRenderTime 69ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/modules/search.py

https://github.com/schallis/phenny
Python | 152 lines | 136 code | 6 blank | 10 comment | 9 complexity | bb4e757600a8888acc7cf92b6e6181e1 MD5 | raw file
  1. #!/usr/bin/env python
  2. """
  3. search.py - Phenny Web Search Module
  4. Copyright 2008-9, Sean B. Palmer, inamidst.com
  5. Licensed under the Eiffel Forum License 2.
  6. http://inamidst.com/phenny/
  7. """
  8. import re
  9. import web
  10. class Grab(web.urllib.URLopener):
  11. def __init__(self, *args):
  12. self.version = 'Mozilla/5.0 (Phenny)'
  13. web.urllib.URLopener.__init__(self, *args)
  14. self.addheader('Referer', 'https://github.com/sbp/phenny')
  15. def http_error_default(self, url, fp, errcode, errmsg, headers):
  16. return web.urllib.addinfourl(fp, [headers, errcode], "http:" + url)
  17. def search(query):
  18. """Search using AjaxSearch, and return its JSON."""
  19. uri = 'http://ajax.googleapis.com/ajax/services/search/web'
  20. args = '?v=1.0&safe=off&q=' + web.urllib.quote(query.encode('utf-8'))
  21. handler = web.urllib._urlopener
  22. web.urllib._urlopener = Grab()
  23. bytes = web.get(uri + args)
  24. web.urllib._urlopener = handler
  25. return web.json(bytes)
  26. def result(query):
  27. results = search(query)
  28. try: return results['responseData']['results'][0]['unescapedUrl']
  29. except IndexError: return None
  30. except TypeError:
  31. print results
  32. return False
  33. def count(query):
  34. results = search(query)
  35. if not results.has_key('responseData'): return '0'
  36. if not results['responseData'].has_key('cursor'): return '0'
  37. if not results['responseData']['cursor'].has_key('estimatedResultCount'):
  38. return '0'
  39. return results['responseData']['cursor']['estimatedResultCount']
  40. def formatnumber(n):
  41. """Format a number with beautiful commas."""
  42. parts = list(str(n))
  43. for i in range((len(parts) - 3), 0, -3):
  44. parts.insert(i, ',')
  45. return ''.join(parts)
  46. def g(phenny, input):
  47. """Queries Google for the specified input."""
  48. query = input.group(2)
  49. if not query:
  50. return phenny.reply('.g what?')
  51. uri = result(query)
  52. if uri:
  53. phenny.reply(uri)
  54. if not hasattr(phenny.bot, 'last_seen_uri'):
  55. phenny.bot.last_seen_uri = {}
  56. phenny.bot.last_seen_uri[input.sender] = uri
  57. elif uri is False: phenny.reply("Problem getting data from Google.")
  58. else: phenny.reply("No results found for '%s'." % query)
  59. g.commands = ['g']
  60. g.priority = 'high'
  61. g.example = '.g swhack'
  62. def gc(phenny, input):
  63. """Returns the number of Google results for the specified input."""
  64. query = input.group(2)
  65. if not query:
  66. return phenny.reply('.gc what?')
  67. num = formatnumber(count(query))
  68. phenny.say(query + ': ' + num)
  69. gc.commands = ['gc']
  70. gc.priority = 'high'
  71. gc.example = '.gc extrapolate'
  72. r_query = re.compile(
  73. r'\+?"[^"\\]*(?:\\.[^"\\]*)*"|\[[^]\\]*(?:\\.[^]\\]*)*\]|\S+'
  74. )
  75. def gcs(phenny, input):
  76. if not input.group(2):
  77. return phenny.reply("Nothing to compare.")
  78. queries = r_query.findall(input.group(2))
  79. if len(queries) > 6:
  80. return phenny.reply('Sorry, can only compare up to six things.')
  81. results = []
  82. for i, query in enumerate(queries):
  83. query = query.strip('[]')
  84. n = int((formatnumber(count(query)) or '0').replace(',', ''))
  85. results.append((n, query))
  86. if i >= 2: __import__('time').sleep(0.25)
  87. if i >= 4: __import__('time').sleep(0.25)
  88. results = [(term, n) for (n, term) in reversed(sorted(results))]
  89. reply = ', '.join('%s (%s)' % (t, formatnumber(n)) for (t, n) in results)
  90. phenny.say(reply)
  91. gcs.commands = ['gcs', 'comp']
  92. r_bing = re.compile(r'<h3><a href="([^"]+)"')
  93. def bing(phenny, input):
  94. """Queries Bing for the specified input."""
  95. query = input.group(2)
  96. if query.startswith(':'):
  97. lang, query = query.split(' ', 1)
  98. lang = lang[1:]
  99. else: lang = 'en-GB'
  100. if not query:
  101. return phenny.reply('.bing what?')
  102. query = web.urllib.quote(query.encode('utf-8'))
  103. base = 'http://www.bing.com/search?mkt=%s&q=' % lang
  104. bytes = web.get(base + query)
  105. m = r_bing.search(bytes)
  106. if m:
  107. uri = m.group(1)
  108. phenny.reply(uri)
  109. if not hasattr(phenny.bot, 'last_seen_uri'):
  110. phenny.bot.last_seen_uri = {}
  111. phenny.bot.last_seen_uri[input.sender] = uri
  112. else: phenny.reply("No results found for '%s'." % query)
  113. bing.commands = ['bing']
  114. bing.example = '.bing swhack'
  115. r_ddg = re.compile(r'nofollow" class="[^"]+" href="(.*?)">')
  116. def ddg(phenny, input):
  117. query = input.group(2)
  118. if not query: return phenny.reply('.ddg what?')
  119. query = web.urllib.quote(query.encode('utf-8'))
  120. uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query
  121. bytes = web.get(uri)
  122. m = r_ddg.search(bytes)
  123. if m:
  124. uri = m.group(1)
  125. phenny.reply(uri)
  126. if not hasattr(phenny.bot, 'last_seen_uri'):
  127. phenny.bot.last_seen_uri = {}
  128. phenny.bot.last_seen_uri[input.sender] = uri
  129. else: phenny.reply("No results found for '%s'." % query)
  130. ddg.commands = ['ddg']
  131. if __name__ == '__main__':
  132. print __doc__.strip()