PageRenderTime 57ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/search.py

https://gitlab.com/hekel/sopel-extras-pandorah
Python | 158 lines | 149 code | 4 blank | 5 comment | 5 complexity | 1fa827e0483c534c19323f1e612f7331 MD5 | raw file
  1. # coding=utf-8
  2. # Copyright 2008-9, Sean B. Palmer, inamidst.com
  3. # Copyright 2012, Elsie Powell, embolalia.com
  4. # Licensed under the Eiffel Forum License 2.
  5. from __future__ import unicode_literals, absolute_import, print_function, division
  6. import re
  7. from sopel import web
  8. from sopel.module import commands, example
  9. import json
  10. import xmltodict
  11. import sys
  12. if sys.version_info.major < 3:
  13. from urllib import quote_plus, unquote
  14. else:
  15. from urllib.parse import quote_plus, unquote
  16. def formatnumber(n):
  17. """Format a number with beautiful commas."""
  18. parts = list(str(n))
  19. for i in range((len(parts) - 3), 0, -3):
  20. parts.insert(i, ',')
  21. return ''.join(parts)
  22. r_bing = re.compile(r'<h2(?: class=" b_topTitle")?><a href="([^"]+)"')
  23. def bing_search(query, lang='en-US'):
  24. base = 'https://www.bing.com/search?mkt=%s&q=' % lang
  25. bytes = web.get(base + query)
  26. m = r_bing.search(bytes)
  27. if m:
  28. return m.group(1)
  29. r_duck = re.compile(r'nofollow" class="[^"]+" href="(?!(?:https?:\/\/r\.search\.yahoo)|(?:https?:\/\/duckduckgo\.com\/y\.js)(?:\/l\/\?kh=-1&amp;uddg=))(.*?)">')
  30. def duck_search(query):
  31. query = query.replace('!', '')
  32. uri = 'https://duckduckgo.com/html/?q=%s&kl=us-en' % query
  33. bytes = web.get(uri, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'})
  34. if 'web-result' in bytes: # filter out the adds on top of the page
  35. bytes = bytes.split('web-result')[1]
  36. m = r_duck.search(bytes)
  37. if m:
  38. unquoted_m = unquote(m.group(1))
  39. return web.decode(unquoted_m)
  40. # Alias google_search to duck_search
  41. google_search = duck_search
  42. def duck_api(query):
  43. if '!bang' in query.lower():
  44. return 'https://duckduckgo.com/bang.html'
  45. # This fixes issue #885 (https://github.com/sopel-irc/sopel/issues/885)
  46. # It seems that duckduckgo api redirects to its Instant answer API html page
  47. # if the query constains special charactares that aren't urlencoded.
  48. # So in order to always get a JSON response back the query is urlencoded
  49. query = quote_plus(query)
  50. uri = 'https://api.duckduckgo.com/?q=%s&format=json&no_html=1&no_redirect=1' % query
  51. results = json.loads(web.get(uri))
  52. if results['Redirect']:
  53. return results['Redirect']
  54. else:
  55. return None
  56. @commands('duck', 'ddg')
  57. @example('.duck sopel bot', r'https?:\/\/sopel\.chat\/?', re=True)
  58. def duck(bot, trigger):
  59. """Queries Duck Duck Go for the specified input."""
  60. query = trigger.group(2)
  61. if not query:
  62. return bot.reply('.ddg what?')
  63. # If the API gives us something, say it and stop
  64. result = duck_api(query)
  65. if result:
  66. bot.reply(result)
  67. return
  68. # Otherwise, look it up on the HTMl version
  69. uri = duck_search(query)
  70. if uri:
  71. bot.reply(uri)
  72. if 'last_seen_url' in bot.memory:
  73. bot.memory['last_seen_url'][trigger.sender] = uri
  74. else:
  75. bot.reply("No results found for '%s'." % query)
  76. @commands('bing')
  77. @example('.bing sopel bot', r'https?:\/\/sopel\.chat\/?', re=True)
  78. def bing(bot, trigger):
  79. """Queries Bing for the specified input."""
  80. if not trigger.group(2):
  81. return bot.reply('.bing what?')
  82. query = trigger.group(2)
  83. result = bing_search(query)
  84. if result:
  85. bot.say(result)
  86. else:
  87. bot.reply("No results found for '%s'." % query)
  88. @commands('search')
  89. @example('.search sopel bot', r'(https?:\/\/sopel\.chat\/? \(b, d\)|https?:\/\/sopel\.chat\/? \(b\), https?:\/\/sopel\.chat\/? \(d\))', re=True)
  90. def search(bot, trigger):
  91. """Searches Bing and Duck Duck Go."""
  92. if not trigger.group(2):
  93. return bot.reply('.search for what?')
  94. query = trigger.group(2)
  95. bu = bing_search(query) or '-'
  96. du = duck_search(query) or '-'
  97. if bu == du:
  98. result = '%s (b, d)' % bu
  99. else:
  100. if len(bu) > 150:
  101. bu = '(extremely long link)'
  102. if len(du) > 150:
  103. du = '(extremely long link)'
  104. result = '%s (b), %s (d)' % (bu, du)
  105. bot.reply(result)
  106. @commands('suggest')
  107. @example('.suggest wikip', 'wikipedia')
  108. @example('.suggest ', 'No query term.')
  109. @example('.suggest lkashdfiauwgeaef', 'Sorry, no result.')
  110. def suggest(bot, trigger):
  111. """Suggest terms starting with given input"""
  112. if not trigger.group(2):
  113. return bot.reply("No query term.")
  114. query = trigger.group(2)
  115. # Using Google isn't necessarily ideal, but at most they'll be able to build
  116. # a composite profile of all users on a given instance, not a profile of any
  117. # single user. This can be switched out as soon as someone finds (or builds)
  118. # an alternative suggestion API.
  119. uri = 'https://suggestqueries.google.com/complete/search?output=toolbar&hl=en&q='
  120. answer = xmltodict.parse(web.get(uri + query.replace('+', '%2B')))['toplevel']
  121. try:
  122. answer = answer['CompleteSuggestion'][0]['suggestion']['@data']
  123. except TypeError:
  124. answer = None
  125. if answer:
  126. bot.say(answer)
  127. else:
  128. bot.reply('Sorry, no result.')