PageRenderTime 56ms CodeModel.GetById 22ms app.highlight 27ms RepoModel.GetById 2ms app.codeStats 0ms

/sopel/modules/search.py

https://bitbucket.org/ebarrier/botnet-sopel
Python | 158 lines | 149 code | 4 blank | 5 comment | 5 complexity | 4b60123c73db318041a873bb7a0fc392 MD5 | raw file
  1# coding=utf-8
  2# Copyright 2008-9, Sean B. Palmer, inamidst.com
  3# Copyright 2012, Elsie Powell, embolalia.com
  4# Licensed under the Eiffel Forum License 2.
  5from __future__ import unicode_literals, absolute_import, print_function, division
  6
  7import re
  8from sopel import web
  9from sopel.module import commands, example
 10import json
 11import xmltodict
 12import sys
 13
 14if sys.version_info.major < 3:
 15    from urllib import quote_plus, unquote
 16else:
 17    from urllib.parse import quote_plus, unquote
 18
 19
 20def formatnumber(n):
 21    """Format a number with beautiful commas."""
 22    parts = list(str(n))
 23    for i in range((len(parts) - 3), 0, -3):
 24        parts.insert(i, ',')
 25    return ''.join(parts)
 26
 27
 28r_bing = re.compile(r'<h2(?: class=" b_topTitle")?><a href="([^"]+)"')
 29
 30
 31def bing_search(query, lang='en-US'):
 32    base = 'https://www.bing.com/search?mkt=%s&q=' % lang
 33    bytes = web.get(base + query)
 34    m = r_bing.search(bytes)
 35    if m:
 36        return m.group(1)
 37
 38
 39r_duck = re.compile(r'nofollow" class="[^"]+" href="(?!(?:https?:\/\/r\.search\.yahoo)|(?:https?:\/\/duckduckgo\.com\/y\.js)(?:\/l\/\?kh=-1&amp;uddg=))(.*?)">')
 40
 41
 42def duck_search(query):
 43    query = query.replace('!', '')
 44    uri = 'https://duckduckgo.com/html/?q=%s&kl=us-en' % query
 45    bytes = web.get(uri, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'})
 46    if 'web-result' in bytes:  # filter out the adds on top of the page
 47        bytes = bytes.split('web-result')[1]
 48    m = r_duck.search(bytes)
 49    if m:
 50        unquoted_m = unquote(m.group(1))
 51        return web.decode(unquoted_m)
 52
 53
 54# Alias google_search to duck_search
 55google_search = duck_search
 56
 57
 58def duck_api(query):
 59    if '!bang' in query.lower():
 60        return 'https://duckduckgo.com/bang.html'
 61
 62    # This fixes issue #885 (https://github.com/sopel-irc/sopel/issues/885)
 63    # It seems that duckduckgo api redirects to its Instant answer API html page
 64    # if the query constains special charactares that aren't urlencoded.
 65    # So in order to always get a JSON response back the query is urlencoded
 66    query = quote_plus(query)
 67    uri = 'https://api.duckduckgo.com/?q=%s&format=json&no_html=1&no_redirect=1' % query
 68    results = json.loads(web.get(uri))
 69    if results['Redirect']:
 70        return results['Redirect']
 71    else:
 72        return None
 73
 74
 75@commands('duck', 'ddg', 'g')
 76@example('.duck sopel bot', r'https?:\/\/sopel\.chat\/?', re=True)
 77def duck(bot, trigger):
 78    """Queries Duck Duck Go for the specified input."""
 79    query = trigger.group(2)
 80    if not query:
 81        return bot.reply('.ddg what?')
 82
 83    # If the API gives us something, say it and stop
 84    result = duck_api(query)
 85    if result:
 86        bot.reply(result)
 87        return
 88
 89    # Otherwise, look it up on the HTMl version
 90    uri = duck_search(query)
 91
 92    if uri:
 93        bot.reply(uri)
 94        if 'last_seen_url' in bot.memory:
 95            bot.memory['last_seen_url'][trigger.sender] = uri
 96    else:
 97        bot.reply("No results found for '%s'." % query)
 98
 99
100@commands('bing')
101@example('.bing sopel bot', r'https?:\/\/sopel\.chat\/?', re=True)
102def bing(bot, trigger):
103    """Queries Bing for the specified input."""
104    if not trigger.group(2):
105        return bot.reply('.bing what?')
106    query = trigger.group(2)
107    result = bing_search(query)
108    if result:
109        bot.say(result)
110    else:
111        bot.reply("No results found for '%s'." % query)
112
113
114@commands('search')
115@example('.search sopel bot', r'(https?:\/\/sopel\.chat\/? \(b, d\)|https?:\/\/sopel\.chat\/? \(b\), https?:\/\/sopel\.chat\/? \(d\))', re=True)
116def search(bot, trigger):
117    """Searches Bing and Duck Duck Go."""
118    if not trigger.group(2):
119        return bot.reply('.search for what?')
120    query = trigger.group(2)
121    bu = bing_search(query) or '-'
122    du = duck_search(query) or '-'
123
124    if bu == du:
125        result = '%s (b, d)' % bu
126    else:
127        if len(bu) > 150:
128            bu = '(extremely long link)'
129        if len(du) > 150:
130            du = '(extremely long link)'
131        result = '%s (b), %s (d)' % (bu, du)
132
133    bot.reply(result)
134
135
136@commands('suggest')
137@example('.suggest wikip', 'wikipedia')
138@example('.suggest ', 'No query term.')
139@example('.suggest lkashdfiauwgeaef', 'Sorry, no result.')
140def suggest(bot, trigger):
141    """Suggest terms starting with given input"""
142    if not trigger.group(2):
143        return bot.reply("No query term.")
144    query = trigger.group(2)
145    # Using Google isn't necessarily ideal, but at most they'll be able to build
146    # a composite profile of all users on a given instance, not a profile of any
147    # single user. This can be switched out as soon as someone finds (or builds)
148    # an alternative suggestion API.
149    uri = 'https://suggestqueries.google.com/complete/search?output=toolbar&hl=en&q='
150    answer = xmltodict.parse(web.get(uri + query.replace('+', '%2B')))['toplevel']
151    try:
152        answer = answer['CompleteSuggestion'][0]['suggestion']['@data']
153    except TypeError:
154        answer = None
155    if answer:
156        bot.say(answer)
157    else:
158        bot.reply('Sorry, no result.')