/sopel/modules/search.py
Python | 158 lines | 149 code | 4 blank | 5 comment | 5 complexity | 4b60123c73db318041a873bb7a0fc392 MD5 | raw file
1# coding=utf-8 2# Copyright 2008-9, Sean B. Palmer, inamidst.com 3# Copyright 2012, Elsie Powell, embolalia.com 4# Licensed under the Eiffel Forum License 2. 5from __future__ import unicode_literals, absolute_import, print_function, division 6 7import re 8from sopel import web 9from sopel.module import commands, example 10import json 11import xmltodict 12import sys 13 14if sys.version_info.major < 3: 15 from urllib import quote_plus, unquote 16else: 17 from urllib.parse import quote_plus, unquote 18 19 20def formatnumber(n): 21 """Format a number with beautiful commas.""" 22 parts = list(str(n)) 23 for i in range((len(parts) - 3), 0, -3): 24 parts.insert(i, ',') 25 return ''.join(parts) 26 27 28r_bing = re.compile(r'<h2(?: class=" b_topTitle")?><a href="([^"]+)"') 29 30 31def bing_search(query, lang='en-US'): 32 base = 'https://www.bing.com/search?mkt=%s&q=' % lang 33 bytes = web.get(base + query) 34 m = r_bing.search(bytes) 35 if m: 36 return m.group(1) 37 38 39r_duck = re.compile(r'nofollow" class="[^"]+" href="(?!(?:https?:\/\/r\.search\.yahoo)|(?:https?:\/\/duckduckgo\.com\/y\.js)(?:\/l\/\?kh=-1&uddg=))(.*?)">') 40 41 42def duck_search(query): 43 query = query.replace('!', '') 44 uri = 'https://duckduckgo.com/html/?q=%s&kl=us-en' % query 45 bytes = web.get(uri, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}) 46 if 'web-result' in bytes: # filter out the adds on top of the page 47 bytes = bytes.split('web-result')[1] 48 m = r_duck.search(bytes) 49 if m: 50 unquoted_m = unquote(m.group(1)) 51 return web.decode(unquoted_m) 52 53 54# Alias google_search to duck_search 55google_search = duck_search 56 57 58def duck_api(query): 59 if '!bang' in query.lower(): 60 return 'https://duckduckgo.com/bang.html' 61 62 # This fixes issue #885 (https://github.com/sopel-irc/sopel/issues/885) 63 # It seems that duckduckgo api redirects to its Instant answer API html page 64 # if the query constains special charactares that aren't urlencoded. 65 # So in order to always get a JSON response back the query is urlencoded 66 query = quote_plus(query) 67 uri = 'https://api.duckduckgo.com/?q=%s&format=json&no_html=1&no_redirect=1' % query 68 results = json.loads(web.get(uri)) 69 if results['Redirect']: 70 return results['Redirect'] 71 else: 72 return None 73 74 75@commands('duck', 'ddg', 'g') 76@example('.duck sopel bot', r'https?:\/\/sopel\.chat\/?', re=True) 77def duck(bot, trigger): 78 """Queries Duck Duck Go for the specified input.""" 79 query = trigger.group(2) 80 if not query: 81 return bot.reply('.ddg what?') 82 83 # If the API gives us something, say it and stop 84 result = duck_api(query) 85 if result: 86 bot.reply(result) 87 return 88 89 # Otherwise, look it up on the HTMl version 90 uri = duck_search(query) 91 92 if uri: 93 bot.reply(uri) 94 if 'last_seen_url' in bot.memory: 95 bot.memory['last_seen_url'][trigger.sender] = uri 96 else: 97 bot.reply("No results found for '%s'." % query) 98 99 100@commands('bing') 101@example('.bing sopel bot', r'https?:\/\/sopel\.chat\/?', re=True) 102def bing(bot, trigger): 103 """Queries Bing for the specified input.""" 104 if not trigger.group(2): 105 return bot.reply('.bing what?') 106 query = trigger.group(2) 107 result = bing_search(query) 108 if result: 109 bot.say(result) 110 else: 111 bot.reply("No results found for '%s'." % query) 112 113 114@commands('search') 115@example('.search sopel bot', r'(https?:\/\/sopel\.chat\/? \(b, d\)|https?:\/\/sopel\.chat\/? \(b\), https?:\/\/sopel\.chat\/? \(d\))', re=True) 116def search(bot, trigger): 117 """Searches Bing and Duck Duck Go.""" 118 if not trigger.group(2): 119 return bot.reply('.search for what?') 120 query = trigger.group(2) 121 bu = bing_search(query) or '-' 122 du = duck_search(query) or '-' 123 124 if bu == du: 125 result = '%s (b, d)' % bu 126 else: 127 if len(bu) > 150: 128 bu = '(extremely long link)' 129 if len(du) > 150: 130 du = '(extremely long link)' 131 result = '%s (b), %s (d)' % (bu, du) 132 133 bot.reply(result) 134 135 136@commands('suggest') 137@example('.suggest wikip', 'wikipedia') 138@example('.suggest ', 'No query term.') 139@example('.suggest lkashdfiauwgeaef', 'Sorry, no result.') 140def suggest(bot, trigger): 141 """Suggest terms starting with given input""" 142 if not trigger.group(2): 143 return bot.reply("No query term.") 144 query = trigger.group(2) 145 # Using Google isn't necessarily ideal, but at most they'll be able to build 146 # a composite profile of all users on a given instance, not a profile of any 147 # single user. This can be switched out as soon as someone finds (or builds) 148 # an alternative suggestion API. 149 uri = 'https://suggestqueries.google.com/complete/search?output=toolbar&hl=en&q=' 150 answer = xmltodict.parse(web.get(uri + query.replace('+', '%2B')))['toplevel'] 151 try: 152 answer = answer['CompleteSuggestion'][0]['suggestion']['@data'] 153 except TypeError: 154 answer = None 155 if answer: 156 bot.say(answer) 157 else: 158 bot.reply('Sorry, no result.')