/sopel/modules/search.py
Python | 158 lines | 149 code | 4 blank | 5 comment | 5 complexity | 4b60123c73db318041a873bb7a0fc392 MD5 | raw file
1# coding=utf-8
2# Copyright 2008-9, Sean B. Palmer, inamidst.com
3# Copyright 2012, Elsie Powell, embolalia.com
4# Licensed under the Eiffel Forum License 2.
5from __future__ import unicode_literals, absolute_import, print_function, division
6
7import re
8from sopel import web
9from sopel.module import commands, example
10import json
11import xmltodict
12import sys
13
14if sys.version_info.major < 3:
15 from urllib import quote_plus, unquote
16else:
17 from urllib.parse import quote_plus, unquote
18
19
20def formatnumber(n):
21 """Format a number with beautiful commas."""
22 parts = list(str(n))
23 for i in range((len(parts) - 3), 0, -3):
24 parts.insert(i, ',')
25 return ''.join(parts)
26
27
28r_bing = re.compile(r'<h2(?: class=" b_topTitle")?><a href="([^"]+)"')
29
30
31def bing_search(query, lang='en-US'):
32 base = 'https://www.bing.com/search?mkt=%s&q=' % lang
33 bytes = web.get(base + query)
34 m = r_bing.search(bytes)
35 if m:
36 return m.group(1)
37
38
39r_duck = re.compile(r'nofollow" class="[^"]+" href="(?!(?:https?:\/\/r\.search\.yahoo)|(?:https?:\/\/duckduckgo\.com\/y\.js)(?:\/l\/\?kh=-1&uddg=))(.*?)">')
40
41
42def duck_search(query):
43 query = query.replace('!', '')
44 uri = 'https://duckduckgo.com/html/?q=%s&kl=us-en' % query
45 bytes = web.get(uri, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'})
46 if 'web-result' in bytes: # filter out the adds on top of the page
47 bytes = bytes.split('web-result')[1]
48 m = r_duck.search(bytes)
49 if m:
50 unquoted_m = unquote(m.group(1))
51 return web.decode(unquoted_m)
52
53
54# Alias google_search to duck_search
55google_search = duck_search
56
57
58def duck_api(query):
59 if '!bang' in query.lower():
60 return 'https://duckduckgo.com/bang.html'
61
62 # This fixes issue #885 (https://github.com/sopel-irc/sopel/issues/885)
63 # It seems that duckduckgo api redirects to its Instant answer API html page
64 # if the query constains special charactares that aren't urlencoded.
65 # So in order to always get a JSON response back the query is urlencoded
66 query = quote_plus(query)
67 uri = 'https://api.duckduckgo.com/?q=%s&format=json&no_html=1&no_redirect=1' % query
68 results = json.loads(web.get(uri))
69 if results['Redirect']:
70 return results['Redirect']
71 else:
72 return None
73
74
75@commands('duck', 'ddg', 'g')
76@example('.duck sopel bot', r'https?:\/\/sopel\.chat\/?', re=True)
77def duck(bot, trigger):
78 """Queries Duck Duck Go for the specified input."""
79 query = trigger.group(2)
80 if not query:
81 return bot.reply('.ddg what?')
82
83 # If the API gives us something, say it and stop
84 result = duck_api(query)
85 if result:
86 bot.reply(result)
87 return
88
89 # Otherwise, look it up on the HTMl version
90 uri = duck_search(query)
91
92 if uri:
93 bot.reply(uri)
94 if 'last_seen_url' in bot.memory:
95 bot.memory['last_seen_url'][trigger.sender] = uri
96 else:
97 bot.reply("No results found for '%s'." % query)
98
99
100@commands('bing')
101@example('.bing sopel bot', r'https?:\/\/sopel\.chat\/?', re=True)
102def bing(bot, trigger):
103 """Queries Bing for the specified input."""
104 if not trigger.group(2):
105 return bot.reply('.bing what?')
106 query = trigger.group(2)
107 result = bing_search(query)
108 if result:
109 bot.say(result)
110 else:
111 bot.reply("No results found for '%s'." % query)
112
113
114@commands('search')
115@example('.search sopel bot', r'(https?:\/\/sopel\.chat\/? \(b, d\)|https?:\/\/sopel\.chat\/? \(b\), https?:\/\/sopel\.chat\/? \(d\))', re=True)
116def search(bot, trigger):
117 """Searches Bing and Duck Duck Go."""
118 if not trigger.group(2):
119 return bot.reply('.search for what?')
120 query = trigger.group(2)
121 bu = bing_search(query) or '-'
122 du = duck_search(query) or '-'
123
124 if bu == du:
125 result = '%s (b, d)' % bu
126 else:
127 if len(bu) > 150:
128 bu = '(extremely long link)'
129 if len(du) > 150:
130 du = '(extremely long link)'
131 result = '%s (b), %s (d)' % (bu, du)
132
133 bot.reply(result)
134
135
136@commands('suggest')
137@example('.suggest wikip', 'wikipedia')
138@example('.suggest ', 'No query term.')
139@example('.suggest lkashdfiauwgeaef', 'Sorry, no result.')
140def suggest(bot, trigger):
141 """Suggest terms starting with given input"""
142 if not trigger.group(2):
143 return bot.reply("No query term.")
144 query = trigger.group(2)
145 # Using Google isn't necessarily ideal, but at most they'll be able to build
146 # a composite profile of all users on a given instance, not a profile of any
147 # single user. This can be switched out as soon as someone finds (or builds)
148 # an alternative suggestion API.
149 uri = 'https://suggestqueries.google.com/complete/search?output=toolbar&hl=en&q='
150 answer = xmltodict.parse(web.get(uri + query.replace('+', '%2B')))['toplevel']
151 try:
152 answer = answer['CompleteSuggestion'][0]['suggestion']['@data']
153 except TypeError:
154 answer = None
155 if answer:
156 bot.say(answer)
157 else:
158 bot.reply('Sorry, no result.')