PageRenderTime 2ms CodeModel.GetById 1ms app.highlight 9ms RepoModel.GetById 1ms app.codeStats 0ms

/searx/autocomplete.py

https://gitlab.com/lanodan/searx
Python | 162 lines | 89 code | 37 blank | 36 comment | 28 complexity | dd437211b47e9f5490828758c82b4af9 MD5 | raw file
  1'''
  2searx is free software: you can redistribute it and/or modify
  3it under the terms of the GNU Affero General Public License as published by
  4the Free Software Foundation, either version 3 of the License, or
  5(at your option) any later version.
  6
  7searx is distributed in the hope that it will be useful,
  8but WITHOUT ANY WARRANTY; without even the implied warranty of
  9MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10GNU Affero General Public License for more details.
 11
 12You should have received a copy of the GNU Affero General Public License
 13along with searx. If not, see < http://www.gnu.org/licenses/ >.
 14
 15(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 16'''
 17
 18
 19from lxml import etree
 20from json import loads
 21from urllib import urlencode
 22from searx.languages import language_codes
 23from searx.engines import (
 24    categories, engines, engine_shortcuts
 25)
 26from searx.poolrequests import get
 27
 28
 29def searx_bang(full_query):
 30    '''check if the searchQuery contain a bang, and create fitting autocompleter results'''
 31    # check if there is a query which can be parsed
 32    if len(full_query.getSearchQuery()) == 0:
 33        return []
 34
 35    results = []
 36
 37    # check if current query stats with !bang
 38    first_char = full_query.getSearchQuery()[0]
 39    if first_char == '!' or first_char == '?':
 40        if len(full_query.getSearchQuery()) == 1:
 41            # show some example queries
 42            # TODO, check if engine is not avaliable
 43            results.append(first_char + "images")
 44            results.append(first_char + "wikipedia")
 45            results.append(first_char + "osm")
 46        else:
 47            engine_query = full_query.getSearchQuery()[1:]
 48
 49            # check if query starts with categorie name
 50            for categorie in categories:
 51                if categorie.startswith(engine_query):
 52                    results.append(first_char+'{categorie}'.format(categorie=categorie))
 53
 54            # check if query starts with engine name
 55            for engine in engines:
 56                if engine.startswith(engine_query.replace('_', ' ')):
 57                    results.append(first_char+'{engine}'.format(engine=engine.replace(' ', '_')))
 58
 59            # check if query starts with engine shortcut
 60            for engine_shortcut in engine_shortcuts:
 61                if engine_shortcut.startswith(engine_query):
 62                    results.append(first_char+'{engine_shortcut}'.format(engine_shortcut=engine_shortcut))
 63
 64    # check if current query stats with :bang
 65    elif first_char == ':':
 66        if len(full_query.getSearchQuery()) == 1:
 67            # show some example queries
 68            results.append(":en")
 69            results.append(":en_us")
 70            results.append(":english")
 71            results.append(":united_kingdom")
 72        else:
 73            engine_query = full_query.getSearchQuery()[1:]
 74
 75            for lc in language_codes:
 76                lang_id, lang_name, country = map(str.lower, lc)
 77
 78                # check if query starts with language-id
 79                if lang_id.startswith(engine_query):
 80                    if len(engine_query) <= 2:
 81                        results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0]))
 82                    else:
 83                        results.append(':{lang_id}'.format(lang_id=lang_id))
 84
 85                # check if query starts with language name
 86                if lang_name.startswith(engine_query):
 87                    results.append(':{lang_name}'.format(lang_name=lang_name))
 88
 89                # check if query starts with country
 90                if country.startswith(engine_query.replace('_', ' ')):
 91                    results.append(':{country}'.format(country=country.replace(' ', '_')))
 92
 93    # remove duplicates
 94    result_set = set(results)
 95
 96    # remove results which are already contained in the query
 97    for query_part in full_query.query_parts:
 98        if query_part in result_set:
 99            result_set.remove(query_part)
100
101    # convert result_set back to list
102    return list(result_set)
103
104
105def dbpedia(query):
106    # dbpedia autocompleter
107    autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'  # noqa
108
109    response = get(autocomplete_url
110                   + urlencode(dict(QueryString=query)))
111
112    results = []
113
114    if response.ok:
115        dom = etree.fromstring(response.content)
116        results = dom.xpath('//a:Result/a:Label//text()',
117                            namespaces={'a': 'http://lookup.dbpedia.org/'})
118
119    return results
120
121
122def duckduckgo(query):
123    # duckduckgo autocompleter
124    url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
125
126    resp = loads(get(url.format(urlencode(dict(q=query)))).text)
127    if len(resp) > 1:
128        return resp[1]
129    return []
130
131
132def google(query):
133    # google autocompleter
134    autocomplete_url = 'http://suggestqueries.google.com/complete/search?client=toolbar&'  # noqa
135
136    response = get(autocomplete_url
137                   + urlencode(dict(q=query)))
138
139    results = []
140
141    if response.ok:
142        dom = etree.fromstring(response.text)
143        results = dom.xpath('//suggestion/@data')
144
145    return results
146
147
148def wikipedia(query):
149    # wikipedia autocompleter
150    url = 'https://en.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json'  # noqa
151
152    resp = loads(get(url.format(urlencode(dict(search=query)))).text)
153    if len(resp) > 1:
154        return resp[1]
155    return []
156
157
158backends = {'dbpedia': dbpedia,
159            'duckduckgo': duckduckgo,
160            'google': google,
161            'wikipedia': wikipedia
162            }