PageRenderTime 11ms CodeModel.GetById 7ms app.highlight 2ms RepoModel.GetById 1ms app.codeStats 0ms

/searx/engines/duckduckgo.py

https://gitlab.com/lanodan/searx
Python | 76 lines | 37 code | 17 blank | 22 comment | 5 complexity | fa292e0b2956c1c6be362f4847c02885 MD5 | raw file
 1## DuckDuckGo (Web)
 2#
 3# @website     https://duckduckgo.com/
 4# @provide-api yes (https://duckduckgo.com/api),
 5#              but not all results from search-site
 6#
 7# @using-api   no
 8# @results     HTML (using search portal)
 9# @stable      no (HTML can change)
10# @parse       url, title, content
11#
12# @todo        rewrite to api
13# @todo        language support
14#              (the current used site does not support language-change)
15
16from urllib import urlencode
17from lxml.html import fromstring
18from searx.engines.xpath import extract_text
19
20# engine dependent config
21categories = ['general']
22paging = True
23language_support = True
24
25# search-url
26url = 'https://duckduckgo.com/html?{query}&s={offset}'
27
28# specific xpath variables
29result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
30url_xpath = './/a[@class="large"]/@href'
31title_xpath = './/a[@class="large"]'
32content_xpath = './/div[@class="snippet"]'
33
34
35# do search-request
36def request(query, params):
37    offset = (params['pageno'] - 1) * 30
38
39    if params['language'] == 'all':
40        locale = 'en-us'
41    else:
42        locale = params['language'].replace('_', '-').lower()
43
44    params['url'] = url.format(
45        query=urlencode({'q': query, 'kl': locale}),
46        offset=offset)
47
48    return params
49
50
51# get response from search-request
52def response(resp):
53    results = []
54
55    doc = fromstring(resp.text)
56
57    # parse results
58    for r in doc.xpath(result_xpath):
59        try:
60            res_url = r.xpath(url_xpath)[-1]
61        except:
62            continue
63
64        if not res_url:
65            continue
66
67        title = extract_text(r.xpath(title_xpath))
68        content = extract_text(r.xpath(content_xpath))
69
70        # append result
71        results.append({'title': title,
72                        'content': content,
73                        'url': res_url})
74
75    # return results
76    return results