PageRenderTime 2ms CodeModel.GetById 1ms app.highlight 26ms RepoModel.GetById 1ms app.codeStats 0ms

/searx/engines/duckduckgo_definitions.py

https://gitlab.com/lanodan/searx
Python | 149 lines | 138 code | 9 blank | 2 comment | 1 complexity | 09cccc9d0b07ac38a3fb0951864321c2 MD5 | raw file
  1import json
  2from urllib import urlencode
  3from lxml import html
  4from searx.utils import html_to_text
  5from searx.engines.xpath import extract_text
  6
  7url = 'https://api.duckduckgo.com/'\
  8    + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
  9
 10
 11def result_to_text(url, text, htmlResult):
 12    # TODO : remove result ending with "Meaning" or "Category"
 13    dom = html.fromstring(htmlResult)
 14    a = dom.xpath('//a')
 15    if len(a) >= 1:
 16        return extract_text(a[0])
 17    else:
 18        return text
 19
 20
 21def request(query, params):
 22    # TODO add kl={locale}
 23    params['url'] = url.format(query=urlencode({'q': query}))
 24    return params
 25
 26
 27def response(resp):
 28    results = []
 29
 30    search_res = json.loads(resp.text)
 31
 32    content = ''
 33    heading = search_res.get('Heading', '')
 34    attributes = []
 35    urls = []
 36    infobox_id = None
 37    relatedTopics = []
 38
 39    # add answer if there is one
 40    answer = search_res.get('Answer', '')
 41    if answer != '':
 42        results.append({'answer': html_to_text(answer)})
 43
 44    # add infobox
 45    if 'Definition' in search_res:
 46        content = content + search_res.get('Definition', '')
 47
 48    if 'Abstract' in search_res:
 49        content = content + search_res.get('Abstract', '')
 50
 51    # image
 52    image = search_res.get('Image', '')
 53    image = None if image == '' else image
 54
 55    # attributes
 56    if 'Infobox' in search_res:
 57        infobox = search_res.get('Infobox', None)
 58        if 'content' in infobox:
 59            for info in infobox.get('content'):
 60                attributes.append({'label': info.get('label'),
 61                                  'value': info.get('value')})
 62
 63    # urls
 64    for ddg_result in search_res.get('Results', []):
 65        if 'FirstURL' in ddg_result:
 66            firstURL = ddg_result.get('FirstURL', '')
 67            text = ddg_result.get('Text', '')
 68            urls.append({'title': text, 'url': firstURL})
 69            results.append({'title': heading, 'url': firstURL})
 70
 71    # related topics
 72    for ddg_result in search_res.get('RelatedTopics', []):
 73        if 'FirstURL' in ddg_result:
 74            suggestion = result_to_text(ddg_result.get('FirstURL', None),
 75                                        ddg_result.get('Text', None),
 76                                        ddg_result.get('Result', None))
 77            if suggestion != heading:
 78                results.append({'suggestion': suggestion})
 79        elif 'Topics' in ddg_result:
 80            suggestions = []
 81            relatedTopics.append({'name': ddg_result.get('Name', ''),
 82                                 'suggestions': suggestions})
 83            for topic_result in ddg_result.get('Topics', []):
 84                suggestion = result_to_text(topic_result.get('FirstURL', None),
 85                                            topic_result.get('Text', None),
 86                                            topic_result.get('Result', None))
 87                if suggestion != heading:
 88                    suggestions.append(suggestion)
 89
 90    # abstract
 91    abstractURL = search_res.get('AbstractURL', '')
 92    if abstractURL != '':
 93        # add as result ? problem always in english
 94        infobox_id = abstractURL
 95        urls.append({'title': search_res.get('AbstractSource'),
 96                    'url': abstractURL})
 97
 98    # definition
 99    definitionURL = search_res.get('DefinitionURL', '')
100    if definitionURL != '':
101        # add as result ? as answer ? problem always in english
102        infobox_id = definitionURL
103        urls.append({'title': search_res.get('DefinitionSource'),
104                    'url': definitionURL})
105
106    # entity
107    entity = search_res.get('Entity', None)
108    # TODO continent / country / department / location / waterfall /
109    #      mountain range :
110    #      link to map search, get weather, near by locations
111    # TODO musician : link to music search
112    # TODO concert tour : ??
113    # TODO film / actor / television  / media franchise :
114    #      links to IMDB / rottentomatoes (or scrap result)
115    # TODO music : link tu musicbrainz / last.fm
116    # TODO book : ??
117    # TODO artist / playwright : ??
118    # TODO compagny : ??
119    # TODO software / os : ??
120    # TODO software engineer : ??
121    # TODO prepared food : ??
122    # TODO website : ??
123    # TODO performing art : ??
124    # TODO prepared food : ??
125    # TODO programming language : ??
126    # TODO file format : ??
127
128    if len(heading) > 0:
129        # TODO get infobox.meta.value where .label='article_title'
130        if image is None and len(attributes) == 0 and len(urls) == 1 and\
131           len(relatedTopics) == 0 and len(content) == 0:
132            results.append({
133                           'url': urls[0]['url'],
134                           'title': heading,
135                           'content': content
136                           })
137        else:
138            results.append({
139                           'infobox': heading,
140                           'id': infobox_id,
141                           'entity': entity,
142                           'content': content,
143                           'img_src': image,
144                           'attributes': attributes,
145                           'urls': urls,
146                           'relatedTopics': relatedTopics
147                           })
148
149    return results