/searx/engines/duckduckgo_definitions.py
Python | 149 lines | 138 code | 9 blank | 2 comment | 1 complexity | 09cccc9d0b07ac38a3fb0951864321c2 MD5 | raw file
1import json 2from urllib import urlencode 3from lxml import html 4from searx.utils import html_to_text 5from searx.engines.xpath import extract_text 6 7url = 'https://api.duckduckgo.com/'\ 8 + '?{query}&format=json&pretty=0&no_redirect=1&d=1' 9 10 11def result_to_text(url, text, htmlResult): 12 # TODO : remove result ending with "Meaning" or "Category" 13 dom = html.fromstring(htmlResult) 14 a = dom.xpath('//a') 15 if len(a) >= 1: 16 return extract_text(a[0]) 17 else: 18 return text 19 20 21def request(query, params): 22 # TODO add kl={locale} 23 params['url'] = url.format(query=urlencode({'q': query})) 24 return params 25 26 27def response(resp): 28 results = [] 29 30 search_res = json.loads(resp.text) 31 32 content = '' 33 heading = search_res.get('Heading', '') 34 attributes = [] 35 urls = [] 36 infobox_id = None 37 relatedTopics = [] 38 39 # add answer if there is one 40 answer = search_res.get('Answer', '') 41 if answer != '': 42 results.append({'answer': html_to_text(answer)}) 43 44 # add infobox 45 if 'Definition' in search_res: 46 content = content + search_res.get('Definition', '') 47 48 if 'Abstract' in search_res: 49 content = content + search_res.get('Abstract', '') 50 51 # image 52 image = search_res.get('Image', '') 53 image = None if image == '' else image 54 55 # attributes 56 if 'Infobox' in search_res: 57 infobox = search_res.get('Infobox', None) 58 if 'content' in infobox: 59 for info in infobox.get('content'): 60 attributes.append({'label': info.get('label'), 61 'value': info.get('value')}) 62 63 # urls 64 for ddg_result in search_res.get('Results', []): 65 if 'FirstURL' in ddg_result: 66 firstURL = ddg_result.get('FirstURL', '') 67 text = ddg_result.get('Text', '') 68 urls.append({'title': text, 'url': firstURL}) 69 results.append({'title': heading, 'url': firstURL}) 70 71 # related topics 72 for ddg_result in search_res.get('RelatedTopics', []): 73 if 'FirstURL' in ddg_result: 74 suggestion = result_to_text(ddg_result.get('FirstURL', None), 75 ddg_result.get('Text', None), 76 ddg_result.get('Result', None)) 77 if suggestion != heading: 78 results.append({'suggestion': suggestion}) 79 elif 'Topics' in ddg_result: 80 suggestions = [] 81 relatedTopics.append({'name': ddg_result.get('Name', ''), 82 'suggestions': suggestions}) 83 for topic_result in ddg_result.get('Topics', []): 84 suggestion = result_to_text(topic_result.get('FirstURL', None), 85 topic_result.get('Text', None), 86 topic_result.get('Result', None)) 87 if suggestion != heading: 88 suggestions.append(suggestion) 89 90 # abstract 91 abstractURL = search_res.get('AbstractURL', '') 92 if abstractURL != '': 93 # add as result ? problem always in english 94 infobox_id = abstractURL 95 urls.append({'title': search_res.get('AbstractSource'), 96 'url': abstractURL}) 97 98 # definition 99 definitionURL = search_res.get('DefinitionURL', '') 100 if definitionURL != '': 101 # add as result ? as answer ? problem always in english 102 infobox_id = definitionURL 103 urls.append({'title': search_res.get('DefinitionSource'), 104 'url': definitionURL}) 105 106 # entity 107 entity = search_res.get('Entity', None) 108 # TODO continent / country / department / location / waterfall / 109 # mountain range : 110 # link to map search, get weather, near by locations 111 # TODO musician : link to music search 112 # TODO concert tour : ?? 113 # TODO film / actor / television / media franchise : 114 # links to IMDB / rottentomatoes (or scrap result) 115 # TODO music : link tu musicbrainz / last.fm 116 # TODO book : ?? 117 # TODO artist / playwright : ?? 118 # TODO compagny : ?? 119 # TODO software / os : ?? 120 # TODO software engineer : ?? 121 # TODO prepared food : ?? 122 # TODO website : ?? 123 # TODO performing art : ?? 124 # TODO prepared food : ?? 125 # TODO programming language : ?? 126 # TODO file format : ?? 127 128 if len(heading) > 0: 129 # TODO get infobox.meta.value where .label='article_title' 130 if image is None and len(attributes) == 0 and len(urls) == 1 and\ 131 len(relatedTopics) == 0 and len(content) == 0: 132 results.append({ 133 'url': urls[0]['url'], 134 'title': heading, 135 'content': content 136 }) 137 else: 138 results.append({ 139 'infobox': heading, 140 'id': infobox_id, 141 'entity': entity, 142 'content': content, 143 'img_src': image, 144 'attributes': attributes, 145 'urls': urls, 146 'relatedTopics': relatedTopics 147 }) 148 149 return results