PageRenderTime 45ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/searx/autocomplete.py

https://gitlab.com/lanodan/searx
Python | 162 lines | 89 code | 37 blank | 36 comment | 26 complexity | dd437211b47e9f5490828758c82b4af9 MD5 | raw file
  1. '''
  2. searx is free software: you can redistribute it and/or modify
  3. it under the terms of the GNU Affero General Public License as published by
  4. the Free Software Foundation, either version 3 of the License, or
  5. (at your option) any later version.
  6. searx is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. GNU Affero General Public License for more details.
  10. You should have received a copy of the GNU Affero General Public License
  11. along with searx. If not, see < http://www.gnu.org/licenses/ >.
  12. (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
  13. '''
  14. from lxml import etree
  15. from json import loads
  16. from urllib import urlencode
  17. from searx.languages import language_codes
  18. from searx.engines import (
  19. categories, engines, engine_shortcuts
  20. )
  21. from searx.poolrequests import get
  22. def searx_bang(full_query):
  23. '''check if the searchQuery contain a bang, and create fitting autocompleter results'''
  24. # check if there is a query which can be parsed
  25. if len(full_query.getSearchQuery()) == 0:
  26. return []
  27. results = []
  28. # check if current query stats with !bang
  29. first_char = full_query.getSearchQuery()[0]
  30. if first_char == '!' or first_char == '?':
  31. if len(full_query.getSearchQuery()) == 1:
  32. # show some example queries
  33. # TODO, check if engine is not avaliable
  34. results.append(first_char + "images")
  35. results.append(first_char + "wikipedia")
  36. results.append(first_char + "osm")
  37. else:
  38. engine_query = full_query.getSearchQuery()[1:]
  39. # check if query starts with categorie name
  40. for categorie in categories:
  41. if categorie.startswith(engine_query):
  42. results.append(first_char+'{categorie}'.format(categorie=categorie))
  43. # check if query starts with engine name
  44. for engine in engines:
  45. if engine.startswith(engine_query.replace('_', ' ')):
  46. results.append(first_char+'{engine}'.format(engine=engine.replace(' ', '_')))
  47. # check if query starts with engine shortcut
  48. for engine_shortcut in engine_shortcuts:
  49. if engine_shortcut.startswith(engine_query):
  50. results.append(first_char+'{engine_shortcut}'.format(engine_shortcut=engine_shortcut))
  51. # check if current query stats with :bang
  52. elif first_char == ':':
  53. if len(full_query.getSearchQuery()) == 1:
  54. # show some example queries
  55. results.append(":en")
  56. results.append(":en_us")
  57. results.append(":english")
  58. results.append(":united_kingdom")
  59. else:
  60. engine_query = full_query.getSearchQuery()[1:]
  61. for lc in language_codes:
  62. lang_id, lang_name, country = map(str.lower, lc)
  63. # check if query starts with language-id
  64. if lang_id.startswith(engine_query):
  65. if len(engine_query) <= 2:
  66. results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0]))
  67. else:
  68. results.append(':{lang_id}'.format(lang_id=lang_id))
  69. # check if query starts with language name
  70. if lang_name.startswith(engine_query):
  71. results.append(':{lang_name}'.format(lang_name=lang_name))
  72. # check if query starts with country
  73. if country.startswith(engine_query.replace('_', ' ')):
  74. results.append(':{country}'.format(country=country.replace(' ', '_')))
  75. # remove duplicates
  76. result_set = set(results)
  77. # remove results which are already contained in the query
  78. for query_part in full_query.query_parts:
  79. if query_part in result_set:
  80. result_set.remove(query_part)
  81. # convert result_set back to list
  82. return list(result_set)
  83. def dbpedia(query):
  84. # dbpedia autocompleter
  85. autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' # noqa
  86. response = get(autocomplete_url
  87. + urlencode(dict(QueryString=query)))
  88. results = []
  89. if response.ok:
  90. dom = etree.fromstring(response.content)
  91. results = dom.xpath('//a:Result/a:Label//text()',
  92. namespaces={'a': 'http://lookup.dbpedia.org/'})
  93. return results
  94. def duckduckgo(query):
  95. # duckduckgo autocompleter
  96. url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
  97. resp = loads(get(url.format(urlencode(dict(q=query)))).text)
  98. if len(resp) > 1:
  99. return resp[1]
  100. return []
  101. def google(query):
  102. # google autocompleter
  103. autocomplete_url = 'http://suggestqueries.google.com/complete/search?client=toolbar&' # noqa
  104. response = get(autocomplete_url
  105. + urlencode(dict(q=query)))
  106. results = []
  107. if response.ok:
  108. dom = etree.fromstring(response.text)
  109. results = dom.xpath('//suggestion/@data')
  110. return results
  111. def wikipedia(query):
  112. # wikipedia autocompleter
  113. url = 'https://en.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json' # noqa
  114. resp = loads(get(url.format(urlencode(dict(search=query)))).text)
  115. if len(resp) > 1:
  116. return resp[1]
  117. return []
  118. backends = {'dbpedia': dbpedia,
  119. 'duckduckgo': duckduckgo,
  120. 'google': google,
  121. 'wikipedia': wikipedia
  122. }