PageRenderTime 62ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/askbot/search/state_manager.py

https://github.com/raonyguimaraes/askbot-devel
Python | 262 lines | 224 code | 22 blank | 16 comment | 13 complexity | a64c7788bcef22545ad39db6ea31cca7 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.1
  1. """Search state manager object"""
  2. import re
  3. import urllib
  4. import copy
  5. from django.core import urlresolvers
  6. from django.utils.http import urlencode
  7. from django.utils.encoding import smart_str
  8. import askbot
  9. import askbot.conf
  10. from askbot import const
  11. from askbot.utils.functions import strip_plus
  12. def extract_matching_token(text, regexes):
  13. """if text matches any of the regexes,
  14. * the entire match is removed from text
  15. * repeating spaces in the remaining string are replaced with one
  16. * returned is a tuple of: first group from the regex, remaining text
  17. """
  18. for regex in regexes:
  19. m = regex.search(text)
  20. if m:
  21. text = regex.sub('', text)
  22. extracted_match = m.group(1)
  23. return (strip_plus(extracted_match), strip_plus(text))
  24. return ('', text.strip())
  25. def extract_all_matching_tokens(text, regexes):
  26. """the same as the ``extract_matching_token``
  27. but returns a tuple of: list of first group matches from the regexes
  28. and the remains of the input text
  29. """
  30. matching_tokens = set()
  31. for regex in regexes:
  32. matches = regex.findall(text)
  33. if len(matches) > 0:
  34. text = regex.sub('', text)
  35. matching_tokens.update([match.strip() for match in matches])
  36. return ([strip_plus(token) for token in matching_tokens], strip_plus(text))
  37. def parse_query(query):
  38. """takes hand-typed search query string as an argument
  39. returns a dictionary with keys (and values in parens):
  40. * stripped_query (query with the items below stripped)
  41. * query_tags (list of tag names)
  42. * query_users (list of user names, not validated)
  43. * query_title (question title)
  44. Note: the stripped_query is the actual string
  45. against which global search will be performed
  46. the original query will still all be shown in the search
  47. query input box
  48. """
  49. title_re1 = re.compile(r'\[title:(.+?)\]')
  50. title_re2 = re.compile(r'title:"([^"]+?)"')
  51. title_re3 = re.compile(r"title:'([^']+?)'")
  52. title_regexes = (title_re1, title_re2, title_re3)
  53. (query_title, query) = extract_matching_token(query, title_regexes)
  54. tag_re1 = re.compile(r'\[([^:]+?)\]')
  55. tag_re2 = re.compile(r'\[tag:\s*([\S]+)\s*]')
  56. tag_re3 = re.compile(r'#(\S+)')
  57. tag_regexes = (tag_re1, tag_re2, tag_re3)
  58. (query_tags, query) = extract_all_matching_tokens(query, tag_regexes)
  59. user_re1 = re.compile(r'\[user:([^\]]+?)\]')
  60. user_re2 = re.compile(r'user:"([^"]+?)"')
  61. user_re3 = re.compile(r"user:'([^']+?)'")
  62. user_re4 = re.compile(r"""@([^'"\s]+)""")
  63. user_re5 = re.compile(r'@"([^"]+)"')
  64. user_re6 = re.compile(r"@'([^']+)'")
  65. user_regexes = (user_re1, user_re2, user_re3, user_re4, user_re5, user_re6)
  66. (query_users, stripped_query) = extract_all_matching_tokens(query, user_regexes)
  67. return {
  68. 'stripped_query': stripped_query,
  69. 'query_title': query_title,
  70. 'query_tags': query_tags,
  71. 'query_users': query_users
  72. }
  73. class SearchState(object):
  74. @classmethod
  75. def get_empty(cls):
  76. return cls(scope=None, sort=None, query=None, tags=None, author=None, page=None, user_logged_in=None)
  77. def __init__(self, scope, sort, query, tags, author, page, user_logged_in):
  78. # INFO: zip(*[('a', 1), ('b', 2)])[0] == ('a', 'b')
  79. if (scope not in zip(*const.POST_SCOPE_LIST)[0]) or (scope == 'favorite' and not user_logged_in):
  80. self.scope = const.DEFAULT_POST_SCOPE
  81. else:
  82. self.scope = scope
  83. self.query = query.strip() if query else None
  84. if self.query:
  85. #pull out values of [title:xxx], [user:some one]
  86. #[tag: sometag], title:'xxx', title:"xxx", @user, @'some user',
  87. #and #tag - (hash symbol to delineate the tag
  88. query_bits = parse_query(self.query)
  89. self.stripped_query = query_bits['stripped_query']
  90. self.query_tags = query_bits['query_tags']
  91. self.query_users = query_bits['query_users']
  92. self.query_title = query_bits['query_title']
  93. else:
  94. self.stripped_query = None
  95. self.query_tags = None
  96. self.query_users = None
  97. self.query_title = None
  98. if (sort not in zip(*const.POST_SORT_METHODS)[0]) or (sort == 'relevance-desc' and (not self.query or not askbot.conf.should_show_sort_by_relevance())):
  99. self.sort = const.DEFAULT_POST_SORT_METHOD
  100. else:
  101. self.sort = sort
  102. #patch for empty stripped query, relevance sorting is useless then
  103. if self.stripped_query in (None, '') and sort == 'relevance-desc':
  104. self.sort = const.DEFAULT_POST_SORT_METHOD
  105. self.tags = []
  106. if tags:
  107. for t in tags.split(const.TAG_SEP):
  108. tag = t.strip()
  109. if tag not in self.tags:
  110. self.tags.append(tag)
  111. self.author = int(author) if author else None
  112. self.page = int(page) if page else 1
  113. if self.page == 0: # in case someone likes jokes :)
  114. self.page = 1
  115. self._questions_url = urlresolvers.reverse('questions')
  116. def __str__(self):
  117. return self.query_string()
  118. def full_url(self):
  119. return self._questions_url + self.query_string()
  120. def ask_query_string(self): # TODO: test me
  121. """returns string to prepopulate title field on the "Ask your question" page"""
  122. ask_title = self.stripped_query or self.query or ''
  123. if not ask_title:
  124. return ''
  125. return '?' + urlencode({'title': ask_title})
  126. def full_ask_url(self):
  127. return urlresolvers.reverse('ask') + self.ask_query_string()
  128. def unified_tags(self):
  129. "Returns tags both from tag selector and extracted from query"
  130. return (self.query_tags or []) + (self.tags or [])
  131. #
  132. # Safe characters in urlquote() according to http://www.ietf.org/rfc/rfc1738.txt:
  133. #
  134. # Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
  135. # reserved characters used for their reserved purposes may be used
  136. # unencoded within a URL.
  137. #
  138. # Tag separator (const.TAG_SEP) remains unencoded to clearly mark tag boundaries
  139. # _+.- stay unencoded to keep tags in URL as verbose as possible
  140. # (note that urllib.quote() in Python 2.7 treats _.- as safe chars, but let's be explicit)
  141. # Hash (#) is not safe and has to be encodeded, as it's used as URL has delimiter
  142. #
  143. SAFE_CHARS = const.TAG_SEP + '_+.-'
  144. def query_string(self):
  145. lst = [
  146. 'scope:' + self.scope,
  147. 'sort:' + self.sort
  148. ]
  149. if self.query:
  150. lst.append('query:' + urllib.quote(smart_str(self.query), safe=self.SAFE_CHARS))
  151. if self.tags:
  152. lst.append('tags:' + urllib.quote(smart_str(const.TAG_SEP.join(self.tags)), safe=self.SAFE_CHARS))
  153. if self.author:
  154. lst.append('author:' + str(self.author))
  155. if self.page:
  156. lst.append('page:' + str(self.page))
  157. return '/'.join(lst) + '/'
  158. def deepcopy(self): # TODO: test me
  159. "Used to contruct a new SearchState for manipulation, e.g. for adding/removing tags"
  160. ss = copy.copy(self) #SearchState.get_empty()
  161. #ss.scope = self.scope
  162. #ss.sort = self.sort
  163. #ss.query = self.query
  164. if ss.tags is not None: # it's important to test against None, because empty lists should also be cloned!
  165. ss.tags = ss.tags[:] # create a copy
  166. #ss.author = self.author
  167. #ss.page = self.page
  168. #ss.stripped_query = self.stripped_query
  169. if ss.query_tags: # Here we don't have empty lists, only None
  170. ss.query_tags = ss.query_tags[:]
  171. if ss.query_users:
  172. ss.query_users = ss.query_users[:]
  173. #ss.query_title = self.query_title
  174. #ss._questions_url = self._questions_url
  175. return ss
  176. def add_tag(self, tag):
  177. ss = self.deepcopy()
  178. if tag not in ss.tags:
  179. ss.tags.append(tag)
  180. ss.page = 1 # state change causes page reset
  181. return ss
  182. def remove_author(self):
  183. ss = self.deepcopy()
  184. ss.author = None
  185. ss.page = 1
  186. return ss
  187. def remove_tags(self, tags = None):
  188. ss = self.deepcopy()
  189. if tags:
  190. ss.tags = list(
  191. set(ss.tags) - set(tags)
  192. )
  193. else:
  194. ss.tags = []
  195. ss.page = 1
  196. return ss
  197. def change_scope(self, new_scope):
  198. ss = self.deepcopy()
  199. ss.scope = new_scope
  200. ss.page = 1
  201. return ss
  202. def change_sort(self, new_sort):
  203. ss = self.deepcopy()
  204. ss.sort = new_sort
  205. ss.page = 1
  206. return ss
  207. def change_page(self, new_page):
  208. ss = self.deepcopy()
  209. ss.page = new_page
  210. return ss
  211. class DummySearchState(object): # Used for caching question/thread summaries
  212. def add_tag(self, tag):
  213. self.tag = tag
  214. return self
  215. def change_scope(self, new_scope):
  216. return self
  217. def full_url(self):
  218. return '<<<%s>>>' % self.tag