state_manager.py | searchcode

/askbot/search/state_manager.py

https://github.com/raonyguimaraes/askbot-devel
Python | 262 lines | 224 code | 22 blank | 16 comment | 13 complexity | a64c7788bcef22545ad39db6ea31cca7 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.1

"""Search state manager object"""
import re
import urllib
import copy

from django.core import urlresolvers
from django.utils.http import urlencode
from django.utils.encoding import smart_str

import askbot
import askbot.conf
from askbot import const
from askbot.utils.functions import strip_plus


def extract_matching_token(text, regexes):
    """if text matches any of the regexes,
    * the entire match is removed from text
    * repeating spaces in the remaining string are replaced with one
    * returned is a tuple of: first group from the regex, remaining text
    """
    for regex in regexes:
        m = regex.search(text)
        if m:
            text = regex.sub('', text)
            extracted_match = m.group(1)
            return (strip_plus(extracted_match), strip_plus(text))
    return ('', text.strip())

def extract_all_matching_tokens(text, regexes):
    """the same as the ``extract_matching_token``
    but returns a tuple of: list of first group matches from the regexes
    and the remains of the input text
    """
    matching_tokens = set()
    for regex in regexes:
        matches = regex.findall(text)
        if len(matches) > 0:
            text = regex.sub('', text)
            matching_tokens.update([match.strip() for match in matches])
    return ([strip_plus(token) for token in matching_tokens], strip_plus(text))


def parse_query(query):
    """takes hand-typed search query string as an argument
    returns a dictionary with keys (and values in parens):
    * stripped_query (query with the items below stripped)
    * query_tags (list of tag names)
    * query_users (list of user names, not validated)
    * query_title (question title)
    Note: the stripped_query is the actual string
    against which global search will be performed
    the original query will still all be shown in the search
    query input box
    """
    title_re1 = re.compile(r'\[title:(.+?)\]')
    title_re2 = re.compile(r'title:"([^"]+?)"')
    title_re3 = re.compile(r"title:'([^']+?)'")
    title_regexes = (title_re1, title_re2, title_re3)
    (query_title, query) = extract_matching_token(query, title_regexes)

    tag_re1 = re.compile(r'\[([^:]+?)\]')
    tag_re2 = re.compile(r'\[tag:\s*([\S]+)\s*]')
    tag_re3 = re.compile(r'#(\S+)')
    tag_regexes = (tag_re1, tag_re2, tag_re3)
    (query_tags, query) = extract_all_matching_tokens(query, tag_regexes)

    user_re1 = re.compile(r'\[user:([^\]]+?)\]')
    user_re2 = re.compile(r'user:"([^"]+?)"')
    user_re3 = re.compile(r"user:'([^']+?)'")
    user_re4 = re.compile(r"""@([^'"\s]+)""")
    user_re5 = re.compile(r'@"([^"]+)"')
    user_re6 = re.compile(r"@'([^']+)'")
    user_regexes = (user_re1, user_re2, user_re3, user_re4, user_re5, user_re6)
    (query_users, stripped_query) = extract_all_matching_tokens(query, user_regexes)

    return {
        'stripped_query': stripped_query,
        'query_title': query_title,
        'query_tags': query_tags,
        'query_users': query_users
    }

class SearchState(object):

    @classmethod
    def get_empty(cls):
        return cls(scope=None, sort=None, query=None, tags=None, author=None, page=None, user_logged_in=None)

    def __init__(self, scope, sort, query, tags, author, page, user_logged_in):
        # INFO: zip(*[('a', 1), ('b', 2)])[0] == ('a', 'b')

        if (scope not in zip(*const.POST_SCOPE_LIST)[0]) or (scope == 'favorite' and not user_logged_in):
            self.scope = const.DEFAULT_POST_SCOPE
        else:
            self.scope = scope

        self.query = query.strip() if query else None

        if self.query:
            #pull out values of [title:xxx], [user:some one]
            #[tag: sometag], title:'xxx', title:"xxx", @user, @'some user',
            #and  #tag - (hash symbol to delineate the tag
            query_bits = parse_query(self.query)
            self.stripped_query = query_bits['stripped_query']
            self.query_tags = query_bits['query_tags']
            self.query_users = query_bits['query_users']
            self.query_title = query_bits['query_title']
        else:
            self.stripped_query = None
            self.query_tags = None
            self.query_users = None
            self.query_title = None

        if (sort not in zip(*const.POST_SORT_METHODS)[0]) or (sort == 'relevance-desc' and (not self.query or not askbot.conf.should_show_sort_by_relevance())):
            self.sort = const.DEFAULT_POST_SORT_METHOD
        else:
            self.sort = sort

        #patch for empty stripped query, relevance sorting is useless then
        if self.stripped_query in (None, '') and sort == 'relevance-desc':
            self.sort = const.DEFAULT_POST_SORT_METHOD

        self.tags = []
        if tags:
            for t in tags.split(const.TAG_SEP):
                tag = t.strip()
                if tag not in self.tags:
                    self.tags.append(tag)

        self.author = int(author) if author else None
        self.page = int(page) if page else 1
        if self.page == 0:  # in case someone likes jokes :)
            self.page = 1

        self._questions_url = urlresolvers.reverse('questions')

    def __str__(self):
        return self.query_string()

    def full_url(self):
        return self._questions_url + self.query_string()

    def ask_query_string(self): # TODO: test me
        """returns string to prepopulate title field on the "Ask your question" page"""
        ask_title = self.stripped_query or self.query or ''
        if not ask_title:
            return ''
        return '?' + urlencode({'title': ask_title})

    def full_ask_url(self):
        return urlresolvers.reverse('ask') + self.ask_query_string()

    def unified_tags(self):
        "Returns tags both from tag selector and extracted from query"
        return (self.query_tags or []) + (self.tags or [])

    #
    # Safe characters in urlquote() according to http://www.ietf.org/rfc/rfc1738.txt:
    #
    #    Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
    #    reserved characters used for their reserved purposes may be used
    #    unencoded within a URL.
    #
    # Tag separator (const.TAG_SEP) remains unencoded to clearly mark tag boundaries
    # _+.- stay unencoded to keep tags in URL as verbose as possible
    #      (note that urllib.quote() in Python 2.7 treats _.- as safe chars, but let's be explicit)
    # Hash (#) is not safe and has to be encodeded, as it's used as URL has delimiter
    #
    SAFE_CHARS = const.TAG_SEP + '_+.-'

    def query_string(self):
        lst = [
            'scope:' + self.scope,
            'sort:' + self.sort
        ]
        if self.query:
            lst.append('query:' + urllib.quote(smart_str(self.query), safe=self.SAFE_CHARS))
        if self.tags:
            lst.append('tags:' + urllib.quote(smart_str(const.TAG_SEP.join(self.tags)), safe=self.SAFE_CHARS))
        if self.author:
            lst.append('author:' + str(self.author))
        if self.page:
            lst.append('page:' + str(self.page))
        return '/'.join(lst) + '/'

    def deepcopy(self): # TODO: test me
        "Used to contruct a new SearchState for manipulation, e.g. for adding/removing tags"
        ss = copy.copy(self) #SearchState.get_empty()

        #ss.scope = self.scope
        #ss.sort = self.sort
        #ss.query = self.query
        if ss.tags is not None: # it's important to test against None, because empty lists should also be cloned!
            ss.tags = ss.tags[:]  # create a copy
        #ss.author = self.author
        #ss.page = self.page

        #ss.stripped_query = self.stripped_query
        if ss.query_tags: # Here we don't have empty lists, only None
            ss.query_tags = ss.query_tags[:]
        if ss.query_users:
            ss.query_users = ss.query_users[:]
        #ss.query_title = self.query_title

        #ss._questions_url = self._questions_url

        return ss

    def add_tag(self, tag):
        ss = self.deepcopy()
        if tag not in ss.tags:
            ss.tags.append(tag)
            ss.page = 1 # state change causes page reset
        return ss

    def remove_author(self):
        ss = self.deepcopy()
        ss.author = None
        ss.page = 1
        return ss

    def remove_tags(self, tags = None):
        ss = self.deepcopy()
        if tags:
            ss.tags = list(
                set(ss.tags) - set(tags)
            )
        else:
            ss.tags = []
        ss.page = 1
        return ss

    def change_scope(self, new_scope):
        ss = self.deepcopy()
        ss.scope = new_scope
        ss.page = 1
        return ss

    def change_sort(self, new_sort):
        ss = self.deepcopy()
        ss.sort = new_sort
        ss.page = 1
        return ss

    def change_page(self, new_page):
        ss = self.deepcopy()
        ss.page = new_page
        return ss


class DummySearchState(object): # Used for caching question/thread summaries

    def add_tag(self, tag):
        self.tag = tag
        return self

    def change_scope(self, new_scope):
        return self

    def full_url(self):
        return '<<<%s>>>' % self.tag