/solango/solr/query.py
Python | 656 lines | 647 code | 0 blank | 9 comment | 25 complexity | d9240ca07308f8ddeb06b670baf8db34 MD5 | raw file
- """
- Query
- =====
- This is wrapper around a Solr Query
- We need to handle 3 use cases:
- # User generates own q string
- q = "django OR solango"
- # User passes a tuple
- q = (("model", "entry"), ("q", "django"),)
-
- #User Passes us a Dictionary
- q = {"model" : "entry", "q" : "django"}
- """
- import urllib
- from copy import deepcopy
- from solango.solr.fq import FqBase
- from solango.solr.function import FunctionBase
- class Value(object):
- def __init__(self, data=None, prefix=None, default=None, help_text=None):
- self.data = data
- self.prefix=prefix
- self.default=default
- self.help_text=help_text
- self.name = None
-
- def add(self, value):
- self.data = value
- def set(self, value):
- self.data = value
- def __unicode__(self):
- return unicode(self.data)
-
- def __repr__(self):
- return self.__unicode__()
- def __nonzero__(self):
- if self.data:
- return True
- return False
-
- def url(self):
- if self.data is None:
- return ""
-
- name = self.name.replace("_", ".")
- if self.prefix:
- name = self.prefix + name
-
- return "%s=%s" % (name, self.data)
- class IntegerValue(Value):pass
- class BooleanValue(Value):pass
- class FloatValue(Value):pass
- class MultiValue(Value):
- def __init__(self, data=None, prefix=None, default=None, help_text=None, per_field=False):
- self.data = []
- if data is not None:
- self.data = data
- self.prefix=prefix
- self.default=default
- self.help_text=help_text
- self.name = None
- self.per_field = per_field
- def add(self, value):
- self.data.append(value)
- def set(self, value):
-
- if not isinstance(value, list):
- self.data = [value]
- else:
- self.data = value
- def url(self):
- if not self.data:
- return ""
-
- name = self.name.replace("_", ".")
- if self.prefix:
- name = self.prefix + name
-
- return urllib.urlencode(["%s=%s" % (name, value) for value in self.data])
-
- class UniqueMultiValue(Value):
- def __init__(self, data=None, prefix=None, default=None, help_text=None, per_field=False):
- self.prefix=prefix
- self.default=default
- self.help_text=help_text
- self.data = set()
- if data is not None:
- self.data = data
- self.name = None
- self.per_field = per_field
- def add(self, value):
- if isinstance(value, (FqBase, FunctionBase)):
- self.data.update([unicode(value)])
- elif not isinstance(value, list) and not isinstance(value, set):
- self.data.update([value])
- else:
- self.data.update(value)
- def set(self, value):
- if isinstance(value, (FqBase, FunctionBase)):
- self.data = set([unicode(value)])
- elif not isinstance(value, list):
- self.data = set([value])
- else:
- self.data = set(value)
- def url(self):
- if not self.data:
- return ""
-
- name = self.name.replace("_", ".")
- if self.prefix:
- name = self.prefix + name
-
- values = []
- for value in self.data:
- if self.per_field and isinstance(value, (tuple,list)):
- field = value[0]
- field_value = value[1]
- values.append(("f.%s.%s" % (field, name), field_value))
- else:
- values.append((name, value))
-
- return urllib.urlencode(values)
-
- def __unicode__(self):
- return unicode(list(self.data))
- class DelimitedMultiValue(UniqueMultiValue):
-
- def url(self):
- if not self.data:
- return ""
-
- name = self.name.replace("_", ".")
- if self.prefix:
- name = self.prefix + name
-
- return urllib.urlencode([(name, ",".join(self.data))])
- class UniqueSingleValue(UniqueMultiValue):pass
- class QValue(MultiValue):
-
- operator = "AND"
-
- def add(self, key, value=None):
- if value is not None:
- key = "%s:%s" % (key, value)
-
- if isinstance(key, list):
- self.data.extend(key)
- else:
- self.data.append(key)
- def url(self):
- if not self.data:
- return ""
-
- name = self.name.replace("_", ".")
- if self.prefix:
- name = self.prefix + name
- operator = " %s " % self.operator
- return urllib.urlencode([(name,
- operator.join(["%s" %
- value.encode('utf-8') if isinstance(value, unicode) else value
- for value in self.data
- ]))])
- def get_query_values(attrs):
- data = {}
-
- for name, value in attrs.items():
- if isinstance(value, Value) or name == "facet" or name=="hl":
- value = attrs.pop(name)
- value.name = name
- data[name] = value
-
- return data
-
- class QueryMetaClass(type):
- """
- Meta Class
- """
- def __new__(cls, name, bases, attrs):
- attrs['base_data'] = get_query_values(attrs)
- return super(QueryMetaClass,
- cls).__new__(cls, name, bases, attrs)
- class QueryBase(object):
-
- __metaclass__ = QueryMetaClass
- def __init__(self, initial=[], **kwargs):
-
- self.data = deepcopy(self.base_data)
-
- params = []
-
- if isinstance(initial, (tuple, list)):
- params.extend(list(initial))
- elif isinstance(initial, dict):
- params.extend(initial.items())
-
- params.extend(kwargs.items())
-
- for key, value in params:
- self.add(key, value)
-
- def url(self):
- return "&".join([value.url() for value in self.data.values() if value])
-
- def __nonzero__(self):
- if self.url():
- return True
- return False
- def __setattr__(self, name, value):
- if not name.startswith("_") and name != "data" \
- and self.data.has_key(name):
- self.data[name].set(value)
- else:
- super(QueryBase, self).__setattr__(name, value)
-
- def __getattr__(self, name):
- if name != "data" and self.data.has_key(name):
- return self.data[name]
- return super(QueryBase, self).__getattr__(name)
-
- def __repr__(self):
- return unicode(self.data)
- class Facet(QueryBase):
- """
- Facet
- -----
- Python Object that represents a Solr Facet query.
- """
- query = UniqueMultiValue(prefix="facet.",
- default="*:*",
- help_text="This param allows you to specify an arbitrary"
- +"query in the Lucene default syntax to generate a facet"
- + "count.")
-
- field = UniqueMultiValue(prefix="facet.",
- help_text="This param allows you to specify a field"
- + "which should be treated as a facet.")
-
- prefix = UniqueMultiValue(prefix="facet.",
- help_text="Limits the terms on which to facet"
- + "to those starting with the given string prefix.",
- per_field=True)
- sort = UniqueMultiValue(prefix="facet.",
- help_text ="This param determines the ordering of"
- + "the facet field constraints. true - sort the "
- + "constraints by count (highest count first. false"
- + " - to return the constraints sorted in their index order",
- per_field=True)
-
- limit = UniqueMultiValue(prefix="facet.",
- default=100,
- help_text="This param indicates an offset into the"
- + " list of constraints to allow paging.",
- per_field=True)
-
- offset = UniqueMultiValue(prefix="facet.",
- default=0,
- help_text = "This param indicates an offset into"
- + " the list of constraints to allow paging.",
- per_field=True)
-
-
- mincount = UniqueMultiValue(prefix="facet.",
- default=0,
- help_text = "Indicates the minimum counts for "
- + "facet fields should be included in the "
- + "response. ",
- per_field=True)
-
-
- missing = UniqueMultiValue(prefix="facet.",
- default=False,
- help_text = "Set to `True` this param indicates "
- + "that in addition to the Term based constraints"
- + " of a facet field, a count of all matching "
- + " results which have no value for the field "
- + "should be computed ",
- per_field=True)
-
- method = UniqueMultiValue(prefix="facet.",
- default="fc",
- help_text = "This parameter indicates what type "
- + "of algorithm/method to use when faceting a "
- + "field. `enum` Enumerates all terms in a field,"
- + " `fc` The facet counts are calculated by "
- + " iterating over documents that match the query",
- per_field=True)
-
- enum_cache_minDf = UniqueMultiValue(prefix="facet.",
- default=0,
- help_text = "This param indicates the minimum "
- + "document frequency (number of documents "
- + "matching a term) for which the filterCache "
- + "should be used when determining the constraint"
- + " count for that term. This is only used when "
- + "`facet.method=enum` method of faceting ",
- per_field=True)
-
- date = UniqueMultiValue(prefix="facet.",
- help_text="This param allows you to specify names "
- + "of fields (of type DateField) which should be "
- + "treated as date facets. ")
- date_start = UniqueMultiValue(prefix="facet.",
- help_text= "The lower bound for the first "
- + "date range for all Date Faceting on this "
- + "field.",
- per_field=True)
-
- date_end = UniqueMultiValue(prefix="facet.",
- help_text= "The minimum upper bound for the "
- + "last date range for all Date Faceting on "
- + "this field",
- per_field=True)
-
- date_gap = UniqueMultiValue(prefix="facet.",
- help_text= "The size of each date range "
- + "expressed as an interval to be added "
- + "to the lower bound",
- per_field=True)
- date_hardened = UniqueMultiValue(prefix="facet.",
- default=False,
- help_text="A Boolean parameter instructing "
- + "Solr what to do in the event that "
- + "`facet.date.gap` does not divide evenly "
- + "between `facet.date.start` and "
- + "`facet.date.end`.",
- per_field=True)
-
- date_other = UniqueMultiValue(prefix="facet.", default=False,
- help_text="This param indicates that in addition to the "
- +" counts for each date range constraint between "
- +"`facet.date.start` and `facet.date.end`, counts should "
- +" also be computed for. `before`: all records with field "
- +" values lower then lower bound of the first range "
- +" `after`: all records with field values greater then the"
- +" upper bound of the last range. `between`: all records "
- +" with field values between the start and end bounds of "
- +"all ranges. `none`: compute none of this information "
- +"`all`: shortcut for before, between, and after",
- per_field=True)
- _facet = False
-
-
- def __setattr__(self, name, value):
- if name.startswith("facet_"):
- name = name[6:]
- if not name.startswith("_") and name != "data" \
- and self.data.has_key(name):
- self.data[name].set(value)
- else:
- super(Facet, self).__setattr__(name, value)
-
- def __getattr__(self, name):
- if name != "data" and self.data.has_key(name):
- return self.data[name]
- return super(Facet, self).__getattr__(name)
-
- def add(self, name, value):
- name = name.replace(".", "_")
- if name.startswith("facet_"):
- name = name[6:]
-
- if name == "facet":
- self._facet = value
-
- else:
- self.data[name].add(value)
-
- def url(self):
- part = "&".join([value.url() for value in self.data.values() if value])
- if part or self._facet is True:
- part = "facet=true&" + part
- return part
- class Highlight(QueryBase):
-
- _hl = False
-
- fl = DelimitedMultiValue(prefix="hl.",
- help_text="A comma delimited list of fields to generate "
- + "highlighted snippets for")
-
- snippets = UniqueMultiValue(prefix="hl.", default=1,
- help_text="The maximum number of highlighted snippets to "
- + "generate per field.",
- per_field=True)
- fragsize = UniqueMultiValue(prefix="hl.", default=100,
- help_text="The size, in characters, of fragments to consider "
- +"for highlighting. ",
- per_field=True)
-
- mergeContiguous = UniqueMultiValue(prefix="hl.", default=False,
- help_text="Collapse contiguous fragments into a single "
- +"fragment.",
- per_field=True)
-
- requireFieldMatch = BooleanValue(prefix="hl.", default=False,
- help_text="If true, then a field will only be highlighted if "
- + "the query matched in this particular field")
-
- maxAnalyzedChars = IntegerValue(prefix="hl.", default=51200,
- help_text = "How many characters into a document to look for "
- +"suitable snippets")
-
- alternateField = UniqueMultiValue(prefix="hl.", default=None,
- help_text="If a snippet cannot be generated (due to no terms "
- + "matching), you can specify a field to use as the "
- + "backup/default summary.",
- per_field=True)
-
-
- formatter = Value(prefix="hl.", default="simple",
- help_text="Specify a formatter for the highlight output.")
-
- simple_pre = Value(prefix="hl.", default="<em>",
- help_text="The text which appears before a highlighted term")
-
- simple_post = Value(prefix="hl.", default="</em>",
- help_text="The text which appears after a highlighted term")
-
- fragmenter = UniqueMultiValue(prefix="hl.", default="gap",
- help_text="Specify a text snippet generator for highlighted"
- + " text. The standard fragmenter is gap, Another option is "
- + "regex, which tries to create fragments that `look like` a "
- + "certain regular expression. ",
- per_field=True)
-
- usePhraseHighlighter = BooleanValue(prefix="hl.", default=False,
- help_text="Use SpanScorer to highlight phrase terms only when "
- + "they appear within the query phrase in the document.")
-
- highlightMultiTerm = BooleanValue(prefix="hl.", default=False,
- help_text="If the SpanScorer is also being used, enables "
- + "highlighting for range/wildcard/fuzzy/prefix queries.")
-
-
- regex_slop = FloatValue(prefix="hl.", default=0.6,
- help_text="Factor by which the regex fragmenter can stray from"
- + " the ideal fragment size (given by hl.fragsize) to "
- + "accommodate the regular expression.")
-
- regex_pattern = Value(prefix="hl.", default=None,
- help_text="The regular expression for fragmenting. This could "
- + "be used to extract sentences (see example solrconfig.xml)")
-
- regex_maxAnalyzedChars = IntegerValue(prefix="hl.", default=10000,
- help_text="Only analyze this many characters from a field when"
- + " using the regex fragmenter")
- def __setattr__(self, name, value):
- if name.startswith("hl_"):
- name = name[3:]
- if not name.startswith("_") and name != "data" \
- and self.data.has_key(name):
- self.data[name].set(value)
- else:
- super(Highlight, self).__setattr__(name, value)
-
- def add(self, name, value):
- name = name.replace(".", "_")
- if name.startswith("hl_"):
- name = name[3:]
-
- if name == "hl":
- self._hl = value
-
- else:
- self.data[name].add(value)
-
- def url(self):
- part = "&".join([value.url() for value in self.data.values() if value])
- if part:
- part = "hl=true&" + part
- elif self._hl is True:
- part = "hl=true"
- return part
- class Query(QueryBase):
- """
- Query
- -----
- Object for building solr queries
-
- ..attribute qt:
-
- If a request uses the /select URL, and no SolrRequestHandler has been
- configured with /select as its name, then Solr uses the qt (query type)
- parameter to determine which Query Handler should be used to process
- the request. Valid values are any of the names specified by
- <requestHandler ... /> declarations in solrconfig.xml
-
- The default value is "standard".
-
- ..attribute wt:
-
- The wt (writer type) parameter is used by Solr to determine which
- QueryResponseWriter should be used to process the request. Valid values
- are any of the names specified by <queryResponseWriter... />
- declarations in solrconfig.xml
- The default value is "json".
-
- ..attribute echoHandler:
-
- If the echoHandler parameter is true, Solr places the name of the
- handle used in the response to the client for debugging purposes.
-
- ..attribute echoParams:
-
- The echoParams parameter tells Solr what kinds of Request parameters should be included in the response for debugging purposes, legal values include:
- * none - don't include any request parameters for debugging
- * explicit - include the parameters explicitly specified by the client in the request
- * all - include all parameters involved in this request, either specified explicitly by the client, or implicit because of the request handler configuration.
-
- """
-
- #Common Query Params: http://wiki.apache.org/solr/CommonQueryParameters
- q = QValue(default="*:*",
- help_text="This is the only mandatory query parameter. Search"
- + " string used by solr")
- sort = UniqueMultiValue()
- start = Value()
- rows = Value()
- fq = UniqueMultiValue()
- fl = DelimitedMultiValue()
- debugQuery = Value()
- explainOther = Value()
- defType = Value()
- timeAllowed = Value()
- omitHeader = Value()
- wt = Value(data="json")
-
- #http://wiki.apache.org/solr/DisMaxRequestHandler
- q_alt = QValue()
- qf = UniqueMultiValue()
- mm = Value()
- pf = UniqueMultiValue()
- ps = Value()
- tie = Value()
- bq = QValue()
- bf = UniqueMultiValue()
- qt = Value()
- df = Value()
-
- facet = Facet()
- hl = Highlight()
- def __init__(self, initial=[], **kwargs):
- self.data = deepcopy(self.base_data)
-
- params = []
-
- if isinstance(initial, basestring):
- params.append(("q", initial))
- elif isinstance(initial, (tuple, list)):
- params.extend(list(initial))
- elif isinstance(initial, dict):
- params.extend(initial.items())
-
- params.extend(kwargs.items())
-
- for key, value in params:
- #per field attrs: `f.cat.facet.missing=true`
- if key.startswith("f."):
- parts = key.split(".")
- name = parts[1]
- key = ".".join(parts[2:])
- value = (name, value)
-
- if key.startswith("facet"):
- self.facet.add(key, value)
- elif key.startswith("hl"):
- self.hl.add(key, value)
- else:
- self.add(key, value)
-
- def __setattr__(self, name, value):
- if not name.startswith("_") and name != "data" \
- and self.data.has_key(name):
- self.data[name].set(value)
- else:
- super(Query, self).__setattr__(name, value)
-
- def __getattr__(self, name):
- if not name.startswith("_") and name != "data" \
- and self.data.has_key(name):
- return self.data[name]
- return super(Query, self).__getattr__(name)
-
- def add(self, key, value):
- if key.startswith("facet"):
- self.facet.add(key, value)
- elif key.startswith("hl"):
- self.hl.add(key, value)
- else:
- if self.data.has_key(key):
- self.data[key].add(value)
- else:
- self.data["q"].add(key, value)
-
- def url(self):
- return "?%s" % "&".join([value.url() for value in self.data.values() if value is not None])
-
- def merge(self, query):
- #will merge a another query in with this one.
- assert isinstance(query, Query), "Merge only accepts Query element"
-
- for value in query.data.values():
- self.add(value.name, value.data)
-