PageRenderTime 56ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 1ms

/solango/solr/query.py

https://bitbucket.org/onjin/solango2
Python | 656 lines | 647 code | 0 blank | 9 comment | 25 complexity | d9240ca07308f8ddeb06b670baf8db34 MD5 | raw file
  1. """
  2. Query
  3. =====
  4. This is wrapper around a Solr Query
  5. We need to handle 3 use cases:
  6. # User generates own q string
  7. q = "django OR solango"
  8. # User passes a tuple
  9. q = (("model", "entry"), ("q", "django"),)
  10. #User Passes us a Dictionary
  11. q = {"model" : "entry", "q" : "django"}
  12. """
  13. import urllib
  14. from copy import deepcopy
  15. from solango.solr.fq import FqBase
  16. from solango.solr.function import FunctionBase
  17. class Value(object):
  18. def __init__(self, data=None, prefix=None, default=None, help_text=None):
  19. self.data = data
  20. self.prefix=prefix
  21. self.default=default
  22. self.help_text=help_text
  23. self.name = None
  24. def add(self, value):
  25. self.data = value
  26. def set(self, value):
  27. self.data = value
  28. def __unicode__(self):
  29. return unicode(self.data)
  30. def __repr__(self):
  31. return self.__unicode__()
  32. def __nonzero__(self):
  33. if self.data:
  34. return True
  35. return False
  36. def url(self):
  37. if self.data is None:
  38. return ""
  39. name = self.name.replace("_", ".")
  40. if self.prefix:
  41. name = self.prefix + name
  42. return "%s=%s" % (name, self.data)
  43. class IntegerValue(Value):pass
  44. class BooleanValue(Value):pass
  45. class FloatValue(Value):pass
  46. class MultiValue(Value):
  47. def __init__(self, data=None, prefix=None, default=None, help_text=None, per_field=False):
  48. self.data = []
  49. if data is not None:
  50. self.data = data
  51. self.prefix=prefix
  52. self.default=default
  53. self.help_text=help_text
  54. self.name = None
  55. self.per_field = per_field
  56. def add(self, value):
  57. self.data.append(value)
  58. def set(self, value):
  59. if not isinstance(value, list):
  60. self.data = [value]
  61. else:
  62. self.data = value
  63. def url(self):
  64. if not self.data:
  65. return ""
  66. name = self.name.replace("_", ".")
  67. if self.prefix:
  68. name = self.prefix + name
  69. return urllib.urlencode(["%s=%s" % (name, value) for value in self.data])
  70. class UniqueMultiValue(Value):
  71. def __init__(self, data=None, prefix=None, default=None, help_text=None, per_field=False):
  72. self.prefix=prefix
  73. self.default=default
  74. self.help_text=help_text
  75. self.data = set()
  76. if data is not None:
  77. self.data = data
  78. self.name = None
  79. self.per_field = per_field
  80. def add(self, value):
  81. if isinstance(value, (FqBase, FunctionBase)):
  82. self.data.update([unicode(value)])
  83. elif not isinstance(value, list) and not isinstance(value, set):
  84. self.data.update([value])
  85. else:
  86. self.data.update(value)
  87. def set(self, value):
  88. if isinstance(value, (FqBase, FunctionBase)):
  89. self.data = set([unicode(value)])
  90. elif not isinstance(value, list):
  91. self.data = set([value])
  92. else:
  93. self.data = set(value)
  94. def url(self):
  95. if not self.data:
  96. return ""
  97. name = self.name.replace("_", ".")
  98. if self.prefix:
  99. name = self.prefix + name
  100. values = []
  101. for value in self.data:
  102. if self.per_field and isinstance(value, (tuple,list)):
  103. field = value[0]
  104. field_value = value[1]
  105. values.append(("f.%s.%s" % (field, name), field_value))
  106. else:
  107. values.append((name, value))
  108. return urllib.urlencode(values)
  109. def __unicode__(self):
  110. return unicode(list(self.data))
  111. class DelimitedMultiValue(UniqueMultiValue):
  112. def url(self):
  113. if not self.data:
  114. return ""
  115. name = self.name.replace("_", ".")
  116. if self.prefix:
  117. name = self.prefix + name
  118. return urllib.urlencode([(name, ",".join(self.data))])
  119. class UniqueSingleValue(UniqueMultiValue):pass
  120. class QValue(MultiValue):
  121. operator = "AND"
  122. def add(self, key, value=None):
  123. if value is not None:
  124. key = "%s:%s" % (key, value)
  125. if isinstance(key, list):
  126. self.data.extend(key)
  127. else:
  128. self.data.append(key)
  129. def url(self):
  130. if not self.data:
  131. return ""
  132. name = self.name.replace("_", ".")
  133. if self.prefix:
  134. name = self.prefix + name
  135. operator = " %s " % self.operator
  136. return urllib.urlencode([(name,
  137. operator.join(["%s" %
  138. value.encode('utf-8') if isinstance(value, unicode) else value
  139. for value in self.data
  140. ]))])
  141. def get_query_values(attrs):
  142. data = {}
  143. for name, value in attrs.items():
  144. if isinstance(value, Value) or name == "facet" or name=="hl":
  145. value = attrs.pop(name)
  146. value.name = name
  147. data[name] = value
  148. return data
  149. class QueryMetaClass(type):
  150. """
  151. Meta Class
  152. """
  153. def __new__(cls, name, bases, attrs):
  154. attrs['base_data'] = get_query_values(attrs)
  155. return super(QueryMetaClass,
  156. cls).__new__(cls, name, bases, attrs)
  157. class QueryBase(object):
  158. __metaclass__ = QueryMetaClass
  159. def __init__(self, initial=[], **kwargs):
  160. self.data = deepcopy(self.base_data)
  161. params = []
  162. if isinstance(initial, (tuple, list)):
  163. params.extend(list(initial))
  164. elif isinstance(initial, dict):
  165. params.extend(initial.items())
  166. params.extend(kwargs.items())
  167. for key, value in params:
  168. self.add(key, value)
  169. def url(self):
  170. return "&".join([value.url() for value in self.data.values() if value])
  171. def __nonzero__(self):
  172. if self.url():
  173. return True
  174. return False
  175. def __setattr__(self, name, value):
  176. if not name.startswith("_") and name != "data" \
  177. and self.data.has_key(name):
  178. self.data[name].set(value)
  179. else:
  180. super(QueryBase, self).__setattr__(name, value)
  181. def __getattr__(self, name):
  182. if name != "data" and self.data.has_key(name):
  183. return self.data[name]
  184. return super(QueryBase, self).__getattr__(name)
  185. def __repr__(self):
  186. return unicode(self.data)
  187. class Facet(QueryBase):
  188. """
  189. Facet
  190. -----
  191. Python Object that represents a Solr Facet query.
  192. """
  193. query = UniqueMultiValue(prefix="facet.",
  194. default="*:*",
  195. help_text="This param allows you to specify an arbitrary"
  196. +"query in the Lucene default syntax to generate a facet"
  197. + "count.")
  198. field = UniqueMultiValue(prefix="facet.",
  199. help_text="This param allows you to specify a field"
  200. + "which should be treated as a facet.")
  201. prefix = UniqueMultiValue(prefix="facet.",
  202. help_text="Limits the terms on which to facet"
  203. + "to those starting with the given string prefix.",
  204. per_field=True)
  205. sort = UniqueMultiValue(prefix="facet.",
  206. help_text ="This param determines the ordering of"
  207. + "the facet field constraints. true - sort the "
  208. + "constraints by count (highest count first. false"
  209. + " - to return the constraints sorted in their index order",
  210. per_field=True)
  211. limit = UniqueMultiValue(prefix="facet.",
  212. default=100,
  213. help_text="This param indicates an offset into the"
  214. + " list of constraints to allow paging.",
  215. per_field=True)
  216. offset = UniqueMultiValue(prefix="facet.",
  217. default=0,
  218. help_text = "This param indicates an offset into"
  219. + " the list of constraints to allow paging.",
  220. per_field=True)
  221. mincount = UniqueMultiValue(prefix="facet.",
  222. default=0,
  223. help_text = "Indicates the minimum counts for "
  224. + "facet fields should be included in the "
  225. + "response. ",
  226. per_field=True)
  227. missing = UniqueMultiValue(prefix="facet.",
  228. default=False,
  229. help_text = "Set to `True` this param indicates "
  230. + "that in addition to the Term based constraints"
  231. + " of a facet field, a count of all matching "
  232. + " results which have no value for the field "
  233. + "should be computed ",
  234. per_field=True)
  235. method = UniqueMultiValue(prefix="facet.",
  236. default="fc",
  237. help_text = "This parameter indicates what type "
  238. + "of algorithm/method to use when faceting a "
  239. + "field. `enum` Enumerates all terms in a field,"
  240. + " `fc` The facet counts are calculated by "
  241. + " iterating over documents that match the query",
  242. per_field=True)
  243. enum_cache_minDf = UniqueMultiValue(prefix="facet.",
  244. default=0,
  245. help_text = "This param indicates the minimum "
  246. + "document frequency (number of documents "
  247. + "matching a term) for which the filterCache "
  248. + "should be used when determining the constraint"
  249. + " count for that term. This is only used when "
  250. + "`facet.method=enum` method of faceting ",
  251. per_field=True)
  252. date = UniqueMultiValue(prefix="facet.",
  253. help_text="This param allows you to specify names "
  254. + "of fields (of type DateField) which should be "
  255. + "treated as date facets. ")
  256. date_start = UniqueMultiValue(prefix="facet.",
  257. help_text= "The lower bound for the first "
  258. + "date range for all Date Faceting on this "
  259. + "field.",
  260. per_field=True)
  261. date_end = UniqueMultiValue(prefix="facet.",
  262. help_text= "The minimum upper bound for the "
  263. + "last date range for all Date Faceting on "
  264. + "this field",
  265. per_field=True)
  266. date_gap = UniqueMultiValue(prefix="facet.",
  267. help_text= "The size of each date range "
  268. + "expressed as an interval to be added "
  269. + "to the lower bound",
  270. per_field=True)
  271. date_hardened = UniqueMultiValue(prefix="facet.",
  272. default=False,
  273. help_text="A Boolean parameter instructing "
  274. + "Solr what to do in the event that "
  275. + "`facet.date.gap` does not divide evenly "
  276. + "between `facet.date.start` and "
  277. + "`facet.date.end`.",
  278. per_field=True)
  279. date_other = UniqueMultiValue(prefix="facet.", default=False,
  280. help_text="This param indicates that in addition to the "
  281. +" counts for each date range constraint between "
  282. +"`facet.date.start` and `facet.date.end`, counts should "
  283. +" also be computed for. `before`: all records with field "
  284. +" values lower then lower bound of the first range "
  285. +" `after`: all records with field values greater then the"
  286. +" upper bound of the last range. `between`: all records "
  287. +" with field values between the start and end bounds of "
  288. +"all ranges. `none`: compute none of this information "
  289. +"`all`: shortcut for before, between, and after",
  290. per_field=True)
  291. _facet = False
  292. def __setattr__(self, name, value):
  293. if name.startswith("facet_"):
  294. name = name[6:]
  295. if not name.startswith("_") and name != "data" \
  296. and self.data.has_key(name):
  297. self.data[name].set(value)
  298. else:
  299. super(Facet, self).__setattr__(name, value)
  300. def __getattr__(self, name):
  301. if name != "data" and self.data.has_key(name):
  302. return self.data[name]
  303. return super(Facet, self).__getattr__(name)
  304. def add(self, name, value):
  305. name = name.replace(".", "_")
  306. if name.startswith("facet_"):
  307. name = name[6:]
  308. if name == "facet":
  309. self._facet = value
  310. else:
  311. self.data[name].add(value)
  312. def url(self):
  313. part = "&".join([value.url() for value in self.data.values() if value])
  314. if part or self._facet is True:
  315. part = "facet=true&" + part
  316. return part
  317. class Highlight(QueryBase):
  318. _hl = False
  319. fl = DelimitedMultiValue(prefix="hl.",
  320. help_text="A comma delimited list of fields to generate "
  321. + "highlighted snippets for")
  322. snippets = UniqueMultiValue(prefix="hl.", default=1,
  323. help_text="The maximum number of highlighted snippets to "
  324. + "generate per field.",
  325. per_field=True)
  326. fragsize = UniqueMultiValue(prefix="hl.", default=100,
  327. help_text="The size, in characters, of fragments to consider "
  328. +"for highlighting. ",
  329. per_field=True)
  330. mergeContiguous = UniqueMultiValue(prefix="hl.", default=False,
  331. help_text="Collapse contiguous fragments into a single "
  332. +"fragment.",
  333. per_field=True)
  334. requireFieldMatch = BooleanValue(prefix="hl.", default=False,
  335. help_text="If true, then a field will only be highlighted if "
  336. + "the query matched in this particular field")
  337. maxAnalyzedChars = IntegerValue(prefix="hl.", default=51200,
  338. help_text = "How many characters into a document to look for "
  339. +"suitable snippets")
  340. alternateField = UniqueMultiValue(prefix="hl.", default=None,
  341. help_text="If a snippet cannot be generated (due to no terms "
  342. + "matching), you can specify a field to use as the "
  343. + "backup/default summary.",
  344. per_field=True)
  345. formatter = Value(prefix="hl.", default="simple",
  346. help_text="Specify a formatter for the highlight output.")
  347. simple_pre = Value(prefix="hl.", default="<em>",
  348. help_text="The text which appears before a highlighted term")
  349. simple_post = Value(prefix="hl.", default="</em>",
  350. help_text="The text which appears after a highlighted term")
  351. fragmenter = UniqueMultiValue(prefix="hl.", default="gap",
  352. help_text="Specify a text snippet generator for highlighted"
  353. + " text. The standard fragmenter is gap, Another option is "
  354. + "regex, which tries to create fragments that `look like` a "
  355. + "certain regular expression. ",
  356. per_field=True)
  357. usePhraseHighlighter = BooleanValue(prefix="hl.", default=False,
  358. help_text="Use SpanScorer to highlight phrase terms only when "
  359. + "they appear within the query phrase in the document.")
  360. highlightMultiTerm = BooleanValue(prefix="hl.", default=False,
  361. help_text="If the SpanScorer is also being used, enables "
  362. + "highlighting for range/wildcard/fuzzy/prefix queries.")
  363. regex_slop = FloatValue(prefix="hl.", default=0.6,
  364. help_text="Factor by which the regex fragmenter can stray from"
  365. + " the ideal fragment size (given by hl.fragsize) to "
  366. + "accommodate the regular expression.")
  367. regex_pattern = Value(prefix="hl.", default=None,
  368. help_text="The regular expression for fragmenting. This could "
  369. + "be used to extract sentences (see example solrconfig.xml)")
  370. regex_maxAnalyzedChars = IntegerValue(prefix="hl.", default=10000,
  371. help_text="Only analyze this many characters from a field when"
  372. + " using the regex fragmenter")
  373. def __setattr__(self, name, value):
  374. if name.startswith("hl_"):
  375. name = name[3:]
  376. if not name.startswith("_") and name != "data" \
  377. and self.data.has_key(name):
  378. self.data[name].set(value)
  379. else:
  380. super(Highlight, self).__setattr__(name, value)
  381. def add(self, name, value):
  382. name = name.replace(".", "_")
  383. if name.startswith("hl_"):
  384. name = name[3:]
  385. if name == "hl":
  386. self._hl = value
  387. else:
  388. self.data[name].add(value)
  389. def url(self):
  390. part = "&".join([value.url() for value in self.data.values() if value])
  391. if part:
  392. part = "hl=true&" + part
  393. elif self._hl is True:
  394. part = "hl=true"
  395. return part
  396. class Query(QueryBase):
  397. """
  398. Query
  399. -----
  400. Object for building solr queries
  401. ..attribute qt:
  402. If a request uses the /select URL, and no SolrRequestHandler has been
  403. configured with /select as its name, then Solr uses the qt (query type)
  404. parameter to determine which Query Handler should be used to process
  405. the request. Valid values are any of the names specified by
  406. <requestHandler ... /> declarations in solrconfig.xml
  407. The default value is "standard".
  408. ..attribute wt:
  409. The wt (writer type) parameter is used by Solr to determine which
  410. QueryResponseWriter should be used to process the request. Valid values
  411. are any of the names specified by <queryResponseWriter... />
  412. declarations in solrconfig.xml
  413. The default value is "json".
  414. ..attribute echoHandler:
  415. If the echoHandler parameter is true, Solr places the name of the
  416. handle used in the response to the client for debugging purposes.
  417. ..attribute echoParams:
  418. The echoParams parameter tells Solr what kinds of Request parameters should be included in the response for debugging purposes, legal values include:
  419. * none - don't include any request parameters for debugging
  420. * explicit - include the parameters explicitly specified by the client in the request
  421. * all - include all parameters involved in this request, either specified explicitly by the client, or implicit because of the request handler configuration.
  422. """
  423. #Common Query Params: http://wiki.apache.org/solr/CommonQueryParameters
  424. q = QValue(default="*:*",
  425. help_text="This is the only mandatory query parameter. Search"
  426. + " string used by solr")
  427. sort = UniqueMultiValue()
  428. start = Value()
  429. rows = Value()
  430. fq = UniqueMultiValue()
  431. fl = DelimitedMultiValue()
  432. debugQuery = Value()
  433. explainOther = Value()
  434. defType = Value()
  435. timeAllowed = Value()
  436. omitHeader = Value()
  437. wt = Value(data="json")
  438. #http://wiki.apache.org/solr/DisMaxRequestHandler
  439. q_alt = QValue()
  440. qf = UniqueMultiValue()
  441. mm = Value()
  442. pf = UniqueMultiValue()
  443. ps = Value()
  444. tie = Value()
  445. bq = QValue()
  446. bf = UniqueMultiValue()
  447. qt = Value()
  448. df = Value()
  449. facet = Facet()
  450. hl = Highlight()
  451. def __init__(self, initial=[], **kwargs):
  452. self.data = deepcopy(self.base_data)
  453. params = []
  454. if isinstance(initial, basestring):
  455. params.append(("q", initial))
  456. elif isinstance(initial, (tuple, list)):
  457. params.extend(list(initial))
  458. elif isinstance(initial, dict):
  459. params.extend(initial.items())
  460. params.extend(kwargs.items())
  461. for key, value in params:
  462. #per field attrs: `f.cat.facet.missing=true`
  463. if key.startswith("f."):
  464. parts = key.split(".")
  465. name = parts[1]
  466. key = ".".join(parts[2:])
  467. value = (name, value)
  468. if key.startswith("facet"):
  469. self.facet.add(key, value)
  470. elif key.startswith("hl"):
  471. self.hl.add(key, value)
  472. else:
  473. self.add(key, value)
  474. def __setattr__(self, name, value):
  475. if not name.startswith("_") and name != "data" \
  476. and self.data.has_key(name):
  477. self.data[name].set(value)
  478. else:
  479. super(Query, self).__setattr__(name, value)
  480. def __getattr__(self, name):
  481. if not name.startswith("_") and name != "data" \
  482. and self.data.has_key(name):
  483. return self.data[name]
  484. return super(Query, self).__getattr__(name)
  485. def add(self, key, value):
  486. if key.startswith("facet"):
  487. self.facet.add(key, value)
  488. elif key.startswith("hl"):
  489. self.hl.add(key, value)
  490. else:
  491. if self.data.has_key(key):
  492. self.data[key].add(value)
  493. else:
  494. self.data["q"].add(key, value)
  495. def url(self):
  496. return "?%s" % "&".join([value.url() for value in self.data.values() if value is not None])
  497. def merge(self, query):
  498. #will merge a another query in with this one.
  499. assert isinstance(query, Query), "Merge only accepts Query element"
  500. for value in query.data.values():
  501. self.add(value.name, value.data)