PageRenderTime 54ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/solango/solr/query.py

https://bitbucket.org/mstaniszczak/solango2
Python | 647 lines | 638 code | 0 blank | 9 comment | 25 complexity | b4be6a6dfae3a31e699626b35b067f6a MD5 | raw file
Possible License(s): BSD-3-Clause
  1. """
  2. Query
  3. =====
  4. This is wrapper around a Solr Query
  5. We need to handle 3 use cases:
  6. # User generates own q string
  7. q = "django OR solango"
  8. # User passes a tuple
  9. q = (("model", "entry"), ("q", "django"),)
  10. #User Passes us a Dictionary
  11. q = {"model" : "entry", "q" : "django"}
  12. """
  13. import urllib
  14. from copy import deepcopy
  15. class Value(object):
  16. def __init__(self, data=None, prefix=None, default=None, help_text=None):
  17. self.data = data
  18. self.prefix=prefix
  19. self.default=default
  20. self.help_text=help_text
  21. self.name = None
  22. def add(self, value):
  23. self.data = value
  24. def set(self, value):
  25. self.data = value
  26. def __unicode__(self):
  27. return unicode(self.data)
  28. def __repr__(self):
  29. return self.__unicode__()
  30. def __nonzero__(self):
  31. if self.data:
  32. return True
  33. return False
  34. def url(self):
  35. if not self.data:
  36. return ""
  37. name = self.name.replace("_", ".")
  38. if self.prefix:
  39. name = self.prefix + name
  40. return "%s=%s" % (name, self.data)
  41. class IntegerValue(Value):pass
  42. class BooleanValue(Value):pass
  43. class FloatValue(Value):pass
  44. class MultiValue(Value):
  45. def __init__(self, data=None, prefix=None, default=None, help_text=None, per_field=False):
  46. self.data = []
  47. if data is not None:
  48. self.data = data
  49. self.prefix=prefix
  50. self.default=default
  51. self.help_text=help_text
  52. self.name = None
  53. self.per_field = per_field
  54. def add(self, value):
  55. self.data.append(value)
  56. def set(self, value):
  57. if not isinstance(value, list):
  58. self.data = [value]
  59. else:
  60. self.data = value
  61. def url(self):
  62. if not self.data:
  63. return ""
  64. name = self.name.replace("_", ".")
  65. if self.prefix:
  66. name = self.prefix + name
  67. return urllib.urlencode(["%s=%s" % (name, value) for value in self.data])
  68. class UniqueMultiValue(Value):
  69. def __init__(self, data=None, prefix=None, default=None, help_text=None, per_field=False):
  70. self.prefix=prefix
  71. self.default=default
  72. self.help_text=help_text
  73. self.data = set()
  74. if data is not None:
  75. self.data = data
  76. self.name = None
  77. self.per_field = per_field
  78. def add(self, value):
  79. if not isinstance(value, list) and not isinstance(value, set):
  80. self.data.update([value])
  81. else:
  82. self.data.update(value)
  83. def set(self, value):
  84. if not isinstance(value, list):
  85. self.data = set([value])
  86. else:
  87. self.data = set(value)
  88. def url(self):
  89. if not self.data:
  90. return ""
  91. name = self.name.replace("_", ".")
  92. if self.prefix:
  93. name = self.prefix + name
  94. values = []
  95. for value in self.data:
  96. if self.per_field and isinstance(value, (tuple,list)):
  97. field = value[0]
  98. field_value = value[1]
  99. values.append(("f.%s.%s" % (field, name), field_value))
  100. else:
  101. values.append((name, value))
  102. return urllib.urlencode(values)
  103. def __unicode__(self):
  104. return unicode(list(self.data))
  105. class DelimitedMultiValue(UniqueMultiValue):
  106. def url(self):
  107. if not self.data:
  108. return ""
  109. name = self.name.replace("_", ".")
  110. if self.prefix:
  111. name = self.prefix + name
  112. return urllib.urlencode([(name, ",".join(self.data))])
  113. class UniqueSingleValue(UniqueMultiValue):pass
  114. class QValue(MultiValue):
  115. operator = "AND"
  116. def add(self, key, value=None):
  117. if value is not None:
  118. key = "%s:%s" % (key, value)
  119. if isinstance(key, list):
  120. self.data.extend(key)
  121. else:
  122. self.data.append(key)
  123. def url(self):
  124. if not self.data:
  125. return ""
  126. name = self.name.replace("_", ".")
  127. if self.prefix:
  128. name = self.prefix + name
  129. operator = " %s " % self.operator
  130. return urllib.urlencode([(name, operator.join(["%s" % value
  131. for value in self.data]))])
  132. def get_query_values(attrs):
  133. data = {}
  134. for name, value in attrs.items():
  135. if isinstance(value, Value) or name == "facet" or name=="hl":
  136. value = attrs.pop(name)
  137. value.name = name
  138. data[name] = value
  139. return data
  140. class QueryMetaClass(type):
  141. """
  142. Meta Class
  143. """
  144. def __new__(cls, name, bases, attrs):
  145. attrs['base_data'] = get_query_values(attrs)
  146. return super(QueryMetaClass,
  147. cls).__new__(cls, name, bases, attrs)
  148. class QueryBase(object):
  149. __metaclass__ = QueryMetaClass
  150. def __init__(self, initial=[], **kwargs):
  151. self.data = deepcopy(self.base_data)
  152. params = []
  153. if isinstance(initial, (tuple, list)):
  154. params.extend(list(initial))
  155. elif isinstance(initial, dict):
  156. params.extend(initial.items())
  157. params.extend(kwargs.items())
  158. for key, value in params:
  159. self.add(key, value)
  160. def url(self):
  161. return "&".join([value.url() for value in self.data.values() if value])
  162. def __nonzero__(self):
  163. if self.url():
  164. return True
  165. return False
  166. def __setattr__(self, name, value):
  167. if not name.startswith("_") and name != "data" \
  168. and self.data.has_key(name):
  169. self.data[name].set(value)
  170. else:
  171. super(QueryBase, self).__setattr__(name, value)
  172. def __getattr__(self, name):
  173. if name != "data" and self.data.has_key(name):
  174. return self.data[name]
  175. return super(QueryBase, self).__getattr__(name)
  176. def __repr__(self):
  177. return unicode(self.data)
  178. class Facet(QueryBase):
  179. """
  180. Facet
  181. -----
  182. Python Object that represents a Solr Facet query.
  183. """
  184. query = UniqueMultiValue(prefix="facet.",
  185. default="*:*",
  186. help_text="This param allows you to specify an arbitrary"
  187. +"query in the Lucene default syntax to generate a facet"
  188. + "count.")
  189. field = UniqueMultiValue(prefix="facet.",
  190. help_text="This param allows you to specify a field"
  191. + "which should be treated as a facet.")
  192. prefix = UniqueMultiValue(prefix="facet.",
  193. help_text="Limits the terms on which to facet"
  194. + "to those starting with the given string prefix.",
  195. per_field=True)
  196. sort = UniqueMultiValue(prefix="facet.",
  197. help_text ="This param determines the ordering of"
  198. + "the facet field constraints. true - sort the "
  199. + "constraints by count (highest count first. false"
  200. + " - to return the constraints sorted in their index order",
  201. per_field=True)
  202. limit = UniqueMultiValue(prefix="facet.",
  203. default=100,
  204. help_text="This param indicates an offset into the"
  205. + " list of constraints to allow paging.",
  206. per_field=True)
  207. offset = UniqueMultiValue(prefix="facet.",
  208. default=0,
  209. help_text = "This param indicates an offset into"
  210. + " the list of constraints to allow paging.",
  211. per_field=True)
  212. mincount = UniqueMultiValue(prefix="facet.",
  213. default=0,
  214. help_text = "Indicates the minimum counts for "
  215. + "facet fields should be included in the "
  216. + "response. ",
  217. per_field=True)
  218. missing = UniqueMultiValue(prefix="facet.",
  219. default=False,
  220. help_text = "Set to `True` this param indicates "
  221. + "that in addition to the Term based constraints"
  222. + " of a facet field, a count of all matching "
  223. + " results which have no value for the field "
  224. + "should be computed ",
  225. per_field=True)
  226. method = UniqueMultiValue(prefix="facet.",
  227. default="fc",
  228. help_text = "This parameter indicates what type "
  229. + "of algorithm/method to use when faceting a "
  230. + "field. `enum` Enumerates all terms in a field,"
  231. + " `fc` The facet counts are calculated by "
  232. + " iterating over documents that match the query",
  233. per_field=True)
  234. enum_cache_minDf = UniqueMultiValue(prefix="facet.",
  235. default=0,
  236. help_text = "This param indicates the minimum "
  237. + "document frequency (number of documents "
  238. + "matching a term) for which the filterCache "
  239. + "should be used when determining the constraint"
  240. + " count for that term. This is only used when "
  241. + "`facet.method=enum` method of faceting ",
  242. per_field=True)
  243. date = UniqueMultiValue(prefix="facet.",
  244. help_text="This param allows you to specify names "
  245. + "of fields (of type DateField) which should be "
  246. + "treated as date facets. ")
  247. date_start = UniqueMultiValue(prefix="facet.",
  248. help_text= "The lower bound for the first "
  249. + "date range for all Date Faceting on this "
  250. + "field.",
  251. per_field=True)
  252. date_end = UniqueMultiValue(prefix="facet.",
  253. help_text= "The minimum upper bound for the "
  254. + "last date range for all Date Faceting on "
  255. + "this field",
  256. per_field=True)
  257. date_gap = UniqueMultiValue(prefix="facet.",
  258. help_text= "The size of each date range "
  259. + "expressed as an interval to be added "
  260. + "to the lower bound",
  261. per_field=True)
  262. date_hardened = UniqueMultiValue(prefix="facet.",
  263. default=False,
  264. help_text="A Boolean parameter instructing "
  265. + "Solr what to do in the event that "
  266. + "`facet.date.gap` does not divide evenly "
  267. + "between `facet.date.start` and "
  268. + "`facet.date.end`.",
  269. per_field=True)
  270. date_other = UniqueMultiValue(prefix="facet.", default=False,
  271. help_text="This param indicates that in addition to the "
  272. +" counts for each date range constraint between "
  273. +"`facet.date.start` and `facet.date.end`, counts should "
  274. +" also be computed for. `before`: all records with field "
  275. +" values lower then lower bound of the first range "
  276. +" `after`: all records with field values greater then the"
  277. +" upper bound of the last range. `between`: all records "
  278. +" with field values between the start and end bounds of "
  279. +"all ranges. `none`: compute none of this information "
  280. +"`all`: shortcut for before, between, and after",
  281. per_field=True)
  282. _facet = False
  283. def __setattr__(self, name, value):
  284. if name.startswith("facet_"):
  285. name = name[6:]
  286. if not name.startswith("_") and name != "data" \
  287. and self.data.has_key(name):
  288. self.data[name].set(value)
  289. else:
  290. super(Facet, self).__setattr__(name, value)
  291. def __getattr__(self, name):
  292. if name != "data" and self.data.has_key(name):
  293. return self.data[name]
  294. return super(Facet, self).__getattr__(name)
  295. def add(self, name, value):
  296. name = name.replace(".", "_")
  297. if name.startswith("facet_"):
  298. name = name[6:]
  299. if name == "facet":
  300. self._facet = value
  301. else:
  302. self.data[name].add(value)
  303. def url(self):
  304. part = "&".join([value.url() for value in self.data.values() if value])
  305. if part or self._facet is True:
  306. part = "facet=true&" + part
  307. return part
  308. class Highlight(QueryBase):
  309. _hl = False
  310. fl = DelimitedMultiValue(prefix="hl.",
  311. help_text="A comma delimited list of fields to generate "
  312. + "highlighted snippets for")
  313. snippets = UniqueMultiValue(prefix="hl.", default=1,
  314. help_text="The maximum number of highlighted snippets to "
  315. + "generate per field.",
  316. per_field=True)
  317. fragsize = UniqueMultiValue(prefix="hl.", default=100,
  318. help_text="The size, in characters, of fragments to consider "
  319. +"for highlighting. ",
  320. per_field=True)
  321. mergeContiguous = UniqueMultiValue(prefix="hl.", default=False,
  322. help_text="Collapse contiguous fragments into a single "
  323. +"fragment.",
  324. per_field=True)
  325. requireFieldMatch = BooleanValue(prefix="hl.", default=False,
  326. help_text="If true, then a field will only be highlighted if "
  327. + "the query matched in this particular field")
  328. maxAnalyzedChars = IntegerValue(prefix="hl.", default=51200,
  329. help_text = "How many characters into a document to look for "
  330. +"suitable snippets")
  331. alternateField = UniqueMultiValue(prefix="hl.", default=None,
  332. help_text="If a snippet cannot be generated (due to no terms "
  333. + "matching), you can specify a field to use as the "
  334. + "backup/default summary.",
  335. per_field=True)
  336. formatter = Value(prefix="hl.", default="simple",
  337. help_text="Specify a formatter for the highlight output.")
  338. simple_pre = Value(prefix="hl.", default="<em>",
  339. help_text="The text which appears before a highlighted term")
  340. simple_post = Value(prefix="hl.", default="</em>",
  341. help_text="The text which appears after a highlighted term")
  342. fragmenter = UniqueMultiValue(prefix="hl.", default="gap",
  343. help_text="Specify a text snippet generator for highlighted"
  344. + " text. The standard fragmenter is gap, Another option is "
  345. + "regex, which tries to create fragments that `look like` a "
  346. + "certain regular expression. ",
  347. per_field=True)
  348. usePhraseHighlighter = BooleanValue(prefix="hl.", default=False,
  349. help_text="Use SpanScorer to highlight phrase terms only when "
  350. + "they appear within the query phrase in the document.")
  351. highlightMultiTerm = BooleanValue(prefix="hl.", default=False,
  352. help_text="If the SpanScorer is also being used, enables "
  353. + "highlighting for range/wildcard/fuzzy/prefix queries.")
  354. regex_slop = FloatValue(prefix="hl.", default=0.6,
  355. help_text="Factor by which the regex fragmenter can stray from"
  356. + " the ideal fragment size (given by hl.fragsize) to "
  357. + "accommodate the regular expression.")
  358. regex_pattern = Value(prefix="hl.", default=None,
  359. help_text="The regular expression for fragmenting. This could "
  360. + "be used to extract sentences (see example solrconfig.xml)")
  361. regex_maxAnalyzedChars = IntegerValue(prefix="hl.", default=10000,
  362. help_text="Only analyze this many characters from a field when"
  363. + " using the regex fragmenter")
  364. def __setattr__(self, name, value):
  365. if name.startswith("hl_"):
  366. name = name[3:]
  367. if not name.startswith("_") and name != "data" \
  368. and self.data.has_key(name):
  369. self.data[name].set(value)
  370. else:
  371. super(Highlight, self).__setattr__(name, value)
  372. def add(self, name, value):
  373. name = name.replace(".", "_")
  374. if name.startswith("hl_"):
  375. name = name[3:]
  376. if name == "hl":
  377. self._hl = value
  378. else:
  379. self.data[name].add(value)
  380. def url(self):
  381. part = "&".join([value.url() for value in self.data.values() if value])
  382. if part:
  383. part = "hl=true&" + part
  384. elif self._hl is True:
  385. part = "hl=true"
  386. return part
  387. class Query(QueryBase):
  388. """
  389. Query
  390. -----
  391. Object for building solr queries
  392. ..attribute qt:
  393. If a request uses the /select URL, and no SolrRequestHandler has been
  394. configured with /select as its name, then Solr uses the qt (query type)
  395. parameter to determine which Query Handler should be used to process
  396. the request. Valid values are any of the names specified by
  397. <requestHandler ... /> declarations in solrconfig.xml
  398. The default value is "standard".
  399. ..attribute wt:
  400. The wt (writer type) parameter is used by Solr to determine which
  401. QueryResponseWriter should be used to process the request. Valid values
  402. are any of the names specified by <queryResponseWriter... />
  403. declarations in solrconfig.xml
  404. The default value is "json".
  405. ..attribute echoHandler:
  406. If the echoHandler parameter is true, Solr places the name of the
  407. handle used in the response to the client for debugging purposes.
  408. ..attribute echoParams:
  409. The echoParams parameter tells Solr what kinds of Request parameters should be included in the response for debugging purposes, legal values include:
  410. * none - don't include any request parameters for debugging
  411. * explicit - include the parameters explicitly specified by the client in the request
  412. * all - include all parameters involved in this request, either specified explicitly by the client, or implicit because of the request handler configuration.
  413. """
  414. #Common Query Params: http://wiki.apache.org/solr/CommonQueryParameters
  415. q = QValue(default="*:*",
  416. help_text="This is the only mandatory query parameter. Search"
  417. + " string used by solr")
  418. sort = UniqueMultiValue()
  419. start = Value()
  420. rows = Value()
  421. fq = UniqueMultiValue()
  422. fl = DelimitedMultiValue()
  423. debugQuery = Value()
  424. explainOther = Value()
  425. defType = Value()
  426. timeAllowed = Value()
  427. omitHeader = Value()
  428. wt = Value(data="json")
  429. #http://wiki.apache.org/solr/DisMaxRequestHandler
  430. q_alt = QValue()
  431. qf = UniqueMultiValue()
  432. mm = Value()
  433. pf = UniqueMultiValue()
  434. ps = Value()
  435. tie = Value()
  436. bq = QValue()
  437. bf = UniqueMultiValue()
  438. qt = Value()
  439. df = Value()
  440. facet = Facet()
  441. hl = Highlight()
  442. def __init__(self, initial=[], **kwargs):
  443. self.data = deepcopy(self.base_data)
  444. params = []
  445. if isinstance(initial, basestring):
  446. params.append(("q", initial))
  447. elif isinstance(initial, (tuple, list)):
  448. params.extend(list(initial))
  449. elif isinstance(initial, dict):
  450. params.extend(initial.items())
  451. params.extend(kwargs.items())
  452. for key, value in params:
  453. #per field attrs: `f.cat.facet.missing=true`
  454. if key.startswith("f."):
  455. parts = key.split(".")
  456. name = parts[1]
  457. key = ".".join(parts[2:])
  458. value = (name, value)
  459. if key.startswith("facet"):
  460. self.facet.add(key, value)
  461. elif key.startswith("hl"):
  462. self.hl.add(key, value)
  463. else:
  464. self.add(key, value)
  465. def __setattr__(self, name, value):
  466. if not name.startswith("_") and name != "data" \
  467. and self.data.has_key(name):
  468. self.data[name].set(value)
  469. else:
  470. super(Query, self).__setattr__(name, value)
  471. def __getattr__(self, name):
  472. if not name.startswith("_") and name != "data" \
  473. and self.data.has_key(name):
  474. return self.data[name]
  475. return super(Query, self).__getattr__(name)
  476. def add(self, key, value):
  477. if key.startswith("facet"):
  478. self.facet.add(key, value)
  479. elif key.startswith("hl"):
  480. self.hl.add(key, value)
  481. else:
  482. if self.data.has_key(key):
  483. self.data[key].add(value)
  484. else:
  485. self.data["q"].add(key, value)
  486. def url(self):
  487. return "?%s" % "&".join([value.url() for value in self.data.values() if value])
  488. def merge(self, query):
  489. #will merge a another query in with this one.
  490. assert isinstance(query, Query), "Merge only accepts Query element"
  491. for value in query.data.values():
  492. self.add(value.name, value.data)