PageRenderTime 56ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/MoinMoin/search/term.py

https://bitbucket.org/thomaswaldmann/moin-2.0-dev/
Python | 458 lines | 421 code | 0 blank | 37 comment | 2 complexity | 3beec70c7a2dce8049ac40c5343e2779 MD5 | raw file
Possible License(s): GPL-2.0, MPL-2.0-no-copyleft-exception
  1. """
  2. MoinMoin - search expression object representation
  3. This module defines the possible search terms for a query to the
  4. storage backend. This is used, for example, to implement searching,
  5. page lists etc.
  6. Note that some backends can optimise some of the search terms, for
  7. example a backend that has indexed various metadata keys can optimise
  8. easy expressions containing ItemMetaDataMatch terms. This is only allowed
  9. for classes documented as being 'final' which hence also means that
  10. their _evaluate function may not be overridden by descendent classes.
  11. For example, that metadata backend could test if the expression is an
  12. ItemMetaDataMatch expression, and if so, simply return the appropriate
  13. index; or if it is an AND() expression build the page list from the
  14. index, remove the ItemMetaDataMatch instance from the AND list and match
  15. the resulting expression only for pages in that list. Etc.
  16. TODO: Should we write some generic code for picking apart expressions
  17. like that?
  18. @copyright: 2008 MoinMoin:JohannesBerg
  19. @license: GNU GPL, see COPYING for details.
  20. """
  21. import re
  22. from MoinMoin.storage.error import NoSuchRevisionError
  23. # Base classes
  24. class Term(object):
  25. """
  26. Base class for search terms.
  27. """
  28. # relative cost of this search term
  29. _cost = 0
  30. def __init__(self):
  31. pass
  32. def evaluate(self, item):
  33. """
  34. Evaluate this term and return True or False if the
  35. item identified by the parameters matches.
  36. @param item: the item
  37. """
  38. assert hasattr(self, '_result')
  39. if self._result is None:
  40. self._result = self._evaluate(item)
  41. return self._result
  42. def _evaluate(self, item):
  43. """
  44. Implements the actual evaluation
  45. """
  46. raise NotImplementedError()
  47. def prepare(self):
  48. """
  49. Prepare this search term to make it ready for testing.
  50. Must be called before each outermost-level evaluate.
  51. """
  52. self._result = None
  53. def copy(self):
  54. """
  55. Make a copy of this search term.
  56. """
  57. return self.__class__()
  58. class UnaryTerm(Term):
  59. """
  60. Base class for search terms that has a single contained
  61. search term, e.g. NOT.
  62. """
  63. def __init__(self, term):
  64. Term.__init__(self)
  65. assert isinstance(term, Term)
  66. self.term = term
  67. def prepare(self):
  68. Term.prepare(self)
  69. self.term.prepare()
  70. self._cost = self.term._cost
  71. def __repr__(self):
  72. return u'<%s(%r)>' % (self.__class__.__name__, self.term)
  73. def copy(self):
  74. return self.__class__(self.term.copy())
  75. class ListTerm(Term):
  76. """
  77. Base class for search terms that contain multiple other
  78. search terms, e.g. AND.
  79. """
  80. def __init__(self, *terms):
  81. Term.__init__(self)
  82. for e in terms:
  83. assert isinstance(e, Term)
  84. self.terms = list(terms)
  85. def prepare(self):
  86. Term.prepare(self)
  87. # the sum of all costs is a bit of a worst-case cost...
  88. self._cost = 0
  89. for e in self.terms:
  90. e.prepare()
  91. self._cost += e._cost
  92. self.terms.sort(cmp=lambda x, y: cmp(x._cost, y._cost))
  93. def remove(self, subterm):
  94. self.terms.remove(subterm)
  95. def add(self, subterm):
  96. self.terms.append(subterm)
  97. def __repr__(self):
  98. return u'<%s(%s)>' % (self.__class__.__name__,
  99. ', '.join([repr(t) for t in self.terms]))
  100. def copy(self):
  101. terms = [t.copy() for t in self.terms]
  102. return self.__class__(*terms)
  103. # Logical expression classes
  104. class AND(ListTerm):
  105. """
  106. AND connection between multiple terms. Final.
  107. """
  108. def _evaluate(self, item):
  109. for e in self.terms:
  110. if not e.evaluate(item):
  111. return False
  112. return True
  113. class OR(ListTerm):
  114. """
  115. OR connection between multiple terms. Final.
  116. """
  117. def _evaluate(self, item):
  118. for e in self.terms:
  119. if e.evaluate(item):
  120. return True
  121. return False
  122. class NOT(UnaryTerm):
  123. """
  124. Inversion of a single term. Final.
  125. """
  126. def _evaluate(self, item):
  127. return not self.term.evaluate(item)
  128. class XOR(ListTerm):
  129. """
  130. XOR connection between multiple terms, i.e. exactly
  131. one must be True. Final.
  132. """
  133. def _evaluate(self, item):
  134. count = 0
  135. for e in self.terms:
  136. if e.evaluate(item):
  137. count += 1
  138. return count == 1
  139. class _BOOL(Term):
  140. _cost = 0
  141. def __init__(self, val):
  142. self._val = val
  143. def prepare(self):
  144. self._result = self._val
  145. def __repr__(self):
  146. return '<%s>' % str(self._val).upper()
  147. def copy(self):
  148. return self
  149. TRUE = _BOOL(True)
  150. FALSE = _BOOL(False)
  151. def BOOL(b):
  152. if b:
  153. return TRUE
  154. return FALSE
  155. # Actual Moin search terms
  156. class TextRE(Term):
  157. """
  158. Regular expression full text match, use as last resort.
  159. """
  160. _cost = 1000 # almost prohibitive
  161. def __init__(self, needle_re):
  162. Term.__init__(self)
  163. assert hasattr(needle_re, 'search')
  164. self._needle_re = needle_re
  165. def _evaluate(self, item):
  166. try:
  167. rev = item.get_revision(-1)
  168. except NoSuchRevisionError:
  169. return False
  170. data = rev.read()
  171. return not (not self._needle_re.search(data))
  172. def __repr__(self):
  173. return u'<term.TextRE(...)>'
  174. def copy(self):
  175. return TextRE(self._needle_re)
  176. class Text(TextRE):
  177. """
  178. Full text match including middle of words and over word
  179. boundaries. Final.
  180. """
  181. def __init__(self, needle, case_sensitive):
  182. flags = re.UNICODE
  183. if not case_sensitive:
  184. flags = flags | re.IGNORECASE
  185. _needle_re = re.compile(re.escape(needle), flags)
  186. TextRE.__init__(self, _needle_re)
  187. self.needle = needle
  188. self.case_sensitive = case_sensitive
  189. def __repr__(self):
  190. return u'<term.Text(%s, %s)>' % (self.needle, self.case_sensitive)
  191. def copy(self):
  192. return Text(self.needle, self.case_sensitive)
  193. class Word(TextRE):
  194. """
  195. Full text match finding exact words. Final.
  196. """
  197. def __init__(self, needle, case_sensitive):
  198. flags = re.UNICODE
  199. if not case_sensitive:
  200. flags = flags | re.IGNORECASE
  201. _needle_re = re.compile('\\b' + re.escape(needle) + '\\b', flags)
  202. TextRE.__init__(self, _needle_re)
  203. self.needle = needle
  204. self.case_sensitive = case_sensitive
  205. def __repr__(self):
  206. return u'<term.Word(%s, %s)>' % (self.needle, self.case_sensitive)
  207. def copy(self):
  208. return Word(self.needle, self.case_sensitive)
  209. class WordStart(TextRE):
  210. """
  211. Full text match finding the start of a word. Final.
  212. """
  213. def __init__(self, needle, case_sensitive):
  214. flags = re.UNICODE
  215. if not case_sensitive:
  216. flags = flags | re.IGNORECASE
  217. _needle_re = re.compile('\\b' + re.escape(needle), flags)
  218. TextRE.__init__(self, _needle_re)
  219. self.needle = needle
  220. self.case_sensitive = case_sensitive
  221. def __repr__(self):
  222. return u'<term.WordStart(%s, %s)>' % (self.needle, self.case_sensitive)
  223. def copy(self):
  224. return WordStart(self.needle, self.case_sensitive)
  225. class WordEnd(TextRE):
  226. """
  227. Full text match finding the end of a word. Final.
  228. """
  229. def __init__(self, needle, case_sensitive):
  230. flags = re.UNICODE
  231. if not case_sensitive:
  232. flags = flags | re.IGNORECASE
  233. _needle_re = re.compile(re.escape(needle) + '\\b', flags)
  234. TextRE.__init__(self, _needle_re)
  235. self.needle = needle
  236. self.case_sensitive = case_sensitive
  237. def __repr__(self):
  238. return u'<term.WordEnd(%s, %s)>' % (self.needle, self.case_sensitive)
  239. def copy(self):
  240. return WordEnd(self.needle, self.case_sensitive)
  241. class NameRE(Term):
  242. """
  243. Matches the item's name with a given regular expression.
  244. """
  245. _cost = 10 # one of the cheapest
  246. def __init__(self, needle_re):
  247. Term.__init__(self)
  248. assert hasattr(needle_re, 'search')
  249. self._needle_re = needle_re
  250. def _evaluate(self, item):
  251. return not (not self._needle_re.search(item.name))
  252. def __repr__(self):
  253. return u'<term.NameRE(...)>'
  254. def copy(self):
  255. return NameRE(self._needle_re)
  256. class Name(NameRE):
  257. """
  258. Item name match, given needle must occur in item's name. Final.
  259. """
  260. def __init__(self, needle, case_sensitive):
  261. assert isinstance(needle, unicode)
  262. flags = re.UNICODE
  263. if not case_sensitive:
  264. flags = flags | re.IGNORECASE
  265. _needle_re = re.compile(re.escape(needle), flags)
  266. NameRE.__init__(self, _needle_re)
  267. self.needle = needle
  268. self.case_sensitive = case_sensitive
  269. def __repr__(self):
  270. return u'<term.Name(%s, %s)>' % (self.needle, self.case_sensitive)
  271. def copy(self):
  272. return Name(self.needle, self.case_sensitive)
  273. class NameFn(Term):
  274. """
  275. Arbitrary item name matching function.
  276. """
  277. def __init__(self, fn):
  278. Term.__init__(self)
  279. assert callable(fn)
  280. self._fn = fn
  281. def _evaluate(self, item):
  282. return not (not self._fn(item.name))
  283. def __repr__(self):
  284. return u'<term.NameFn(%r)>' % (self._fn, )
  285. def copy(self):
  286. return NameFn(self._fn)
  287. class ItemMetaDataMatch(Term):
  288. """
  289. Matches a metadata key/value pair of an item, requires
  290. existence of the metadata key. Final.
  291. """
  292. _cost = 100 # fairly expensive but way cheaper than text
  293. def __init__(self, key, val):
  294. Term.__init__(self)
  295. self.key = key
  296. self.val = val
  297. def _evaluate(self, item):
  298. return self.key in item and item[self.key] == self.val
  299. def __repr__(self):
  300. return u'<%s(%s: %s)>' % (self.__class__.__name__, self.key, self.val)
  301. def copy(self):
  302. return ItemMetaDataMatch(self.key, self.val)
  303. class ItemHasMetaDataValue(Term):
  304. """
  305. Match when the metadata value for a given key contains the given
  306. value (when the item's metadata value is a dict or list), requires
  307. existence of the metadata key. Final.
  308. """
  309. _cost = 100 # fairly expensive but way cheaper than text
  310. def __init__(self, key, val):
  311. Term.__init__(self)
  312. self.key = key
  313. self.val = val
  314. def _evaluate(self, item):
  315. return self.key in item and self.val in item[self.key]
  316. def __repr__(self):
  317. return u'<%s(%s: %s)>' % (self.__class__.__name__, self.key, self.val)
  318. def copy(self):
  319. return ItemHasMetaDataValue(self.key, self.val)
  320. class ItemHasMetaDataKey(Term):
  321. """
  322. Requires existence of the metadata key. Final.
  323. """
  324. _cost = 90 # possibly cheaper than ItemMetaDataMatch
  325. def __init__(self, key):
  326. Term.__init__(self)
  327. self.key = key
  328. def _evaluate(self, item):
  329. return self.key in item
  330. def __repr__(self):
  331. return u'<%s(%s)>' % (self.__class__.__name__, self.key)
  332. def copy(self):
  333. return ItemHasMetaDataKey(self.key)
  334. class LastRevisionMetaDataMatch(Term):
  335. """
  336. Matches a metadata key/value pair of an item, requires
  337. existence of the metadata key. Final.
  338. """
  339. _cost = 100 # fairly expensive but way cheaper than text
  340. def __init__(self, key, val):
  341. Term.__init__(self)
  342. self.key = key
  343. self.val = val
  344. def _evaluate(self, item):
  345. try:
  346. rev = item.get_revision(-1)
  347. except NoSuchRevisionError:
  348. return False
  349. return self.key in rev and rev[self.key] == self.val
  350. def __repr__(self):
  351. return u'<%s(%s: %s)>' % (self.__class__.__name__, self.key, self.val)
  352. def copy(self):
  353. return LastRevisionMetaDataMatch(self.key, self.val)
  354. class LastRevisionHasMetaDataKey(Term):
  355. """
  356. Requires existence of the metadata key. Final.
  357. """
  358. _cost = 90 # possibly cheaper than LastRevisionMetaDataMatch
  359. def __init__(self, key):
  360. Term.__init__(self)
  361. self.key = key
  362. def _evaluate(self, item):
  363. try:
  364. rev = item.get_revision(-1)
  365. except NoSuchRevisionError:
  366. return False
  367. return self.key in rev
  368. def __repr__(self):
  369. return u'<%s(%s)>' % (self.__class__.__name__, self.key)
  370. def copy(self):
  371. return LastRevisionHasMetaDataKey(self.key)