PageRenderTime 48ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/feedzilla/processors/content_filter.py

https://bitbucket.org/lorien/feedzilla/
Python | 70 lines | 69 code | 0 blank | 1 comment | 1 complexity | 8f370c2113f451249cc420b5fea51853 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. """
  2. This module provides functions to filter posts by keyword in post's content and post's tags.
  3. I do not using re.I flag in regexps because of current locale does not affect
  4. on it. So the solution is to make both searchable text and regexp in lower case.
  5. """
  6. # Copyright: 2011, Grigoriy Petukhov
  7. # Author: Grigoriy Petukhov (http://lorien.name)
  8. # License: BSD
  9. import re
  10. import locale
  11. from django.utils.html import strip_tags
  12. from feedzilla.models import FilterTag, FilterWord
  13. TAGS = []
  14. WORDS = []
  15. def build_regexp(value, exact):
  16. """
  17. Build regexp for the tag/word filter.
  18. If filter `exact` attribute is one then make regexp to
  19. match the word, i.e., matched fragment shuld be surrounded with
  20. spaces or text start or text end.
  21. """
  22. value = value.lower()
  23. if exact:
  24. value = u'\b%s\b' % value
  25. return re.compile(ur'%s' % value, re.U | re.I)
  26. def load_filters():
  27. """
  28. Calculate regexp objects for all filters.
  29. """
  30. for obj in FilterTag.objects.all():
  31. TAGS.append(build_regexp(obj.value, obj.exact))
  32. for obj in FilterWord.objects.all():
  33. WORDS.append(build_regexp(obj.value, obj.exact))
  34. class ContentFilterProcessor(object):
  35. """
  36. This processor search for certain fragment in content and tags
  37. of the post and mark post as active/inactive.
  38. """
  39. def process(self, post):
  40. post.active = self.match_filters(post)
  41. def match_filters(self, post):
  42. for tag in post.tags.all():
  43. for filter_tag in TAGS:
  44. if filter_tag.search(tag.name.lower()):
  45. return True
  46. title = strip_tags(post.title).lower()
  47. text = strip_tags(post.content).lower()
  48. for filter_word in WORDS:
  49. if filter_word.search(text) or filter_word.search(title):
  50. return True
  51. return False
  52. load_filters()