PageRenderTime 60ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/lodgeit/lib/antispam.py

https://bitbucket.org/EnTeQuAk/lodgeit-main/
Python | 48 lines | 35 code | 3 blank | 10 comment | 1 complexity | 33fd0c4f74b7ad5869b1db98bcafea4f MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. """
  3. lodgeit.lib.antispam
  4. ~~~~~~~~~~~~~~~~~~~~
  5. Fight stupid spammers.
  6. :copyright: 2007-2008 by Armin Ronacher, Christopher Grebs.
  7. :license: BSD
  8. """
  9. from __future__ import division
  10. import re
  11. from operator import sub
  12. from itertools import starmap
  13. _url_pattern = (
  14. r'(?:(?:https?|ftps?|file|ssh|mms|irc|rsync|smb)://|'
  15. r'(?:mailto|telnet|s?news|sips?|skype):)'
  16. )
  17. _link_re = re.compile(r'''(?xi)
  18. (%(url)s[^\s\'"]+)|
  19. (<a(?:\s+\w+\s*=\s*
  20. (?:"\s*%(url)s.*?"|'\s*%(url)s.*?'|%(url)s.*?)
  21. )+\s*>.*?</a>)
  22. ''' % dict(url=_url_pattern))
  23. # maximum number of links in percent
  24. MAX_LINK_PERCENTAGE = 30
  25. # maximum number of links in the text (hard limit)
  26. MAX_LINK_NUMBER = 15
  27. def check_for_link_spam(code):
  28. """It's spam if more than 30% of the text are links."""
  29. spans = [x.span() for x in _link_re.finditer(code)]
  30. if len(spans) > MAX_LINK_PERCENTAGE:
  31. return True
  32. return (sum(starmap(sub, spans)) * -100) / (len(code) or 1) \
  33. > MAX_LINK_PERCENTAGE
  34. def is_spam(code):
  35. """Check if the code provided contains spam."""
  36. return check_for_link_spam(code)