/syntaxhighlight/pygments/filters/__init__.py

http://github.com/plushcms/PlushCMS · Python · 357 lines · 295 code · 19 blank · 43 comment · 19 complexity · 2374befec3015e408f1e5d81960c3969 MD5 · raw file

  1. # -*- coding: utf-8 -*-
  2. """
  3. plushcms.syntaxhighlight.pygments.filters
  4. ~~~~~~~~~~~~~~~~
  5. Module containing filter lookup functions and default
  6. filters.
  7. :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS.
  8. :license: BSD, see LICENSE for details.
  9. """
  10. import re
  11. from plushcms.syntaxhighlight.pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
  12. string_to_tokentype
  13. from plushcms.syntaxhighlight.pygments.filter import Filter
  14. from plushcms.syntaxhighlight.pygments.util import get_list_opt, get_int_opt, get_bool_opt, \
  15. get_choice_opt, ClassNotFound, OptionError
  16. from plushcms.syntaxhighlight.pygments.plugin import find_plugin_filters
  17. def find_filter_class(filtername):
  18. """
  19. Lookup a filter by name. Return None if not found.
  20. """
  21. if filtername in FILTERS:
  22. return FILTERS[filtername]
  23. for name, cls in find_plugin_filters():
  24. if name == filtername:
  25. return cls
  26. return None
  27. def get_filter_by_name(filtername, **options):
  28. """
  29. Return an instantiated filter. Options are passed to the filter
  30. initializer if wanted. Raise a ClassNotFound if not found.
  31. """
  32. cls = find_filter_class(filtername)
  33. if cls:
  34. return cls(**options)
  35. else:
  36. raise ClassNotFound('filter %r not found' % filtername)
  37. def get_all_filters():
  38. """
  39. Return a generator of all filter names.
  40. """
  41. for name in FILTERS:
  42. yield name
  43. for name, _ in find_plugin_filters():
  44. yield name
  45. def _replace_special(ttype, value, regex, specialttype,
  46. replacefunc=lambda x: x):
  47. last = 0
  48. for match in regex.finditer(value):
  49. start, end = match.start(), match.end()
  50. if start != last:
  51. yield ttype, value[last:start]
  52. yield specialttype, replacefunc(value[start:end])
  53. last = end
  54. if last != len(value):
  55. yield ttype, value[last:]
  56. class CodeTagFilter(Filter):
  57. """
  58. Highlight special code tags in comments and docstrings.
  59. Options accepted:
  60. `codetags` : list of strings
  61. A list of strings that are flagged as code tags. The default is to
  62. highlight ``XXX``, ``TODO``, ``BUG`` and ``NOTE``.
  63. """
  64. def __init__(self, **options):
  65. Filter.__init__(self, **options)
  66. tags = get_list_opt(options, 'codetags',
  67. ['XXX', 'TODO', 'BUG', 'NOTE'])
  68. self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([
  69. re.escape(tag) for tag in tags if tag
  70. ]))
  71. def filter(self, lexer, stream):
  72. regex = self.tag_re
  73. for ttype, value in stream:
  74. if ttype in String.Doc or \
  75. ttype in Comment and \
  76. ttype not in Comment.Preproc:
  77. for sttype, svalue in _replace_special(ttype, value, regex,
  78. Comment.Special):
  79. yield sttype, svalue
  80. else:
  81. yield ttype, value
  82. class KeywordCaseFilter(Filter):
  83. """
  84. Convert keywords to lowercase or uppercase or capitalize them, which
  85. means first letter uppercase, rest lowercase.
  86. This can be useful e.g. if you highlight Pascal code and want to adapt the
  87. code to your styleguide.
  88. Options accepted:
  89. `case` : string
  90. The casing to convert keywords to. Must be one of ``'lower'``,
  91. ``'upper'`` or ``'capitalize'``. The default is ``'lower'``.
  92. """
  93. def __init__(self, **options):
  94. Filter.__init__(self, **options)
  95. case = get_choice_opt(options, 'case', ['lower', 'upper', 'capitalize'], 'lower')
  96. self.convert = getattr(unicode, case)
  97. def filter(self, lexer, stream):
  98. for ttype, value in stream:
  99. if ttype in Keyword:
  100. yield ttype, self.convert(value)
  101. else:
  102. yield ttype, value
  103. class NameHighlightFilter(Filter):
  104. """
  105. Highlight a normal Name token with a different token type.
  106. Example::
  107. filter = NameHighlightFilter(
  108. names=['foo', 'bar', 'baz'],
  109. tokentype=Name.Function,
  110. )
  111. This would highlight the names "foo", "bar" and "baz"
  112. as functions. `Name.Function` is the default token type.
  113. Options accepted:
  114. `names` : list of strings
  115. A list of names that should be given the different token type.
  116. There is no default.
  117. `tokentype` : TokenType or string
  118. A token type or a string containing a token type name that is
  119. used for highlighting the strings in `names`. The default is
  120. `Name.Function`.
  121. """
  122. def __init__(self, **options):
  123. Filter.__init__(self, **options)
  124. self.names = set(get_list_opt(options, 'names', []))
  125. tokentype = options.get('tokentype')
  126. if tokentype:
  127. self.tokentype = string_to_tokentype(tokentype)
  128. else:
  129. self.tokentype = Name.Function
  130. def filter(self, lexer, stream):
  131. for ttype, value in stream:
  132. if ttype is Name and value in self.names:
  133. yield self.tokentype, value
  134. else:
  135. yield ttype, value
  136. class ErrorToken(Exception):
  137. pass
  138. class RaiseOnErrorTokenFilter(Filter):
  139. """
  140. Raise an exception when the lexer generates an error token.
  141. Options accepted:
  142. `excclass` : Exception class
  143. The exception class to raise.
  144. The default is `plushcms.syntaxhighlight.pygments.filters.ErrorToken`.
  145. *New in Pygments 0.8.*
  146. """
  147. def __init__(self, **options):
  148. Filter.__init__(self, **options)
  149. self.exception = options.get('excclass', ErrorToken)
  150. try:
  151. # issubclass() will raise TypeError if first argument is not a class
  152. if not issubclass(self.exception, Exception):
  153. raise TypeError
  154. except TypeError:
  155. raise OptionError('excclass option is not an exception class')
  156. def filter(self, lexer, stream):
  157. for ttype, value in stream:
  158. if ttype is Error:
  159. raise self.exception(value)
  160. yield ttype, value
  161. class VisibleWhitespaceFilter(Filter):
  162. """
  163. Convert tabs, newlines and/or spaces to visible characters.
  164. Options accepted:
  165. `spaces` : string or bool
  166. If this is a one-character string, spaces will be replaces by this string.
  167. If it is another true value, spaces will be replaced by ``·`` (unicode
  168. MIDDLE DOT). If it is a false value, spaces will not be replaced. The
  169. default is ``False``.
  170. `tabs` : string or bool
  171. The same as for `spaces`, but the default replacement character is ``»``
  172. (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value
  173. is ``False``. Note: this will not work if the `tabsize` option for the
  174. lexer is nonzero, as tabs will already have been expanded then.
  175. `tabsize` : int
  176. If tabs are to be replaced by this filter (see the `tabs` option), this
  177. is the total number of characters that a tab should be expanded to.
  178. The default is ``8``.
  179. `newlines` : string or bool
  180. The same as for `spaces`, but the default replacement character is ````
  181. (unicode PILCROW SIGN). The default value is ``False``.
  182. `wstokentype` : bool
  183. If true, give whitespace the special `Whitespace` token type. This allows
  184. styling the visible whitespace differently (e.g. greyed out), but it can
  185. disrupt background colors. The default is ``True``.
  186. *New in Pygments 0.8.*
  187. """
  188. def __init__(self, **options):
  189. Filter.__init__(self, **options)
  190. for name, default in {'spaces': u'·', 'tabs': u'»', 'newlines': u'¶'}.items():
  191. opt = options.get(name, False)
  192. if isinstance(opt, basestring) and len(opt) == 1:
  193. setattr(self, name, opt)
  194. else:
  195. setattr(self, name, (opt and default or ''))
  196. tabsize = get_int_opt(options, 'tabsize', 8)
  197. if self.tabs:
  198. self.tabs += ' '*(tabsize-1)
  199. if self.newlines:
  200. self.newlines += '\n'
  201. self.wstt = get_bool_opt(options, 'wstokentype', True)
  202. def filter(self, lexer, stream):
  203. if self.wstt:
  204. spaces = self.spaces or ' '
  205. tabs = self.tabs or '\t'
  206. newlines = self.newlines or '\n'
  207. regex = re.compile(r'\s')
  208. def replacefunc(wschar):
  209. if wschar == ' ':
  210. return spaces
  211. elif wschar == '\t':
  212. return tabs
  213. elif wschar == '\n':
  214. return newlines
  215. return wschar
  216. for ttype, value in stream:
  217. for sttype, svalue in _replace_special(ttype, value, regex,
  218. Whitespace, replacefunc):
  219. yield sttype, svalue
  220. else:
  221. spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
  222. # simpler processing
  223. for ttype, value in stream:
  224. if spaces:
  225. value = value.replace(' ', spaces)
  226. if tabs:
  227. value = value.replace('\t', tabs)
  228. if newlines:
  229. value = value.replace('\n', newlines)
  230. yield ttype, value
  231. class GobbleFilter(Filter):
  232. """
  233. Gobbles source code lines (eats initial characters).
  234. This filter drops the first ``n`` characters off every line of code. This
  235. may be useful when the source code fed to the lexer is indented by a fixed
  236. amount of space that isn't desired in the output.
  237. Options accepted:
  238. `n` : int
  239. The number of characters to gobble.
  240. *New in Pygments 1.2.*
  241. """
  242. def __init__(self, **options):
  243. Filter.__init__(self, **options)
  244. self.n = get_int_opt(options, 'n', 0)
  245. def gobble(self, value, left):
  246. if left < len(value):
  247. return value[left:], 0
  248. else:
  249. return '', left - len(value)
  250. def filter(self, lexer, stream):
  251. n = self.n
  252. left = n # How many characters left to gobble.
  253. for ttype, value in stream:
  254. # Remove ``left`` tokens from first line, ``n`` from all others.
  255. parts = value.split('\n')
  256. (parts[0], left) = self.gobble(parts[0], left)
  257. for i in range(1, len(parts)):
  258. (parts[i], left) = self.gobble(parts[i], n)
  259. value = '\n'.join(parts)
  260. if value != '':
  261. yield ttype, value
  262. class TokenMergeFilter(Filter):
  263. """
  264. Merges consecutive tokens with the same token type in the output stream of a
  265. lexer.
  266. *New in Pygments 1.2.*
  267. """
  268. def __init__(self, **options):
  269. Filter.__init__(self, **options)
  270. def filter(self, lexer, stream):
  271. output = []
  272. current_type = None
  273. current_value = None
  274. for ttype, value in stream:
  275. if ttype is current_type:
  276. current_value += value
  277. else:
  278. if current_type is not None:
  279. yield current_type, current_value
  280. current_type = ttype
  281. current_value = value
  282. if current_type is not None:
  283. yield current_type, current_value
  284. FILTERS = {
  285. 'codetagify': CodeTagFilter,
  286. 'keywordcase': KeywordCaseFilter,
  287. 'highlight': NameHighlightFilter,
  288. 'raiseonerror': RaiseOnErrorTokenFilter,
  289. 'whitespace': VisibleWhitespaceFilter,
  290. 'gobble': GobbleFilter,
  291. 'tokenmerge': TokenMergeFilter,
  292. }