/syntaxhighlight/pygments/filters/__init__.py
http://github.com/plushcms/PlushCMS · Python · 357 lines · 295 code · 19 blank · 43 comment · 19 complexity · 2374befec3015e408f1e5d81960c3969 MD5 · raw file
- # -*- coding: utf-8 -*-
- """
- plushcms.syntaxhighlight.pygments.filters
- ~~~~~~~~~~~~~~~~
- Module containing filter lookup functions and default
- filters.
- :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from plushcms.syntaxhighlight.pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
- string_to_tokentype
- from plushcms.syntaxhighlight.pygments.filter import Filter
- from plushcms.syntaxhighlight.pygments.util import get_list_opt, get_int_opt, get_bool_opt, \
- get_choice_opt, ClassNotFound, OptionError
- from plushcms.syntaxhighlight.pygments.plugin import find_plugin_filters
- def find_filter_class(filtername):
- """
- Lookup a filter by name. Return None if not found.
- """
- if filtername in FILTERS:
- return FILTERS[filtername]
- for name, cls in find_plugin_filters():
- if name == filtername:
- return cls
- return None
- def get_filter_by_name(filtername, **options):
- """
- Return an instantiated filter. Options are passed to the filter
- initializer if wanted. Raise a ClassNotFound if not found.
- """
- cls = find_filter_class(filtername)
- if cls:
- return cls(**options)
- else:
- raise ClassNotFound('filter %r not found' % filtername)
- def get_all_filters():
- """
- Return a generator of all filter names.
- """
- for name in FILTERS:
- yield name
- for name, _ in find_plugin_filters():
- yield name
- def _replace_special(ttype, value, regex, specialttype,
- replacefunc=lambda x: x):
- last = 0
- for match in regex.finditer(value):
- start, end = match.start(), match.end()
- if start != last:
- yield ttype, value[last:start]
- yield specialttype, replacefunc(value[start:end])
- last = end
- if last != len(value):
- yield ttype, value[last:]
- class CodeTagFilter(Filter):
- """
- Highlight special code tags in comments and docstrings.
- Options accepted:
- `codetags` : list of strings
- A list of strings that are flagged as code tags. The default is to
- highlight ``XXX``, ``TODO``, ``BUG`` and ``NOTE``.
- """
- def __init__(self, **options):
- Filter.__init__(self, **options)
- tags = get_list_opt(options, 'codetags',
- ['XXX', 'TODO', 'BUG', 'NOTE'])
- self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([
- re.escape(tag) for tag in tags if tag
- ]))
- def filter(self, lexer, stream):
- regex = self.tag_re
- for ttype, value in stream:
- if ttype in String.Doc or \
- ttype in Comment and \
- ttype not in Comment.Preproc:
- for sttype, svalue in _replace_special(ttype, value, regex,
- Comment.Special):
- yield sttype, svalue
- else:
- yield ttype, value
- class KeywordCaseFilter(Filter):
- """
- Convert keywords to lowercase or uppercase or capitalize them, which
- means first letter uppercase, rest lowercase.
- This can be useful e.g. if you highlight Pascal code and want to adapt the
- code to your styleguide.
- Options accepted:
- `case` : string
- The casing to convert keywords to. Must be one of ``'lower'``,
- ``'upper'`` or ``'capitalize'``. The default is ``'lower'``.
- """
- def __init__(self, **options):
- Filter.__init__(self, **options)
- case = get_choice_opt(options, 'case', ['lower', 'upper', 'capitalize'], 'lower')
- self.convert = getattr(unicode, case)
- def filter(self, lexer, stream):
- for ttype, value in stream:
- if ttype in Keyword:
- yield ttype, self.convert(value)
- else:
- yield ttype, value
- class NameHighlightFilter(Filter):
- """
- Highlight a normal Name token with a different token type.
- Example::
- filter = NameHighlightFilter(
- names=['foo', 'bar', 'baz'],
- tokentype=Name.Function,
- )
- This would highlight the names "foo", "bar" and "baz"
- as functions. `Name.Function` is the default token type.
- Options accepted:
- `names` : list of strings
- A list of names that should be given the different token type.
- There is no default.
- `tokentype` : TokenType or string
- A token type or a string containing a token type name that is
- used for highlighting the strings in `names`. The default is
- `Name.Function`.
- """
- def __init__(self, **options):
- Filter.__init__(self, **options)
- self.names = set(get_list_opt(options, 'names', []))
- tokentype = options.get('tokentype')
- if tokentype:
- self.tokentype = string_to_tokentype(tokentype)
- else:
- self.tokentype = Name.Function
- def filter(self, lexer, stream):
- for ttype, value in stream:
- if ttype is Name and value in self.names:
- yield self.tokentype, value
- else:
- yield ttype, value
- class ErrorToken(Exception):
- pass
- class RaiseOnErrorTokenFilter(Filter):
- """
- Raise an exception when the lexer generates an error token.
- Options accepted:
- `excclass` : Exception class
- The exception class to raise.
- The default is `plushcms.syntaxhighlight.pygments.filters.ErrorToken`.
- *New in Pygments 0.8.*
- """
- def __init__(self, **options):
- Filter.__init__(self, **options)
- self.exception = options.get('excclass', ErrorToken)
- try:
- # issubclass() will raise TypeError if first argument is not a class
- if not issubclass(self.exception, Exception):
- raise TypeError
- except TypeError:
- raise OptionError('excclass option is not an exception class')
- def filter(self, lexer, stream):
- for ttype, value in stream:
- if ttype is Error:
- raise self.exception(value)
- yield ttype, value
- class VisibleWhitespaceFilter(Filter):
- """
- Convert tabs, newlines and/or spaces to visible characters.
- Options accepted:
- `spaces` : string or bool
- If this is a one-character string, spaces will be replaces by this string.
- If it is another true value, spaces will be replaced by ``·`` (unicode
- MIDDLE DOT). If it is a false value, spaces will not be replaced. The
- default is ``False``.
- `tabs` : string or bool
- The same as for `spaces`, but the default replacement character is ``»``
- (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value
- is ``False``. Note: this will not work if the `tabsize` option for the
- lexer is nonzero, as tabs will already have been expanded then.
- `tabsize` : int
- If tabs are to be replaced by this filter (see the `tabs` option), this
- is the total number of characters that a tab should be expanded to.
- The default is ``8``.
- `newlines` : string or bool
- The same as for `spaces`, but the default replacement character is ``¶``
- (unicode PILCROW SIGN). The default value is ``False``.
- `wstokentype` : bool
- If true, give whitespace the special `Whitespace` token type. This allows
- styling the visible whitespace differently (e.g. greyed out), but it can
- disrupt background colors. The default is ``True``.
- *New in Pygments 0.8.*
- """
- def __init__(self, **options):
- Filter.__init__(self, **options)
- for name, default in {'spaces': u'·', 'tabs': u'»', 'newlines': u'¶'}.items():
- opt = options.get(name, False)
- if isinstance(opt, basestring) and len(opt) == 1:
- setattr(self, name, opt)
- else:
- setattr(self, name, (opt and default or ''))
- tabsize = get_int_opt(options, 'tabsize', 8)
- if self.tabs:
- self.tabs += ' '*(tabsize-1)
- if self.newlines:
- self.newlines += '\n'
- self.wstt = get_bool_opt(options, 'wstokentype', True)
- def filter(self, lexer, stream):
- if self.wstt:
- spaces = self.spaces or ' '
- tabs = self.tabs or '\t'
- newlines = self.newlines or '\n'
- regex = re.compile(r'\s')
- def replacefunc(wschar):
- if wschar == ' ':
- return spaces
- elif wschar == '\t':
- return tabs
- elif wschar == '\n':
- return newlines
- return wschar
- for ttype, value in stream:
- for sttype, svalue in _replace_special(ttype, value, regex,
- Whitespace, replacefunc):
- yield sttype, svalue
- else:
- spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
- # simpler processing
- for ttype, value in stream:
- if spaces:
- value = value.replace(' ', spaces)
- if tabs:
- value = value.replace('\t', tabs)
- if newlines:
- value = value.replace('\n', newlines)
- yield ttype, value
- class GobbleFilter(Filter):
- """
- Gobbles source code lines (eats initial characters).
- This filter drops the first ``n`` characters off every line of code. This
- may be useful when the source code fed to the lexer is indented by a fixed
- amount of space that isn't desired in the output.
- Options accepted:
- `n` : int
- The number of characters to gobble.
- *New in Pygments 1.2.*
- """
- def __init__(self, **options):
- Filter.__init__(self, **options)
- self.n = get_int_opt(options, 'n', 0)
- def gobble(self, value, left):
- if left < len(value):
- return value[left:], 0
- else:
- return '', left - len(value)
- def filter(self, lexer, stream):
- n = self.n
- left = n # How many characters left to gobble.
- for ttype, value in stream:
- # Remove ``left`` tokens from first line, ``n`` from all others.
- parts = value.split('\n')
- (parts[0], left) = self.gobble(parts[0], left)
- for i in range(1, len(parts)):
- (parts[i], left) = self.gobble(parts[i], n)
- value = '\n'.join(parts)
- if value != '':
- yield ttype, value
- class TokenMergeFilter(Filter):
- """
- Merges consecutive tokens with the same token type in the output stream of a
- lexer.
- *New in Pygments 1.2.*
- """
- def __init__(self, **options):
- Filter.__init__(self, **options)
- def filter(self, lexer, stream):
- output = []
- current_type = None
- current_value = None
- for ttype, value in stream:
- if ttype is current_type:
- current_value += value
- else:
- if current_type is not None:
- yield current_type, current_value
- current_type = ttype
- current_value = value
- if current_type is not None:
- yield current_type, current_value
- FILTERS = {
- 'codetagify': CodeTagFilter,
- 'keywordcase': KeywordCaseFilter,
- 'highlight': NameHighlightFilter,
- 'raiseonerror': RaiseOnErrorTokenFilter,
- 'whitespace': VisibleWhitespaceFilter,
- 'gobble': GobbleFilter,
- 'tokenmerge': TokenMergeFilter,
- }