PageRenderTime 48ms CodeModel.GetById 18ms app.highlight 23ms RepoModel.GetById 0ms app.codeStats 1ms

/syntaxhighlight/pygments/filters/__init__.py

http://github.com/plushcms/PlushCMS
Python | 357 lines | 295 code | 19 blank | 43 comment | 31 complexity | 2374befec3015e408f1e5d81960c3969 MD5 | raw file
  1# -*- coding: utf-8 -*-
  2"""
  3    plushcms.syntaxhighlight.pygments.filters
  4    ~~~~~~~~~~~~~~~~
  5
  6    Module containing filter lookup functions and default
  7    filters.
  8
  9    :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS.
 10    :license: BSD, see LICENSE for details.
 11"""
 12
 13import re
 14
 15from plushcms.syntaxhighlight.pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
 16    string_to_tokentype
 17from plushcms.syntaxhighlight.pygments.filter import Filter
 18from plushcms.syntaxhighlight.pygments.util import get_list_opt, get_int_opt, get_bool_opt, \
 19     get_choice_opt, ClassNotFound, OptionError
 20from plushcms.syntaxhighlight.pygments.plugin import find_plugin_filters
 21
 22
 23def find_filter_class(filtername):
 24    """
 25    Lookup a filter by name. Return None if not found.
 26    """
 27    if filtername in FILTERS:
 28        return FILTERS[filtername]
 29    for name, cls in find_plugin_filters():
 30        if name == filtername:
 31            return cls
 32    return None
 33
 34
 35def get_filter_by_name(filtername, **options):
 36    """
 37    Return an instantiated filter. Options are passed to the filter
 38    initializer if wanted. Raise a ClassNotFound if not found.
 39    """
 40    cls = find_filter_class(filtername)
 41    if cls:
 42        return cls(**options)
 43    else:
 44        raise ClassNotFound('filter %r not found' % filtername)
 45
 46
 47def get_all_filters():
 48    """
 49    Return a generator of all filter names.
 50    """
 51    for name in FILTERS:
 52        yield name
 53    for name, _ in find_plugin_filters():
 54        yield name
 55
 56
 57def _replace_special(ttype, value, regex, specialttype,
 58                     replacefunc=lambda x: x):
 59    last = 0
 60    for match in regex.finditer(value):
 61        start, end = match.start(), match.end()
 62        if start != last:
 63            yield ttype, value[last:start]
 64        yield specialttype, replacefunc(value[start:end])
 65        last = end
 66    if last != len(value):
 67        yield ttype, value[last:]
 68
 69
 70class CodeTagFilter(Filter):
 71    """
 72    Highlight special code tags in comments and docstrings.
 73
 74    Options accepted:
 75
 76    `codetags` : list of strings
 77       A list of strings that are flagged as code tags.  The default is to
 78       highlight ``XXX``, ``TODO``, ``BUG`` and ``NOTE``.
 79    """
 80
 81    def __init__(self, **options):
 82        Filter.__init__(self, **options)
 83        tags = get_list_opt(options, 'codetags',
 84                            ['XXX', 'TODO', 'BUG', 'NOTE'])
 85        self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([
 86            re.escape(tag) for tag in tags if tag
 87        ]))
 88
 89    def filter(self, lexer, stream):
 90        regex = self.tag_re
 91        for ttype, value in stream:
 92            if ttype in String.Doc or \
 93               ttype in Comment and \
 94               ttype not in Comment.Preproc:
 95                for sttype, svalue in _replace_special(ttype, value, regex,
 96                                                       Comment.Special):
 97                    yield sttype, svalue
 98            else:
 99                yield ttype, value
100
101
102class KeywordCaseFilter(Filter):
103    """
104    Convert keywords to lowercase or uppercase or capitalize them, which
105    means first letter uppercase, rest lowercase.
106
107    This can be useful e.g. if you highlight Pascal code and want to adapt the
108    code to your styleguide.
109
110    Options accepted:
111
112    `case` : string
113       The casing to convert keywords to. Must be one of ``'lower'``,
114       ``'upper'`` or ``'capitalize'``.  The default is ``'lower'``.
115    """
116
117    def __init__(self, **options):
118        Filter.__init__(self, **options)
119        case = get_choice_opt(options, 'case', ['lower', 'upper', 'capitalize'], 'lower')
120        self.convert = getattr(unicode, case)
121
122    def filter(self, lexer, stream):
123        for ttype, value in stream:
124            if ttype in Keyword:
125                yield ttype, self.convert(value)
126            else:
127                yield ttype, value
128
129
130class NameHighlightFilter(Filter):
131    """
132    Highlight a normal Name token with a different token type.
133
134    Example::
135
136        filter = NameHighlightFilter(
137            names=['foo', 'bar', 'baz'],
138            tokentype=Name.Function,
139        )
140
141    This would highlight the names "foo", "bar" and "baz"
142    as functions. `Name.Function` is the default token type.
143
144    Options accepted:
145
146    `names` : list of strings
147      A list of names that should be given the different token type.
148      There is no default.
149    `tokentype` : TokenType or string
150      A token type or a string containing a token type name that is
151      used for highlighting the strings in `names`.  The default is
152      `Name.Function`.
153    """
154
155    def __init__(self, **options):
156        Filter.__init__(self, **options)
157        self.names = set(get_list_opt(options, 'names', []))
158        tokentype = options.get('tokentype')
159        if tokentype:
160            self.tokentype = string_to_tokentype(tokentype)
161        else:
162            self.tokentype = Name.Function
163
164    def filter(self, lexer, stream):
165        for ttype, value in stream:
166            if ttype is Name and value in self.names:
167                yield self.tokentype, value
168            else:
169                yield ttype, value
170
171
172class ErrorToken(Exception):
173    pass
174
175class RaiseOnErrorTokenFilter(Filter):
176    """
177    Raise an exception when the lexer generates an error token.
178
179    Options accepted:
180
181    `excclass` : Exception class
182      The exception class to raise.
183      The default is `plushcms.syntaxhighlight.pygments.filters.ErrorToken`.
184
185    *New in Pygments 0.8.*
186    """
187
188    def __init__(self, **options):
189        Filter.__init__(self, **options)
190        self.exception = options.get('excclass', ErrorToken)
191        try:
192            # issubclass() will raise TypeError if first argument is not a class
193            if not issubclass(self.exception, Exception):
194                raise TypeError
195        except TypeError:
196            raise OptionError('excclass option is not an exception class')
197
198    def filter(self, lexer, stream):
199        for ttype, value in stream:
200            if ttype is Error:
201                raise self.exception(value)
202            yield ttype, value
203
204
205class VisibleWhitespaceFilter(Filter):
206    """
207    Convert tabs, newlines and/or spaces to visible characters.
208
209    Options accepted:
210
211    `spaces` : string or bool
212      If this is a one-character string, spaces will be replaces by this string.
213      If it is another true value, spaces will be replaced by ``·`` (unicode
214      MIDDLE DOT).  If it is a false value, spaces will not be replaced.  The
215      default is ``False``.
216    `tabs` : string or bool
217      The same as for `spaces`, but the default replacement character is ``»``
218      (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK).  The default value
219      is ``False``.  Note: this will not work if the `tabsize` option for the
220      lexer is nonzero, as tabs will already have been expanded then.
221    `tabsize` : int
222      If tabs are to be replaced by this filter (see the `tabs` option), this
223      is the total number of characters that a tab should be expanded to.
224      The default is ``8``.
225    `newlines` : string or bool
226      The same as for `spaces`, but the default replacement character is ``¶``
227      (unicode PILCROW SIGN).  The default value is ``False``.
228    `wstokentype` : bool
229      If true, give whitespace the special `Whitespace` token type.  This allows
230      styling the visible whitespace differently (e.g. greyed out), but it can
231      disrupt background colors.  The default is ``True``.
232
233    *New in Pygments 0.8.*
234    """
235
236    def __init__(self, **options):
237        Filter.__init__(self, **options)
238        for name, default in {'spaces': u'·', 'tabs': u'»', 'newlines': u''}.items():
239            opt = options.get(name, False)
240            if isinstance(opt, basestring) and len(opt) == 1:
241                setattr(self, name, opt)
242            else:
243                setattr(self, name, (opt and default or ''))
244        tabsize = get_int_opt(options, 'tabsize', 8)
245        if self.tabs:
246            self.tabs += ' '*(tabsize-1)
247        if self.newlines:
248            self.newlines += '\n'
249        self.wstt = get_bool_opt(options, 'wstokentype', True)
250
251    def filter(self, lexer, stream):
252        if self.wstt:
253            spaces = self.spaces or ' '
254            tabs = self.tabs or '\t'
255            newlines = self.newlines or '\n'
256            regex = re.compile(r'\s')
257            def replacefunc(wschar):
258                if wschar == ' ':
259                    return spaces
260                elif wschar == '\t':
261                    return tabs
262                elif wschar == '\n':
263                    return newlines
264                return wschar
265
266            for ttype, value in stream:
267                for sttype, svalue in _replace_special(ttype, value, regex,
268                                                       Whitespace, replacefunc):
269                    yield sttype, svalue
270        else:
271            spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
272            # simpler processing
273            for ttype, value in stream:
274                if spaces:
275                    value = value.replace(' ', spaces)
276                if tabs:
277                    value = value.replace('\t', tabs)
278                if newlines:
279                    value = value.replace('\n', newlines)
280                yield ttype, value
281
282
283class GobbleFilter(Filter):
284    """
285    Gobbles source code lines (eats initial characters).
286
287    This filter drops the first ``n`` characters off every line of code.  This
288    may be useful when the source code fed to the lexer is indented by a fixed
289    amount of space that isn't desired in the output.
290
291    Options accepted:
292
293    `n` : int
294       The number of characters to gobble.
295
296    *New in Pygments 1.2.*
297    """
298    def __init__(self, **options):
299        Filter.__init__(self, **options)
300        self.n = get_int_opt(options, 'n', 0)
301
302    def gobble(self, value, left):
303        if left < len(value):
304            return value[left:], 0
305        else:
306            return '', left - len(value)
307
308    def filter(self, lexer, stream):
309        n = self.n
310        left = n # How many characters left to gobble.
311        for ttype, value in stream:
312            # Remove ``left`` tokens from first line, ``n`` from all others.
313            parts = value.split('\n')
314            (parts[0], left) = self.gobble(parts[0], left)
315            for i in range(1, len(parts)):
316                (parts[i], left) = self.gobble(parts[i], n)
317            value = '\n'.join(parts)
318
319            if value != '':
320                yield ttype, value
321
322
323class TokenMergeFilter(Filter):
324    """
325    Merges consecutive tokens with the same token type in the output stream of a
326    lexer.
327
328    *New in Pygments 1.2.*
329    """
330    def __init__(self, **options):
331        Filter.__init__(self, **options)
332
333    def filter(self, lexer, stream):
334        output = []
335        current_type = None
336        current_value = None
337        for ttype, value in stream:
338            if ttype is current_type:
339                current_value += value
340            else:
341                if current_type is not None:
342                    yield current_type, current_value
343                current_type = ttype
344                current_value = value
345        if current_type is not None:
346            yield current_type, current_value
347
348
349FILTERS = {
350    'codetagify':     CodeTagFilter,
351    'keywordcase':    KeywordCaseFilter,
352    'highlight':      NameHighlightFilter,
353    'raiseonerror':   RaiseOnErrorTokenFilter,
354    'whitespace':     VisibleWhitespaceFilter,
355    'gobble':         GobbleFilter,
356    'tokenmerge':     TokenMergeFilter,
357}