/syntaxhighlight/pygments/filters/__init__.py
Python | 357 lines | 295 code | 19 blank | 43 comment | 31 complexity | 2374befec3015e408f1e5d81960c3969 MD5 | raw file
1# -*- coding: utf-8 -*- 2""" 3 plushcms.syntaxhighlight.pygments.filters 4 ~~~~~~~~~~~~~~~~ 5 6 Module containing filter lookup functions and default 7 filters. 8 9 :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. 10 :license: BSD, see LICENSE for details. 11""" 12 13import re 14 15from plushcms.syntaxhighlight.pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \ 16 string_to_tokentype 17from plushcms.syntaxhighlight.pygments.filter import Filter 18from plushcms.syntaxhighlight.pygments.util import get_list_opt, get_int_opt, get_bool_opt, \ 19 get_choice_opt, ClassNotFound, OptionError 20from plushcms.syntaxhighlight.pygments.plugin import find_plugin_filters 21 22 23def find_filter_class(filtername): 24 """ 25 Lookup a filter by name. Return None if not found. 26 """ 27 if filtername in FILTERS: 28 return FILTERS[filtername] 29 for name, cls in find_plugin_filters(): 30 if name == filtername: 31 return cls 32 return None 33 34 35def get_filter_by_name(filtername, **options): 36 """ 37 Return an instantiated filter. Options are passed to the filter 38 initializer if wanted. Raise a ClassNotFound if not found. 39 """ 40 cls = find_filter_class(filtername) 41 if cls: 42 return cls(**options) 43 else: 44 raise ClassNotFound('filter %r not found' % filtername) 45 46 47def get_all_filters(): 48 """ 49 Return a generator of all filter names. 50 """ 51 for name in FILTERS: 52 yield name 53 for name, _ in find_plugin_filters(): 54 yield name 55 56 57def _replace_special(ttype, value, regex, specialttype, 58 replacefunc=lambda x: x): 59 last = 0 60 for match in regex.finditer(value): 61 start, end = match.start(), match.end() 62 if start != last: 63 yield ttype, value[last:start] 64 yield specialttype, replacefunc(value[start:end]) 65 last = end 66 if last != len(value): 67 yield ttype, value[last:] 68 69 70class CodeTagFilter(Filter): 71 """ 72 Highlight special code tags in comments and docstrings. 73 74 Options accepted: 75 76 `codetags` : list of strings 77 A list of strings that are flagged as code tags. The default is to 78 highlight ``XXX``, ``TODO``, ``BUG`` and ``NOTE``. 79 """ 80 81 def __init__(self, **options): 82 Filter.__init__(self, **options) 83 tags = get_list_opt(options, 'codetags', 84 ['XXX', 'TODO', 'BUG', 'NOTE']) 85 self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([ 86 re.escape(tag) for tag in tags if tag 87 ])) 88 89 def filter(self, lexer, stream): 90 regex = self.tag_re 91 for ttype, value in stream: 92 if ttype in String.Doc or \ 93 ttype in Comment and \ 94 ttype not in Comment.Preproc: 95 for sttype, svalue in _replace_special(ttype, value, regex, 96 Comment.Special): 97 yield sttype, svalue 98 else: 99 yield ttype, value 100 101 102class KeywordCaseFilter(Filter): 103 """ 104 Convert keywords to lowercase or uppercase or capitalize them, which 105 means first letter uppercase, rest lowercase. 106 107 This can be useful e.g. if you highlight Pascal code and want to adapt the 108 code to your styleguide. 109 110 Options accepted: 111 112 `case` : string 113 The casing to convert keywords to. Must be one of ``'lower'``, 114 ``'upper'`` or ``'capitalize'``. The default is ``'lower'``. 115 """ 116 117 def __init__(self, **options): 118 Filter.__init__(self, **options) 119 case = get_choice_opt(options, 'case', ['lower', 'upper', 'capitalize'], 'lower') 120 self.convert = getattr(unicode, case) 121 122 def filter(self, lexer, stream): 123 for ttype, value in stream: 124 if ttype in Keyword: 125 yield ttype, self.convert(value) 126 else: 127 yield ttype, value 128 129 130class NameHighlightFilter(Filter): 131 """ 132 Highlight a normal Name token with a different token type. 133 134 Example:: 135 136 filter = NameHighlightFilter( 137 names=['foo', 'bar', 'baz'], 138 tokentype=Name.Function, 139 ) 140 141 This would highlight the names "foo", "bar" and "baz" 142 as functions. `Name.Function` is the default token type. 143 144 Options accepted: 145 146 `names` : list of strings 147 A list of names that should be given the different token type. 148 There is no default. 149 `tokentype` : TokenType or string 150 A token type or a string containing a token type name that is 151 used for highlighting the strings in `names`. The default is 152 `Name.Function`. 153 """ 154 155 def __init__(self, **options): 156 Filter.__init__(self, **options) 157 self.names = set(get_list_opt(options, 'names', [])) 158 tokentype = options.get('tokentype') 159 if tokentype: 160 self.tokentype = string_to_tokentype(tokentype) 161 else: 162 self.tokentype = Name.Function 163 164 def filter(self, lexer, stream): 165 for ttype, value in stream: 166 if ttype is Name and value in self.names: 167 yield self.tokentype, value 168 else: 169 yield ttype, value 170 171 172class ErrorToken(Exception): 173 pass 174 175class RaiseOnErrorTokenFilter(Filter): 176 """ 177 Raise an exception when the lexer generates an error token. 178 179 Options accepted: 180 181 `excclass` : Exception class 182 The exception class to raise. 183 The default is `plushcms.syntaxhighlight.pygments.filters.ErrorToken`. 184 185 *New in Pygments 0.8.* 186 """ 187 188 def __init__(self, **options): 189 Filter.__init__(self, **options) 190 self.exception = options.get('excclass', ErrorToken) 191 try: 192 # issubclass() will raise TypeError if first argument is not a class 193 if not issubclass(self.exception, Exception): 194 raise TypeError 195 except TypeError: 196 raise OptionError('excclass option is not an exception class') 197 198 def filter(self, lexer, stream): 199 for ttype, value in stream: 200 if ttype is Error: 201 raise self.exception(value) 202 yield ttype, value 203 204 205class VisibleWhitespaceFilter(Filter): 206 """ 207 Convert tabs, newlines and/or spaces to visible characters. 208 209 Options accepted: 210 211 `spaces` : string or bool 212 If this is a one-character string, spaces will be replaces by this string. 213 If it is another true value, spaces will be replaced by ``·`` (unicode 214 MIDDLE DOT). If it is a false value, spaces will not be replaced. The 215 default is ``False``. 216 `tabs` : string or bool 217 The same as for `spaces`, but the default replacement character is ``»`` 218 (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value 219 is ``False``. Note: this will not work if the `tabsize` option for the 220 lexer is nonzero, as tabs will already have been expanded then. 221 `tabsize` : int 222 If tabs are to be replaced by this filter (see the `tabs` option), this 223 is the total number of characters that a tab should be expanded to. 224 The default is ``8``. 225 `newlines` : string or bool 226 The same as for `spaces`, but the default replacement character is ``¶`` 227 (unicode PILCROW SIGN). The default value is ``False``. 228 `wstokentype` : bool 229 If true, give whitespace the special `Whitespace` token type. This allows 230 styling the visible whitespace differently (e.g. greyed out), but it can 231 disrupt background colors. The default is ``True``. 232 233 *New in Pygments 0.8.* 234 """ 235 236 def __init__(self, **options): 237 Filter.__init__(self, **options) 238 for name, default in {'spaces': u'·', 'tabs': u'»', 'newlines': u'¶'}.items(): 239 opt = options.get(name, False) 240 if isinstance(opt, basestring) and len(opt) == 1: 241 setattr(self, name, opt) 242 else: 243 setattr(self, name, (opt and default or '')) 244 tabsize = get_int_opt(options, 'tabsize', 8) 245 if self.tabs: 246 self.tabs += ' '*(tabsize-1) 247 if self.newlines: 248 self.newlines += '\n' 249 self.wstt = get_bool_opt(options, 'wstokentype', True) 250 251 def filter(self, lexer, stream): 252 if self.wstt: 253 spaces = self.spaces or ' ' 254 tabs = self.tabs or '\t' 255 newlines = self.newlines or '\n' 256 regex = re.compile(r'\s') 257 def replacefunc(wschar): 258 if wschar == ' ': 259 return spaces 260 elif wschar == '\t': 261 return tabs 262 elif wschar == '\n': 263 return newlines 264 return wschar 265 266 for ttype, value in stream: 267 for sttype, svalue in _replace_special(ttype, value, regex, 268 Whitespace, replacefunc): 269 yield sttype, svalue 270 else: 271 spaces, tabs, newlines = self.spaces, self.tabs, self.newlines 272 # simpler processing 273 for ttype, value in stream: 274 if spaces: 275 value = value.replace(' ', spaces) 276 if tabs: 277 value = value.replace('\t', tabs) 278 if newlines: 279 value = value.replace('\n', newlines) 280 yield ttype, value 281 282 283class GobbleFilter(Filter): 284 """ 285 Gobbles source code lines (eats initial characters). 286 287 This filter drops the first ``n`` characters off every line of code. This 288 may be useful when the source code fed to the lexer is indented by a fixed 289 amount of space that isn't desired in the output. 290 291 Options accepted: 292 293 `n` : int 294 The number of characters to gobble. 295 296 *New in Pygments 1.2.* 297 """ 298 def __init__(self, **options): 299 Filter.__init__(self, **options) 300 self.n = get_int_opt(options, 'n', 0) 301 302 def gobble(self, value, left): 303 if left < len(value): 304 return value[left:], 0 305 else: 306 return '', left - len(value) 307 308 def filter(self, lexer, stream): 309 n = self.n 310 left = n # How many characters left to gobble. 311 for ttype, value in stream: 312 # Remove ``left`` tokens from first line, ``n`` from all others. 313 parts = value.split('\n') 314 (parts[0], left) = self.gobble(parts[0], left) 315 for i in range(1, len(parts)): 316 (parts[i], left) = self.gobble(parts[i], n) 317 value = '\n'.join(parts) 318 319 if value != '': 320 yield ttype, value 321 322 323class TokenMergeFilter(Filter): 324 """ 325 Merges consecutive tokens with the same token type in the output stream of a 326 lexer. 327 328 *New in Pygments 1.2.* 329 """ 330 def __init__(self, **options): 331 Filter.__init__(self, **options) 332 333 def filter(self, lexer, stream): 334 output = [] 335 current_type = None 336 current_value = None 337 for ttype, value in stream: 338 if ttype is current_type: 339 current_value += value 340 else: 341 if current_type is not None: 342 yield current_type, current_value 343 current_type = ttype 344 current_value = value 345 if current_type is not None: 346 yield current_type, current_value 347 348 349FILTERS = { 350 'codetagify': CodeTagFilter, 351 'keywordcase': KeywordCaseFilter, 352 'highlight': NameHighlightFilter, 353 'raiseonerror': RaiseOnErrorTokenFilter, 354 'whitespace': VisibleWhitespaceFilter, 355 'gobble': GobbleFilter, 356 'tokenmerge': TokenMergeFilter, 357}