PageRenderTime 100ms CodeModel.GetById 23ms app.highlight 64ms RepoModel.GetById 0ms app.codeStats 1ms

/console/app/pygments/lexers/text.py

https://bitbucket.org/alex_muscar/myspace-competition-radar
Python | 1482 lines | 1137 code | 104 blank | 241 comment | 31 complexity | cda584df89622b4651aa4d746e4a6823 MD5 | raw file
Possible License(s): GPL-3.0

Large files files are truncated, but you can click here to view the full file

   1# -*- coding: utf-8 -*-
   2"""
   3    pygments.lexers.text
   4    ~~~~~~~~~~~~~~~~~~~~
   5
   6    Lexers for non-source code file types.
   7
   8    :copyright: 2006-2008 by Armin Ronacher, Georg Brandl,
   9                Tim Hatch <tim@timhatch.com>,
  10                Ronny Pfannschmidt,
  11                Dennis Kaarsemaker,
  12                Kumar Appaiah <akumar@ee.iitm.ac.in>,
  13                Varun Hiremath <varunhiremath@gmail.com>,
  14                Jeremy Thurgood,
  15                Max Battcher,
  16                Kirill Simonov <xi@resolvent.net>.
  17    :license: BSD, see LICENSE for more details.
  18"""
  19
  20import re
  21try:
  22    set
  23except NameError:
  24    from sets import Set as set
  25from bisect import bisect
  26
  27from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, \
  28     bygroups, include, using, this, do_insertions
  29from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \
  30     Generic, Operator, Number, Whitespace, Literal
  31from pygments.util import get_bool_opt
  32from pygments.lexers.other import BashLexer
  33
  34__all__ = ['IniLexer', 'SourcesListLexer', 'BaseMakefileLexer',
  35           'MakefileLexer', 'DiffLexer', 'IrcLogsLexer', 'TexLexer',
  36           'GroffLexer', 'ApacheConfLexer', 'BBCodeLexer', 'MoinWikiLexer',
  37           'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer',
  38           'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer',
  39           'LighttpdConfLexer', 'NginxConfLexer']
  40
  41
  42class IniLexer(RegexLexer):
  43    """
  44    Lexer for configuration files in INI style.
  45    """
  46
  47    name = 'INI'
  48    aliases = ['ini', 'cfg']
  49    filenames = ['*.ini', '*.cfg', '*.properties']
  50    mimetypes = ['text/x-ini']
  51
  52    tokens = {
  53        'root': [
  54            (r'\s+', Text),
  55            (r'[;#].*?$', Comment),
  56            (r'\[.*?\]$', Keyword),
  57            (r'(.*?)(\s*)(=)(\s*)(.*?)$',
  58             bygroups(Name.Attribute, Text, Operator, Text, String))
  59        ]
  60    }
  61
  62    def analyse_text(text):
  63        npos = text.find('\n')
  64        if npos < 3:
  65            return False
  66        return text[0] == '[' and text[npos-1] == ']'
  67
  68
  69class SourcesListLexer(RegexLexer):
  70    """
  71    Lexer that highlights debian sources.list files.
  72
  73    *New in Pygments 0.7.*
  74    """
  75
  76    name = 'Debian Sourcelist'
  77    aliases = ['sourceslist', 'sources.list']
  78    filenames = ['sources.list']
  79    mimetype = ['application/x-debian-sourceslist']
  80
  81    tokens = {
  82        'root': [
  83            (r'\s+', Text),
  84            (r'#.*?$', Comment),
  85            (r'^(deb(?:-src)?)(\s+)',
  86             bygroups(Keyword, Text), 'distribution')
  87        ],
  88        'distribution': [
  89            (r'#.*?$', Comment, '#pop'),
  90            (r'\$\(ARCH\)', Name.Variable),
  91            (r'[^\s$[]+', String),
  92            (r'\[', String.Other, 'escaped-distribution'),
  93            (r'\$', String),
  94            (r'\s+', Text, 'components')
  95        ],
  96        'escaped-distribution': [
  97            (r'\]', String.Other, '#pop'),
  98            (r'\$\(ARCH\)', Name.Variable),
  99            (r'[^\]$]+', String.Other),
 100            (r'\$', String.Other)
 101        ],
 102        'components': [
 103            (r'#.*?$', Comment, '#pop:2'),
 104            (r'$', Text, '#pop:2'),
 105            (r'\s+', Text),
 106            (r'\S+', Keyword.Pseudo),
 107        ]
 108    }
 109
 110    def analyse_text(text):
 111        for line in text.split('\n'):
 112            line = line.strip()
 113            if not (line.startswith('#') or line.startswith('deb ') or
 114                    line.startswith('deb-src ') or not line):
 115                return False
 116        return True
 117
 118
 119class MakefileLexer(Lexer):
 120    """
 121    Lexer for BSD and GNU make extensions (lenient enough to handle both in
 122    the same file even).
 123
 124    *Rewritten in Pygments 0.10.*
 125    """
 126
 127    name = 'Makefile'
 128    aliases = ['make', 'makefile', 'mf', 'bsdmake']
 129    filenames = ['*.mak', 'Makefile', 'makefile', 'Makefile.*']
 130    mimetypes = ['text/x-makefile']
 131
 132    r_special = re.compile(r'^(?:'
 133        # BSD Make
 134        r'\.\s*(include|undef|error|warning|if|else|elif|endif|for|endfor)|'
 135        # GNU Make
 136        r'\s*(ifeq|ifneq|ifdef|ifndef|else|endif|-?include|define|endef|:))(?=\s)')
 137    r_comment = re.compile(r'^\s*@?#')
 138
 139    def get_tokens_unprocessed(self, text):
 140        ins = []
 141        lines = text.splitlines(True)
 142        done = ''
 143        lex = BaseMakefileLexer(**self.options)
 144        backslashflag = False
 145        for line in lines:
 146            if self.r_special.match(line) or backslashflag:
 147                ins.append((len(done), [(0, Comment.Preproc, line)]))
 148                backslashflag = line.strip().endswith('\\')
 149            elif self.r_comment.match(line):
 150                ins.append((len(done), [(0, Comment, line)]))
 151            else:
 152                done += line
 153        for item in do_insertions(ins, lex.get_tokens_unprocessed(done)):
 154            yield item
 155
 156
 157class BaseMakefileLexer(RegexLexer):
 158    """
 159    Lexer for simple Makefiles (no preprocessing).
 160
 161    *New in Pygments 0.10.*
 162    """
 163
 164    name = 'Makefile'
 165    aliases = ['basemake']
 166    filenames = []
 167    mimetypes = []
 168
 169    tokens = {
 170        'root': [
 171            (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)),
 172            (r'\$\((?:.*\\\n|.*\n)+', using(BashLexer)),
 173            (r'\s+', Text),
 174            (r'#.*?\n', Comment),
 175            (r'(export)(\s+)(?=[a-zA-Z0-9_${}\t -]+\n)',
 176             bygroups(Keyword, Text), 'export'),
 177            (r'export\s+', Keyword),
 178            # assignment
 179            (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n|.*\n)+)',
 180             bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))),
 181            # strings
 182            (r'"(\\\\|\\"|[^"])*"', String.Double),
 183            (r"'(\\\\|\\'|[^'])*'", String.Single),
 184            # targets
 185            (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text),
 186             'block-header'),
 187            #TODO: add paren handling (grr)
 188        ],
 189        'export': [
 190            (r'[a-zA-Z0-9_${}-]+', Name.Variable),
 191            (r'\n', Text, '#pop'),
 192            (r'\s+', Text),
 193        ],
 194        'block-header': [
 195            (r'[^,\\\n#]+', Number),
 196            (r',', Punctuation),
 197            (r'#.*?\n', Comment),
 198            (r'\\\n', Text), # line continuation
 199            (r'\\.', Text),
 200            (r'(?:[\t ]+.*\n|\n)+', using(BashLexer), '#pop'),
 201        ],
 202    }
 203
 204
 205class DiffLexer(RegexLexer):
 206    """
 207    Lexer for unified or context-style diffs or patches.
 208    """
 209
 210    name = 'Diff'
 211    aliases = ['diff']
 212    filenames = ['*.diff', '*.patch']
 213    mimetypes = ['text/x-diff', 'text/x-patch']
 214
 215    tokens = {
 216        'root': [
 217            (r' .*\n', Text),
 218            (r'\+.*\n', Generic.Inserted),
 219            (r'-.*\n', Generic.Deleted),
 220            (r'!.*\n', Generic.Strong),
 221            (r'@.*\n', Generic.Subheading),
 222            (r'(Index|diff).*\n', Generic.Heading),
 223            (r'=.*\n', Generic.Heading),
 224            (r'.*\n', Text),
 225        ]
 226    }
 227
 228    def analyse_text(text):
 229        if text[:7] == 'Index: ':
 230            return True
 231        if text[:5] == 'diff ':
 232            return True
 233        if text[:4] == '--- ':
 234            return 0.9
 235
 236
 237class DarcsPatchLexer(RegexLexer):
 238    """
 239    DarcsPatchLexer is a lexer for the various versions of the darcs patch
 240    format.  Examples of this format are derived by commands such as
 241    ``darcs annotate --patch`` and ``darcs send``.
 242
 243    *New in Pygments 0.10.*
 244    """
 245    name = 'Darcs Patch'
 246    aliases = ['dpatch']
 247    filenames = ['*.dpatch', '*.darcspatch']
 248
 249    tokens = {
 250        'root': [
 251            (r'<', Operator),
 252            (r'>', Operator),
 253            (r'{', Operator, 'patch'),
 254            (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)',
 255             bygroups(Operator, Keyword, Name, Text, Name, Operator,
 256                      Literal.Date, Text), 'comment'),
 257            (r'New patches:', Generic.Heading),
 258            (r'Context:', Generic.Heading),
 259            (r'Patch bundle hash:', Generic.Heading),
 260            (r'\s+|\w+', Text),
 261        ],
 262        'comment': [
 263            (r' .*\n', Comment),
 264            (r'\]', Operator, "#pop"),
 265        ],
 266        'patch': [
 267            (r'}', Operator, "#pop"),
 268            (r'(\w+)(.*\n)', bygroups(Keyword, Text)),
 269            (r'\+.*\n', Generic.Inserted),
 270            (r'-.*\n', Generic.Deleted),
 271            (r'.*\n', Text),
 272        ],
 273    }
 274
 275
 276class IrcLogsLexer(RegexLexer):
 277    """
 278    Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
 279    """
 280
 281    name = 'IRC logs'
 282    aliases = ['irc']
 283    filenames = ['*.weechatlog']
 284    mimetypes = ['text/x-irclog']
 285
 286    flags = re.VERBOSE | re.MULTILINE
 287    timestamp = r"""
 288        (
 289          # irssi / xchat and others
 290          (?: \[|\()?                  # Opening bracket or paren for the timestamp
 291            (?:                        # Timestamp
 292                (?: (?:\d{1,4} [-/]?)+ # Date as - or /-separated groups of digits
 293                 [T ])?                # Date/time separator: T or space
 294                (?: \d?\d [:.]?)+      # Time as :/.-separated groups of 1 or 2 digits
 295            )
 296          (?: \]|\))?\s+               # Closing bracket or paren for the timestamp
 297        |
 298          # weechat
 299          \d{4}\s\w{3}\s\d{2}\s        # Date
 300          \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
 301        |
 302          # xchat
 303          \w{3}\s\d{2}\s               # Date
 304          \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
 305        )?
 306    """
 307    tokens = {
 308        'root': [
 309                # log start/end
 310            (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
 311            # hack
 312            ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
 313            # normal msgs
 314            ("^" + timestamp + r"""
 315                (\s*<.*?>\s*)          # Nick """,
 316             bygroups(Comment.Preproc, Name.Tag), 'msg'),
 317            # /me msgs
 318            ("^" + timestamp + r"""
 319                (\s*[*]\s+)            # Star
 320                ([^\s]+\s+.*?\n)       # Nick + rest of message """,
 321             bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
 322            # join/part msgs
 323            ("^" + timestamp + r"""
 324                (\s*(?:\*{3}|<?-[!@=P]?->?)\s*)  # Star(s) or symbols
 325                ([^\s]+\s+)                     # Nick + Space
 326                (.*?\n)                         # Rest of message """,
 327             bygroups(Comment.Preproc, Keyword, String, Comment)),
 328            (r"^.*?\n", Text),
 329        ],
 330        'msg': [
 331            (r"[^\s]+:(?!//)", Name.Attribute),  # Prefix
 332            (r".*\n", Text, '#pop'),
 333        ],
 334    }
 335
 336
 337class BBCodeLexer(RegexLexer):
 338    """
 339    A lexer that highlights BBCode(-like) syntax.
 340
 341    *New in Pygments 0.6.*
 342    """
 343
 344    name = 'BBCode'
 345    aliases = ['bbcode']
 346    mimetypes = ['text/x-bbcode']
 347
 348    tokens = {
 349        'root' : [
 350            (r'[\s\w]+', Text),
 351            (r'(\[)(/?[^\]\n\r=]+)(\])',
 352             bygroups(Keyword, Keyword.Pseudo, Keyword)),
 353            (r'(\[)([^\]\n\r=]+)(=)([^\]\n\r]+)(\])',
 354             bygroups(Keyword, Keyword.Pseudo, Operator, String, Keyword)),
 355        ],
 356    }
 357
 358
 359class TexLexer(RegexLexer):
 360    """
 361    Lexer for the TeX and LaTeX typesetting languages.
 362    """
 363
 364    name = 'TeX'
 365    aliases = ['tex', 'latex']
 366    filenames = ['*.tex', '*.aux', '*.toc']
 367    mimetypes = ['text/x-tex', 'text/x-latex']
 368
 369    tokens = {
 370        'general': [
 371            (r'%.*?\n', Comment),
 372            (r'[{}]', Name.Builtin),
 373            (r'[&_^]', Name.Builtin),
 374        ],
 375        'root': [
 376            (r'\\\[', String.Backtick, 'displaymath'),
 377            (r'\\\(', String, 'inlinemath'),
 378            (r'\$\$', String.Backtick, 'displaymath'),
 379            (r'\$', String, 'inlinemath'),
 380            (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
 381            include('general'),
 382            (r'[^\\$%&_^{}]+', Text),
 383        ],
 384        'math': [
 385            (r'\\([a-zA-Z]+|.)', Name.Variable),
 386            include('general'),
 387            (r'[0-9]+', Number),
 388            (r'[-=!+*/()\[\]]', Operator),
 389            (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
 390        ],
 391        'inlinemath': [
 392            (r'\\\)', String, '#pop'),
 393            (r'\$', String, '#pop'),
 394            include('math'),
 395        ],
 396        'displaymath': [
 397            (r'\\\]', String, '#pop'),
 398            (r'\$\$', String, '#pop'),
 399            (r'\$', Name.Builtin),
 400            include('math'),
 401        ],
 402        'command': [
 403            (r'\[.*?\]', Name.Attribute),
 404            (r'\*', Keyword),
 405            (r'', Text, '#pop'),
 406        ],
 407    }
 408
 409    def analyse_text(text):
 410        for start in ("\\documentclass", "\\input", "\\documentstyle",
 411                      "\\relax"):
 412            if text[:len(start)] == start:
 413                return True
 414
 415
 416class GroffLexer(RegexLexer):
 417    """
 418    Lexer for the (g)roff typesetting language, supporting groff
 419    extensions. Mainly useful for highlighting manpage sources.
 420
 421    *New in Pygments 0.6.*
 422    """
 423
 424    name = 'Groff'
 425    aliases = ['groff', 'nroff', 'man']
 426    filenames = ['*.[1234567]', '*.man']
 427    mimetypes = ['application/x-troff', 'text/troff']
 428
 429    tokens = {
 430        'root': [
 431            (r'(?i)(\.)(\w+)', bygroups(Text, Keyword), 'request'),
 432            (r'\.', Punctuation, 'request'),
 433            # Regular characters, slurp till we find a backslash or newline
 434            (r'[^\\\n]*', Text, 'textline'),
 435        ],
 436        'textline': [
 437            include('escapes'),
 438            (r'[^\\\n]+', Text),
 439            (r'\n', Text, '#pop'),
 440        ],
 441        'escapes': [
 442            # groff has many ways to write escapes.
 443            (r'\\"[^\n]*', Comment),
 444            (r'\\[fn]\w', String.Escape),
 445            (r'\\\(..', String.Escape),
 446            (r'\\.\[.*\]', String.Escape),
 447            (r'\\.', String.Escape),
 448            (r'\\\n', Text, 'request'),
 449        ],
 450        'request': [
 451            (r'\n', Text, '#pop'),
 452            include('escapes'),
 453            (r'"[^\n"]+"', String.Double),
 454            (r'\d+', Number),
 455            (r'\S+', String),
 456            (r'\s+', Text),
 457        ],
 458    }
 459
 460    def analyse_text(text):
 461        if text[0] != '.':
 462            return False
 463        if text[:3] == '.\\"':
 464            return True
 465        if text[:4] == '.TH ':
 466            return True
 467        if text[1:3].isalnum() and text[3].isspace():
 468            return 0.9
 469
 470
 471class ApacheConfLexer(RegexLexer):
 472    """
 473    Lexer for configuration files following the Apache config file
 474    format.
 475
 476    *New in Pygments 0.6.*
 477    """
 478
 479    name = 'ApacheConf'
 480    aliases = ['apacheconf', 'aconf', 'apache']
 481    filenames = ['.htaccess', 'apache.conf', 'apache2.conf']
 482    mimetypes = ['text/x-apacheconf']
 483    flags = re.MULTILINE | re.IGNORECASE
 484
 485    tokens = {
 486        'root': [
 487            (r'\s+', Text),
 488            (r'(#.*?)$', Comment),
 489            (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)',
 490             bygroups(Name.Tag, Text, String, Name.Tag)),
 491            (r'([a-zA-Z][a-zA-Z0-9]*)(\s+)',
 492             bygroups(Name.Builtin, Text), 'value'),
 493            (r'\.+', Text),
 494        ],
 495        'value': [
 496            (r'$', Text, '#pop'),
 497            (r'[^\S\n]+', Text),
 498            (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number),
 499            (r'\d+', Number),
 500            (r'/([a-zA-Z0-9][a-zA-Z0-9_./-]+)', String.Other),
 501            (r'(on|off|none|any|all|double|email|dns|min|minimal|'
 502             r'os|productonly|full|emerg|alert|crit|error|warn|'
 503             r'notice|info|debug|registry|script|inetd|standalone|'
 504             r'user|group)\b', Keyword),
 505            (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double),
 506            (r'[^\s"]+', Text)
 507        ]
 508    }
 509
 510
 511class MoinWikiLexer(RegexLexer):
 512    """
 513    For MoinMoin (and Trac) Wiki markup.
 514
 515    *New in Pygments 0.7.*
 516    """
 517
 518    name = 'MoinMoin/Trac Wiki markup'
 519    aliases = ['trac-wiki', 'moin']
 520    filenames = []
 521    mimetypes = ['text/x-trac-wiki']
 522    flags = re.MULTILINE | re.IGNORECASE
 523
 524    tokens = {
 525        'root': [
 526            (r'^#.*$', Comment),
 527            (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
 528            # Titles
 529            (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
 530             bygroups(Generic.Heading, using(this), Generic.Heading, String)),
 531            # Literal code blocks, with optional shebang
 532            (r'({{{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
 533            (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
 534            # Lists
 535            (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
 536            (r'^( +)([a-zivx]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
 537            # Other Formatting
 538            (r'\[\[\w+.*?\]\]', Keyword), # Macro
 539            (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
 540             bygroups(Keyword, String, Keyword)), # Link
 541            (r'^----+$', Keyword), # Horizontal rules
 542            (r'[^\n\'\[{!_~^,|]+', Text),
 543            (r'\n', Text),
 544            (r'.', Text),
 545        ],
 546        'codeblock': [
 547            (r'}}}', Name.Builtin, '#pop'),
 548            # these blocks are allowed to be nested in Trac, but not MoinMoin
 549            (r'{{{', Text, '#push'),
 550            (r'[^{}]+', Comment.Preproc), # slurp boring text
 551            (r'.', Comment.Preproc), # allow loose { or }
 552        ],
 553    }
 554
 555
 556class RstLexer(RegexLexer):
 557    """
 558    For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
 559
 560    *New in Pygments 0.7.*
 561
 562    Additional options accepted:
 563
 564    `handlecodeblocks`
 565        Highlight the contents of ``.. sourcecode:: langauge`` and
 566        ``.. code:: language`` directives with a lexer for the given
 567        language (default: ``True``). *New in Pygments 0.8.*
 568    """
 569    name = 'reStructuredText'
 570    aliases = ['rst', 'rest', 'restructuredtext']
 571    filenames = ['*.rst', '*.rest']
 572    mimetypes = ["text/x-rst"]
 573    flags = re.MULTILINE
 574
 575    def _handle_sourcecode(self, match):
 576        from pygments.lexers import get_lexer_by_name
 577        from pygments.util import ClassNotFound
 578
 579        # section header
 580        yield match.start(1), Punctuation, match.group(1)
 581        yield match.start(2), Text, match.group(2)
 582        yield match.start(3), Operator.Word, match.group(3)
 583        yield match.start(4), Punctuation, match.group(4)
 584        yield match.start(5), Text, match.group(5)
 585        yield match.start(6), Keyword, match.group(6)
 586        yield match.start(7), Text, match.group(7)
 587
 588        # lookup lexer if wanted and existing
 589        lexer = None
 590        if self.handlecodeblocks:
 591            try:
 592                lexer = get_lexer_by_name(match.group(6).strip())
 593            except ClassNotFound:
 594                pass
 595        indention = match.group(8)
 596        indention_size = len(indention)
 597        code = (indention + match.group(9) + match.group(10) + match.group(11))
 598
 599        # no lexer for this language. handle it like it was a code block
 600        if lexer is None:
 601            yield match.start(8), String, code
 602            return
 603
 604        # highlight the lines with the lexer.
 605        ins = []
 606        codelines = code.splitlines(True)
 607        code = ''
 608        for line in codelines:
 609            if len(line) > indention_size:
 610                ins.append((len(code), [(0, Text, line[:indention_size])]))
 611                code += line[indention_size:]
 612            else:
 613                code += line
 614        for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
 615            yield item
 616
 617    tokens = {
 618        'root': [
 619            # Heading with overline
 620            (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)(.+)(\n)(\1)(\n)',
 621             bygroups(Generic.Heading, Text, Generic.Heading,
 622                      Text, Generic.Heading, Text)),
 623            # Plain heading
 624            (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
 625             r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
 626             bygroups(Generic.Heading, Text, Generic.Heading, Text)),
 627            # Bulleted lists
 628            (r'^(\s*)([-*+])( .+\n(?:\1  .+\n)*)',
 629             bygroups(Text, Number, using(this, state='inline'))),
 630            # Numbered lists
 631            (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1  .+\n)*)',
 632             bygroups(Text, Number, using(this, state='inline'))),
 633            (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1  .+\n)*)',
 634             bygroups(Text, Number, using(this, state='inline'))),
 635            # Numbered, but keep words at BOL from becoming lists
 636            (r'^(\s*)([A-Z]+\.)( .+\n(?:\1  .+\n)+)',
 637             bygroups(Text, Number, using(this, state='inline'))),
 638            (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1  .+\n)+)',
 639             bygroups(Text, Number, using(this, state='inline'))),
 640            # Sourcecode directives
 641            (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)'
 642             r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
 643             _handle_sourcecode),
 644            # A directive
 645            (r'^( *\.\.)(\s*)([\w-]+)(::)(?:([ \t]*)(.+))?',
 646             bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, Keyword)),
 647            # A reference target
 648            (r'^( *\.\.)(\s*)([\w\t ]+:)(.*?)$',
 649             bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
 650            # A footnote target
 651            (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
 652             bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
 653            # Comments
 654            (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
 655            # Field list
 656            (r'^( *)(:.*?:)([ \t]+)(.*?)$', bygroups(Text, Name.Class, Text,
 657                                                     Name.Function)),
 658            # Definition list
 659            (r'^([^ ].*(?<!::)\n)((?:(?: +.*)\n)+)',
 660             bygroups(using(this, state='inline'), using(this, state='inline'))),
 661            # Code blocks
 662            (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
 663             bygroups(String.Escape, Text, String, String, Text, String)),
 664            include('inline'),
 665        ],
 666        'inline': [
 667            (r'\\.', Text), # escape
 668            (r'``', String, 'literal'), # code
 669            (r'(`)(.+?)(`__?)',
 670             bygroups(Punctuation, using(this), Punctuation)), # reference
 671            (r'(`.+?`)(:[a-zA-Z0-9-]+?:)?',
 672             bygroups(Name.Variable, Name.Attribute)), # role
 673            (r'(:[a-zA-Z0-9-]+?:)(`.+?`)',
 674             bygroups(Name.Attribute, Name.Variable)), # user-defined role
 675            (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
 676            (r'\*.+?\*', Generic.Emph), # Emphasis
 677            (r'\[.*?\]_', String), # Footnote or citation
 678            (r'<.+?>', Name.Tag), # Hyperlink
 679            (r'[^\\\n\[*`:]+', Text),
 680            (r'.', Text),
 681        ],
 682        'literal': [
 683            (r'[^`\\]+', String),
 684            (r'\\.', String),
 685            (r'``', String, '#pop'),
 686            (r'[`\\]', String),
 687        ]
 688    }
 689
 690    def __init__(self, **options):
 691        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
 692        RegexLexer.__init__(self, **options)
 693
 694    def analyse_text(text):
 695        if text[:2] == '..' and text[2:3] != '.':
 696            return 0.3
 697        p1 = text.find("\n")
 698        p2 = text.find("\n", p1 + 1)
 699        if (p2 > -1 and              # has two lines
 700            p1 * 2 + 1 == p2 and     # they are the same length
 701            text[p1+1] in '-=' and   # the next line both starts and ends with
 702            text[p1+1] == text[p2-1]): # ...a sufficiently high header
 703            return 0.5
 704
 705class VimLexer(RegexLexer):
 706    """
 707    Lexer for VimL script files.
 708
 709    *New in Pygments 0.8.*
 710    """
 711    name = 'VimL'
 712    aliases = ['vim']
 713    filenames = ['*.vim', '.vimrc']
 714    mimetypes = ['text/x-vim']
 715    flags = re.MULTILINE
 716
 717    tokens = {
 718        'root': [
 719            # Who decided that doublequote was a good comment character??
 720            (r'^\s*".*', Comment),
 721            (r'(?<=\s)"[^\-:.%#=*].*', Comment),
 722
 723            (r'[ \t]+', Text),
 724            # TODO: regexes can have other delims
 725            (r'/(\\\\|\\/|[^\n/])*/', String.Regex),
 726            (r'"(\\\\|\\"|[^\n"])*"', String.Double),
 727            (r"'(\\\\|\\'|[^\n'])*'", String.Single),
 728            (r'-?\d+', Number),
 729            (r'#[0-9a-f]{6}', Number.Hex),
 730            (r'^:', Punctuation),
 731            (r'[()<>+=!|,~-]', Punctuation), # Inexact list.  Looks decent.
 732            (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
 733             Keyword),
 734            (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
 735            (r'\b\w+\b', Name.Other), # These are postprocessed below
 736            (r'.', Text),
 737        ],
 738    }
 739    def __init__(self, **options):
 740        from pygments.lexers._vimbuiltins import command, option, auto
 741        self._cmd = command
 742        self._opt = option
 743        self._aut = auto
 744
 745        RegexLexer.__init__(self, **options)
 746
 747    def is_in(self, w, mapping):
 748        r"""
 749        It's kind of difficult to decide if something might be a keyword
 750        in VimL because it allows you to abbreviate them.  In fact,
 751        'ab[breviate]' is a good example.  :ab, :abbre, or :abbreviate are
 752        valid ways to call it so rather than making really awful regexps
 753        like::
 754
 755            \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
 756
 757        we match `\b\w+\b` and then call is_in() on those tokens.  See
 758        `scripts/get_vimkw.py` for how the lists are extracted.
 759        """
 760        p = bisect(mapping, (w,))
 761        if p > 0:
 762            if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
 763               mapping[p-1][1][:len(w)] == w: return True
 764        if p < len(mapping):
 765            return mapping[p][0] == w[:len(mapping[p][0])] and \
 766                   mapping[p][1][:len(w)] == w
 767        return False
 768
 769    def get_tokens_unprocessed(self, text):
 770        # TODO: builtins are only subsequent tokens on lines
 771        #       and 'keywords' only happen at the beginning except
 772        #       for :au ones
 773        for index, token, value in \
 774            RegexLexer.get_tokens_unprocessed(self, text):
 775            if token is Name.Other:
 776                if self.is_in(value, self._cmd):
 777                    yield index, Keyword, value
 778                elif self.is_in(value, self._opt) or \
 779                     self.is_in(value, self._aut):
 780                    yield index, Name.Builtin, value
 781                else:
 782                    yield index, Text, value
 783            else:
 784                yield index, token, value
 785
 786
 787class GettextLexer(RegexLexer):
 788    """
 789    Lexer for Gettext catalog files.
 790
 791    *New in Pygments 0.9.*
 792    """
 793    name = 'Gettext Catalog'
 794    aliases = ['pot', 'po']
 795    filenames = ['*.pot', '*.po']
 796    mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
 797
 798    tokens = {
 799        'root': [
 800            (r'^#,\s.*?$', Keyword.Type),
 801            (r'^#:\s.*?$', Keyword.Declaration),
 802            #(r'^#$', Comment),
 803            (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
 804            (r'^(")([\w-]*:)(.*")$',
 805             bygroups(String, Name.Property, String)),
 806            (r'^".*"$', String),
 807            (r'^(msgid|msgid_plural|msgstr)(\s+)(".*")$',
 808             bygroups(Name.Variable, Text, String)),
 809            (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
 810             bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
 811        ]
 812    }
 813
 814class SquidConfLexer(RegexLexer):
 815    """
 816    Lexer for `squid <http://www.squid-cache.org/>`_ configuration files.
 817
 818    *New in Pygments 0.9.*
 819    """
 820
 821    name = 'SquidConf'
 822    aliases = ['squidconf', 'squid.conf', 'squid']
 823    filenames = ['squid.conf']
 824    mimetypes = ['text/x-squidconf']
 825    flags = re.IGNORECASE
 826
 827    keywords = [ "acl", "always_direct", "announce_host",
 828                 "announce_period", "announce_port", "announce_to",
 829                 "anonymize_headers", "append_domain", "as_whois_server",
 830                 "auth_param_basic", "authenticate_children",
 831                 "authenticate_program", "authenticate_ttl", "broken_posts",
 832                 "buffered_logs", "cache_access_log", "cache_announce",
 833                 "cache_dir", "cache_dns_program", "cache_effective_group",
 834                 "cache_effective_user", "cache_host", "cache_host_acl",
 835                 "cache_host_domain", "cache_log", "cache_mem",
 836                 "cache_mem_high", "cache_mem_low", "cache_mgr",
 837                 "cachemgr_passwd", "cache_peer", "cache_peer_access",
 838                 "cahce_replacement_policy", "cache_stoplist",
 839                 "cache_stoplist_pattern", "cache_store_log", "cache_swap",
 840                 "cache_swap_high", "cache_swap_log", "cache_swap_low",
 841                 "client_db", "client_lifetime", "client_netmask",
 842                 "connect_timeout", "coredump_dir", "dead_peer_timeout",
 843                 "debug_options", "delay_access", "delay_class",
 844                 "delay_initial_bucket_level", "delay_parameters",
 845                 "delay_pools", "deny_info", "dns_children", "dns_defnames",
 846                 "dns_nameservers", "dns_testnames", "emulate_httpd_log",
 847                 "err_html_text", "fake_user_agent", "firewall_ip",
 848                 "forwarded_for", "forward_snmpd_port", "fqdncache_size",
 849                 "ftpget_options", "ftpget_program", "ftp_list_width",
 850                 "ftp_passive", "ftp_user", "half_closed_clients",
 851                 "header_access", "header_replace", "hierarchy_stoplist",
 852                 "high_response_time_warning", "high_page_fault_warning",
 853                 "htcp_port", "http_access", "http_anonymizer", "httpd_accel",
 854                 "httpd_accel_host", "httpd_accel_port",
 855                 "httpd_accel_uses_host_header", "httpd_accel_with_proxy",
 856                 "http_port", "http_reply_access", "icp_access",
 857                 "icp_hit_stale", "icp_port", "icp_query_timeout",
 858                 "ident_lookup", "ident_lookup_access", "ident_timeout",
 859                 "incoming_http_average", "incoming_icp_average",
 860                 "inside_firewall", "ipcache_high", "ipcache_low",
 861                 "ipcache_size", "local_domain", "local_ip", "logfile_rotate",
 862                 "log_fqdn", "log_icp_queries", "log_mime_hdrs",
 863                 "maximum_object_size", "maximum_single_addr_tries",
 864                 "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr",
 865                 "mcast_miss_encode_key", "mcast_miss_port", "memory_pools",
 866                 "memory_pools_limit", "memory_replacement_policy",
 867                 "mime_table", "min_http_poll_cnt", "min_icp_poll_cnt",
 868                 "minimum_direct_hops", "minimum_object_size",
 869                 "minimum_retry_timeout", "miss_access", "negative_dns_ttl",
 870                 "negative_ttl", "neighbor_timeout", "neighbor_type_domain",
 871                 "netdb_high", "netdb_low", "netdb_ping_period",
 872                 "netdb_ping_rate", "never_direct", "no_cache",
 873                 "passthrough_proxy", "pconn_timeout", "pid_filename",
 874                 "pinger_program", "positive_dns_ttl", "prefer_direct",
 875                 "proxy_auth", "proxy_auth_realm", "query_icmp", "quick_abort",
 876                 "quick_abort", "quick_abort_max", "quick_abort_min",
 877                 "quick_abort_pct", "range_offset_limit", "read_timeout",
 878                 "redirect_children", "redirect_program",
 879                 "redirect_rewrites_host_header", "reference_age",
 880                 "reference_age", "refresh_pattern", "reload_into_ims",
 881                 "request_body_max_size", "request_size", "request_timeout",
 882                 "shutdown_lifetime", "single_parent_bypass",
 883                 "siteselect_timeout", "snmp_access", "snmp_incoming_address",
 884                 "snmp_port", "source_ping", "ssl_proxy",
 885                 "store_avg_object_size", "store_objects_per_bucket",
 886                 "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs",
 887                 "tcp_incoming_address", "tcp_outgoing_address",
 888                 "tcp_recv_bufsize", "test_reachability", "udp_hit_obj",
 889                 "udp_hit_obj_size", "udp_incoming_address",
 890                 "udp_outgoing_address", "unique_hostname", "unlinkd_program",
 891                 "uri_whitespace", "useragent_log", "visible_hostname",
 892                 "wais_relay", "wais_relay_host", "wais_relay_port",
 893                 ]
 894
 895    opts = [ "proxy-only", "weight", "ttl", "no-query", "default",
 896             "round-robin", "multicast-responder", "on", "off", "all",
 897             "deny", "allow", "via", "parent", "no-digest", "heap", "lru",
 898             "realm", "children", "credentialsttl", "none", "disable",
 899             "offline_toggle", "diskd", "q1", "q2",
 900             ]
 901
 902    actions = [ "shutdown", "info", "parameter", "server_list",
 903                "client_list", r'squid\.conf',
 904                ]
 905
 906    actions_stats = [ "objects", "vm_objects", "utilization",
 907                      "ipcache", "fqdncache", "dns", "redirector", "io",
 908                      "reply_headers", "filedescriptors", "netdb",
 909                      ]
 910
 911    actions_log = [ "status", "enable", "disable", "clear"]
 912
 913    acls = [ "url_regex", "urlpath_regex", "referer_regex", "port",
 914             "proto", "req_mime_type", "rep_mime_type", "method",
 915             "browser", "user", "src", "dst", "time", "dstdomain", "ident",
 916             "snmp_community",
 917             ]
 918
 919    ip_re = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
 920
 921    def makelistre(list):
 922        return r'\b(?:'+'|'.join(list)+r')\b'
 923
 924    tokens = {
 925        'root': [
 926            (r'\s+', Text),
 927            (r'#', Comment, 'comment'),
 928            (makelistre(keywords), Keyword),
 929            (makelistre(opts), Name.Constant),
 930            # Actions
 931            (makelistre(actions), String),
 932            (r'stats/'+makelistre(actions), String),
 933            (r'log/'+makelistre(actions)+r'=', String),
 934            (makelistre(acls), Keyword),
 935            (ip_re+r'(?:/(?:'+ip_re+r')|\d+)?', Number),
 936            (r'\b\d+\b', Number),
 937            (r'\S+', Text),
 938        ],
 939        'comment': [
 940            (r'\s*TAG:.*', String.Escape, '#pop'),
 941            (r'.*', Comment, '#pop'),
 942        ],
 943    }
 944
 945
 946class DebianControlLexer(RegexLexer):
 947    """
 948    Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs.
 949
 950    *New in Pygments 0.9.*
 951    """
 952    name = 'Debian Control file'
 953    aliases = ['control']
 954    filenames = ['control']
 955
 956    tokens = {
 957        'root': [
 958            (r'^(Description)', Keyword, 'description'),
 959            (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'),
 960            (r'^((Build-)?Depends)', Keyword, 'depends'),
 961            (r'^((?:Python-)?Version)(:\s*)([^\s]+)$',
 962             bygroups(Keyword, Text, Number)),
 963            (r'^((?:Installed-)?Size)(:\s*)([^\s]+)$',
 964             bygroups(Keyword, Text, Number)),
 965            (r'^(MD5Sum|SHA1|SHA256)(:\s*)([^\s]+)$',
 966             bygroups(Keyword, Text, Number)),
 967            (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$',
 968             bygroups(Keyword, Whitespace, String)),
 969        ],
 970        'maintainer': [
 971            (r'<[^>]+>', Generic.Strong),
 972            (r'<[^>]+>$', Generic.Strong, '#pop'),
 973            (r',\n?', Text),
 974            (r'.', Text),
 975        ],
 976        'description': [
 977            (r'(.*)(Homepage)(: )([^\s]+)', bygroups(Text, String, Name, Name.Class)),
 978            (r':.*\n', Generic.Strong),
 979            (r' .*\n', Text),
 980            ('', Text, '#pop'),
 981        ],
 982        'depends': [
 983            (r':\s*', Text),
 984            (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)),
 985            (r'\(', Text, 'depend_vers'),
 986            (r',', Text),
 987            (r'\|', Operator),
 988            (r'[\s]+', Text),
 989            (r'[}\)]\s*$', Text, '#pop'),
 990            (r'[}]', Text),
 991            (r'[^,]$', Name.Function, '#pop'),
 992            (r'([\+\.a-zA-Z0-9-][\s\n]*)', Name.Function),
 993        ],
 994        'depend_vers': [
 995            (r'\),', Text, '#pop'),
 996            (r'\)[^,]', Text, '#pop:2'),
 997            (r'([><=]+)(\s*)([^\)]+)', bygroups(Operator, Text, Number))
 998        ]
 999    }
1000
1001
1002class YamlLexerContext(LexerContext):
1003    """Indentation context for the YAML lexer."""
1004
1005    def __init__(self, *args, **kwds):
1006        super(YamlLexerContext, self).__init__(*args, **kwds)
1007        self.indent_stack = []
1008        self.indent = -1
1009        self.next_indent = 0
1010        self.block_scalar_indent = None
1011
1012
1013class YamlLexer(ExtendedRegexLexer):
1014    """
1015    Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
1016    language.
1017
1018    *New in Pygments 0.11.*
1019    """
1020
1021    name = 'YAML'
1022    aliases = ['yaml']
1023    filenames = ['*.yaml', '*.yml']
1024    mimetypes = ['text/x-yaml']
1025
1026
1027    def something(token_class):
1028        """Do not produce empty tokens."""
1029        def callback(lexer, match, context):
1030            text = match.group()
1031            if not text:
1032                return
1033            yield match.start(), token_class, text
1034            context.pos = match.end()
1035        return callback
1036
1037    def reset_indent(token_class):
1038        """Reset the indentation levels."""
1039        def callback(lexer, match, context):
1040            text = match.group()
1041            context.indent_stack = []
1042            context.indent = -1
1043            context.next_indent = 0
1044            context.block_scalar_indent = None
1045            yield match.start(), token_class, text
1046            context.pos = match.end()
1047        return callback
1048
1049    def save_indent(token_class, start=False):
1050        """Save a possible indentation level."""
1051        def callback(lexer, match, context):
1052            text = match.group()
1053            extra = ''
1054            if start:
1055                context.next_indent = len(text)
1056                if context.next_indent < context.indent:
1057                    while context.next_indent < context.indent:
1058                        context.indent = context.indent_stack.pop()
1059                    if context.next_indent > context.indent:
1060                        extra = text[context.indent:]
1061                        text = text[:context.indent]
1062            else:
1063                context.next_indent += len(text)
1064            if text:
1065                yield match.start(), token_class, text
1066            if extra:
1067                yield match.start()+len(text), token_class.Error, extra
1068            context.pos = match.end()
1069        return callback
1070
1071    def set_indent(token_class, implicit=False):
1072        """Set the previously saved indentation level."""
1073        def callback(lexer, match, context):
1074            text = match.group()
1075            if context.indent < context.next_indent:
1076                context.indent_stack.append(context.indent)
1077                context.indent = context.next_indent
1078            if not implicit:
1079                context.next_indent += len(text)
1080            yield match.start(), token_class, text
1081            context.pos = match.end()
1082        return callback
1083
1084    def set_block_scalar_indent(token_class):
1085        """Set an explicit indentation level for a block scalar."""
1086        def callback(lexer, match, context):
1087            text = match.group()
1088            context.block_scalar_indent = None
1089            if not text:
1090                return
1091            increment = match.group(1)
1092            if increment:
1093                current_indent = max(context.indent, 0)
1094                increment = int(increment)
1095                context.block_scalar_indent = current_indent + increment
1096            if text:
1097                yield match.start(), token_class, text
1098                context.pos = match.end()
1099        return callback
1100
1101    def parse_block_scalar_empty_line(indent_token_class, content_token_class):
1102        """Process an empty line in a block scalar."""
1103        def callback(lexer, match, context):
1104            text = match.group()
1105            if (context.block_scalar_indent is None or
1106                    len(text) <= context.block_scalar_indent):
1107                if text:
1108                    yield match.start(), indent_token_class, text
1109            else:
1110                indentation = text[:context.block_scalar_indent]
1111                content = text[context.block_scalar_indent:]
1112                yield match.start(), indent_token_class, indentation
1113                yield (match.start()+context.block_scalar_indent,
1114                        content_token_class, content)
1115            context.pos = match.end()
1116        return callback
1117
1118    def parse_block_scalar_indent(token_class):
1119        """Process indentation spaces in a block scalar."""
1120        def callback(lexer, match, context):
1121            text = match.group()
1122            if context.block_scalar_indent is None:
1123                if len(text) <= max(context.indent, 0):
1124                    context.stack.pop()
1125                    context.stack.pop()
1126                    return
1127                context.block_scalar_indent = len(text)
1128            else:
1129                if len(text) < context.block_scalar_indent:
1130                    context.stack.pop()
1131                    context.stack.pop()
1132                    return
1133            if text:
1134                yield match.start(), token_class, text
1135                context.pos = match.end()
1136        return callback
1137
1138    def parse_plain_scalar_indent(token_class):
1139        """Process indentation spaces in a plain scalar."""
1140        def callback(lexer, match, context):
1141            text = match.group()
1142            if len(text) <= context.indent:
1143                context.stack.pop()
1144                context.stack.pop()
1145                return
1146            if text:
1147                yield match.start(), token_class, text
1148                context.pos = match.end()
1149        return callback
1150
1151
1152
1153    tokens = {
1154        # the root rules
1155        'root': [
1156            # ignored whitespaces
1157            (r'[ ]+(?=#|$)', Text),
1158            # line breaks
1159            (r'\n+', Text),
1160            # a comment
1161            (r'#[^\n]*', Comment.Single),
1162            # the '%YAML' directive
1163            (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
1164            # the %TAG directive
1165            (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
1166            # document start and document end indicators
1167            (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
1168             'block-line'),
1169            # indentation spaces
1170            (r'[ ]*(?![ \t\n\r\f\v]|$)', save_indent(Text, start=True),
1171             ('block-line', 'indentation')),
1172        ],
1173
1174        # trailing whitespaces after directives or a block scalar indicator
1175        'ignored-line': [
1176            # ignored whitespaces
1177            (r'[ ]+(?=#|$)', Text),
1178            # a comment
1179            (r'#[^\n]*', Comment.Single),
1180            # line break
1181            (r'\n', Text, '#pop:2'),
1182        ],
1183
1184        # the %YAML directive
1185        'yaml-directive': [
1186            # the version number
1187            (r'([ ]+)([0-9]+\.[0-9]+)',
1188             bygroups(Text, Number), 'ignored-line'),
1189        ],
1190
1191        # the %YAG directive
1192        'tag-directive': [
1193            # a tag handle and the corresponding prefix
1194            (r'([ ]+)(!|![0-9A-Za-z_-]*!)'
1195             r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)',
1196             bygroups(Text, Keyword.Type, Text, Keyword.Type),
1197             'ignored-line'),
1198        ],
1199
1200        # block scalar indicators and indentation spaces
1201        'indentation': [
1202            # trailing whitespaces are ignored
1203            (r'[ ]*$', something(Text), '#pop:2'),
1204            # whitespaces preceeding block collection indicators
1205            (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
1206            # block collection indicators
1207            (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
1208            # the beginning a block line
1209            (r'[ ]*', save_indent(Text), '#pop'),
1210        ],
1211
1212        # an indented line in the block context
1213        'block-line': [
1214            # the line end
1215            (r'[ ]*(?=#|$)', something(Text), '#pop'),
1216            # whitespaces separating tokens
1217            (r'[ ]+', Text),
1218            # tags, anchors and aliases,
1219            include('descriptors'),
1220            # block collections and scalars
1221            include('block-nodes'),
1222            # flow collections and quoted scalars
1223            include('flow-nodes'),
1224            # a plain scalar
1225            (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])',
1226             something(Name.Variable),
1227             'plain-scalar-in-block-context'),
1228        ],
1229
1230        # tags, anchors, aliases
1231        'descriptors' : [
1232            # a full-form tag
1233            (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Keyword.Type),
1234            # a tag in the form '!', '!suffix' or '!handle!suffix'
1235            (r'!(?:[0-9A-Za-z_-]+)?'
1236             r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Keyword.Type),
1237            # an anchor
1238            (r'&[0-9A-Za-z_-]+', Name.Label),
1239            # an alias
1240            (r'\*[0-9A-Za-z_-]+', Name.Variable),
1241        ],
1242
1243        # block collections and scalars
1244        'block-nodes': [
1245            # implicit key
1246            (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
1247            # literal and folded scalars
1248            (r'[|>]', Punctuation.Indicator,
1249             ('block-scalar-content', 'block-scalar-header')),
1250        ],
1251
1252        # flow collections and quoted scalars
1253        'flow-nodes': [
1254            # a flow sequence
1255            (r'\[', Punctuation.Indicator, 'flow-sequence'),
1256            # a flow mapping
1257            (r'\{', Punctuation.Indicator, 'flow-mapping'),
1258            # a single-quoted scalar
1259            (r'\'', String, 'single-quoted-scalar'),
1260            # a double-quoted scalar
1261            (r'\"', String, 'double-quoted-scalar'),
1262        ],
1263
1264        # the content of a flow collection
1265        'flow-collection': [
1266            # whitespaces
1267            (r'[ ]+', Text),
1268            # line breaks
1269            (r'\n+', Text),
1270            # a comment
1271            (r'#[^\n]*', Comment.Single),
1272            # simple indicators
1273            (r'[?:,]', Punctuation.Indicator),
1274            # tags, anchors and aliases
1275            include('descriptors'),
1276            # nested collections and quoted scalars
1277            include('flow-nodes'),
1278            # a plain scalar
1279            (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])',
1280             something(Name.Variable),
1281             'plain-scalar-in-flow-context'),
1282        ],
1283
1284        # a flow sequence indicated by '[' and ']'
1285        'flow-sequence': [
1286            # include flow collection rules
1287            include('flow-collection'),
1288            # the closing indicator
1289            (r'\]', Punctuation.Indicator, '#pop'),
1290        ],
1291
1292        # a flow mapping indicated by '{' and '}'
1293        'flow-mapping': [
1294            # include flow collection rules
1295            include('flow-collection'),
1296            # the closing indicator
1297            (r'\}', Punctuation.Indicator, '#pop'),
1298        ],
1299
1300        # block scalar lines
1301        'block-scalar-content': [
1302            # line break
1303            (r'\n', Text),
1304            # empty line
1305            (r'^[ ]+$',
1306             parse_block_scalar_empty_line(Text, Name.Constant)),
1307            # indentation spaces (we may leave the state here)
1308            (r'^[ ]*', parse_block_scalar_indent(Text)),
1309            # line content
1310            (r'[^\n\r\f\v]+', Name.Constant),
1311        ],
1312
1313        # the content of a literal or folded scalar
1314        'block-scalar-header': [
1315            # indentation indicator followed by chomping flag
1316            (r'([1-9])?[+-]?(?=[ ]|$)',
1317             set_block_scalar_indent(Punctuation.Indicator),
1318             'ignored-line'),
1319            # chomping flag followed by indentation indicator
1320            (r'[+-]?([1-9])?(?=[ ]|$)',
1321             set_block_scalar_indent(Punctuation.Indicator),
1322             'ignored-line'),
1323        ],
1324
1325        # ignored and regular whitespaces in quoted scalars
1326        'quoted-scalar-whitespaces': [
1327            # leading and trailing whitespaces are ignored
1328            (r'^[ ]+|[ ]+$', Text),
1329            # line breaks are ignored
1330            (r'\n+', Text),
1331            # other whitespaces are a part of the value
1332            (r'[ ]+', Name.Variable),
1333        ],
1334
1335        # single-quoted scalars
1336        'single-quoted-scalar': [
1337            # include whitespace and line break rules
1338            include('quoted-scalar-whitespaces'),
1339            # escaping of the quote character
1340            (r'\'\'', String.Escape),
1341            # regular non-whitespace characters
1342            (r'[^ \t\n\r\f\v\']+', String),
1343            # the closing quote
1344            (r'\'', String, '#pop'),
1345        ],
1346
1347        # double-quoted scalars
1348        'double-quoted-scalar': [
1349            # include whitespace and line break rules
1350            include('quoted-scalar-whitespaces'),
1351            # escaping of special characters
1352            (r'\\[0abt\tn\nvfre "\\N_LP]', String),
1353            # escape codes
1354            (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
1355             String.Escape),
1356            # regular non-whitespace characters
1357            (r'[^ \t\n\r\f\v\"\\]+', String),
1358            # the closing quote
1359            (r'"', String, '#pop'),
1360        ],
1361
1362        # the beginning of a new line while scanning a plain scalar
1363        'plain-scalar-in-block-context-new-line': [
1364            # empty lines
1365            (r'^[ ]+$', Text),
1366            # line breaks
1367            (r'\n+', Text),
1368            # document start and document end indicators
1369            (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
1370            # indentation spaces (we may leave the block line state here)
1371            (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
1372        ],
1373
1374        # a plain scalar in the block context
1375        'plain-scalar-in-block-context': [
1376            # the scalar ends with the ':' indicator
1377            (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
1378            # the scalar ends w…

Large files files are truncated, but you can click here to view the full file