/console/app/pygments/lexers/text.py
Python | 1482 lines | 1137 code | 104 blank | 241 comment | 31 complexity | cda584df89622b4651aa4d746e4a6823 MD5 | raw file
Possible License(s): GPL-3.0
Large files files are truncated, but you can click here to view the full file
1# -*- coding: utf-8 -*-
2"""
3 pygments.lexers.text
4 ~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for non-source code file types.
7
8 :copyright: 2006-2008 by Armin Ronacher, Georg Brandl,
9 Tim Hatch <tim@timhatch.com>,
10 Ronny Pfannschmidt,
11 Dennis Kaarsemaker,
12 Kumar Appaiah <akumar@ee.iitm.ac.in>,
13 Varun Hiremath <varunhiremath@gmail.com>,
14 Jeremy Thurgood,
15 Max Battcher,
16 Kirill Simonov <xi@resolvent.net>.
17 :license: BSD, see LICENSE for more details.
18"""
19
20import re
21try:
22 set
23except NameError:
24 from sets import Set as set
25from bisect import bisect
26
27from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, \
28 bygroups, include, using, this, do_insertions
29from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \
30 Generic, Operator, Number, Whitespace, Literal
31from pygments.util import get_bool_opt
32from pygments.lexers.other import BashLexer
33
34__all__ = ['IniLexer', 'SourcesListLexer', 'BaseMakefileLexer',
35 'MakefileLexer', 'DiffLexer', 'IrcLogsLexer', 'TexLexer',
36 'GroffLexer', 'ApacheConfLexer', 'BBCodeLexer', 'MoinWikiLexer',
37 'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer',
38 'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer',
39 'LighttpdConfLexer', 'NginxConfLexer']
40
41
42class IniLexer(RegexLexer):
43 """
44 Lexer for configuration files in INI style.
45 """
46
47 name = 'INI'
48 aliases = ['ini', 'cfg']
49 filenames = ['*.ini', '*.cfg', '*.properties']
50 mimetypes = ['text/x-ini']
51
52 tokens = {
53 'root': [
54 (r'\s+', Text),
55 (r'[;#].*?$', Comment),
56 (r'\[.*?\]$', Keyword),
57 (r'(.*?)(\s*)(=)(\s*)(.*?)$',
58 bygroups(Name.Attribute, Text, Operator, Text, String))
59 ]
60 }
61
62 def analyse_text(text):
63 npos = text.find('\n')
64 if npos < 3:
65 return False
66 return text[0] == '[' and text[npos-1] == ']'
67
68
69class SourcesListLexer(RegexLexer):
70 """
71 Lexer that highlights debian sources.list files.
72
73 *New in Pygments 0.7.*
74 """
75
76 name = 'Debian Sourcelist'
77 aliases = ['sourceslist', 'sources.list']
78 filenames = ['sources.list']
79 mimetype = ['application/x-debian-sourceslist']
80
81 tokens = {
82 'root': [
83 (r'\s+', Text),
84 (r'#.*?$', Comment),
85 (r'^(deb(?:-src)?)(\s+)',
86 bygroups(Keyword, Text), 'distribution')
87 ],
88 'distribution': [
89 (r'#.*?$', Comment, '#pop'),
90 (r'\$\(ARCH\)', Name.Variable),
91 (r'[^\s$[]+', String),
92 (r'\[', String.Other, 'escaped-distribution'),
93 (r'\$', String),
94 (r'\s+', Text, 'components')
95 ],
96 'escaped-distribution': [
97 (r'\]', String.Other, '#pop'),
98 (r'\$\(ARCH\)', Name.Variable),
99 (r'[^\]$]+', String.Other),
100 (r'\$', String.Other)
101 ],
102 'components': [
103 (r'#.*?$', Comment, '#pop:2'),
104 (r'$', Text, '#pop:2'),
105 (r'\s+', Text),
106 (r'\S+', Keyword.Pseudo),
107 ]
108 }
109
110 def analyse_text(text):
111 for line in text.split('\n'):
112 line = line.strip()
113 if not (line.startswith('#') or line.startswith('deb ') or
114 line.startswith('deb-src ') or not line):
115 return False
116 return True
117
118
119class MakefileLexer(Lexer):
120 """
121 Lexer for BSD and GNU make extensions (lenient enough to handle both in
122 the same file even).
123
124 *Rewritten in Pygments 0.10.*
125 """
126
127 name = 'Makefile'
128 aliases = ['make', 'makefile', 'mf', 'bsdmake']
129 filenames = ['*.mak', 'Makefile', 'makefile', 'Makefile.*']
130 mimetypes = ['text/x-makefile']
131
132 r_special = re.compile(r'^(?:'
133 # BSD Make
134 r'\.\s*(include|undef|error|warning|if|else|elif|endif|for|endfor)|'
135 # GNU Make
136 r'\s*(ifeq|ifneq|ifdef|ifndef|else|endif|-?include|define|endef|:))(?=\s)')
137 r_comment = re.compile(r'^\s*@?#')
138
139 def get_tokens_unprocessed(self, text):
140 ins = []
141 lines = text.splitlines(True)
142 done = ''
143 lex = BaseMakefileLexer(**self.options)
144 backslashflag = False
145 for line in lines:
146 if self.r_special.match(line) or backslashflag:
147 ins.append((len(done), [(0, Comment.Preproc, line)]))
148 backslashflag = line.strip().endswith('\\')
149 elif self.r_comment.match(line):
150 ins.append((len(done), [(0, Comment, line)]))
151 else:
152 done += line
153 for item in do_insertions(ins, lex.get_tokens_unprocessed(done)):
154 yield item
155
156
157class BaseMakefileLexer(RegexLexer):
158 """
159 Lexer for simple Makefiles (no preprocessing).
160
161 *New in Pygments 0.10.*
162 """
163
164 name = 'Makefile'
165 aliases = ['basemake']
166 filenames = []
167 mimetypes = []
168
169 tokens = {
170 'root': [
171 (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)),
172 (r'\$\((?:.*\\\n|.*\n)+', using(BashLexer)),
173 (r'\s+', Text),
174 (r'#.*?\n', Comment),
175 (r'(export)(\s+)(?=[a-zA-Z0-9_${}\t -]+\n)',
176 bygroups(Keyword, Text), 'export'),
177 (r'export\s+', Keyword),
178 # assignment
179 (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n|.*\n)+)',
180 bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))),
181 # strings
182 (r'"(\\\\|\\"|[^"])*"', String.Double),
183 (r"'(\\\\|\\'|[^'])*'", String.Single),
184 # targets
185 (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text),
186 'block-header'),
187 #TODO: add paren handling (grr)
188 ],
189 'export': [
190 (r'[a-zA-Z0-9_${}-]+', Name.Variable),
191 (r'\n', Text, '#pop'),
192 (r'\s+', Text),
193 ],
194 'block-header': [
195 (r'[^,\\\n#]+', Number),
196 (r',', Punctuation),
197 (r'#.*?\n', Comment),
198 (r'\\\n', Text), # line continuation
199 (r'\\.', Text),
200 (r'(?:[\t ]+.*\n|\n)+', using(BashLexer), '#pop'),
201 ],
202 }
203
204
205class DiffLexer(RegexLexer):
206 """
207 Lexer for unified or context-style diffs or patches.
208 """
209
210 name = 'Diff'
211 aliases = ['diff']
212 filenames = ['*.diff', '*.patch']
213 mimetypes = ['text/x-diff', 'text/x-patch']
214
215 tokens = {
216 'root': [
217 (r' .*\n', Text),
218 (r'\+.*\n', Generic.Inserted),
219 (r'-.*\n', Generic.Deleted),
220 (r'!.*\n', Generic.Strong),
221 (r'@.*\n', Generic.Subheading),
222 (r'(Index|diff).*\n', Generic.Heading),
223 (r'=.*\n', Generic.Heading),
224 (r'.*\n', Text),
225 ]
226 }
227
228 def analyse_text(text):
229 if text[:7] == 'Index: ':
230 return True
231 if text[:5] == 'diff ':
232 return True
233 if text[:4] == '--- ':
234 return 0.9
235
236
237class DarcsPatchLexer(RegexLexer):
238 """
239 DarcsPatchLexer is a lexer for the various versions of the darcs patch
240 format. Examples of this format are derived by commands such as
241 ``darcs annotate --patch`` and ``darcs send``.
242
243 *New in Pygments 0.10.*
244 """
245 name = 'Darcs Patch'
246 aliases = ['dpatch']
247 filenames = ['*.dpatch', '*.darcspatch']
248
249 tokens = {
250 'root': [
251 (r'<', Operator),
252 (r'>', Operator),
253 (r'{', Operator, 'patch'),
254 (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)',
255 bygroups(Operator, Keyword, Name, Text, Name, Operator,
256 Literal.Date, Text), 'comment'),
257 (r'New patches:', Generic.Heading),
258 (r'Context:', Generic.Heading),
259 (r'Patch bundle hash:', Generic.Heading),
260 (r'\s+|\w+', Text),
261 ],
262 'comment': [
263 (r' .*\n', Comment),
264 (r'\]', Operator, "#pop"),
265 ],
266 'patch': [
267 (r'}', Operator, "#pop"),
268 (r'(\w+)(.*\n)', bygroups(Keyword, Text)),
269 (r'\+.*\n', Generic.Inserted),
270 (r'-.*\n', Generic.Deleted),
271 (r'.*\n', Text),
272 ],
273 }
274
275
276class IrcLogsLexer(RegexLexer):
277 """
278 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
279 """
280
281 name = 'IRC logs'
282 aliases = ['irc']
283 filenames = ['*.weechatlog']
284 mimetypes = ['text/x-irclog']
285
286 flags = re.VERBOSE | re.MULTILINE
287 timestamp = r"""
288 (
289 # irssi / xchat and others
290 (?: \[|\()? # Opening bracket or paren for the timestamp
291 (?: # Timestamp
292 (?: (?:\d{1,4} [-/]?)+ # Date as - or /-separated groups of digits
293 [T ])? # Date/time separator: T or space
294 (?: \d?\d [:.]?)+ # Time as :/.-separated groups of 1 or 2 digits
295 )
296 (?: \]|\))?\s+ # Closing bracket or paren for the timestamp
297 |
298 # weechat
299 \d{4}\s\w{3}\s\d{2}\s # Date
300 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
301 |
302 # xchat
303 \w{3}\s\d{2}\s # Date
304 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
305 )?
306 """
307 tokens = {
308 'root': [
309 # log start/end
310 (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
311 # hack
312 ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
313 # normal msgs
314 ("^" + timestamp + r"""
315 (\s*<.*?>\s*) # Nick """,
316 bygroups(Comment.Preproc, Name.Tag), 'msg'),
317 # /me msgs
318 ("^" + timestamp + r"""
319 (\s*[*]\s+) # Star
320 ([^\s]+\s+.*?\n) # Nick + rest of message """,
321 bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
322 # join/part msgs
323 ("^" + timestamp + r"""
324 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols
325 ([^\s]+\s+) # Nick + Space
326 (.*?\n) # Rest of message """,
327 bygroups(Comment.Preproc, Keyword, String, Comment)),
328 (r"^.*?\n", Text),
329 ],
330 'msg': [
331 (r"[^\s]+:(?!//)", Name.Attribute), # Prefix
332 (r".*\n", Text, '#pop'),
333 ],
334 }
335
336
337class BBCodeLexer(RegexLexer):
338 """
339 A lexer that highlights BBCode(-like) syntax.
340
341 *New in Pygments 0.6.*
342 """
343
344 name = 'BBCode'
345 aliases = ['bbcode']
346 mimetypes = ['text/x-bbcode']
347
348 tokens = {
349 'root' : [
350 (r'[\s\w]+', Text),
351 (r'(\[)(/?[^\]\n\r=]+)(\])',
352 bygroups(Keyword, Keyword.Pseudo, Keyword)),
353 (r'(\[)([^\]\n\r=]+)(=)([^\]\n\r]+)(\])',
354 bygroups(Keyword, Keyword.Pseudo, Operator, String, Keyword)),
355 ],
356 }
357
358
359class TexLexer(RegexLexer):
360 """
361 Lexer for the TeX and LaTeX typesetting languages.
362 """
363
364 name = 'TeX'
365 aliases = ['tex', 'latex']
366 filenames = ['*.tex', '*.aux', '*.toc']
367 mimetypes = ['text/x-tex', 'text/x-latex']
368
369 tokens = {
370 'general': [
371 (r'%.*?\n', Comment),
372 (r'[{}]', Name.Builtin),
373 (r'[&_^]', Name.Builtin),
374 ],
375 'root': [
376 (r'\\\[', String.Backtick, 'displaymath'),
377 (r'\\\(', String, 'inlinemath'),
378 (r'\$\$', String.Backtick, 'displaymath'),
379 (r'\$', String, 'inlinemath'),
380 (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
381 include('general'),
382 (r'[^\\$%&_^{}]+', Text),
383 ],
384 'math': [
385 (r'\\([a-zA-Z]+|.)', Name.Variable),
386 include('general'),
387 (r'[0-9]+', Number),
388 (r'[-=!+*/()\[\]]', Operator),
389 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
390 ],
391 'inlinemath': [
392 (r'\\\)', String, '#pop'),
393 (r'\$', String, '#pop'),
394 include('math'),
395 ],
396 'displaymath': [
397 (r'\\\]', String, '#pop'),
398 (r'\$\$', String, '#pop'),
399 (r'\$', Name.Builtin),
400 include('math'),
401 ],
402 'command': [
403 (r'\[.*?\]', Name.Attribute),
404 (r'\*', Keyword),
405 (r'', Text, '#pop'),
406 ],
407 }
408
409 def analyse_text(text):
410 for start in ("\\documentclass", "\\input", "\\documentstyle",
411 "\\relax"):
412 if text[:len(start)] == start:
413 return True
414
415
416class GroffLexer(RegexLexer):
417 """
418 Lexer for the (g)roff typesetting language, supporting groff
419 extensions. Mainly useful for highlighting manpage sources.
420
421 *New in Pygments 0.6.*
422 """
423
424 name = 'Groff'
425 aliases = ['groff', 'nroff', 'man']
426 filenames = ['*.[1234567]', '*.man']
427 mimetypes = ['application/x-troff', 'text/troff']
428
429 tokens = {
430 'root': [
431 (r'(?i)(\.)(\w+)', bygroups(Text, Keyword), 'request'),
432 (r'\.', Punctuation, 'request'),
433 # Regular characters, slurp till we find a backslash or newline
434 (r'[^\\\n]*', Text, 'textline'),
435 ],
436 'textline': [
437 include('escapes'),
438 (r'[^\\\n]+', Text),
439 (r'\n', Text, '#pop'),
440 ],
441 'escapes': [
442 # groff has many ways to write escapes.
443 (r'\\"[^\n]*', Comment),
444 (r'\\[fn]\w', String.Escape),
445 (r'\\\(..', String.Escape),
446 (r'\\.\[.*\]', String.Escape),
447 (r'\\.', String.Escape),
448 (r'\\\n', Text, 'request'),
449 ],
450 'request': [
451 (r'\n', Text, '#pop'),
452 include('escapes'),
453 (r'"[^\n"]+"', String.Double),
454 (r'\d+', Number),
455 (r'\S+', String),
456 (r'\s+', Text),
457 ],
458 }
459
460 def analyse_text(text):
461 if text[0] != '.':
462 return False
463 if text[:3] == '.\\"':
464 return True
465 if text[:4] == '.TH ':
466 return True
467 if text[1:3].isalnum() and text[3].isspace():
468 return 0.9
469
470
471class ApacheConfLexer(RegexLexer):
472 """
473 Lexer for configuration files following the Apache config file
474 format.
475
476 *New in Pygments 0.6.*
477 """
478
479 name = 'ApacheConf'
480 aliases = ['apacheconf', 'aconf', 'apache']
481 filenames = ['.htaccess', 'apache.conf', 'apache2.conf']
482 mimetypes = ['text/x-apacheconf']
483 flags = re.MULTILINE | re.IGNORECASE
484
485 tokens = {
486 'root': [
487 (r'\s+', Text),
488 (r'(#.*?)$', Comment),
489 (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)',
490 bygroups(Name.Tag, Text, String, Name.Tag)),
491 (r'([a-zA-Z][a-zA-Z0-9]*)(\s+)',
492 bygroups(Name.Builtin, Text), 'value'),
493 (r'\.+', Text),
494 ],
495 'value': [
496 (r'$', Text, '#pop'),
497 (r'[^\S\n]+', Text),
498 (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number),
499 (r'\d+', Number),
500 (r'/([a-zA-Z0-9][a-zA-Z0-9_./-]+)', String.Other),
501 (r'(on|off|none|any|all|double|email|dns|min|minimal|'
502 r'os|productonly|full|emerg|alert|crit|error|warn|'
503 r'notice|info|debug|registry|script|inetd|standalone|'
504 r'user|group)\b', Keyword),
505 (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double),
506 (r'[^\s"]+', Text)
507 ]
508 }
509
510
511class MoinWikiLexer(RegexLexer):
512 """
513 For MoinMoin (and Trac) Wiki markup.
514
515 *New in Pygments 0.7.*
516 """
517
518 name = 'MoinMoin/Trac Wiki markup'
519 aliases = ['trac-wiki', 'moin']
520 filenames = []
521 mimetypes = ['text/x-trac-wiki']
522 flags = re.MULTILINE | re.IGNORECASE
523
524 tokens = {
525 'root': [
526 (r'^#.*$', Comment),
527 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
528 # Titles
529 (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
530 bygroups(Generic.Heading, using(this), Generic.Heading, String)),
531 # Literal code blocks, with optional shebang
532 (r'({{{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
533 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
534 # Lists
535 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
536 (r'^( +)([a-zivx]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
537 # Other Formatting
538 (r'\[\[\w+.*?\]\]', Keyword), # Macro
539 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
540 bygroups(Keyword, String, Keyword)), # Link
541 (r'^----+$', Keyword), # Horizontal rules
542 (r'[^\n\'\[{!_~^,|]+', Text),
543 (r'\n', Text),
544 (r'.', Text),
545 ],
546 'codeblock': [
547 (r'}}}', Name.Builtin, '#pop'),
548 # these blocks are allowed to be nested in Trac, but not MoinMoin
549 (r'{{{', Text, '#push'),
550 (r'[^{}]+', Comment.Preproc), # slurp boring text
551 (r'.', Comment.Preproc), # allow loose { or }
552 ],
553 }
554
555
556class RstLexer(RegexLexer):
557 """
558 For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
559
560 *New in Pygments 0.7.*
561
562 Additional options accepted:
563
564 `handlecodeblocks`
565 Highlight the contents of ``.. sourcecode:: langauge`` and
566 ``.. code:: language`` directives with a lexer for the given
567 language (default: ``True``). *New in Pygments 0.8.*
568 """
569 name = 'reStructuredText'
570 aliases = ['rst', 'rest', 'restructuredtext']
571 filenames = ['*.rst', '*.rest']
572 mimetypes = ["text/x-rst"]
573 flags = re.MULTILINE
574
575 def _handle_sourcecode(self, match):
576 from pygments.lexers import get_lexer_by_name
577 from pygments.util import ClassNotFound
578
579 # section header
580 yield match.start(1), Punctuation, match.group(1)
581 yield match.start(2), Text, match.group(2)
582 yield match.start(3), Operator.Word, match.group(3)
583 yield match.start(4), Punctuation, match.group(4)
584 yield match.start(5), Text, match.group(5)
585 yield match.start(6), Keyword, match.group(6)
586 yield match.start(7), Text, match.group(7)
587
588 # lookup lexer if wanted and existing
589 lexer = None
590 if self.handlecodeblocks:
591 try:
592 lexer = get_lexer_by_name(match.group(6).strip())
593 except ClassNotFound:
594 pass
595 indention = match.group(8)
596 indention_size = len(indention)
597 code = (indention + match.group(9) + match.group(10) + match.group(11))
598
599 # no lexer for this language. handle it like it was a code block
600 if lexer is None:
601 yield match.start(8), String, code
602 return
603
604 # highlight the lines with the lexer.
605 ins = []
606 codelines = code.splitlines(True)
607 code = ''
608 for line in codelines:
609 if len(line) > indention_size:
610 ins.append((len(code), [(0, Text, line[:indention_size])]))
611 code += line[indention_size:]
612 else:
613 code += line
614 for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
615 yield item
616
617 tokens = {
618 'root': [
619 # Heading with overline
620 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)(.+)(\n)(\1)(\n)',
621 bygroups(Generic.Heading, Text, Generic.Heading,
622 Text, Generic.Heading, Text)),
623 # Plain heading
624 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
625 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
626 bygroups(Generic.Heading, Text, Generic.Heading, Text)),
627 # Bulleted lists
628 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
629 bygroups(Text, Number, using(this, state='inline'))),
630 # Numbered lists
631 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
632 bygroups(Text, Number, using(this, state='inline'))),
633 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
634 bygroups(Text, Number, using(this, state='inline'))),
635 # Numbered, but keep words at BOL from becoming lists
636 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
637 bygroups(Text, Number, using(this, state='inline'))),
638 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
639 bygroups(Text, Number, using(this, state='inline'))),
640 # Sourcecode directives
641 (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)'
642 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
643 _handle_sourcecode),
644 # A directive
645 (r'^( *\.\.)(\s*)([\w-]+)(::)(?:([ \t]*)(.+))?',
646 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, Keyword)),
647 # A reference target
648 (r'^( *\.\.)(\s*)([\w\t ]+:)(.*?)$',
649 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
650 # A footnote target
651 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
652 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
653 # Comments
654 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
655 # Field list
656 (r'^( *)(:.*?:)([ \t]+)(.*?)$', bygroups(Text, Name.Class, Text,
657 Name.Function)),
658 # Definition list
659 (r'^([^ ].*(?<!::)\n)((?:(?: +.*)\n)+)',
660 bygroups(using(this, state='inline'), using(this, state='inline'))),
661 # Code blocks
662 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
663 bygroups(String.Escape, Text, String, String, Text, String)),
664 include('inline'),
665 ],
666 'inline': [
667 (r'\\.', Text), # escape
668 (r'``', String, 'literal'), # code
669 (r'(`)(.+?)(`__?)',
670 bygroups(Punctuation, using(this), Punctuation)), # reference
671 (r'(`.+?`)(:[a-zA-Z0-9-]+?:)?',
672 bygroups(Name.Variable, Name.Attribute)), # role
673 (r'(:[a-zA-Z0-9-]+?:)(`.+?`)',
674 bygroups(Name.Attribute, Name.Variable)), # user-defined role
675 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
676 (r'\*.+?\*', Generic.Emph), # Emphasis
677 (r'\[.*?\]_', String), # Footnote or citation
678 (r'<.+?>', Name.Tag), # Hyperlink
679 (r'[^\\\n\[*`:]+', Text),
680 (r'.', Text),
681 ],
682 'literal': [
683 (r'[^`\\]+', String),
684 (r'\\.', String),
685 (r'``', String, '#pop'),
686 (r'[`\\]', String),
687 ]
688 }
689
690 def __init__(self, **options):
691 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
692 RegexLexer.__init__(self, **options)
693
694 def analyse_text(text):
695 if text[:2] == '..' and text[2:3] != '.':
696 return 0.3
697 p1 = text.find("\n")
698 p2 = text.find("\n", p1 + 1)
699 if (p2 > -1 and # has two lines
700 p1 * 2 + 1 == p2 and # they are the same length
701 text[p1+1] in '-=' and # the next line both starts and ends with
702 text[p1+1] == text[p2-1]): # ...a sufficiently high header
703 return 0.5
704
705class VimLexer(RegexLexer):
706 """
707 Lexer for VimL script files.
708
709 *New in Pygments 0.8.*
710 """
711 name = 'VimL'
712 aliases = ['vim']
713 filenames = ['*.vim', '.vimrc']
714 mimetypes = ['text/x-vim']
715 flags = re.MULTILINE
716
717 tokens = {
718 'root': [
719 # Who decided that doublequote was a good comment character??
720 (r'^\s*".*', Comment),
721 (r'(?<=\s)"[^\-:.%#=*].*', Comment),
722
723 (r'[ \t]+', Text),
724 # TODO: regexes can have other delims
725 (r'/(\\\\|\\/|[^\n/])*/', String.Regex),
726 (r'"(\\\\|\\"|[^\n"])*"', String.Double),
727 (r"'(\\\\|\\'|[^\n'])*'", String.Single),
728 (r'-?\d+', Number),
729 (r'#[0-9a-f]{6}', Number.Hex),
730 (r'^:', Punctuation),
731 (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.
732 (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
733 Keyword),
734 (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
735 (r'\b\w+\b', Name.Other), # These are postprocessed below
736 (r'.', Text),
737 ],
738 }
739 def __init__(self, **options):
740 from pygments.lexers._vimbuiltins import command, option, auto
741 self._cmd = command
742 self._opt = option
743 self._aut = auto
744
745 RegexLexer.__init__(self, **options)
746
747 def is_in(self, w, mapping):
748 r"""
749 It's kind of difficult to decide if something might be a keyword
750 in VimL because it allows you to abbreviate them. In fact,
751 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are
752 valid ways to call it so rather than making really awful regexps
753 like::
754
755 \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
756
757 we match `\b\w+\b` and then call is_in() on those tokens. See
758 `scripts/get_vimkw.py` for how the lists are extracted.
759 """
760 p = bisect(mapping, (w,))
761 if p > 0:
762 if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
763 mapping[p-1][1][:len(w)] == w: return True
764 if p < len(mapping):
765 return mapping[p][0] == w[:len(mapping[p][0])] and \
766 mapping[p][1][:len(w)] == w
767 return False
768
769 def get_tokens_unprocessed(self, text):
770 # TODO: builtins are only subsequent tokens on lines
771 # and 'keywords' only happen at the beginning except
772 # for :au ones
773 for index, token, value in \
774 RegexLexer.get_tokens_unprocessed(self, text):
775 if token is Name.Other:
776 if self.is_in(value, self._cmd):
777 yield index, Keyword, value
778 elif self.is_in(value, self._opt) or \
779 self.is_in(value, self._aut):
780 yield index, Name.Builtin, value
781 else:
782 yield index, Text, value
783 else:
784 yield index, token, value
785
786
787class GettextLexer(RegexLexer):
788 """
789 Lexer for Gettext catalog files.
790
791 *New in Pygments 0.9.*
792 """
793 name = 'Gettext Catalog'
794 aliases = ['pot', 'po']
795 filenames = ['*.pot', '*.po']
796 mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
797
798 tokens = {
799 'root': [
800 (r'^#,\s.*?$', Keyword.Type),
801 (r'^#:\s.*?$', Keyword.Declaration),
802 #(r'^#$', Comment),
803 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
804 (r'^(")([\w-]*:)(.*")$',
805 bygroups(String, Name.Property, String)),
806 (r'^".*"$', String),
807 (r'^(msgid|msgid_plural|msgstr)(\s+)(".*")$',
808 bygroups(Name.Variable, Text, String)),
809 (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
810 bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
811 ]
812 }
813
814class SquidConfLexer(RegexLexer):
815 """
816 Lexer for `squid <http://www.squid-cache.org/>`_ configuration files.
817
818 *New in Pygments 0.9.*
819 """
820
821 name = 'SquidConf'
822 aliases = ['squidconf', 'squid.conf', 'squid']
823 filenames = ['squid.conf']
824 mimetypes = ['text/x-squidconf']
825 flags = re.IGNORECASE
826
827 keywords = [ "acl", "always_direct", "announce_host",
828 "announce_period", "announce_port", "announce_to",
829 "anonymize_headers", "append_domain", "as_whois_server",
830 "auth_param_basic", "authenticate_children",
831 "authenticate_program", "authenticate_ttl", "broken_posts",
832 "buffered_logs", "cache_access_log", "cache_announce",
833 "cache_dir", "cache_dns_program", "cache_effective_group",
834 "cache_effective_user", "cache_host", "cache_host_acl",
835 "cache_host_domain", "cache_log", "cache_mem",
836 "cache_mem_high", "cache_mem_low", "cache_mgr",
837 "cachemgr_passwd", "cache_peer", "cache_peer_access",
838 "cahce_replacement_policy", "cache_stoplist",
839 "cache_stoplist_pattern", "cache_store_log", "cache_swap",
840 "cache_swap_high", "cache_swap_log", "cache_swap_low",
841 "client_db", "client_lifetime", "client_netmask",
842 "connect_timeout", "coredump_dir", "dead_peer_timeout",
843 "debug_options", "delay_access", "delay_class",
844 "delay_initial_bucket_level", "delay_parameters",
845 "delay_pools", "deny_info", "dns_children", "dns_defnames",
846 "dns_nameservers", "dns_testnames", "emulate_httpd_log",
847 "err_html_text", "fake_user_agent", "firewall_ip",
848 "forwarded_for", "forward_snmpd_port", "fqdncache_size",
849 "ftpget_options", "ftpget_program", "ftp_list_width",
850 "ftp_passive", "ftp_user", "half_closed_clients",
851 "header_access", "header_replace", "hierarchy_stoplist",
852 "high_response_time_warning", "high_page_fault_warning",
853 "htcp_port", "http_access", "http_anonymizer", "httpd_accel",
854 "httpd_accel_host", "httpd_accel_port",
855 "httpd_accel_uses_host_header", "httpd_accel_with_proxy",
856 "http_port", "http_reply_access", "icp_access",
857 "icp_hit_stale", "icp_port", "icp_query_timeout",
858 "ident_lookup", "ident_lookup_access", "ident_timeout",
859 "incoming_http_average", "incoming_icp_average",
860 "inside_firewall", "ipcache_high", "ipcache_low",
861 "ipcache_size", "local_domain", "local_ip", "logfile_rotate",
862 "log_fqdn", "log_icp_queries", "log_mime_hdrs",
863 "maximum_object_size", "maximum_single_addr_tries",
864 "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr",
865 "mcast_miss_encode_key", "mcast_miss_port", "memory_pools",
866 "memory_pools_limit", "memory_replacement_policy",
867 "mime_table", "min_http_poll_cnt", "min_icp_poll_cnt",
868 "minimum_direct_hops", "minimum_object_size",
869 "minimum_retry_timeout", "miss_access", "negative_dns_ttl",
870 "negative_ttl", "neighbor_timeout", "neighbor_type_domain",
871 "netdb_high", "netdb_low", "netdb_ping_period",
872 "netdb_ping_rate", "never_direct", "no_cache",
873 "passthrough_proxy", "pconn_timeout", "pid_filename",
874 "pinger_program", "positive_dns_ttl", "prefer_direct",
875 "proxy_auth", "proxy_auth_realm", "query_icmp", "quick_abort",
876 "quick_abort", "quick_abort_max", "quick_abort_min",
877 "quick_abort_pct", "range_offset_limit", "read_timeout",
878 "redirect_children", "redirect_program",
879 "redirect_rewrites_host_header", "reference_age",
880 "reference_age", "refresh_pattern", "reload_into_ims",
881 "request_body_max_size", "request_size", "request_timeout",
882 "shutdown_lifetime", "single_parent_bypass",
883 "siteselect_timeout", "snmp_access", "snmp_incoming_address",
884 "snmp_port", "source_ping", "ssl_proxy",
885 "store_avg_object_size", "store_objects_per_bucket",
886 "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs",
887 "tcp_incoming_address", "tcp_outgoing_address",
888 "tcp_recv_bufsize", "test_reachability", "udp_hit_obj",
889 "udp_hit_obj_size", "udp_incoming_address",
890 "udp_outgoing_address", "unique_hostname", "unlinkd_program",
891 "uri_whitespace", "useragent_log", "visible_hostname",
892 "wais_relay", "wais_relay_host", "wais_relay_port",
893 ]
894
895 opts = [ "proxy-only", "weight", "ttl", "no-query", "default",
896 "round-robin", "multicast-responder", "on", "off", "all",
897 "deny", "allow", "via", "parent", "no-digest", "heap", "lru",
898 "realm", "children", "credentialsttl", "none", "disable",
899 "offline_toggle", "diskd", "q1", "q2",
900 ]
901
902 actions = [ "shutdown", "info", "parameter", "server_list",
903 "client_list", r'squid\.conf',
904 ]
905
906 actions_stats = [ "objects", "vm_objects", "utilization",
907 "ipcache", "fqdncache", "dns", "redirector", "io",
908 "reply_headers", "filedescriptors", "netdb",
909 ]
910
911 actions_log = [ "status", "enable", "disable", "clear"]
912
913 acls = [ "url_regex", "urlpath_regex", "referer_regex", "port",
914 "proto", "req_mime_type", "rep_mime_type", "method",
915 "browser", "user", "src", "dst", "time", "dstdomain", "ident",
916 "snmp_community",
917 ]
918
919 ip_re = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
920
921 def makelistre(list):
922 return r'\b(?:'+'|'.join(list)+r')\b'
923
924 tokens = {
925 'root': [
926 (r'\s+', Text),
927 (r'#', Comment, 'comment'),
928 (makelistre(keywords), Keyword),
929 (makelistre(opts), Name.Constant),
930 # Actions
931 (makelistre(actions), String),
932 (r'stats/'+makelistre(actions), String),
933 (r'log/'+makelistre(actions)+r'=', String),
934 (makelistre(acls), Keyword),
935 (ip_re+r'(?:/(?:'+ip_re+r')|\d+)?', Number),
936 (r'\b\d+\b', Number),
937 (r'\S+', Text),
938 ],
939 'comment': [
940 (r'\s*TAG:.*', String.Escape, '#pop'),
941 (r'.*', Comment, '#pop'),
942 ],
943 }
944
945
946class DebianControlLexer(RegexLexer):
947 """
948 Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs.
949
950 *New in Pygments 0.9.*
951 """
952 name = 'Debian Control file'
953 aliases = ['control']
954 filenames = ['control']
955
956 tokens = {
957 'root': [
958 (r'^(Description)', Keyword, 'description'),
959 (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'),
960 (r'^((Build-)?Depends)', Keyword, 'depends'),
961 (r'^((?:Python-)?Version)(:\s*)([^\s]+)$',
962 bygroups(Keyword, Text, Number)),
963 (r'^((?:Installed-)?Size)(:\s*)([^\s]+)$',
964 bygroups(Keyword, Text, Number)),
965 (r'^(MD5Sum|SHA1|SHA256)(:\s*)([^\s]+)$',
966 bygroups(Keyword, Text, Number)),
967 (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$',
968 bygroups(Keyword, Whitespace, String)),
969 ],
970 'maintainer': [
971 (r'<[^>]+>', Generic.Strong),
972 (r'<[^>]+>$', Generic.Strong, '#pop'),
973 (r',\n?', Text),
974 (r'.', Text),
975 ],
976 'description': [
977 (r'(.*)(Homepage)(: )([^\s]+)', bygroups(Text, String, Name, Name.Class)),
978 (r':.*\n', Generic.Strong),
979 (r' .*\n', Text),
980 ('', Text, '#pop'),
981 ],
982 'depends': [
983 (r':\s*', Text),
984 (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)),
985 (r'\(', Text, 'depend_vers'),
986 (r',', Text),
987 (r'\|', Operator),
988 (r'[\s]+', Text),
989 (r'[}\)]\s*$', Text, '#pop'),
990 (r'[}]', Text),
991 (r'[^,]$', Name.Function, '#pop'),
992 (r'([\+\.a-zA-Z0-9-][\s\n]*)', Name.Function),
993 ],
994 'depend_vers': [
995 (r'\),', Text, '#pop'),
996 (r'\)[^,]', Text, '#pop:2'),
997 (r'([><=]+)(\s*)([^\)]+)', bygroups(Operator, Text, Number))
998 ]
999 }
1000
1001
1002class YamlLexerContext(LexerContext):
1003 """Indentation context for the YAML lexer."""
1004
1005 def __init__(self, *args, **kwds):
1006 super(YamlLexerContext, self).__init__(*args, **kwds)
1007 self.indent_stack = []
1008 self.indent = -1
1009 self.next_indent = 0
1010 self.block_scalar_indent = None
1011
1012
1013class YamlLexer(ExtendedRegexLexer):
1014 """
1015 Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
1016 language.
1017
1018 *New in Pygments 0.11.*
1019 """
1020
1021 name = 'YAML'
1022 aliases = ['yaml']
1023 filenames = ['*.yaml', '*.yml']
1024 mimetypes = ['text/x-yaml']
1025
1026
1027 def something(token_class):
1028 """Do not produce empty tokens."""
1029 def callback(lexer, match, context):
1030 text = match.group()
1031 if not text:
1032 return
1033 yield match.start(), token_class, text
1034 context.pos = match.end()
1035 return callback
1036
1037 def reset_indent(token_class):
1038 """Reset the indentation levels."""
1039 def callback(lexer, match, context):
1040 text = match.group()
1041 context.indent_stack = []
1042 context.indent = -1
1043 context.next_indent = 0
1044 context.block_scalar_indent = None
1045 yield match.start(), token_class, text
1046 context.pos = match.end()
1047 return callback
1048
1049 def save_indent(token_class, start=False):
1050 """Save a possible indentation level."""
1051 def callback(lexer, match, context):
1052 text = match.group()
1053 extra = ''
1054 if start:
1055 context.next_indent = len(text)
1056 if context.next_indent < context.indent:
1057 while context.next_indent < context.indent:
1058 context.indent = context.indent_stack.pop()
1059 if context.next_indent > context.indent:
1060 extra = text[context.indent:]
1061 text = text[:context.indent]
1062 else:
1063 context.next_indent += len(text)
1064 if text:
1065 yield match.start(), token_class, text
1066 if extra:
1067 yield match.start()+len(text), token_class.Error, extra
1068 context.pos = match.end()
1069 return callback
1070
1071 def set_indent(token_class, implicit=False):
1072 """Set the previously saved indentation level."""
1073 def callback(lexer, match, context):
1074 text = match.group()
1075 if context.indent < context.next_indent:
1076 context.indent_stack.append(context.indent)
1077 context.indent = context.next_indent
1078 if not implicit:
1079 context.next_indent += len(text)
1080 yield match.start(), token_class, text
1081 context.pos = match.end()
1082 return callback
1083
1084 def set_block_scalar_indent(token_class):
1085 """Set an explicit indentation level for a block scalar."""
1086 def callback(lexer, match, context):
1087 text = match.group()
1088 context.block_scalar_indent = None
1089 if not text:
1090 return
1091 increment = match.group(1)
1092 if increment:
1093 current_indent = max(context.indent, 0)
1094 increment = int(increment)
1095 context.block_scalar_indent = current_indent + increment
1096 if text:
1097 yield match.start(), token_class, text
1098 context.pos = match.end()
1099 return callback
1100
1101 def parse_block_scalar_empty_line(indent_token_class, content_token_class):
1102 """Process an empty line in a block scalar."""
1103 def callback(lexer, match, context):
1104 text = match.group()
1105 if (context.block_scalar_indent is None or
1106 len(text) <= context.block_scalar_indent):
1107 if text:
1108 yield match.start(), indent_token_class, text
1109 else:
1110 indentation = text[:context.block_scalar_indent]
1111 content = text[context.block_scalar_indent:]
1112 yield match.start(), indent_token_class, indentation
1113 yield (match.start()+context.block_scalar_indent,
1114 content_token_class, content)
1115 context.pos = match.end()
1116 return callback
1117
1118 def parse_block_scalar_indent(token_class):
1119 """Process indentation spaces in a block scalar."""
1120 def callback(lexer, match, context):
1121 text = match.group()
1122 if context.block_scalar_indent is None:
1123 if len(text) <= max(context.indent, 0):
1124 context.stack.pop()
1125 context.stack.pop()
1126 return
1127 context.block_scalar_indent = len(text)
1128 else:
1129 if len(text) < context.block_scalar_indent:
1130 context.stack.pop()
1131 context.stack.pop()
1132 return
1133 if text:
1134 yield match.start(), token_class, text
1135 context.pos = match.end()
1136 return callback
1137
1138 def parse_plain_scalar_indent(token_class):
1139 """Process indentation spaces in a plain scalar."""
1140 def callback(lexer, match, context):
1141 text = match.group()
1142 if len(text) <= context.indent:
1143 context.stack.pop()
1144 context.stack.pop()
1145 return
1146 if text:
1147 yield match.start(), token_class, text
1148 context.pos = match.end()
1149 return callback
1150
1151
1152
1153 tokens = {
1154 # the root rules
1155 'root': [
1156 # ignored whitespaces
1157 (r'[ ]+(?=#|$)', Text),
1158 # line breaks
1159 (r'\n+', Text),
1160 # a comment
1161 (r'#[^\n]*', Comment.Single),
1162 # the '%YAML' directive
1163 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
1164 # the %TAG directive
1165 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
1166 # document start and document end indicators
1167 (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
1168 'block-line'),
1169 # indentation spaces
1170 (r'[ ]*(?![ \t\n\r\f\v]|$)', save_indent(Text, start=True),
1171 ('block-line', 'indentation')),
1172 ],
1173
1174 # trailing whitespaces after directives or a block scalar indicator
1175 'ignored-line': [
1176 # ignored whitespaces
1177 (r'[ ]+(?=#|$)', Text),
1178 # a comment
1179 (r'#[^\n]*', Comment.Single),
1180 # line break
1181 (r'\n', Text, '#pop:2'),
1182 ],
1183
1184 # the %YAML directive
1185 'yaml-directive': [
1186 # the version number
1187 (r'([ ]+)([0-9]+\.[0-9]+)',
1188 bygroups(Text, Number), 'ignored-line'),
1189 ],
1190
1191 # the %YAG directive
1192 'tag-directive': [
1193 # a tag handle and the corresponding prefix
1194 (r'([ ]+)(!|![0-9A-Za-z_-]*!)'
1195 r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)',
1196 bygroups(Text, Keyword.Type, Text, Keyword.Type),
1197 'ignored-line'),
1198 ],
1199
1200 # block scalar indicators and indentation spaces
1201 'indentation': [
1202 # trailing whitespaces are ignored
1203 (r'[ ]*$', something(Text), '#pop:2'),
1204 # whitespaces preceeding block collection indicators
1205 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
1206 # block collection indicators
1207 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
1208 # the beginning a block line
1209 (r'[ ]*', save_indent(Text), '#pop'),
1210 ],
1211
1212 # an indented line in the block context
1213 'block-line': [
1214 # the line end
1215 (r'[ ]*(?=#|$)', something(Text), '#pop'),
1216 # whitespaces separating tokens
1217 (r'[ ]+', Text),
1218 # tags, anchors and aliases,
1219 include('descriptors'),
1220 # block collections and scalars
1221 include('block-nodes'),
1222 # flow collections and quoted scalars
1223 include('flow-nodes'),
1224 # a plain scalar
1225 (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])',
1226 something(Name.Variable),
1227 'plain-scalar-in-block-context'),
1228 ],
1229
1230 # tags, anchors, aliases
1231 'descriptors' : [
1232 # a full-form tag
1233 (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Keyword.Type),
1234 # a tag in the form '!', '!suffix' or '!handle!suffix'
1235 (r'!(?:[0-9A-Za-z_-]+)?'
1236 r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Keyword.Type),
1237 # an anchor
1238 (r'&[0-9A-Za-z_-]+', Name.Label),
1239 # an alias
1240 (r'\*[0-9A-Za-z_-]+', Name.Variable),
1241 ],
1242
1243 # block collections and scalars
1244 'block-nodes': [
1245 # implicit key
1246 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
1247 # literal and folded scalars
1248 (r'[|>]', Punctuation.Indicator,
1249 ('block-scalar-content', 'block-scalar-header')),
1250 ],
1251
1252 # flow collections and quoted scalars
1253 'flow-nodes': [
1254 # a flow sequence
1255 (r'\[', Punctuation.Indicator, 'flow-sequence'),
1256 # a flow mapping
1257 (r'\{', Punctuation.Indicator, 'flow-mapping'),
1258 # a single-quoted scalar
1259 (r'\'', String, 'single-quoted-scalar'),
1260 # a double-quoted scalar
1261 (r'\"', String, 'double-quoted-scalar'),
1262 ],
1263
1264 # the content of a flow collection
1265 'flow-collection': [
1266 # whitespaces
1267 (r'[ ]+', Text),
1268 # line breaks
1269 (r'\n+', Text),
1270 # a comment
1271 (r'#[^\n]*', Comment.Single),
1272 # simple indicators
1273 (r'[?:,]', Punctuation.Indicator),
1274 # tags, anchors and aliases
1275 include('descriptors'),
1276 # nested collections and quoted scalars
1277 include('flow-nodes'),
1278 # a plain scalar
1279 (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])',
1280 something(Name.Variable),
1281 'plain-scalar-in-flow-context'),
1282 ],
1283
1284 # a flow sequence indicated by '[' and ']'
1285 'flow-sequence': [
1286 # include flow collection rules
1287 include('flow-collection'),
1288 # the closing indicator
1289 (r'\]', Punctuation.Indicator, '#pop'),
1290 ],
1291
1292 # a flow mapping indicated by '{' and '}'
1293 'flow-mapping': [
1294 # include flow collection rules
1295 include('flow-collection'),
1296 # the closing indicator
1297 (r'\}', Punctuation.Indicator, '#pop'),
1298 ],
1299
1300 # block scalar lines
1301 'block-scalar-content': [
1302 # line break
1303 (r'\n', Text),
1304 # empty line
1305 (r'^[ ]+$',
1306 parse_block_scalar_empty_line(Text, Name.Constant)),
1307 # indentation spaces (we may leave the state here)
1308 (r'^[ ]*', parse_block_scalar_indent(Text)),
1309 # line content
1310 (r'[^\n\r\f\v]+', Name.Constant),
1311 ],
1312
1313 # the content of a literal or folded scalar
1314 'block-scalar-header': [
1315 # indentation indicator followed by chomping flag
1316 (r'([1-9])?[+-]?(?=[ ]|$)',
1317 set_block_scalar_indent(Punctuation.Indicator),
1318 'ignored-line'),
1319 # chomping flag followed by indentation indicator
1320 (r'[+-]?([1-9])?(?=[ ]|$)',
1321 set_block_scalar_indent(Punctuation.Indicator),
1322 'ignored-line'),
1323 ],
1324
1325 # ignored and regular whitespaces in quoted scalars
1326 'quoted-scalar-whitespaces': [
1327 # leading and trailing whitespaces are ignored
1328 (r'^[ ]+|[ ]+$', Text),
1329 # line breaks are ignored
1330 (r'\n+', Text),
1331 # other whitespaces are a part of the value
1332 (r'[ ]+', Name.Variable),
1333 ],
1334
1335 # single-quoted scalars
1336 'single-quoted-scalar': [
1337 # include whitespace and line break rules
1338 include('quoted-scalar-whitespaces'),
1339 # escaping of the quote character
1340 (r'\'\'', String.Escape),
1341 # regular non-whitespace characters
1342 (r'[^ \t\n\r\f\v\']+', String),
1343 # the closing quote
1344 (r'\'', String, '#pop'),
1345 ],
1346
1347 # double-quoted scalars
1348 'double-quoted-scalar': [
1349 # include whitespace and line break rules
1350 include('quoted-scalar-whitespaces'),
1351 # escaping of special characters
1352 (r'\\[0abt\tn\nvfre "\\N_LP]', String),
1353 # escape codes
1354 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
1355 String.Escape),
1356 # regular non-whitespace characters
1357 (r'[^ \t\n\r\f\v\"\\]+', String),
1358 # the closing quote
1359 (r'"', String, '#pop'),
1360 ],
1361
1362 # the beginning of a new line while scanning a plain scalar
1363 'plain-scalar-in-block-context-new-line': [
1364 # empty lines
1365 (r'^[ ]+$', Text),
1366 # line breaks
1367 (r'\n+', Text),
1368 # document start and document end indicators
1369 (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
1370 # indentation spaces (we may leave the block line state here)
1371 (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
1372 ],
1373
1374 # a plain scalar in the block context
1375 'plain-scalar-in-block-context': [
1376 # the scalar ends with the ':' indicator
1377 (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
1378 # the scalar ends w…
Large files files are truncated, but you can click here to view the full file