PageRenderTime 68ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 1ms

/pygments/lexers/text.py

https://bitbucket.org/birkenfeld/pygments-main
Python | 1893 lines | 1442 code | 140 blank | 311 comment | 36 complexity | 771f67cc5b49b717bfa988612720639b MD5 | raw file
Possible License(s): BSD-2-Clause

Large files files are truncated, but you can click here to view the full file

  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.text
  4. ~~~~~~~~~~~~~~~~~~~~
  5. Lexers for non-source code file types.
  6. :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from bisect import bisect
  11. from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, \
  12. bygroups, include, using, this, do_insertions
  13. from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \
  14. Generic, Operator, Number, Whitespace, Literal
  15. from pygments.util import get_bool_opt, ClassNotFound
  16. from pygments.lexers.other import BashLexer
  17. __all__ = ['IniLexer', 'PropertiesLexer', 'SourcesListLexer', 'BaseMakefileLexer',
  18. 'MakefileLexer', 'DiffLexer', 'IrcLogsLexer', 'TexLexer',
  19. 'GroffLexer', 'ApacheConfLexer', 'BBCodeLexer', 'MoinWikiLexer',
  20. 'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer',
  21. 'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer',
  22. 'LighttpdConfLexer', 'NginxConfLexer', 'CMakeLexer', 'HttpLexer',
  23. 'PyPyLogLexer', 'RegeditLexer', 'HxmlLexer', 'EbnfLexer']
  24. class IniLexer(RegexLexer):
  25. """
  26. Lexer for configuration files in INI style.
  27. """
  28. name = 'INI'
  29. aliases = ['ini', 'cfg', 'dosini']
  30. filenames = ['*.ini', '*.cfg']
  31. mimetypes = ['text/x-ini']
  32. tokens = {
  33. 'root': [
  34. (r'\s+', Text),
  35. (r'[;#].*', Comment.Single),
  36. (r'\[.*?\]$', Keyword),
  37. (r'(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)',
  38. bygroups(Name.Attribute, Text, Operator, Text, String))
  39. ]
  40. }
  41. def analyse_text(text):
  42. npos = text.find('\n')
  43. if npos < 3:
  44. return False
  45. return text[0] == '[' and text[npos-1] == ']'
  46. class RegeditLexer(RegexLexer):
  47. """
  48. Lexer for `Windows Registry
  49. <http://en.wikipedia.org/wiki/Windows_Registry#.REG_files>`_ files produced
  50. by regedit.
  51. *New in Pygments 1.6.*
  52. """
  53. name = 'reg'
  54. aliases = ['registry']
  55. filenames = ['*.reg']
  56. mimetypes = ['text/x-windows-registry']
  57. tokens = {
  58. 'root': [
  59. (r'Windows Registry Editor.*', Text),
  60. (r'\s+', Text),
  61. (r'[;#].*', Comment.Single),
  62. (r'(\[)(-?)(HKEY_[A-Z_]+)(.*?\])$',
  63. bygroups(Keyword, Operator, Name.Builtin, Keyword)),
  64. # String keys, which obey somewhat normal escaping
  65. (r'("(?:\\"|\\\\|[^"])+")([ \t]*)(=)([ \t]*)',
  66. bygroups(Name.Attribute, Text, Operator, Text),
  67. 'value'),
  68. # Bare keys (includes @)
  69. (r'(.*?)([ \t]*)(=)([ \t]*)',
  70. bygroups(Name.Attribute, Text, Operator, Text),
  71. 'value'),
  72. ],
  73. 'value': [
  74. (r'-', Operator, '#pop'), # delete value
  75. (r'(dword|hex(?:\([0-9a-fA-F]\))?)(:)([0-9a-fA-F,]+)',
  76. bygroups(Name.Variable, Punctuation, Number), '#pop'),
  77. # As far as I know, .reg files do not support line continuation.
  78. (r'.*', String, '#pop'),
  79. ]
  80. }
  81. def analyse_text(text):
  82. return text.startswith('Windows Registry Editor')
  83. class PropertiesLexer(RegexLexer):
  84. """
  85. Lexer for configuration files in Java's properties format.
  86. *New in Pygments 1.4.*
  87. """
  88. name = 'Properties'
  89. aliases = ['properties', 'jproperties']
  90. filenames = ['*.properties']
  91. mimetypes = ['text/x-java-properties']
  92. tokens = {
  93. 'root': [
  94. (r'\s+', Text),
  95. (r'(?:[;#]|//).*$', Comment),
  96. (r'(.*?)([ \t]*)([=:])([ \t]*)(.*(?:(?<=\\)\n.*)*)',
  97. bygroups(Name.Attribute, Text, Operator, Text, String)),
  98. ],
  99. }
  100. class SourcesListLexer(RegexLexer):
  101. """
  102. Lexer that highlights debian sources.list files.
  103. *New in Pygments 0.7.*
  104. """
  105. name = 'Debian Sourcelist'
  106. aliases = ['sourceslist', 'sources.list', 'debsources']
  107. filenames = ['sources.list']
  108. mimetype = ['application/x-debian-sourceslist']
  109. tokens = {
  110. 'root': [
  111. (r'\s+', Text),
  112. (r'#.*?$', Comment),
  113. (r'^(deb(?:-src)?)(\s+)',
  114. bygroups(Keyword, Text), 'distribution')
  115. ],
  116. 'distribution': [
  117. (r'#.*?$', Comment, '#pop'),
  118. (r'\$\(ARCH\)', Name.Variable),
  119. (r'[^\s$[]+', String),
  120. (r'\[', String.Other, 'escaped-distribution'),
  121. (r'\$', String),
  122. (r'\s+', Text, 'components')
  123. ],
  124. 'escaped-distribution': [
  125. (r'\]', String.Other, '#pop'),
  126. (r'\$\(ARCH\)', Name.Variable),
  127. (r'[^\]$]+', String.Other),
  128. (r'\$', String.Other)
  129. ],
  130. 'components': [
  131. (r'#.*?$', Comment, '#pop:2'),
  132. (r'$', Text, '#pop:2'),
  133. (r'\s+', Text),
  134. (r'\S+', Keyword.Pseudo),
  135. ]
  136. }
  137. def analyse_text(text):
  138. for line in text.split('\n'):
  139. line = line.strip()
  140. if not (line.startswith('#') or line.startswith('deb ') or
  141. line.startswith('deb-src ') or not line):
  142. return False
  143. return True
  144. class MakefileLexer(Lexer):
  145. """
  146. Lexer for BSD and GNU make extensions (lenient enough to handle both in
  147. the same file even).
  148. *Rewritten in Pygments 0.10.*
  149. """
  150. name = 'Makefile'
  151. aliases = ['make', 'makefile', 'mf', 'bsdmake']
  152. filenames = ['*.mak', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile']
  153. mimetypes = ['text/x-makefile']
  154. r_special = re.compile(r'^(?:'
  155. # BSD Make
  156. r'\.\s*(include|undef|error|warning|if|else|elif|endif|for|endfor)|'
  157. # GNU Make
  158. r'\s*(ifeq|ifneq|ifdef|ifndef|else|endif|-?include|define|endef|:))(?=\s)')
  159. r_comment = re.compile(r'^\s*@?#')
  160. def get_tokens_unprocessed(self, text):
  161. ins = []
  162. lines = text.splitlines(True)
  163. done = ''
  164. lex = BaseMakefileLexer(**self.options)
  165. backslashflag = False
  166. for line in lines:
  167. if self.r_special.match(line) or backslashflag:
  168. ins.append((len(done), [(0, Comment.Preproc, line)]))
  169. backslashflag = line.strip().endswith('\\')
  170. elif self.r_comment.match(line):
  171. ins.append((len(done), [(0, Comment, line)]))
  172. else:
  173. done += line
  174. for item in do_insertions(ins, lex.get_tokens_unprocessed(done)):
  175. yield item
  176. class BaseMakefileLexer(RegexLexer):
  177. """
  178. Lexer for simple Makefiles (no preprocessing).
  179. *New in Pygments 0.10.*
  180. """
  181. name = 'Base Makefile'
  182. aliases = ['basemake']
  183. filenames = []
  184. mimetypes = []
  185. tokens = {
  186. 'root': [
  187. (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)),
  188. (r'\$\((?:.*\\\n|.*\n)+', using(BashLexer)),
  189. (r'\s+', Text),
  190. (r'#.*?\n', Comment),
  191. (r'(export)(\s+)(?=[a-zA-Z0-9_${}\t -]+\n)',
  192. bygroups(Keyword, Text), 'export'),
  193. (r'export\s+', Keyword),
  194. # assignment
  195. (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)',
  196. bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))),
  197. # strings
  198. (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double),
  199. (r"(?s)'(\\\\|\\.|[^'\\])*'", String.Single),
  200. # targets
  201. (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text),
  202. 'block-header'),
  203. # TODO: add paren handling (grr)
  204. ],
  205. 'export': [
  206. (r'[a-zA-Z0-9_${}-]+', Name.Variable),
  207. (r'\n', Text, '#pop'),
  208. (r'\s+', Text),
  209. ],
  210. 'block-header': [
  211. (r'[^,\\\n#]+', Number),
  212. (r',', Punctuation),
  213. (r'#.*?\n', Comment),
  214. (r'\\\n', Text), # line continuation
  215. (r'\\.', Text),
  216. (r'(?:[\t ]+.*\n|\n)+', using(BashLexer), '#pop'),
  217. ],
  218. }
  219. class DiffLexer(RegexLexer):
  220. """
  221. Lexer for unified or context-style diffs or patches.
  222. """
  223. name = 'Diff'
  224. aliases = ['diff', 'udiff']
  225. filenames = ['*.diff', '*.patch']
  226. mimetypes = ['text/x-diff', 'text/x-patch']
  227. tokens = {
  228. 'root': [
  229. (r' .*\n', Text),
  230. (r'\+.*\n', Generic.Inserted),
  231. (r'-.*\n', Generic.Deleted),
  232. (r'!.*\n', Generic.Strong),
  233. (r'@.*\n', Generic.Subheading),
  234. (r'([Ii]ndex|diff).*\n', Generic.Heading),
  235. (r'=.*\n', Generic.Heading),
  236. (r'.*\n', Text),
  237. ]
  238. }
  239. def analyse_text(text):
  240. if text[:7] == 'Index: ':
  241. return True
  242. if text[:5] == 'diff ':
  243. return True
  244. if text[:4] == '--- ':
  245. return 0.9
  246. DPATCH_KEYWORDS = ['hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move',
  247. 'replace']
  248. class DarcsPatchLexer(RegexLexer):
  249. """
  250. DarcsPatchLexer is a lexer for the various versions of the darcs patch
  251. format. Examples of this format are derived by commands such as
  252. ``darcs annotate --patch`` and ``darcs send``.
  253. *New in Pygments 0.10.*
  254. """
  255. name = 'Darcs Patch'
  256. aliases = ['dpatch']
  257. filenames = ['*.dpatch', '*.darcspatch']
  258. tokens = {
  259. 'root': [
  260. (r'<', Operator),
  261. (r'>', Operator),
  262. (r'{', Operator),
  263. (r'}', Operator),
  264. (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])',
  265. bygroups(Operator, Keyword, Name, Text, Name, Operator,
  266. Literal.Date, Text, Operator)),
  267. (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)',
  268. bygroups(Operator, Keyword, Name, Text, Name, Operator,
  269. Literal.Date, Text), 'comment'),
  270. (r'New patches:', Generic.Heading),
  271. (r'Context:', Generic.Heading),
  272. (r'Patch bundle hash:', Generic.Heading),
  273. (r'(\s*)(%s)(.*\n)' % '|'.join(DPATCH_KEYWORDS),
  274. bygroups(Text, Keyword, Text)),
  275. (r'\+', Generic.Inserted, "insert"),
  276. (r'-', Generic.Deleted, "delete"),
  277. (r'.*\n', Text),
  278. ],
  279. 'comment': [
  280. (r'[^\]].*\n', Comment),
  281. (r'\]', Operator, "#pop"),
  282. ],
  283. 'specialText': [ # darcs add [_CODE_] special operators for clarity
  284. (r'\n', Text, "#pop"), # line-based
  285. (r'\[_[^_]*_]', Operator),
  286. ],
  287. 'insert': [
  288. include('specialText'),
  289. (r'\[', Generic.Inserted),
  290. (r'[^\n\[]+', Generic.Inserted),
  291. ],
  292. 'delete': [
  293. include('specialText'),
  294. (r'\[', Generic.Deleted),
  295. (r'[^\n\[]+', Generic.Deleted),
  296. ],
  297. }
  298. class IrcLogsLexer(RegexLexer):
  299. """
  300. Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
  301. """
  302. name = 'IRC logs'
  303. aliases = ['irc']
  304. filenames = ['*.weechatlog']
  305. mimetypes = ['text/x-irclog']
  306. flags = re.VERBOSE | re.MULTILINE
  307. timestamp = r"""
  308. (
  309. # irssi / xchat and others
  310. (?: \[|\()? # Opening bracket or paren for the timestamp
  311. (?: # Timestamp
  312. (?: (?:\d{1,4} [-/]?)+ # Date as - or /-separated groups of digits
  313. [T ])? # Date/time separator: T or space
  314. (?: \d?\d [:.]?)+ # Time as :/.-separated groups of 1 or 2 digits
  315. )
  316. (?: \]|\))?\s+ # Closing bracket or paren for the timestamp
  317. |
  318. # weechat
  319. \d{4}\s\w{3}\s\d{2}\s # Date
  320. \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
  321. |
  322. # xchat
  323. \w{3}\s\d{2}\s # Date
  324. \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
  325. )?
  326. """
  327. tokens = {
  328. 'root': [
  329. # log start/end
  330. (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
  331. # hack
  332. ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
  333. # normal msgs
  334. ("^" + timestamp + r"""
  335. (\s*<.*?>\s*) # Nick """,
  336. bygroups(Comment.Preproc, Name.Tag), 'msg'),
  337. # /me msgs
  338. ("^" + timestamp + r"""
  339. (\s*[*]\s+) # Star
  340. (\S+\s+.*?\n) # Nick + rest of message """,
  341. bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
  342. # join/part msgs
  343. ("^" + timestamp + r"""
  344. (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols
  345. (\S+\s+) # Nick + Space
  346. (.*?\n) # Rest of message """,
  347. bygroups(Comment.Preproc, Keyword, String, Comment)),
  348. (r"^.*?\n", Text),
  349. ],
  350. 'msg': [
  351. (r"\S+:(?!//)", Name.Attribute), # Prefix
  352. (r".*\n", Text, '#pop'),
  353. ],
  354. }
  355. class BBCodeLexer(RegexLexer):
  356. """
  357. A lexer that highlights BBCode(-like) syntax.
  358. *New in Pygments 0.6.*
  359. """
  360. name = 'BBCode'
  361. aliases = ['bbcode']
  362. mimetypes = ['text/x-bbcode']
  363. tokens = {
  364. 'root': [
  365. (r'[^[]+', Text),
  366. # tag/end tag begin
  367. (r'\[/?\w+', Keyword, 'tag'),
  368. # stray bracket
  369. (r'\[', Text),
  370. ],
  371. 'tag': [
  372. (r'\s+', Text),
  373. # attribute with value
  374. (r'(\w+)(=)("?[^\s"\]]+"?)',
  375. bygroups(Name.Attribute, Operator, String)),
  376. # tag argument (a la [color=green])
  377. (r'(=)("?[^\s"\]]+"?)',
  378. bygroups(Operator, String)),
  379. # tag end
  380. (r'\]', Keyword, '#pop'),
  381. ],
  382. }
  383. class TexLexer(RegexLexer):
  384. """
  385. Lexer for the TeX and LaTeX typesetting languages.
  386. """
  387. name = 'TeX'
  388. aliases = ['tex', 'latex']
  389. filenames = ['*.tex', '*.aux', '*.toc']
  390. mimetypes = ['text/x-tex', 'text/x-latex']
  391. tokens = {
  392. 'general': [
  393. (r'%.*?\n', Comment),
  394. (r'[{}]', Name.Builtin),
  395. (r'[&_^]', Name.Builtin),
  396. ],
  397. 'root': [
  398. (r'\\\[', String.Backtick, 'displaymath'),
  399. (r'\\\(', String, 'inlinemath'),
  400. (r'\$\$', String.Backtick, 'displaymath'),
  401. (r'\$', String, 'inlinemath'),
  402. (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
  403. include('general'),
  404. (r'[^\\$%&_^{}]+', Text),
  405. ],
  406. 'math': [
  407. (r'\\([a-zA-Z]+|.)', Name.Variable),
  408. include('general'),
  409. (r'[0-9]+', Number),
  410. (r'[-=!+*/()\[\]]', Operator),
  411. (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
  412. ],
  413. 'inlinemath': [
  414. (r'\\\)', String, '#pop'),
  415. (r'\$', String, '#pop'),
  416. include('math'),
  417. ],
  418. 'displaymath': [
  419. (r'\\\]', String, '#pop'),
  420. (r'\$\$', String, '#pop'),
  421. (r'\$', Name.Builtin),
  422. include('math'),
  423. ],
  424. 'command': [
  425. (r'\[.*?\]', Name.Attribute),
  426. (r'\*', Keyword),
  427. (r'', Text, '#pop'),
  428. ],
  429. }
  430. def analyse_text(text):
  431. for start in ("\\documentclass", "\\input", "\\documentstyle",
  432. "\\relax"):
  433. if text[:len(start)] == start:
  434. return True
  435. class GroffLexer(RegexLexer):
  436. """
  437. Lexer for the (g)roff typesetting language, supporting groff
  438. extensions. Mainly useful for highlighting manpage sources.
  439. *New in Pygments 0.6.*
  440. """
  441. name = 'Groff'
  442. aliases = ['groff', 'nroff', 'man']
  443. filenames = ['*.[1234567]', '*.man']
  444. mimetypes = ['application/x-troff', 'text/troff']
  445. tokens = {
  446. 'root': [
  447. (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
  448. (r'\.', Punctuation, 'request'),
  449. # Regular characters, slurp till we find a backslash or newline
  450. (r'[^\\\n]*', Text, 'textline'),
  451. ],
  452. 'textline': [
  453. include('escapes'),
  454. (r'[^\\\n]+', Text),
  455. (r'\n', Text, '#pop'),
  456. ],
  457. 'escapes': [
  458. # groff has many ways to write escapes.
  459. (r'\\"[^\n]*', Comment),
  460. (r'\\[fn]\w', String.Escape),
  461. (r'\\\(.{2}', String.Escape),
  462. (r'\\.\[.*\]', String.Escape),
  463. (r'\\.', String.Escape),
  464. (r'\\\n', Text, 'request'),
  465. ],
  466. 'request': [
  467. (r'\n', Text, '#pop'),
  468. include('escapes'),
  469. (r'"[^\n"]+"', String.Double),
  470. (r'\d+', Number),
  471. (r'\S+', String),
  472. (r'\s+', Text),
  473. ],
  474. }
  475. def analyse_text(text):
  476. if text[:1] != '.':
  477. return False
  478. if text[:3] == '.\\"':
  479. return True
  480. if text[:4] == '.TH ':
  481. return True
  482. if text[1:3].isalnum() and text[3].isspace():
  483. return 0.9
  484. class ApacheConfLexer(RegexLexer):
  485. """
  486. Lexer for configuration files following the Apache config file
  487. format.
  488. *New in Pygments 0.6.*
  489. """
  490. name = 'ApacheConf'
  491. aliases = ['apacheconf', 'aconf', 'apache']
  492. filenames = ['.htaccess', 'apache.conf', 'apache2.conf']
  493. mimetypes = ['text/x-apacheconf']
  494. flags = re.MULTILINE | re.IGNORECASE
  495. tokens = {
  496. 'root': [
  497. (r'\s+', Text),
  498. (r'(#.*?)$', Comment),
  499. (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)',
  500. bygroups(Name.Tag, Text, String, Name.Tag)),
  501. (r'([a-zA-Z][a-zA-Z0-9_]*)(\s+)',
  502. bygroups(Name.Builtin, Text), 'value'),
  503. (r'\.+', Text),
  504. ],
  505. 'value': [
  506. (r'$', Text, '#pop'),
  507. (r'[^\S\n]+', Text),
  508. (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number),
  509. (r'\d+', Number),
  510. (r'/([a-zA-Z0-9][a-zA-Z0-9_./-]+)', String.Other),
  511. (r'(on|off|none|any|all|double|email|dns|min|minimal|'
  512. r'os|productonly|full|emerg|alert|crit|error|warn|'
  513. r'notice|info|debug|registry|script|inetd|standalone|'
  514. r'user|group)\b', Keyword),
  515. (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double),
  516. (r'[^\s"]+', Text)
  517. ]
  518. }
  519. class MoinWikiLexer(RegexLexer):
  520. """
  521. For MoinMoin (and Trac) Wiki markup.
  522. *New in Pygments 0.7.*
  523. """
  524. name = 'MoinMoin/Trac Wiki markup'
  525. aliases = ['trac-wiki', 'moin']
  526. filenames = []
  527. mimetypes = ['text/x-trac-wiki']
  528. flags = re.MULTILINE | re.IGNORECASE
  529. tokens = {
  530. 'root': [
  531. (r'^#.*$', Comment),
  532. (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
  533. # Titles
  534. (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
  535. bygroups(Generic.Heading, using(this), Generic.Heading, String)),
  536. # Literal code blocks, with optional shebang
  537. (r'({{{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
  538. (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
  539. # Lists
  540. (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
  541. (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
  542. # Other Formatting
  543. (r'\[\[\w+.*?\]\]', Keyword), # Macro
  544. (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
  545. bygroups(Keyword, String, Keyword)), # Link
  546. (r'^----+$', Keyword), # Horizontal rules
  547. (r'[^\n\'\[{!_~^,|]+', Text),
  548. (r'\n', Text),
  549. (r'.', Text),
  550. ],
  551. 'codeblock': [
  552. (r'}}}', Name.Builtin, '#pop'),
  553. # these blocks are allowed to be nested in Trac, but not MoinMoin
  554. (r'{{{', Text, '#push'),
  555. (r'[^{}]+', Comment.Preproc), # slurp boring text
  556. (r'.', Comment.Preproc), # allow loose { or }
  557. ],
  558. }
  559. class RstLexer(RegexLexer):
  560. """
  561. For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
  562. *New in Pygments 0.7.*
  563. Additional options accepted:
  564. `handlecodeblocks`
  565. Highlight the contents of ``.. sourcecode:: langauge`` and
  566. ``.. code:: language`` directives with a lexer for the given
  567. language (default: ``True``). *New in Pygments 0.8.*
  568. """
  569. name = 'reStructuredText'
  570. aliases = ['rst', 'rest', 'restructuredtext']
  571. filenames = ['*.rst', '*.rest']
  572. mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
  573. flags = re.MULTILINE
  574. def _handle_sourcecode(self, match):
  575. from pygments.lexers import get_lexer_by_name
  576. # section header
  577. yield match.start(1), Punctuation, match.group(1)
  578. yield match.start(2), Text, match.group(2)
  579. yield match.start(3), Operator.Word, match.group(3)
  580. yield match.start(4), Punctuation, match.group(4)
  581. yield match.start(5), Text, match.group(5)
  582. yield match.start(6), Keyword, match.group(6)
  583. yield match.start(7), Text, match.group(7)
  584. # lookup lexer if wanted and existing
  585. lexer = None
  586. if self.handlecodeblocks:
  587. try:
  588. lexer = get_lexer_by_name(match.group(6).strip())
  589. except ClassNotFound:
  590. pass
  591. indention = match.group(8)
  592. indention_size = len(indention)
  593. code = (indention + match.group(9) + match.group(10) + match.group(11))
  594. # no lexer for this language. handle it like it was a code block
  595. if lexer is None:
  596. yield match.start(8), String, code
  597. return
  598. # highlight the lines with the lexer.
  599. ins = []
  600. codelines = code.splitlines(True)
  601. code = ''
  602. for line in codelines:
  603. if len(line) > indention_size:
  604. ins.append((len(code), [(0, Text, line[:indention_size])]))
  605. code += line[indention_size:]
  606. else:
  607. code += line
  608. for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
  609. yield item
  610. # from docutils.parsers.rst.states
  611. closers = u'\'")]}>\u2019\u201d\xbb!?'
  612. unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
  613. end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
  614. % (re.escape(unicode_delimiters),
  615. re.escape(closers)))
  616. tokens = {
  617. 'root': [
  618. # Heading with overline
  619. (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
  620. r'(.+)(\n)(\1)(\n)',
  621. bygroups(Generic.Heading, Text, Generic.Heading,
  622. Text, Generic.Heading, Text)),
  623. # Plain heading
  624. (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
  625. r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
  626. bygroups(Generic.Heading, Text, Generic.Heading, Text)),
  627. # Bulleted lists
  628. (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
  629. bygroups(Text, Number, using(this, state='inline'))),
  630. # Numbered lists
  631. (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
  632. bygroups(Text, Number, using(this, state='inline'))),
  633. (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
  634. bygroups(Text, Number, using(this, state='inline'))),
  635. # Numbered, but keep words at BOL from becoming lists
  636. (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
  637. bygroups(Text, Number, using(this, state='inline'))),
  638. (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
  639. bygroups(Text, Number, using(this, state='inline'))),
  640. # Line blocks
  641. (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
  642. bygroups(Text, Operator, using(this, state='inline'))),
  643. # Sourcecode directives
  644. (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)'
  645. r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
  646. _handle_sourcecode),
  647. # A directive
  648. (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
  649. bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
  650. using(this, state='inline'))),
  651. # A reference target
  652. (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
  653. bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
  654. # A footnote/citation target
  655. (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
  656. bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
  657. # A substitution def
  658. (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
  659. bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
  660. Punctuation, Text, using(this, state='inline'))),
  661. # Comments
  662. (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
  663. # Field list
  664. (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)),
  665. (r'^( *)(:.*?:)([ \t]+)(.*?)$',
  666. bygroups(Text, Name.Class, Text, Name.Function)),
  667. # Definition list
  668. (r'^([^ ].*(?<!::)\n)((?:(?: +.*)\n)+)',
  669. bygroups(using(this, state='inline'), using(this, state='inline'))),
  670. # Code blocks
  671. (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
  672. bygroups(String.Escape, Text, String, String, Text, String)),
  673. include('inline'),
  674. ],
  675. 'inline': [
  676. (r'\\.', Text), # escape
  677. (r'``', String, 'literal'), # code
  678. (r'(`.+?)(<.+?>)(`__?)', # reference with inline target
  679. bygroups(String, String.Interpol, String)),
  680. (r'`.+?`__?', String), # reference
  681. (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
  682. bygroups(Name.Variable, Name.Attribute)), # role
  683. (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
  684. bygroups(Name.Attribute, Name.Variable)), # role (content first)
  685. (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
  686. (r'\*.+?\*', Generic.Emph), # Emphasis
  687. (r'\[.*?\]_', String), # Footnote or citation
  688. (r'<.+?>', Name.Tag), # Hyperlink
  689. (r'[^\\\n\[*`:]+', Text),
  690. (r'.', Text),
  691. ],
  692. 'literal': [
  693. (r'[^`]+', String),
  694. (r'``' + end_string_suffix, String, '#pop'),
  695. (r'`', String),
  696. ]
  697. }
  698. def __init__(self, **options):
  699. self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
  700. RegexLexer.__init__(self, **options)
  701. def analyse_text(text):
  702. if text[:2] == '..' and text[2:3] != '.':
  703. return 0.3
  704. p1 = text.find("\n")
  705. p2 = text.find("\n", p1 + 1)
  706. if (p2 > -1 and # has two lines
  707. p1 * 2 + 1 == p2 and # they are the same length
  708. text[p1+1] in '-=' and # the next line both starts and ends with
  709. text[p1+1] == text[p2-1]): # ...a sufficiently high header
  710. return 0.5
  711. class VimLexer(RegexLexer):
  712. """
  713. Lexer for VimL script files.
  714. *New in Pygments 0.8.*
  715. """
  716. name = 'VimL'
  717. aliases = ['vim']
  718. filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc',
  719. '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc']
  720. mimetypes = ['text/x-vim']
  721. flags = re.MULTILINE
  722. tokens = {
  723. 'root': [
  724. (r'^\s*".*', Comment),
  725. (r'[ \t]+', Text),
  726. # TODO: regexes can have other delims
  727. (r'/(\\\\|\\/|[^\n/])*/', String.Regex),
  728. (r'"(\\\\|\\"|[^\n"])*"', String.Double),
  729. (r"'(\\\\|\\'|[^\n'])*'", String.Single),
  730. # Who decided that doublequote was a good comment character??
  731. (r'(?<=\s)"[^\-:.%#=*].*', Comment),
  732. (r'-?\d+', Number),
  733. (r'#[0-9a-f]{6}', Number.Hex),
  734. (r'^:', Punctuation),
  735. (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.
  736. (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
  737. Keyword),
  738. (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
  739. (r'\b\w+\b', Name.Other), # These are postprocessed below
  740. (r'.', Text),
  741. ],
  742. }
  743. def __init__(self, **options):
  744. from pygments.lexers._vimbuiltins import command, option, auto
  745. self._cmd = command
  746. self._opt = option
  747. self._aut = auto
  748. RegexLexer.__init__(self, **options)
  749. def is_in(self, w, mapping):
  750. r"""
  751. It's kind of difficult to decide if something might be a keyword
  752. in VimL because it allows you to abbreviate them. In fact,
  753. 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are
  754. valid ways to call it so rather than making really awful regexps
  755. like::
  756. \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
  757. we match `\b\w+\b` and then call is_in() on those tokens. See
  758. `scripts/get_vimkw.py` for how the lists are extracted.
  759. """
  760. p = bisect(mapping, (w,))
  761. if p > 0:
  762. if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
  763. mapping[p-1][1][:len(w)] == w: return True
  764. if p < len(mapping):
  765. return mapping[p][0] == w[:len(mapping[p][0])] and \
  766. mapping[p][1][:len(w)] == w
  767. return False
  768. def get_tokens_unprocessed(self, text):
  769. # TODO: builtins are only subsequent tokens on lines
  770. # and 'keywords' only happen at the beginning except
  771. # for :au ones
  772. for index, token, value in \
  773. RegexLexer.get_tokens_unprocessed(self, text):
  774. if token is Name.Other:
  775. if self.is_in(value, self._cmd):
  776. yield index, Keyword, value
  777. elif self.is_in(value, self._opt) or \
  778. self.is_in(value, self._aut):
  779. yield index, Name.Builtin, value
  780. else:
  781. yield index, Text, value
  782. else:
  783. yield index, token, value
  784. class GettextLexer(RegexLexer):
  785. """
  786. Lexer for Gettext catalog files.
  787. *New in Pygments 0.9.*
  788. """
  789. name = 'Gettext Catalog'
  790. aliases = ['pot', 'po']
  791. filenames = ['*.pot', '*.po']
  792. mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
  793. tokens = {
  794. 'root': [
  795. (r'^#,\s.*?$', Keyword.Type),
  796. (r'^#:\s.*?$', Keyword.Declaration),
  797. #(r'^#$', Comment),
  798. (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
  799. (r'^(")([A-Za-z-]+:)(.*")$',
  800. bygroups(String, Name.Property, String)),
  801. (r'^".*"$', String),
  802. (r'^(msgid|msgid_plural|msgstr)(\s+)(".*")$',
  803. bygroups(Name.Variable, Text, String)),
  804. (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
  805. bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
  806. ]
  807. }
  808. class SquidConfLexer(RegexLexer):
  809. """
  810. Lexer for `squid <http://www.squid-cache.org/>`_ configuration files.
  811. *New in Pygments 0.9.*
  812. """
  813. name = 'SquidConf'
  814. aliases = ['squidconf', 'squid.conf', 'squid']
  815. filenames = ['squid.conf']
  816. mimetypes = ['text/x-squidconf']
  817. flags = re.IGNORECASE
  818. keywords = [
  819. "access_log", "acl", "always_direct", "announce_host",
  820. "announce_period", "announce_port", "announce_to", "anonymize_headers",
  821. "append_domain", "as_whois_server", "auth_param_basic",
  822. "authenticate_children", "authenticate_program", "authenticate_ttl",
  823. "broken_posts", "buffered_logs", "cache_access_log", "cache_announce",
  824. "cache_dir", "cache_dns_program", "cache_effective_group",
  825. "cache_effective_user", "cache_host", "cache_host_acl",
  826. "cache_host_domain", "cache_log", "cache_mem", "cache_mem_high",
  827. "cache_mem_low", "cache_mgr", "cachemgr_passwd", "cache_peer",
  828. "cache_peer_access", "cahce_replacement_policy", "cache_stoplist",
  829. "cache_stoplist_pattern", "cache_store_log", "cache_swap",
  830. "cache_swap_high", "cache_swap_log", "cache_swap_low", "client_db",
  831. "client_lifetime", "client_netmask", "connect_timeout", "coredump_dir",
  832. "dead_peer_timeout", "debug_options", "delay_access", "delay_class",
  833. "delay_initial_bucket_level", "delay_parameters", "delay_pools",
  834. "deny_info", "dns_children", "dns_defnames", "dns_nameservers",
  835. "dns_testnames", "emulate_httpd_log", "err_html_text",
  836. "fake_user_agent", "firewall_ip", "forwarded_for", "forward_snmpd_port",
  837. "fqdncache_size", "ftpget_options", "ftpget_program", "ftp_list_width",
  838. "ftp_passive", "ftp_user", "half_closed_clients", "header_access",
  839. "header_replace", "hierarchy_stoplist", "high_response_time_warning",
  840. "high_page_fault_warning", "hosts_file", "htcp_port", "http_access",
  841. "http_anonymizer", "httpd_accel", "httpd_accel_host",
  842. "httpd_accel_port", "httpd_accel_uses_host_header",
  843. "httpd_accel_with_proxy", "http_port", "http_reply_access",
  844. "icp_access", "icp_hit_stale", "icp_port", "icp_query_timeout",
  845. "ident_lookup", "ident_lookup_access", "ident_timeout",
  846. "incoming_http_average", "incoming_icp_average", "inside_firewall",
  847. "ipcache_high", "ipcache_low", "ipcache_size", "local_domain",
  848. "local_ip", "logfile_rotate", "log_fqdn", "log_icp_queries",
  849. "log_mime_hdrs", "maximum_object_size", "maximum_single_addr_tries",
  850. "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr",
  851. "mcast_miss_encode_key", "mcast_miss_port", "memory_pools",
  852. "memory_pools_limit", "memory_replacement_policy", "mime_table",
  853. "min_http_poll_cnt", "min_icp_poll_cnt", "minimum_direct_hops",
  854. "minimum_object_size", "minimum_retry_timeout", "miss_access",
  855. "negative_dns_ttl", "negative_ttl", "neighbor_timeout",
  856. "neighbor_type_domain", "netdb_high", "netdb_low", "netdb_ping_period",
  857. "netdb_ping_rate", "never_direct", "no_cache", "passthrough_proxy",
  858. "pconn_timeout", "pid_filename", "pinger_program", "positive_dns_ttl",
  859. "prefer_direct", "proxy_auth", "proxy_auth_realm", "query_icmp",
  860. "quick_abort", "quick_abort", "quick_abort_max", "quick_abort_min",
  861. "quick_abort_pct", "range_offset_limit", "read_timeout",
  862. "redirect_children", "redirect_program",
  863. "redirect_rewrites_host_header", "reference_age", "reference_age",
  864. "refresh_pattern", "reload_into_ims", "request_body_max_size",
  865. "request_size", "request_timeout", "shutdown_lifetime",
  866. "single_parent_bypass", "siteselect_timeout", "snmp_access",
  867. "snmp_incoming_address", "snmp_port", "source_ping", "ssl_proxy",
  868. "store_avg_object_size", "store_objects_per_bucket",
  869. "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs",
  870. "tcp_incoming_address", "tcp_outgoing_address", "tcp_recv_bufsize",
  871. "test_reachability", "udp_hit_obj", "udp_hit_obj_size",
  872. "udp_incoming_address", "udp_outgoing_address", "unique_hostname",
  873. "unlinkd_program", "uri_whitespace", "useragent_log",
  874. "visible_hostname", "wais_relay", "wais_relay_host", "wais_relay_port",
  875. ]
  876. opts = [
  877. "proxy-only", "weight", "ttl", "no-query", "default", "round-robin",
  878. "multicast-responder", "on", "off", "all", "deny", "allow", "via",
  879. "parent", "no-digest", "heap", "lru", "realm", "children", "q1", "q2",
  880. "credentialsttl", "none", "disable", "offline_toggle", "diskd",
  881. ]
  882. actions = [
  883. "shutdown", "info", "parameter", "server_list", "client_list",
  884. r'squid\.conf',
  885. ]
  886. actions_stats = [
  887. "objects", "vm_objects", "utilization", "ipcache", "fqdncache", "dns",
  888. "redirector", "io", "reply_headers", "filedescriptors", "netdb",
  889. ]
  890. actions_log = ["status", "enable", "disable", "clear"]
  891. acls = [
  892. "url_regex", "urlpath_regex", "referer_regex", "port", "proto",
  893. "req_mime_type", "rep_mime_type", "method", "browser", "user", "src",
  894. "dst", "time", "dstdomain", "ident", "snmp_community",
  895. ]
  896. ip_re = (
  897. r'(?:(?:(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|0x0*[0-9a-f]{1,2}|'
  898. r'0+[1-3]?[0-7]{0,2})(?:\.(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|'
  899. r'0x0*[0-9a-f]{1,2}|0+[1-3]?[0-7]{0,2})){3})|(?!.*::.*::)(?:(?!:)|'
  900. r':(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)){6}(?:[0-9a-f]{0,4}'
  901. r'(?:(?<=::)|(?<!::):)[0-9a-f]{0,4}(?:(?<=::)|(?<!:)|(?<=:)(?<!::):)|'
  902. r'(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-4]|2[0-4]\d|1\d\d|'
  903. r'[1-9]?\d)){3}))'
  904. )
  905. def makelistre(list):
  906. return r'\b(?:' + '|'.join(list) + r')\b'
  907. tokens = {
  908. 'root': [
  909. (r'\s+', Whitespace),
  910. (r'#', Comment, 'comment'),
  911. (makelistre(keywords), Keyword),
  912. (makelistre(opts), Name.Constant),
  913. # Actions
  914. (makelistre(actions), String),
  915. (r'stats/'+makelistre(actions), String),
  916. (r'log/'+makelistre(actions)+r'=', String),
  917. (makelistre(acls), Keyword),
  918. (ip_re + r'(?:/(?:' + ip_re + r'|\b\d+\b))?', Number.Float),
  919. (r'(?:\b\d+\b(?:-\b\d+|%)?)', Number),
  920. (r'\S+', Text),
  921. ],
  922. 'comment': [
  923. (r'\s*TAG:.*', String.Escape, '#pop'),
  924. (r'.*', Comment, '#pop'),
  925. ],
  926. }
  927. class DebianControlLexer(RegexLexer):
  928. """
  929. Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs.
  930. *New in Pygments 0.9.*
  931. """
  932. name = 'Debian Control file'
  933. aliases = ['control', 'debcontrol']
  934. filenames = ['control']
  935. tokens = {
  936. 'root': [
  937. (r'^(Description)', Keyword, 'description'),
  938. (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'),
  939. (r'^((Build-)?Depends)', Keyword, 'depends'),
  940. (r'^((?:Python-)?Version)(:\s*)(\S+)$',
  941. bygroups(Keyword, Text, Number)),
  942. (r'^((?:Installed-)?Size)(:\s*)(\S+)$',
  943. bygroups(Keyword, Text, Number)),
  944. (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$',
  945. bygroups(Keyword, Text, Number)),
  946. (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$',
  947. bygroups(Keyword, Whitespace, String)),
  948. ],
  949. 'maintainer': [
  950. (r'<[^>]+>', Generic.Strong),
  951. (r'<[^>]+>$', Generic.Strong, '#pop'),
  952. (r',\n?', Text),
  953. (r'.', Text),
  954. ],
  955. 'description': [
  956. (r'(.*)(Homepage)(: )(\S+)',
  957. bygroups(Text, String, Name, Name.Class)),
  958. (r':.*\n', Generic.Strong),
  959. (r' .*\n', Text),
  960. ('', Text, '#pop'),
  961. ],
  962. 'depends': [
  963. (r':\s*', Text),
  964. (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)),
  965. (r'\(', Text, 'depend_vers'),
  966. (r',', Text),
  967. (r'\|', Operator),
  968. (r'[\s]+', Text),
  969. (r'[}\)]\s*$', Text, '#pop'),
  970. (r'}', Text),
  971. (r'[^,]$', Name.Function, '#pop'),
  972. (r'([\+\.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)),
  973. (r'\[.*?\]', Name.Entity),
  974. ],
  975. 'depend_vers': [
  976. (r'\),', Text, '#pop'),
  977. (r'\)[^,]', Text, '#pop:2'),
  978. (r'([><=]+)(\s*)([^\)]+)', bygroups(Operator, Text, Number))
  979. ]
  980. }
  981. class YamlLexerContext(LexerContext):
  982. """Indentation context for the YAML lexer."""
  983. def __init__(self, *args, **kwds):
  984. super(YamlLexerContext, self).__init__(*args, **kwds)
  985. self.indent_stack = []
  986. self.indent = -1
  987. self.next_indent = 0
  988. self.block_scalar_indent = None
  989. class YamlLexer(ExtendedRegexLexer):
  990. """
  991. Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
  992. language.
  993. *New in Pygments 0.11.*
  994. """
  995. name = 'YAML'
  996. aliases = ['yaml']
  997. filenames = ['*.yaml', '*.yml']
  998. mimetypes = ['text/x-yaml']
  999. def something(token_class):
  1000. """Do not produce empty tokens."""
  1001. def callback(lexer, match, context):
  1002. text = match.group()
  1003. if not text:
  1004. return
  1005. yield match.start(), token_class, text
  1006. context.pos = match.end()
  1007. return callback
  1008. def reset_indent(token_class):
  1009. """Reset the indentation levels."""
  1010. def callback(lexer, match, context):
  1011. text = match.group()
  1012. context.indent_stack = []
  1013. context.indent = -1
  1014. context.next_indent = 0
  1015. context.block_scalar_indent = None
  1016. yield match.start(), token_class, text
  1017. context.pos = match.end()
  1018. return callback
  1019. def save_indent(token_class, start=False):
  1020. """Save a possible indentation level."""
  1021. def callback(lexer, match, context):
  1022. text = match.group()
  1023. extra = ''
  1024. if start:
  1025. context.next_indent = len(text)
  1026. if context.next_indent < context.indent:
  1027. while context.next_indent < context.indent:
  1028. context.indent = context.indent_stack.pop()
  1029. if context.next_indent > context.indent:
  1030. extra = text[context.indent:]
  1031. text = text[:context.indent]
  1032. else:
  1033. context.next_indent += len(text)
  1034. if text:
  1035. yield match.start(), token_class, text
  1036. if extra:
  1037. yield match.start()+len(text), token_class.Error, extra
  1038. context.pos = match.end()
  1039. return callback
  1040. def set_indent(token_class, implicit=False):
  1041. """Set the previously saved indentation level."""
  1042. def callback(lexer, match, context):
  1043. text = match.group()
  1044. if context.indent < context.next_indent:
  1045. context.indent_stack.append(context.indent)
  1046. context.indent = context.next_indent
  1047. if not implicit:
  1048. context.next_indent += len(text)
  1049. yield match.start(), token_class, text
  1050. context.pos = match.end()
  1051. return callback
  1052. def set_block_scalar_indent(token_class):
  1053. """Set an explicit indentation level for a block scalar."""
  1054. def callback(lexer, match, context):
  1055. text = match.group()
  1056. context.block_scalar_indent = None
  1057. if not text:
  1058. return
  1059. increment = match.group(1)
  1060. if increment:
  1061. current_indent = max(context.indent, 0)
  1062. increment = int(increment)
  1063. context.block_scalar_indent = current_indent + increment
  1064. if text:
  1065. yield match.start(), token_class, text
  1066. context.pos = match.end()
  1067. return callback
  1068. def parse_block_scalar_empty_line(indent_token_class, content_token_class):
  1069. """Process an empty line in a block scalar."""
  1070. def callback(lexer, match, context):
  1071. text = match.group()
  1072. if (context.block_scalar_indent is None or
  1073. len(text) <= context.block_scalar_indent):
  1074. if text:
  1075. yield match.start(), indent_token_class, text
  1076. else:
  1077. indentation = text[:context.block_scalar_indent]
  1078. content = text[context.block_scalar_indent:]
  1079. yield match.start(), indent_token_class, indentation
  1080. yield (match.start()+context.block_scalar_indent,
  1081. content_token_class, content)
  1082. context.pos = match.end()
  1083. return callback
  1084. def parse_block_scalar_indent(token_class):
  1085. """Process indentation spaces in a block scalar."""
  1086. def callback(lexer, match, context):
  1087. text = match.group()
  1088. if context.block_scalar_indent is None:
  1089. if len(text) <= max(context.indent, 0):
  1090. context.stack.pop()
  1091. context.stack.pop()
  1092. return
  1093. context.block_scalar_indent = len(text)
  1094. else:
  1095. if len(text) < context.block_scalar_indent:
  1096. context.stack.pop()
  1097. context.stack.pop()
  1098. return
  1099. if text:
  1100. yield match.start(), token_class, text
  1101. context.pos = match.end()
  1102. return callback
  1103. def parse_plain_scalar_indent(token_class):
  1104. """Process indentation spaces in a plain scalar."""
  1105. def callback(lexer, match, context):
  1106. text = match.group()
  1107. if len(text) <= context.indent:
  1108. context.stack.pop()
  1109. context.stack.pop()
  1110. return
  1111. if text:
  1112. yield match.start(), token_class, text
  1113. context.pos = match.end()
  1114. return callback
  1115. tokens = {
  1116. # the root rules
  1117. 'root': [
  1118. # ignored whitespaces
  1119. (r'[ ]+(?=#|$)', Text),
  1120. # line breaks
  1121. (r'\n+', Text),
  1122. # a comment
  1123. (r'#[^\n]*', Comment.Single),
  1124. # the '%YAML' directive
  1125. (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
  1126. # the %TAG directive
  1127. (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
  1128. # document start and document end indicators
  1129. (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
  1130. 'block-line'),
  1131. # indentation spaces
  1132. (r'[ ]*(?![ \t\n\r\f\v]|$)', save_indent(Text, start=True),
  1133. ('block-line', 'indentation')),
  1134. ],
  1135. # trailing whitespaces after directives or a block scalar indicator
  1136. 'ignored-line': [
  1137. # ignored whitespaces
  1138. (r'[ ]+(?=#|$)', Text),
  1139. # a comment
  1140. (r'#[^\n]*', Comment.Single),
  1141. # line break
  1142. (r'\n', Text, '#pop:2'),
  1143. ],
  1144. # the %YAML directive
  1145. 'yaml-directive': [
  1146. # the version number
  1147. (r'([ ]+)([0-9]+\.[0-9]+)',
  1148. bygroups(Text, Number), 'ignored-line'),
  1149. ],
  1150. # the %YAG directive
  1151. 'tag-directive': [
  1152. # a tag handle and the corresponding prefix
  1153. (r'([ ]+)(!|![0-9A-Za-z_-]*!)'
  1154. r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)',
  1155. bygroups(Text, Keyword.Type, Text, Keyword.Type),
  1156. 'ignored-line'),
  1157. ],
  1158. # block scalar indicators and indentation spaces
  1159. 'indentation': [
  1160. # trailing whitespaces are ignored
  1161. (r'[ ]*$', something(Text), '#pop:2'),
  1162. # whitespaces preceeding block collection indicators
  1163. (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
  1164. # block collection indicators
  1165. (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
  1166. # the beginning a block line
  1167. (r'[ ]*', save_indent(Text), '#pop'),
  1168. ],
  1169. # an indented line in the block context
  1170. 'block-line': [
  1171. # the line end
  1172. (r'[ ]*(?=#|$)', something(Text), '#pop'),
  1173. # whitespaces separating tokens
  1174. (r'[ ]+', Text),
  1175. # tags, anchors and aliases,
  1176. include('descriptors'),
  1177. # block collections and scalars
  1178. include('block-nodes'),
  1179. # flow collections and quoted scalars
  1180. include('flow-nodes'),
  1181. # a plain scalar
  1182. (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])',
  1183. something(Name.Variable),
  1184. 'plain-scalar-in-block-context'),
  1185. ],
  1186. # tags, anchors, aliases
  1187. 'descriptors' : [
  1188. # a full-form tag
  1189. (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Keyword.Type),
  1190. # a tag in the form '!', '!suffix' or '!handle!suffix'
  1191. (r'!(?:[0-9A-Za-z_-]+)?'
  1192. r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Keyword.Type),
  1193. # an anchor
  1194. (r'&[0-9A-Za-z_-]+', Name.Label),
  1195. # an alias
  1196. (r'\*[0-9A-Za-z_-]+', Name.Variable),
  1197. ],
  1198. # block collections and scalars
  1199. 'block-nodes': [
  1200. # implicit key
  1201. (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
  1202. # literal and folded scalars
  1203. (r'[|>]', Punctuation.Indicator,
  1204. ('block-scalar-content', 'block-scalar-header')),
  1205. ],
  1206. # flow collections and quoted scalars
  1207. 'flow-nodes': [
  1208. # a flow sequence
  1209. (r'\[', Punctuation.Indicator, 'flow-sequence'),
  1210. # a flow mapping
  1211. (r'\{', Punctuation.Indicator, 'flow-mapping'),
  1212. # a single-quoted scalar
  1213. (r'\'', String, 'single-quoted-scalar'),
  1214. # a double-quoted scalar
  1215. (r'\"', String, 'double-quoted-scalar'),
  1216. ],
  1217. # the content of a flow collection
  1218. 'flow-collection': [
  1219. # whitespaces
  1220. (r'[ ]+', Text),
  1221. # line breaks
  1222. (r'\n+', Text),
  1223. # a comment
  1224. (r'#[^\n]*', Comment.Single),
  1225. # simple indicators
  1226. (r'[?:,]', Punctuation.Indicator),
  1227. # tags, anchors and aliases
  1228. include('descriptors'),
  1229. # nested collections and quoted scalars
  1230. include('flow-nodes'),
  1231. # a plain scalar
  1232. (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])',
  1233. something(Name.Variable),
  1234. 'plain-scalar-in-flow-context'),
  1235. ],
  1236. # a f

Large files files are truncated, but you can click here to view the full file