PageRenderTime 54ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 1ms

/console/app/pygments/lexers/text.py

https://bitbucket.org/alex_muscar/myspace-competition-radar
Python | 1482 lines | 1137 code | 104 blank | 241 comment | 31 complexity | cda584df89622b4651aa4d746e4a6823 MD5 | raw file
Possible License(s): GPL-3.0
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.text
  4. ~~~~~~~~~~~~~~~~~~~~
  5. Lexers for non-source code file types.
  6. :copyright: 2006-2008 by Armin Ronacher, Georg Brandl,
  7. Tim Hatch <tim@timhatch.com>,
  8. Ronny Pfannschmidt,
  9. Dennis Kaarsemaker,
  10. Kumar Appaiah <akumar@ee.iitm.ac.in>,
  11. Varun Hiremath <varunhiremath@gmail.com>,
  12. Jeremy Thurgood,
  13. Max Battcher,
  14. Kirill Simonov <xi@resolvent.net>.
  15. :license: BSD, see LICENSE for more details.
  16. """
  17. import re
  18. try:
  19. set
  20. except NameError:
  21. from sets import Set as set
  22. from bisect import bisect
  23. from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, \
  24. bygroups, include, using, this, do_insertions
  25. from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \
  26. Generic, Operator, Number, Whitespace, Literal
  27. from pygments.util import get_bool_opt
  28. from pygments.lexers.other import BashLexer
  29. __all__ = ['IniLexer', 'SourcesListLexer', 'BaseMakefileLexer',
  30. 'MakefileLexer', 'DiffLexer', 'IrcLogsLexer', 'TexLexer',
  31. 'GroffLexer', 'ApacheConfLexer', 'BBCodeLexer', 'MoinWikiLexer',
  32. 'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer',
  33. 'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer',
  34. 'LighttpdConfLexer', 'NginxConfLexer']
  35. class IniLexer(RegexLexer):
  36. """
  37. Lexer for configuration files in INI style.
  38. """
  39. name = 'INI'
  40. aliases = ['ini', 'cfg']
  41. filenames = ['*.ini', '*.cfg', '*.properties']
  42. mimetypes = ['text/x-ini']
  43. tokens = {
  44. 'root': [
  45. (r'\s+', Text),
  46. (r'[;#].*?$', Comment),
  47. (r'\[.*?\]$', Keyword),
  48. (r'(.*?)(\s*)(=)(\s*)(.*?)$',
  49. bygroups(Name.Attribute, Text, Operator, Text, String))
  50. ]
  51. }
  52. def analyse_text(text):
  53. npos = text.find('\n')
  54. if npos < 3:
  55. return False
  56. return text[0] == '[' and text[npos-1] == ']'
  57. class SourcesListLexer(RegexLexer):
  58. """
  59. Lexer that highlights debian sources.list files.
  60. *New in Pygments 0.7.*
  61. """
  62. name = 'Debian Sourcelist'
  63. aliases = ['sourceslist', 'sources.list']
  64. filenames = ['sources.list']
  65. mimetype = ['application/x-debian-sourceslist']
  66. tokens = {
  67. 'root': [
  68. (r'\s+', Text),
  69. (r'#.*?$', Comment),
  70. (r'^(deb(?:-src)?)(\s+)',
  71. bygroups(Keyword, Text), 'distribution')
  72. ],
  73. 'distribution': [
  74. (r'#.*?$', Comment, '#pop'),
  75. (r'\$\(ARCH\)', Name.Variable),
  76. (r'[^\s$[]+', String),
  77. (r'\[', String.Other, 'escaped-distribution'),
  78. (r'\$', String),
  79. (r'\s+', Text, 'components')
  80. ],
  81. 'escaped-distribution': [
  82. (r'\]', String.Other, '#pop'),
  83. (r'\$\(ARCH\)', Name.Variable),
  84. (r'[^\]$]+', String.Other),
  85. (r'\$', String.Other)
  86. ],
  87. 'components': [
  88. (r'#.*?$', Comment, '#pop:2'),
  89. (r'$', Text, '#pop:2'),
  90. (r'\s+', Text),
  91. (r'\S+', Keyword.Pseudo),
  92. ]
  93. }
  94. def analyse_text(text):
  95. for line in text.split('\n'):
  96. line = line.strip()
  97. if not (line.startswith('#') or line.startswith('deb ') or
  98. line.startswith('deb-src ') or not line):
  99. return False
  100. return True
  101. class MakefileLexer(Lexer):
  102. """
  103. Lexer for BSD and GNU make extensions (lenient enough to handle both in
  104. the same file even).
  105. *Rewritten in Pygments 0.10.*
  106. """
  107. name = 'Makefile'
  108. aliases = ['make', 'makefile', 'mf', 'bsdmake']
  109. filenames = ['*.mak', 'Makefile', 'makefile', 'Makefile.*']
  110. mimetypes = ['text/x-makefile']
  111. r_special = re.compile(r'^(?:'
  112. # BSD Make
  113. r'\.\s*(include|undef|error|warning|if|else|elif|endif|for|endfor)|'
  114. # GNU Make
  115. r'\s*(ifeq|ifneq|ifdef|ifndef|else|endif|-?include|define|endef|:))(?=\s)')
  116. r_comment = re.compile(r'^\s*@?#')
  117. def get_tokens_unprocessed(self, text):
  118. ins = []
  119. lines = text.splitlines(True)
  120. done = ''
  121. lex = BaseMakefileLexer(**self.options)
  122. backslashflag = False
  123. for line in lines:
  124. if self.r_special.match(line) or backslashflag:
  125. ins.append((len(done), [(0, Comment.Preproc, line)]))
  126. backslashflag = line.strip().endswith('\\')
  127. elif self.r_comment.match(line):
  128. ins.append((len(done), [(0, Comment, line)]))
  129. else:
  130. done += line
  131. for item in do_insertions(ins, lex.get_tokens_unprocessed(done)):
  132. yield item
  133. class BaseMakefileLexer(RegexLexer):
  134. """
  135. Lexer for simple Makefiles (no preprocessing).
  136. *New in Pygments 0.10.*
  137. """
  138. name = 'Makefile'
  139. aliases = ['basemake']
  140. filenames = []
  141. mimetypes = []
  142. tokens = {
  143. 'root': [
  144. (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)),
  145. (r'\$\((?:.*\\\n|.*\n)+', using(BashLexer)),
  146. (r'\s+', Text),
  147. (r'#.*?\n', Comment),
  148. (r'(export)(\s+)(?=[a-zA-Z0-9_${}\t -]+\n)',
  149. bygroups(Keyword, Text), 'export'),
  150. (r'export\s+', Keyword),
  151. # assignment
  152. (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n|.*\n)+)',
  153. bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))),
  154. # strings
  155. (r'"(\\\\|\\"|[^"])*"', String.Double),
  156. (r"'(\\\\|\\'|[^'])*'", String.Single),
  157. # targets
  158. (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text),
  159. 'block-header'),
  160. #TODO: add paren handling (grr)
  161. ],
  162. 'export': [
  163. (r'[a-zA-Z0-9_${}-]+', Name.Variable),
  164. (r'\n', Text, '#pop'),
  165. (r'\s+', Text),
  166. ],
  167. 'block-header': [
  168. (r'[^,\\\n#]+', Number),
  169. (r',', Punctuation),
  170. (r'#.*?\n', Comment),
  171. (r'\\\n', Text), # line continuation
  172. (r'\\.', Text),
  173. (r'(?:[\t ]+.*\n|\n)+', using(BashLexer), '#pop'),
  174. ],
  175. }
  176. class DiffLexer(RegexLexer):
  177. """
  178. Lexer for unified or context-style diffs or patches.
  179. """
  180. name = 'Diff'
  181. aliases = ['diff']
  182. filenames = ['*.diff', '*.patch']
  183. mimetypes = ['text/x-diff', 'text/x-patch']
  184. tokens = {
  185. 'root': [
  186. (r' .*\n', Text),
  187. (r'\+.*\n', Generic.Inserted),
  188. (r'-.*\n', Generic.Deleted),
  189. (r'!.*\n', Generic.Strong),
  190. (r'@.*\n', Generic.Subheading),
  191. (r'(Index|diff).*\n', Generic.Heading),
  192. (r'=.*\n', Generic.Heading),
  193. (r'.*\n', Text),
  194. ]
  195. }
  196. def analyse_text(text):
  197. if text[:7] == 'Index: ':
  198. return True
  199. if text[:5] == 'diff ':
  200. return True
  201. if text[:4] == '--- ':
  202. return 0.9
  203. class DarcsPatchLexer(RegexLexer):
  204. """
  205. DarcsPatchLexer is a lexer for the various versions of the darcs patch
  206. format. Examples of this format are derived by commands such as
  207. ``darcs annotate --patch`` and ``darcs send``.
  208. *New in Pygments 0.10.*
  209. """
  210. name = 'Darcs Patch'
  211. aliases = ['dpatch']
  212. filenames = ['*.dpatch', '*.darcspatch']
  213. tokens = {
  214. 'root': [
  215. (r'<', Operator),
  216. (r'>', Operator),
  217. (r'{', Operator, 'patch'),
  218. (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)',
  219. bygroups(Operator, Keyword, Name, Text, Name, Operator,
  220. Literal.Date, Text), 'comment'),
  221. (r'New patches:', Generic.Heading),
  222. (r'Context:', Generic.Heading),
  223. (r'Patch bundle hash:', Generic.Heading),
  224. (r'\s+|\w+', Text),
  225. ],
  226. 'comment': [
  227. (r' .*\n', Comment),
  228. (r'\]', Operator, "#pop"),
  229. ],
  230. 'patch': [
  231. (r'}', Operator, "#pop"),
  232. (r'(\w+)(.*\n)', bygroups(Keyword, Text)),
  233. (r'\+.*\n', Generic.Inserted),
  234. (r'-.*\n', Generic.Deleted),
  235. (r'.*\n', Text),
  236. ],
  237. }
  238. class IrcLogsLexer(RegexLexer):
  239. """
  240. Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
  241. """
  242. name = 'IRC logs'
  243. aliases = ['irc']
  244. filenames = ['*.weechatlog']
  245. mimetypes = ['text/x-irclog']
  246. flags = re.VERBOSE | re.MULTILINE
  247. timestamp = r"""
  248. (
  249. # irssi / xchat and others
  250. (?: \[|\()? # Opening bracket or paren for the timestamp
  251. (?: # Timestamp
  252. (?: (?:\d{1,4} [-/]?)+ # Date as - or /-separated groups of digits
  253. [T ])? # Date/time separator: T or space
  254. (?: \d?\d [:.]?)+ # Time as :/.-separated groups of 1 or 2 digits
  255. )
  256. (?: \]|\))?\s+ # Closing bracket or paren for the timestamp
  257. |
  258. # weechat
  259. \d{4}\s\w{3}\s\d{2}\s # Date
  260. \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
  261. |
  262. # xchat
  263. \w{3}\s\d{2}\s # Date
  264. \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
  265. )?
  266. """
  267. tokens = {
  268. 'root': [
  269. # log start/end
  270. (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
  271. # hack
  272. ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
  273. # normal msgs
  274. ("^" + timestamp + r"""
  275. (\s*<.*?>\s*) # Nick """,
  276. bygroups(Comment.Preproc, Name.Tag), 'msg'),
  277. # /me msgs
  278. ("^" + timestamp + r"""
  279. (\s*[*]\s+) # Star
  280. ([^\s]+\s+.*?\n) # Nick + rest of message """,
  281. bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
  282. # join/part msgs
  283. ("^" + timestamp + r"""
  284. (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols
  285. ([^\s]+\s+) # Nick + Space
  286. (.*?\n) # Rest of message """,
  287. bygroups(Comment.Preproc, Keyword, String, Comment)),
  288. (r"^.*?\n", Text),
  289. ],
  290. 'msg': [
  291. (r"[^\s]+:(?!//)", Name.Attribute), # Prefix
  292. (r".*\n", Text, '#pop'),
  293. ],
  294. }
  295. class BBCodeLexer(RegexLexer):
  296. """
  297. A lexer that highlights BBCode(-like) syntax.
  298. *New in Pygments 0.6.*
  299. """
  300. name = 'BBCode'
  301. aliases = ['bbcode']
  302. mimetypes = ['text/x-bbcode']
  303. tokens = {
  304. 'root' : [
  305. (r'[\s\w]+', Text),
  306. (r'(\[)(/?[^\]\n\r=]+)(\])',
  307. bygroups(Keyword, Keyword.Pseudo, Keyword)),
  308. (r'(\[)([^\]\n\r=]+)(=)([^\]\n\r]+)(\])',
  309. bygroups(Keyword, Keyword.Pseudo, Operator, String, Keyword)),
  310. ],
  311. }
  312. class TexLexer(RegexLexer):
  313. """
  314. Lexer for the TeX and LaTeX typesetting languages.
  315. """
  316. name = 'TeX'
  317. aliases = ['tex', 'latex']
  318. filenames = ['*.tex', '*.aux', '*.toc']
  319. mimetypes = ['text/x-tex', 'text/x-latex']
  320. tokens = {
  321. 'general': [
  322. (r'%.*?\n', Comment),
  323. (r'[{}]', Name.Builtin),
  324. (r'[&_^]', Name.Builtin),
  325. ],
  326. 'root': [
  327. (r'\\\[', String.Backtick, 'displaymath'),
  328. (r'\\\(', String, 'inlinemath'),
  329. (r'\$\$', String.Backtick, 'displaymath'),
  330. (r'\$', String, 'inlinemath'),
  331. (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
  332. include('general'),
  333. (r'[^\\$%&_^{}]+', Text),
  334. ],
  335. 'math': [
  336. (r'\\([a-zA-Z]+|.)', Name.Variable),
  337. include('general'),
  338. (r'[0-9]+', Number),
  339. (r'[-=!+*/()\[\]]', Operator),
  340. (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
  341. ],
  342. 'inlinemath': [
  343. (r'\\\)', String, '#pop'),
  344. (r'\$', String, '#pop'),
  345. include('math'),
  346. ],
  347. 'displaymath': [
  348. (r'\\\]', String, '#pop'),
  349. (r'\$\$', String, '#pop'),
  350. (r'\$', Name.Builtin),
  351. include('math'),
  352. ],
  353. 'command': [
  354. (r'\[.*?\]', Name.Attribute),
  355. (r'\*', Keyword),
  356. (r'', Text, '#pop'),
  357. ],
  358. }
  359. def analyse_text(text):
  360. for start in ("\\documentclass", "\\input", "\\documentstyle",
  361. "\\relax"):
  362. if text[:len(start)] == start:
  363. return True
  364. class GroffLexer(RegexLexer):
  365. """
  366. Lexer for the (g)roff typesetting language, supporting groff
  367. extensions. Mainly useful for highlighting manpage sources.
  368. *New in Pygments 0.6.*
  369. """
  370. name = 'Groff'
  371. aliases = ['groff', 'nroff', 'man']
  372. filenames = ['*.[1234567]', '*.man']
  373. mimetypes = ['application/x-troff', 'text/troff']
  374. tokens = {
  375. 'root': [
  376. (r'(?i)(\.)(\w+)', bygroups(Text, Keyword), 'request'),
  377. (r'\.', Punctuation, 'request'),
  378. # Regular characters, slurp till we find a backslash or newline
  379. (r'[^\\\n]*', Text, 'textline'),
  380. ],
  381. 'textline': [
  382. include('escapes'),
  383. (r'[^\\\n]+', Text),
  384. (r'\n', Text, '#pop'),
  385. ],
  386. 'escapes': [
  387. # groff has many ways to write escapes.
  388. (r'\\"[^\n]*', Comment),
  389. (r'\\[fn]\w', String.Escape),
  390. (r'\\\(..', String.Escape),
  391. (r'\\.\[.*\]', String.Escape),
  392. (r'\\.', String.Escape),
  393. (r'\\\n', Text, 'request'),
  394. ],
  395. 'request': [
  396. (r'\n', Text, '#pop'),
  397. include('escapes'),
  398. (r'"[^\n"]+"', String.Double),
  399. (r'\d+', Number),
  400. (r'\S+', String),
  401. (r'\s+', Text),
  402. ],
  403. }
  404. def analyse_text(text):
  405. if text[0] != '.':
  406. return False
  407. if text[:3] == '.\\"':
  408. return True
  409. if text[:4] == '.TH ':
  410. return True
  411. if text[1:3].isalnum() and text[3].isspace():
  412. return 0.9
  413. class ApacheConfLexer(RegexLexer):
  414. """
  415. Lexer for configuration files following the Apache config file
  416. format.
  417. *New in Pygments 0.6.*
  418. """
  419. name = 'ApacheConf'
  420. aliases = ['apacheconf', 'aconf', 'apache']
  421. filenames = ['.htaccess', 'apache.conf', 'apache2.conf']
  422. mimetypes = ['text/x-apacheconf']
  423. flags = re.MULTILINE | re.IGNORECASE
  424. tokens = {
  425. 'root': [
  426. (r'\s+', Text),
  427. (r'(#.*?)$', Comment),
  428. (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)',
  429. bygroups(Name.Tag, Text, String, Name.Tag)),
  430. (r'([a-zA-Z][a-zA-Z0-9]*)(\s+)',
  431. bygroups(Name.Builtin, Text), 'value'),
  432. (r'\.+', Text),
  433. ],
  434. 'value': [
  435. (r'$', Text, '#pop'),
  436. (r'[^\S\n]+', Text),
  437. (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number),
  438. (r'\d+', Number),
  439. (r'/([a-zA-Z0-9][a-zA-Z0-9_./-]+)', String.Other),
  440. (r'(on|off|none|any|all|double|email|dns|min|minimal|'
  441. r'os|productonly|full|emerg|alert|crit|error|warn|'
  442. r'notice|info|debug|registry|script|inetd|standalone|'
  443. r'user|group)\b', Keyword),
  444. (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double),
  445. (r'[^\s"]+', Text)
  446. ]
  447. }
  448. class MoinWikiLexer(RegexLexer):
  449. """
  450. For MoinMoin (and Trac) Wiki markup.
  451. *New in Pygments 0.7.*
  452. """
  453. name = 'MoinMoin/Trac Wiki markup'
  454. aliases = ['trac-wiki', 'moin']
  455. filenames = []
  456. mimetypes = ['text/x-trac-wiki']
  457. flags = re.MULTILINE | re.IGNORECASE
  458. tokens = {
  459. 'root': [
  460. (r'^#.*$', Comment),
  461. (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
  462. # Titles
  463. (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
  464. bygroups(Generic.Heading, using(this), Generic.Heading, String)),
  465. # Literal code blocks, with optional shebang
  466. (r'({{{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
  467. (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
  468. # Lists
  469. (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
  470. (r'^( +)([a-zivx]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
  471. # Other Formatting
  472. (r'\[\[\w+.*?\]\]', Keyword), # Macro
  473. (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
  474. bygroups(Keyword, String, Keyword)), # Link
  475. (r'^----+$', Keyword), # Horizontal rules
  476. (r'[^\n\'\[{!_~^,|]+', Text),
  477. (r'\n', Text),
  478. (r'.', Text),
  479. ],
  480. 'codeblock': [
  481. (r'}}}', Name.Builtin, '#pop'),
  482. # these blocks are allowed to be nested in Trac, but not MoinMoin
  483. (r'{{{', Text, '#push'),
  484. (r'[^{}]+', Comment.Preproc), # slurp boring text
  485. (r'.', Comment.Preproc), # allow loose { or }
  486. ],
  487. }
  488. class RstLexer(RegexLexer):
  489. """
  490. For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
  491. *New in Pygments 0.7.*
  492. Additional options accepted:
  493. `handlecodeblocks`
  494. Highlight the contents of ``.. sourcecode:: langauge`` and
  495. ``.. code:: language`` directives with a lexer for the given
  496. language (default: ``True``). *New in Pygments 0.8.*
  497. """
  498. name = 'reStructuredText'
  499. aliases = ['rst', 'rest', 'restructuredtext']
  500. filenames = ['*.rst', '*.rest']
  501. mimetypes = ["text/x-rst"]
  502. flags = re.MULTILINE
  503. def _handle_sourcecode(self, match):
  504. from pygments.lexers import get_lexer_by_name
  505. from pygments.util import ClassNotFound
  506. # section header
  507. yield match.start(1), Punctuation, match.group(1)
  508. yield match.start(2), Text, match.group(2)
  509. yield match.start(3), Operator.Word, match.group(3)
  510. yield match.start(4), Punctuation, match.group(4)
  511. yield match.start(5), Text, match.group(5)
  512. yield match.start(6), Keyword, match.group(6)
  513. yield match.start(7), Text, match.group(7)
  514. # lookup lexer if wanted and existing
  515. lexer = None
  516. if self.handlecodeblocks:
  517. try:
  518. lexer = get_lexer_by_name(match.group(6).strip())
  519. except ClassNotFound:
  520. pass
  521. indention = match.group(8)
  522. indention_size = len(indention)
  523. code = (indention + match.group(9) + match.group(10) + match.group(11))
  524. # no lexer for this language. handle it like it was a code block
  525. if lexer is None:
  526. yield match.start(8), String, code
  527. return
  528. # highlight the lines with the lexer.
  529. ins = []
  530. codelines = code.splitlines(True)
  531. code = ''
  532. for line in codelines:
  533. if len(line) > indention_size:
  534. ins.append((len(code), [(0, Text, line[:indention_size])]))
  535. code += line[indention_size:]
  536. else:
  537. code += line
  538. for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
  539. yield item
  540. tokens = {
  541. 'root': [
  542. # Heading with overline
  543. (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)(.+)(\n)(\1)(\n)',
  544. bygroups(Generic.Heading, Text, Generic.Heading,
  545. Text, Generic.Heading, Text)),
  546. # Plain heading
  547. (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
  548. r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
  549. bygroups(Generic.Heading, Text, Generic.Heading, Text)),
  550. # Bulleted lists
  551. (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
  552. bygroups(Text, Number, using(this, state='inline'))),
  553. # Numbered lists
  554. (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
  555. bygroups(Text, Number, using(this, state='inline'))),
  556. (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
  557. bygroups(Text, Number, using(this, state='inline'))),
  558. # Numbered, but keep words at BOL from becoming lists
  559. (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
  560. bygroups(Text, Number, using(this, state='inline'))),
  561. (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
  562. bygroups(Text, Number, using(this, state='inline'))),
  563. # Sourcecode directives
  564. (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)'
  565. r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
  566. _handle_sourcecode),
  567. # A directive
  568. (r'^( *\.\.)(\s*)([\w-]+)(::)(?:([ \t]*)(.+))?',
  569. bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, Keyword)),
  570. # A reference target
  571. (r'^( *\.\.)(\s*)([\w\t ]+:)(.*?)$',
  572. bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
  573. # A footnote target
  574. (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
  575. bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
  576. # Comments
  577. (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
  578. # Field list
  579. (r'^( *)(:.*?:)([ \t]+)(.*?)$', bygroups(Text, Name.Class, Text,
  580. Name.Function)),
  581. # Definition list
  582. (r'^([^ ].*(?<!::)\n)((?:(?: +.*)\n)+)',
  583. bygroups(using(this, state='inline'), using(this, state='inline'))),
  584. # Code blocks
  585. (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
  586. bygroups(String.Escape, Text, String, String, Text, String)),
  587. include('inline'),
  588. ],
  589. 'inline': [
  590. (r'\\.', Text), # escape
  591. (r'``', String, 'literal'), # code
  592. (r'(`)(.+?)(`__?)',
  593. bygroups(Punctuation, using(this), Punctuation)), # reference
  594. (r'(`.+?`)(:[a-zA-Z0-9-]+?:)?',
  595. bygroups(Name.Variable, Name.Attribute)), # role
  596. (r'(:[a-zA-Z0-9-]+?:)(`.+?`)',
  597. bygroups(Name.Attribute, Name.Variable)), # user-defined role
  598. (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
  599. (r'\*.+?\*', Generic.Emph), # Emphasis
  600. (r'\[.*?\]_', String), # Footnote or citation
  601. (r'<.+?>', Name.Tag), # Hyperlink
  602. (r'[^\\\n\[*`:]+', Text),
  603. (r'.', Text),
  604. ],
  605. 'literal': [
  606. (r'[^`\\]+', String),
  607. (r'\\.', String),
  608. (r'``', String, '#pop'),
  609. (r'[`\\]', String),
  610. ]
  611. }
  612. def __init__(self, **options):
  613. self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
  614. RegexLexer.__init__(self, **options)
  615. def analyse_text(text):
  616. if text[:2] == '..' and text[2:3] != '.':
  617. return 0.3
  618. p1 = text.find("\n")
  619. p2 = text.find("\n", p1 + 1)
  620. if (p2 > -1 and # has two lines
  621. p1 * 2 + 1 == p2 and # they are the same length
  622. text[p1+1] in '-=' and # the next line both starts and ends with
  623. text[p1+1] == text[p2-1]): # ...a sufficiently high header
  624. return 0.5
  625. class VimLexer(RegexLexer):
  626. """
  627. Lexer for VimL script files.
  628. *New in Pygments 0.8.*
  629. """
  630. name = 'VimL'
  631. aliases = ['vim']
  632. filenames = ['*.vim', '.vimrc']
  633. mimetypes = ['text/x-vim']
  634. flags = re.MULTILINE
  635. tokens = {
  636. 'root': [
  637. # Who decided that doublequote was a good comment character??
  638. (r'^\s*".*', Comment),
  639. (r'(?<=\s)"[^\-:.%#=*].*', Comment),
  640. (r'[ \t]+', Text),
  641. # TODO: regexes can have other delims
  642. (r'/(\\\\|\\/|[^\n/])*/', String.Regex),
  643. (r'"(\\\\|\\"|[^\n"])*"', String.Double),
  644. (r"'(\\\\|\\'|[^\n'])*'", String.Single),
  645. (r'-?\d+', Number),
  646. (r'#[0-9a-f]{6}', Number.Hex),
  647. (r'^:', Punctuation),
  648. (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.
  649. (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
  650. Keyword),
  651. (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
  652. (r'\b\w+\b', Name.Other), # These are postprocessed below
  653. (r'.', Text),
  654. ],
  655. }
  656. def __init__(self, **options):
  657. from pygments.lexers._vimbuiltins import command, option, auto
  658. self._cmd = command
  659. self._opt = option
  660. self._aut = auto
  661. RegexLexer.__init__(self, **options)
  662. def is_in(self, w, mapping):
  663. r"""
  664. It's kind of difficult to decide if something might be a keyword
  665. in VimL because it allows you to abbreviate them. In fact,
  666. 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are
  667. valid ways to call it so rather than making really awful regexps
  668. like::
  669. \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
  670. we match `\b\w+\b` and then call is_in() on those tokens. See
  671. `scripts/get_vimkw.py` for how the lists are extracted.
  672. """
  673. p = bisect(mapping, (w,))
  674. if p > 0:
  675. if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
  676. mapping[p-1][1][:len(w)] == w: return True
  677. if p < len(mapping):
  678. return mapping[p][0] == w[:len(mapping[p][0])] and \
  679. mapping[p][1][:len(w)] == w
  680. return False
  681. def get_tokens_unprocessed(self, text):
  682. # TODO: builtins are only subsequent tokens on lines
  683. # and 'keywords' only happen at the beginning except
  684. # for :au ones
  685. for index, token, value in \
  686. RegexLexer.get_tokens_unprocessed(self, text):
  687. if token is Name.Other:
  688. if self.is_in(value, self._cmd):
  689. yield index, Keyword, value
  690. elif self.is_in(value, self._opt) or \
  691. self.is_in(value, self._aut):
  692. yield index, Name.Builtin, value
  693. else:
  694. yield index, Text, value
  695. else:
  696. yield index, token, value
  697. class GettextLexer(RegexLexer):
  698. """
  699. Lexer for Gettext catalog files.
  700. *New in Pygments 0.9.*
  701. """
  702. name = 'Gettext Catalog'
  703. aliases = ['pot', 'po']
  704. filenames = ['*.pot', '*.po']
  705. mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
  706. tokens = {
  707. 'root': [
  708. (r'^#,\s.*?$', Keyword.Type),
  709. (r'^#:\s.*?$', Keyword.Declaration),
  710. #(r'^#$', Comment),
  711. (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
  712. (r'^(")([\w-]*:)(.*")$',
  713. bygroups(String, Name.Property, String)),
  714. (r'^".*"$', String),
  715. (r'^(msgid|msgid_plural|msgstr)(\s+)(".*")$',
  716. bygroups(Name.Variable, Text, String)),
  717. (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
  718. bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
  719. ]
  720. }
  721. class SquidConfLexer(RegexLexer):
  722. """
  723. Lexer for `squid <http://www.squid-cache.org/>`_ configuration files.
  724. *New in Pygments 0.9.*
  725. """
  726. name = 'SquidConf'
  727. aliases = ['squidconf', 'squid.conf', 'squid']
  728. filenames = ['squid.conf']
  729. mimetypes = ['text/x-squidconf']
  730. flags = re.IGNORECASE
  731. keywords = [ "acl", "always_direct", "announce_host",
  732. "announce_period", "announce_port", "announce_to",
  733. "anonymize_headers", "append_domain", "as_whois_server",
  734. "auth_param_basic", "authenticate_children",
  735. "authenticate_program", "authenticate_ttl", "broken_posts",
  736. "buffered_logs", "cache_access_log", "cache_announce",
  737. "cache_dir", "cache_dns_program", "cache_effective_group",
  738. "cache_effective_user", "cache_host", "cache_host_acl",
  739. "cache_host_domain", "cache_log", "cache_mem",
  740. "cache_mem_high", "cache_mem_low", "cache_mgr",
  741. "cachemgr_passwd", "cache_peer", "cache_peer_access",
  742. "cahce_replacement_policy", "cache_stoplist",
  743. "cache_stoplist_pattern", "cache_store_log", "cache_swap",
  744. "cache_swap_high", "cache_swap_log", "cache_swap_low",
  745. "client_db", "client_lifetime", "client_netmask",
  746. "connect_timeout", "coredump_dir", "dead_peer_timeout",
  747. "debug_options", "delay_access", "delay_class",
  748. "delay_initial_bucket_level", "delay_parameters",
  749. "delay_pools", "deny_info", "dns_children", "dns_defnames",
  750. "dns_nameservers", "dns_testnames", "emulate_httpd_log",
  751. "err_html_text", "fake_user_agent", "firewall_ip",
  752. "forwarded_for", "forward_snmpd_port", "fqdncache_size",
  753. "ftpget_options", "ftpget_program", "ftp_list_width",
  754. "ftp_passive", "ftp_user", "half_closed_clients",
  755. "header_access", "header_replace", "hierarchy_stoplist",
  756. "high_response_time_warning", "high_page_fault_warning",
  757. "htcp_port", "http_access", "http_anonymizer", "httpd_accel",
  758. "httpd_accel_host", "httpd_accel_port",
  759. "httpd_accel_uses_host_header", "httpd_accel_with_proxy",
  760. "http_port", "http_reply_access", "icp_access",
  761. "icp_hit_stale", "icp_port", "icp_query_timeout",
  762. "ident_lookup", "ident_lookup_access", "ident_timeout",
  763. "incoming_http_average", "incoming_icp_average",
  764. "inside_firewall", "ipcache_high", "ipcache_low",
  765. "ipcache_size", "local_domain", "local_ip", "logfile_rotate",
  766. "log_fqdn", "log_icp_queries", "log_mime_hdrs",
  767. "maximum_object_size", "maximum_single_addr_tries",
  768. "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr",
  769. "mcast_miss_encode_key", "mcast_miss_port", "memory_pools",
  770. "memory_pools_limit", "memory_replacement_policy",
  771. "mime_table", "min_http_poll_cnt", "min_icp_poll_cnt",
  772. "minimum_direct_hops", "minimum_object_size",
  773. "minimum_retry_timeout", "miss_access", "negative_dns_ttl",
  774. "negative_ttl", "neighbor_timeout", "neighbor_type_domain",
  775. "netdb_high", "netdb_low", "netdb_ping_period",
  776. "netdb_ping_rate", "never_direct", "no_cache",
  777. "passthrough_proxy", "pconn_timeout", "pid_filename",
  778. "pinger_program", "positive_dns_ttl", "prefer_direct",
  779. "proxy_auth", "proxy_auth_realm", "query_icmp", "quick_abort",
  780. "quick_abort", "quick_abort_max", "quick_abort_min",
  781. "quick_abort_pct", "range_offset_limit", "read_timeout",
  782. "redirect_children", "redirect_program",
  783. "redirect_rewrites_host_header", "reference_age",
  784. "reference_age", "refresh_pattern", "reload_into_ims",
  785. "request_body_max_size", "request_size", "request_timeout",
  786. "shutdown_lifetime", "single_parent_bypass",
  787. "siteselect_timeout", "snmp_access", "snmp_incoming_address",
  788. "snmp_port", "source_ping", "ssl_proxy",
  789. "store_avg_object_size", "store_objects_per_bucket",
  790. "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs",
  791. "tcp_incoming_address", "tcp_outgoing_address",
  792. "tcp_recv_bufsize", "test_reachability", "udp_hit_obj",
  793. "udp_hit_obj_size", "udp_incoming_address",
  794. "udp_outgoing_address", "unique_hostname", "unlinkd_program",
  795. "uri_whitespace", "useragent_log", "visible_hostname",
  796. "wais_relay", "wais_relay_host", "wais_relay_port",
  797. ]
  798. opts = [ "proxy-only", "weight", "ttl", "no-query", "default",
  799. "round-robin", "multicast-responder", "on", "off", "all",
  800. "deny", "allow", "via", "parent", "no-digest", "heap", "lru",
  801. "realm", "children", "credentialsttl", "none", "disable",
  802. "offline_toggle", "diskd", "q1", "q2",
  803. ]
  804. actions = [ "shutdown", "info", "parameter", "server_list",
  805. "client_list", r'squid\.conf',
  806. ]
  807. actions_stats = [ "objects", "vm_objects", "utilization",
  808. "ipcache", "fqdncache", "dns", "redirector", "io",
  809. "reply_headers", "filedescriptors", "netdb",
  810. ]
  811. actions_log = [ "status", "enable", "disable", "clear"]
  812. acls = [ "url_regex", "urlpath_regex", "referer_regex", "port",
  813. "proto", "req_mime_type", "rep_mime_type", "method",
  814. "browser", "user", "src", "dst", "time", "dstdomain", "ident",
  815. "snmp_community",
  816. ]
  817. ip_re = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
  818. def makelistre(list):
  819. return r'\b(?:'+'|'.join(list)+r')\b'
  820. tokens = {
  821. 'root': [
  822. (r'\s+', Text),
  823. (r'#', Comment, 'comment'),
  824. (makelistre(keywords), Keyword),
  825. (makelistre(opts), Name.Constant),
  826. # Actions
  827. (makelistre(actions), String),
  828. (r'stats/'+makelistre(actions), String),
  829. (r'log/'+makelistre(actions)+r'=', String),
  830. (makelistre(acls), Keyword),
  831. (ip_re+r'(?:/(?:'+ip_re+r')|\d+)?', Number),
  832. (r'\b\d+\b', Number),
  833. (r'\S+', Text),
  834. ],
  835. 'comment': [
  836. (r'\s*TAG:.*', String.Escape, '#pop'),
  837. (r'.*', Comment, '#pop'),
  838. ],
  839. }
  840. class DebianControlLexer(RegexLexer):
  841. """
  842. Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs.
  843. *New in Pygments 0.9.*
  844. """
  845. name = 'Debian Control file'
  846. aliases = ['control']
  847. filenames = ['control']
  848. tokens = {
  849. 'root': [
  850. (r'^(Description)', Keyword, 'description'),
  851. (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'),
  852. (r'^((Build-)?Depends)', Keyword, 'depends'),
  853. (r'^((?:Python-)?Version)(:\s*)([^\s]+)$',
  854. bygroups(Keyword, Text, Number)),
  855. (r'^((?:Installed-)?Size)(:\s*)([^\s]+)$',
  856. bygroups(Keyword, Text, Number)),
  857. (r'^(MD5Sum|SHA1|SHA256)(:\s*)([^\s]+)$',
  858. bygroups(Keyword, Text, Number)),
  859. (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$',
  860. bygroups(Keyword, Whitespace, String)),
  861. ],
  862. 'maintainer': [
  863. (r'<[^>]+>', Generic.Strong),
  864. (r'<[^>]+>$', Generic.Strong, '#pop'),
  865. (r',\n?', Text),
  866. (r'.', Text),
  867. ],
  868. 'description': [
  869. (r'(.*)(Homepage)(: )([^\s]+)', bygroups(Text, String, Name, Name.Class)),
  870. (r':.*\n', Generic.Strong),
  871. (r' .*\n', Text),
  872. ('', Text, '#pop'),
  873. ],
  874. 'depends': [
  875. (r':\s*', Text),
  876. (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)),
  877. (r'\(', Text, 'depend_vers'),
  878. (r',', Text),
  879. (r'\|', Operator),
  880. (r'[\s]+', Text),
  881. (r'[}\)]\s*$', Text, '#pop'),
  882. (r'[}]', Text),
  883. (r'[^,]$', Name.Function, '#pop'),
  884. (r'([\+\.a-zA-Z0-9-][\s\n]*)', Name.Function),
  885. ],
  886. 'depend_vers': [
  887. (r'\),', Text, '#pop'),
  888. (r'\)[^,]', Text, '#pop:2'),
  889. (r'([><=]+)(\s*)([^\)]+)', bygroups(Operator, Text, Number))
  890. ]
  891. }
  892. class YamlLexerContext(LexerContext):
  893. """Indentation context for the YAML lexer."""
  894. def __init__(self, *args, **kwds):
  895. super(YamlLexerContext, self).__init__(*args, **kwds)
  896. self.indent_stack = []
  897. self.indent = -1
  898. self.next_indent = 0
  899. self.block_scalar_indent = None
  900. class YamlLexer(ExtendedRegexLexer):
  901. """
  902. Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
  903. language.
  904. *New in Pygments 0.11.*
  905. """
  906. name = 'YAML'
  907. aliases = ['yaml']
  908. filenames = ['*.yaml', '*.yml']
  909. mimetypes = ['text/x-yaml']
  910. def something(token_class):
  911. """Do not produce empty tokens."""
  912. def callback(lexer, match, context):
  913. text = match.group()
  914. if not text:
  915. return
  916. yield match.start(), token_class, text
  917. context.pos = match.end()
  918. return callback
  919. def reset_indent(token_class):
  920. """Reset the indentation levels."""
  921. def callback(lexer, match, context):
  922. text = match.group()
  923. context.indent_stack = []
  924. context.indent = -1
  925. context.next_indent = 0
  926. context.block_scalar_indent = None
  927. yield match.start(), token_class, text
  928. context.pos = match.end()
  929. return callback
  930. def save_indent(token_class, start=False):
  931. """Save a possible indentation level."""
  932. def callback(lexer, match, context):
  933. text = match.group()
  934. extra = ''
  935. if start:
  936. context.next_indent = len(text)
  937. if context.next_indent < context.indent:
  938. while context.next_indent < context.indent:
  939. context.indent = context.indent_stack.pop()
  940. if context.next_indent > context.indent:
  941. extra = text[context.indent:]
  942. text = text[:context.indent]
  943. else:
  944. context.next_indent += len(text)
  945. if text:
  946. yield match.start(), token_class, text
  947. if extra:
  948. yield match.start()+len(text), token_class.Error, extra
  949. context.pos = match.end()
  950. return callback
  951. def set_indent(token_class, implicit=False):
  952. """Set the previously saved indentation level."""
  953. def callback(lexer, match, context):
  954. text = match.group()
  955. if context.indent < context.next_indent:
  956. context.indent_stack.append(context.indent)
  957. context.indent = context.next_indent
  958. if not implicit:
  959. context.next_indent += len(text)
  960. yield match.start(), token_class, text
  961. context.pos = match.end()
  962. return callback
  963. def set_block_scalar_indent(token_class):
  964. """Set an explicit indentation level for a block scalar."""
  965. def callback(lexer, match, context):
  966. text = match.group()
  967. context.block_scalar_indent = None
  968. if not text:
  969. return
  970. increment = match.group(1)
  971. if increment:
  972. current_indent = max(context.indent, 0)
  973. increment = int(increment)
  974. context.block_scalar_indent = current_indent + increment
  975. if text:
  976. yield match.start(), token_class, text
  977. context.pos = match.end()
  978. return callback
  979. def parse_block_scalar_empty_line(indent_token_class, content_token_class):
  980. """Process an empty line in a block scalar."""
  981. def callback(lexer, match, context):
  982. text = match.group()
  983. if (context.block_scalar_indent is None or
  984. len(text) <= context.block_scalar_indent):
  985. if text:
  986. yield match.start(), indent_token_class, text
  987. else:
  988. indentation = text[:context.block_scalar_indent]
  989. content = text[context.block_scalar_indent:]
  990. yield match.start(), indent_token_class, indentation
  991. yield (match.start()+context.block_scalar_indent,
  992. content_token_class, content)
  993. context.pos = match.end()
  994. return callback
  995. def parse_block_scalar_indent(token_class):
  996. """Process indentation spaces in a block scalar."""
  997. def callback(lexer, match, context):
  998. text = match.group()
  999. if context.block_scalar_indent is None:
  1000. if len(text) <= max(context.indent, 0):
  1001. context.stack.pop()
  1002. context.stack.pop()
  1003. return
  1004. context.block_scalar_indent = len(text)
  1005. else:
  1006. if len(text) < context.block_scalar_indent:
  1007. context.stack.pop()
  1008. context.stack.pop()
  1009. return
  1010. if text:
  1011. yield match.start(), token_class, text
  1012. context.pos = match.end()
  1013. return callback
  1014. def parse_plain_scalar_indent(token_class):
  1015. """Process indentation spaces in a plain scalar."""
  1016. def callback(lexer, match, context):
  1017. text = match.group()
  1018. if len(text) <= context.indent:
  1019. context.stack.pop()
  1020. context.stack.pop()
  1021. return
  1022. if text:
  1023. yield match.start(), token_class, text
  1024. context.pos = match.end()
  1025. return callback
  1026. tokens = {
  1027. # the root rules
  1028. 'root': [
  1029. # ignored whitespaces
  1030. (r'[ ]+(?=#|$)', Text),
  1031. # line breaks
  1032. (r'\n+', Text),
  1033. # a comment
  1034. (r'#[^\n]*', Comment.Single),
  1035. # the '%YAML' directive
  1036. (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
  1037. # the %TAG directive
  1038. (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
  1039. # document start and document end indicators
  1040. (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
  1041. 'block-line'),
  1042. # indentation spaces
  1043. (r'[ ]*(?![ \t\n\r\f\v]|$)', save_indent(Text, start=True),
  1044. ('block-line', 'indentation')),
  1045. ],
  1046. # trailing whitespaces after directives or a block scalar indicator
  1047. 'ignored-line': [
  1048. # ignored whitespaces
  1049. (r'[ ]+(?=#|$)', Text),
  1050. # a comment
  1051. (r'#[^\n]*', Comment.Single),
  1052. # line break
  1053. (r'\n', Text, '#pop:2'),
  1054. ],
  1055. # the %YAML directive
  1056. 'yaml-directive': [
  1057. # the version number
  1058. (r'([ ]+)([0-9]+\.[0-9]+)',
  1059. bygroups(Text, Number), 'ignored-line'),
  1060. ],
  1061. # the %YAG directive
  1062. 'tag-directive': [
  1063. # a tag handle and the corresponding prefix
  1064. (r'([ ]+)(!|![0-9A-Za-z_-]*!)'
  1065. r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)',
  1066. bygroups(Text, Keyword.Type, Text, Keyword.Type),
  1067. 'ignored-line'),
  1068. ],
  1069. # block scalar indicators and indentation spaces
  1070. 'indentation': [
  1071. # trailing whitespaces are ignored
  1072. (r'[ ]*$', something(Text), '#pop:2'),
  1073. # whitespaces preceeding block collection indicators
  1074. (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
  1075. # block collection indicators
  1076. (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
  1077. # the beginning a block line
  1078. (r'[ ]*', save_indent(Text), '#pop'),
  1079. ],
  1080. # an indented line in the block context
  1081. 'block-line': [
  1082. # the line end
  1083. (r'[ ]*(?=#|$)', something(Text), '#pop'),
  1084. # whitespaces separating tokens
  1085. (r'[ ]+', Text),
  1086. # tags, anchors and aliases,
  1087. include('descriptors'),
  1088. # block collections and scalars
  1089. include('block-nodes'),
  1090. # flow collections and quoted scalars
  1091. include('flow-nodes'),
  1092. # a plain scalar
  1093. (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])',
  1094. something(Name.Variable),
  1095. 'plain-scalar-in-block-context'),
  1096. ],
  1097. # tags, anchors, aliases
  1098. 'descriptors' : [
  1099. # a full-form tag
  1100. (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Keyword.Type),
  1101. # a tag in the form '!', '!suffix' or '!handle!suffix'
  1102. (r'!(?:[0-9A-Za-z_-]+)?'
  1103. r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Keyword.Type),
  1104. # an anchor
  1105. (r'&[0-9A-Za-z_-]+', Name.Label),
  1106. # an alias
  1107. (r'\*[0-9A-Za-z_-]+', Name.Variable),
  1108. ],
  1109. # block collections and scalars
  1110. 'block-nodes': [
  1111. # implicit key
  1112. (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
  1113. # literal and folded scalars
  1114. (r'[|>]', Punctuation.Indicator,
  1115. ('block-scalar-content', 'block-scalar-header')),
  1116. ],
  1117. # flow collections and quoted scalars
  1118. 'flow-nodes': [
  1119. # a flow sequence
  1120. (r'\[', Punctuation.Indicator, 'flow-sequence'),
  1121. # a flow mapping
  1122. (r'\{', Punctuation.Indicator, 'flow-mapping'),
  1123. # a single-quoted scalar
  1124. (r'\'', String, 'single-quoted-scalar'),
  1125. # a double-quoted scalar
  1126. (r'\"', String, 'double-quoted-scalar'),
  1127. ],
  1128. # the content of a flow collection
  1129. 'flow-collection': [
  1130. # whitespaces
  1131. (r'[ ]+', Text),
  1132. # line breaks
  1133. (r'\n+', Text),
  1134. # a comment
  1135. (r'#[^\n]*', Comment.Single),
  1136. # simple indicators
  1137. (r'[?:,]', Punctuation.Indicator),
  1138. # tags, anchors and aliases
  1139. include('descriptors'),
  1140. # nested collections and quoted scalars
  1141. include('flow-nodes'),
  1142. # a plain scalar
  1143. (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])',
  1144. something(Name.Variable),
  1145. 'plain-scalar-in-flow-context'),
  1146. ],
  1147. # a flow sequence indicated by '[' and ']'
  1148. 'flow-sequence': [
  1149. # include flow collection rules
  1150. include('flow-collection'),
  1151. # the closing indicator
  1152. (r'\]', Punctuation.Indicator, '#pop'),
  1153. ],
  1154. # a flow mapping indicated by '{' and '}'
  1155. 'flow-mapping': [
  1156. # include flow collection rules
  1157. include('flow-collection'),
  1158. # the closing indicator
  1159. (r'\}', Punctuation.Indicator, '#pop'),
  1160. ],
  1161. # block scalar lines
  1162. 'block-scalar-content': [
  1163. # line break
  1164. (r'\n', Text),
  1165. # empty line
  1166. (r'^[ ]+$',
  1167. parse_block_scalar_empty_line(Text, Name.Constant)),
  1168. # indentation spaces (we may leave the state here)
  1169. (r'^[ ]*', parse_block_scalar_indent(Text)),
  1170. # line content
  1171. (r'[^\n\r\f\v]+', Name.Constant),
  1172. ],
  1173. # the content of a literal or folded scalar
  1174. 'block-scalar-header': [
  1175. # indentation indicator followed by chomping flag
  1176. (r'([1-9])?[+-]?(?=[ ]|$)',
  1177. set_block_scalar_indent(Punctuation.Indicator),
  1178. 'ignored-line'),
  1179. # chomping flag followed by indentation indicator
  1180. (r'[+-]?([1-9])?(?=[ ]|$)',
  1181. set_block_scalar_indent(Punctuation.Indicator),
  1182. 'ignored-line'),
  1183. ],
  1184. # ignored and regular whitespaces in quoted scalars
  1185. 'quoted-scalar-whitespaces': [
  1186. # leading and trailing whitespaces are ignored
  1187. (r'^[ ]+|[ ]+$', Text),
  1188. # line breaks are ignored
  1189. (r'\n+', Text),
  1190. # other whitespaces are a part of the value
  1191. (r'[ ]+', Name.Variable),
  1192. ],
  1193. # single-quoted scalars
  1194. 'single-quoted-scalar': [
  1195. # include whitespace and line break rules
  1196. include('quoted-scalar-whitespaces'),
  1197. # escaping of the quote character
  1198. (r'\'\'', String.Escape),
  1199. # regular non-whitespace characters
  1200. (r'[^ \t\n\r\f\v\']+', String),
  1201. # the closing quote
  1202. (r'\'', String, '#pop'),
  1203. ],
  1204. # double-quoted scalars
  1205. 'double-quoted-scalar': [
  1206. # include whitespace and line break rules
  1207. include('quoted-scalar-whitespaces'),
  1208. # escaping of special characters
  1209. (r'\\[0abt\tn\nvfre "\\N_LP]', String),
  1210. # escape codes
  1211. (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
  1212. String.Escape),
  1213. # regular non-whitespace characters
  1214. (r'[^ \t\n\r\f\v\"\\]+', String),
  1215. # the closing quote
  1216. (r'"', String, '#pop'),
  1217. ],
  1218. # the beginning of a new line while scanning a plain scalar
  1219. 'plain-scalar-in-block-context-new-line': [
  1220. # empty lines
  1221. (r'^[ ]+$', Text),
  1222. # line breaks
  1223. (r'\n+', Text),
  1224. # document start and document end indicators
  1225. (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
  1226. # indentation spaces (we may leave the block line state here)
  1227. (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
  1228. ],
  1229. # a plain scalar in the block context
  1230. 'plain-scalar-in-block-context': [
  1231. # the scalar ends with the ':' indicator
  1232. (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
  1233. # the scalar ends with whitespaces followed by a comment
  1234. (r'[ ]+(?=#)', Text, '#pop'),
  1235. # trailing whitespaces are ignored
  1236. (r'[ ]+$', Text),
  1237. # line breaks are ignored
  1238. (r'\n+', Text, 'plain-scalar-in-block-context-new-line'),
  1239. # other whitespaces are a part of the value
  1240. (r'[ ]+', Literal.Scalar.Plain),
  1241. # regular non-whitespace characters
  1242. (r'(?::(?![ \t\n\r\f\v])|[^ \t\n\r\f\v:])+', Literal.Scalar.Plain),
  1243. ],
  1244. # a plain scalar is the flow context
  1245. 'plain-scalar-in-flow-context': [
  1246. # the scalar ends with an indicator character
  1247. (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'),
  1248. # the scalar ends with a comment
  1249. (r'[ ]+(?=#)', Text, '#pop'),
  1250. # leading and trailing whitespaces are ignored
  1251. (r'^[ ]+|[ ]+$', Text),
  1252. # line breaks are ignored
  1253. (r'\n+', Text),
  1254. # other whitespaces are a part of the value
  1255. (r'[ ]+', Name.Variable),
  1256. # regular non-whitespace characters
  1257. (r'[^ \t\n\r\f\v,:?\[\]{}]+', Name.Variable),
  1258. ],
  1259. }
  1260. def get_tokens_unprocessed(self, text=None, context=None):
  1261. if context is None:
  1262. context = YamlLexerContext(text, 0)
  1263. return super(YamlLexer, self).get_tokens_unprocessed(text, context)
  1264. class LighttpdConfLexer(RegexLexer):
  1265. """
  1266. Lexer for `Lighttpd <http://lighttpd.net/>`_ configuration files.
  1267. *New in Pygments 0.11.*
  1268. """
  1269. name = 'Lighttpd configuration file'
  1270. aliases = ['lighty', 'lighttpd']
  1271. filenames = []
  1272. mimetypes = ['text/x-lighttpd-conf']
  1273. tokens = {
  1274. 'root': [
  1275. (r'#.*\n', Comment.Single),
  1276. (r'/\S*', Name), # pathname
  1277. (r'[a-zA-Z._-]+', Keyword),
  1278. (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number),
  1279. (r'[0-9]+', Number),
  1280. (r'=>|=~|\+=|==|=|\+', Operator),
  1281. (r'\$[A-Z]+', Name.Builtin),
  1282. (r'[(){}\[\],]', Punctuation),
  1283. (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double),
  1284. (r'\s+', Text),
  1285. ],
  1286. }
  1287. class NginxConfLexer(RegexLexer):
  1288. """
  1289. Lexer for `Nginx <http://nginx.net/>`_ configuration files.
  1290. *New in Pygments 0.11.*
  1291. """
  1292. name = 'Nginx configuration file'
  1293. aliases = ['nginx']
  1294. filenames = []
  1295. mimetypes = ['text/x-nginx-conf']
  1296. tokens = {
  1297. 'root': [
  1298. (r'(include)(\s+)([^\s;]+)', bygroups(Keyword, Text, Name)),
  1299. (r'[^\s;#]+', Keyword, 'stmt'),
  1300. include('base'),
  1301. ],
  1302. 'block': [
  1303. (r'}', Punctuation, '#pop:2'),
  1304. (r'[^\s;#]+', Keyword.Namespace, 'stmt'),
  1305. include('base'),
  1306. ],
  1307. 'stmt': [
  1308. (r'{', Punctuation, 'block'),
  1309. (r';', Punctuation, '#pop'),
  1310. include('base'),
  1311. ],
  1312. 'base': [
  1313. (r'#.*\n', Comment.Single),
  1314. (r'on|off', Name.Constant),
  1315. (r'\$[^\s;#]+', Name.Variable),
  1316. (r'([a-z0-9.-]+)(:)([0-9]+)',
  1317. bygroups(Name, Punctuation, Number.Integer)),
  1318. (r'[a-z-]+/[a-z-]+', Name), # mimetype
  1319. #(r'[a-zA-Z._-]+', Keyword),
  1320. (r'[0-9]+[km]?\b', Number.Integer),
  1321. (r'(~)(\s*)([^\s{]+)', bygroups(Punctuation, Text, String.Regex)),
  1322. (r'[:=~]', Punctuation),
  1323. (r'[^\s;#{}$]+', String), # catch all
  1324. (r'/[^\s;#]*', Name), # pathname
  1325. (r'\s+', Text),
  1326. ],
  1327. }