PageRenderTime 53ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/pygments/lexers/agile.py

https://bitbucket.org/pnathan/pygments-main
Python | 1815 lines | 1526 code | 114 blank | 175 comment | 28 complexity | b65aa70f94be6e388cb58ea9b6e333a2 MD5 | raw file
Possible License(s): BSD-2-Clause

Large files files are truncated, but you can click here to view the full file

  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.agile
  4. ~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for agile languages.
  6. :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \
  11. LexerContext, include, combined, do_insertions, bygroups, using
  12. from pygments.token import Error, Text, Other, \
  13. Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation
  14. from pygments.util import get_bool_opt, get_list_opt, shebang_matches
  15. from pygments import unistring as uni
  16. __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
  17. 'Python3Lexer', 'Python3TracebackLexer', 'RubyLexer',
  18. 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer', 'MoonScriptLexer',
  19. 'CrocLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', 'FactorLexer', 'FancyLexer']
  20. # b/w compatibility
  21. from pygments.lexers.functional import SchemeLexer
  22. from pygments.lexers.jvm import IokeLexer, ClojureLexer
  23. line_re = re.compile('.*?\n')
  24. class PythonLexer(RegexLexer):
  25. """
  26. For `Python <http://www.python.org>`_ source code.
  27. """
  28. name = 'Python'
  29. aliases = ['python', 'py', 'sage']
  30. filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac', '*.sage']
  31. mimetypes = ['text/x-python', 'application/x-python']
  32. tokens = {
  33. 'root': [
  34. (r'\n', Text),
  35. (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
  36. (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
  37. (r'[^\S\n]+', Text),
  38. (r'#.*$', Comment),
  39. (r'[]{}:(),;[]', Punctuation),
  40. (r'\\\n', Text),
  41. (r'\\', Text),
  42. (r'(in|is|and|or|not)\b', Operator.Word),
  43. (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
  44. include('keywords'),
  45. (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
  46. (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
  47. (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
  48. 'fromimport'),
  49. (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
  50. 'import'),
  51. include('builtins'),
  52. include('backtick'),
  53. ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
  54. ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
  55. ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
  56. ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
  57. ('[uU]?"""', String, combined('stringescape', 'tdqs')),
  58. ("[uU]?'''", String, combined('stringescape', 'tsqs')),
  59. ('[uU]?"', String, combined('stringescape', 'dqs')),
  60. ("[uU]?'", String, combined('stringescape', 'sqs')),
  61. include('name'),
  62. include('numbers'),
  63. ],
  64. 'keywords': [
  65. (r'(assert|break|continue|del|elif|else|except|exec|'
  66. r'finally|for|global|if|lambda|pass|print|raise|'
  67. r'return|try|while|yield|as|with)\b', Keyword),
  68. ],
  69. 'builtins': [
  70. (r'(?<!\.)(__import__|abs|all|any|apply|basestring|bin|bool|buffer|'
  71. r'bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|'
  72. r'complex|delattr|dict|dir|divmod|enumerate|eval|execfile|exit|'
  73. r'file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|'
  74. r'input|int|intern|isinstance|issubclass|iter|len|list|locals|'
  75. r'long|map|max|min|next|object|oct|open|ord|pow|property|range|'
  76. r'raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|'
  77. r'sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|'
  78. r'vars|xrange|zip)\b', Name.Builtin),
  79. (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True'
  80. r')\b', Name.Builtin.Pseudo),
  81. (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|'
  82. r'BaseException|DeprecationWarning|EOFError|EnvironmentError|'
  83. r'Exception|FloatingPointError|FutureWarning|GeneratorExit|IOError|'
  84. r'ImportError|ImportWarning|IndentationError|IndexError|KeyError|'
  85. r'KeyboardInterrupt|LookupError|MemoryError|NameError|'
  86. r'NotImplemented|NotImplementedError|OSError|OverflowError|'
  87. r'OverflowWarning|PendingDeprecationWarning|ReferenceError|'
  88. r'RuntimeError|RuntimeWarning|StandardError|StopIteration|'
  89. r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|'
  90. r'TypeError|UnboundLocalError|UnicodeDecodeError|'
  91. r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|'
  92. r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|'
  93. r'WindowsError|ZeroDivisionError)\b', Name.Exception),
  94. ],
  95. 'numbers': [
  96. (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
  97. (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
  98. (r'0[0-7]+j?', Number.Oct),
  99. (r'0[xX][a-fA-F0-9]+', Number.Hex),
  100. (r'\d+L', Number.Integer.Long),
  101. (r'\d+j?', Number.Integer)
  102. ],
  103. 'backtick': [
  104. ('`.*?`', String.Backtick),
  105. ],
  106. 'name': [
  107. (r'@[a-zA-Z0-9_.]+', Name.Decorator),
  108. ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
  109. ],
  110. 'funcname': [
  111. ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')
  112. ],
  113. 'classname': [
  114. ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
  115. ],
  116. 'import': [
  117. (r'(?:[ \t]|\\\n)+', Text),
  118. (r'as\b', Keyword.Namespace),
  119. (r',', Operator),
  120. (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace),
  121. (r'', Text, '#pop') # all else: go back
  122. ],
  123. 'fromimport': [
  124. (r'(?:[ \t]|\\\n)+', Text),
  125. (r'import\b', Keyword.Namespace, '#pop'),
  126. # if None occurs here, it's "raise x from None", since None can
  127. # never be a module name
  128. (r'None\b', Name.Builtin.Pseudo, '#pop'),
  129. # sadly, in "raise x from y" y will be highlighted as namespace too
  130. (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace),
  131. # anything else here also means "raise x from y" and is therefore
  132. # not an error
  133. (r'', Text, '#pop'),
  134. ],
  135. 'stringescape': [
  136. (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'
  137. r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
  138. ],
  139. 'strings': [
  140. (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
  141. '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
  142. (r'[^\\\'"%\n]+', String),
  143. # quotes, percents and backslashes must be parsed one at a time
  144. (r'[\'"\\]', String),
  145. # unhandled string formatting sign
  146. (r'%', String)
  147. # newlines are an error (use "nl" state)
  148. ],
  149. 'nl': [
  150. (r'\n', String)
  151. ],
  152. 'dqs': [
  153. (r'"', String, '#pop'),
  154. (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
  155. include('strings')
  156. ],
  157. 'sqs': [
  158. (r"'", String, '#pop'),
  159. (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
  160. include('strings')
  161. ],
  162. 'tdqs': [
  163. (r'"""', String, '#pop'),
  164. include('strings'),
  165. include('nl')
  166. ],
  167. 'tsqs': [
  168. (r"'''", String, '#pop'),
  169. include('strings'),
  170. include('nl')
  171. ],
  172. }
  173. def analyse_text(text):
  174. return shebang_matches(text, r'pythonw?(2(\.\d)?)?')
  175. class Python3Lexer(RegexLexer):
  176. """
  177. For `Python <http://www.python.org>`_ source code (version 3.0).
  178. *New in Pygments 0.10.*
  179. """
  180. name = 'Python 3'
  181. aliases = ['python3', 'py3']
  182. filenames = [] # Nothing until Python 3 gets widespread
  183. mimetypes = ['text/x-python3', 'application/x-python3']
  184. flags = re.MULTILINE | re.UNICODE
  185. uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue)
  186. tokens = PythonLexer.tokens.copy()
  187. tokens['keywords'] = [
  188. (r'(assert|break|continue|del|elif|else|except|'
  189. r'finally|for|global|if|lambda|pass|raise|nonlocal|'
  190. r'return|try|while|yield|as|with|True|False|None)\b', Keyword),
  191. ]
  192. tokens['builtins'] = [
  193. (r'(?<!\.)(__import__|abs|all|any|bin|bool|bytearray|bytes|'
  194. r'chr|classmethod|cmp|compile|complex|delattr|dict|dir|'
  195. r'divmod|enumerate|eval|filter|float|format|frozenset|getattr|'
  196. r'globals|hasattr|hash|hex|id|input|int|isinstance|issubclass|'
  197. r'iter|len|list|locals|map|max|memoryview|min|next|object|oct|'
  198. r'open|ord|pow|print|property|range|repr|reversed|round|'
  199. r'set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|'
  200. r'vars|zip)\b', Name.Builtin),
  201. (r'(?<!\.)(self|Ellipsis|NotImplemented)\b', Name.Builtin.Pseudo),
  202. (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|'
  203. r'BaseException|BufferError|BytesWarning|DeprecationWarning|'
  204. r'EOFError|EnvironmentError|Exception|FloatingPointError|'
  205. r'FutureWarning|GeneratorExit|IOError|ImportError|'
  206. r'ImportWarning|IndentationError|IndexError|KeyError|'
  207. r'KeyboardInterrupt|LookupError|MemoryError|NameError|'
  208. r'NotImplementedError|OSError|OverflowError|'
  209. r'PendingDeprecationWarning|ReferenceError|'
  210. r'RuntimeError|RuntimeWarning|StopIteration|'
  211. r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|'
  212. r'TypeError|UnboundLocalError|UnicodeDecodeError|'
  213. r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|'
  214. r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|'
  215. r'WindowsError|ZeroDivisionError)\b', Name.Exception),
  216. ]
  217. tokens['numbers'] = [
  218. (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
  219. (r'0[oO][0-7]+', Number.Oct),
  220. (r'0[bB][01]+', Number.Bin),
  221. (r'0[xX][a-fA-F0-9]+', Number.Hex),
  222. (r'\d+', Number.Integer)
  223. ]
  224. tokens['backtick'] = []
  225. tokens['name'] = [
  226. (r'@[a-zA-Z0-9_]+', Name.Decorator),
  227. (uni_name, Name),
  228. ]
  229. tokens['funcname'] = [
  230. (uni_name, Name.Function, '#pop')
  231. ]
  232. tokens['classname'] = [
  233. (uni_name, Name.Class, '#pop')
  234. ]
  235. tokens['import'] = [
  236. (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
  237. (r'\.', Name.Namespace),
  238. (uni_name, Name.Namespace),
  239. (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
  240. (r'', Text, '#pop') # all else: go back
  241. ]
  242. tokens['fromimport'] = [
  243. (r'(\s+)(import)\b', bygroups(Text, Keyword), '#pop'),
  244. (r'\.', Name.Namespace),
  245. (uni_name, Name.Namespace),
  246. ]
  247. # don't highlight "%s" substitutions
  248. tokens['strings'] = [
  249. (r'[^\\\'"%\n]+', String),
  250. # quotes, percents and backslashes must be parsed one at a time
  251. (r'[\'"\\]', String),
  252. # unhandled string formatting sign
  253. (r'%', String)
  254. # newlines are an error (use "nl" state)
  255. ]
  256. def analyse_text(text):
  257. return shebang_matches(text, r'pythonw?3(\.\d)?')
  258. class PythonConsoleLexer(Lexer):
  259. """
  260. For Python console output or doctests, such as:
  261. .. sourcecode:: pycon
  262. >>> a = 'foo'
  263. >>> print a
  264. foo
  265. >>> 1 / 0
  266. Traceback (most recent call last):
  267. File "<stdin>", line 1, in <module>
  268. ZeroDivisionError: integer division or modulo by zero
  269. Additional options:
  270. `python3`
  271. Use Python 3 lexer for code. Default is ``False``.
  272. *New in Pygments 1.0.*
  273. """
  274. name = 'Python console session'
  275. aliases = ['pycon']
  276. mimetypes = ['text/x-python-doctest']
  277. def __init__(self, **options):
  278. self.python3 = get_bool_opt(options, 'python3', False)
  279. Lexer.__init__(self, **options)
  280. def get_tokens_unprocessed(self, text):
  281. if self.python3:
  282. pylexer = Python3Lexer(**self.options)
  283. tblexer = Python3TracebackLexer(**self.options)
  284. else:
  285. pylexer = PythonLexer(**self.options)
  286. tblexer = PythonTracebackLexer(**self.options)
  287. curcode = ''
  288. insertions = []
  289. curtb = ''
  290. tbindex = 0
  291. tb = 0
  292. for match in line_re.finditer(text):
  293. line = match.group()
  294. if line.startswith(u'>>> ') or line.startswith(u'... '):
  295. tb = 0
  296. insertions.append((len(curcode),
  297. [(0, Generic.Prompt, line[:4])]))
  298. curcode += line[4:]
  299. elif line.rstrip() == u'...' and not tb:
  300. # only a new >>> prompt can end an exception block
  301. # otherwise an ellipsis in place of the traceback frames
  302. # will be mishandled
  303. insertions.append((len(curcode),
  304. [(0, Generic.Prompt, u'...')]))
  305. curcode += line[3:]
  306. else:
  307. if curcode:
  308. for item in do_insertions(insertions,
  309. pylexer.get_tokens_unprocessed(curcode)):
  310. yield item
  311. curcode = ''
  312. insertions = []
  313. if (line.startswith(u'Traceback (most recent call last):') or
  314. re.match(ur' File "[^"]+", line \d+\n$', line)):
  315. tb = 1
  316. curtb = line
  317. tbindex = match.start()
  318. elif line == 'KeyboardInterrupt\n':
  319. yield match.start(), Name.Class, line
  320. elif tb:
  321. curtb += line
  322. if not (line.startswith(' ') or line.strip() == u'...'):
  323. tb = 0
  324. for i, t, v in tblexer.get_tokens_unprocessed(curtb):
  325. yield tbindex+i, t, v
  326. else:
  327. yield match.start(), Generic.Output, line
  328. if curcode:
  329. for item in do_insertions(insertions,
  330. pylexer.get_tokens_unprocessed(curcode)):
  331. yield item
  332. class PythonTracebackLexer(RegexLexer):
  333. """
  334. For Python tracebacks.
  335. *New in Pygments 0.7.*
  336. """
  337. name = 'Python Traceback'
  338. aliases = ['pytb']
  339. filenames = ['*.pytb']
  340. mimetypes = ['text/x-python-traceback']
  341. tokens = {
  342. 'root': [
  343. (r'^Traceback \(most recent call last\):\n',
  344. Generic.Traceback, 'intb'),
  345. # SyntaxError starts with this.
  346. (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
  347. (r'^.*\n', Other),
  348. ],
  349. 'intb': [
  350. (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
  351. bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
  352. (r'^( File )("[^"]+")(, line )(\d+)(\n)',
  353. bygroups(Text, Name.Builtin, Text, Number, Text)),
  354. (r'^( )(.+)(\n)',
  355. bygroups(Text, using(PythonLexer), Text)),
  356. (r'^([ \t]*)(\.\.\.)(\n)',
  357. bygroups(Text, Comment, Text)), # for doctests...
  358. (r'^(.+)(: )(.+)(\n)',
  359. bygroups(Generic.Error, Text, Name, Text), '#pop'),
  360. (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
  361. bygroups(Generic.Error, Text), '#pop')
  362. ],
  363. }
  364. class Python3TracebackLexer(RegexLexer):
  365. """
  366. For Python 3.0 tracebacks, with support for chained exceptions.
  367. *New in Pygments 1.0.*
  368. """
  369. name = 'Python 3.0 Traceback'
  370. aliases = ['py3tb']
  371. filenames = ['*.py3tb']
  372. mimetypes = ['text/x-python3-traceback']
  373. tokens = {
  374. 'root': [
  375. (r'\n', Text),
  376. (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
  377. (r'^During handling of the above exception, another '
  378. r'exception occurred:\n\n', Generic.Traceback),
  379. (r'^The above exception was the direct cause of the '
  380. r'following exception:\n\n', Generic.Traceback),
  381. ],
  382. 'intb': [
  383. (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
  384. bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
  385. (r'^( )(.+)(\n)',
  386. bygroups(Text, using(Python3Lexer), Text)),
  387. (r'^([ \t]*)(\.\.\.)(\n)',
  388. bygroups(Text, Comment, Text)), # for doctests...
  389. (r'^(.+)(: )(.+)(\n)',
  390. bygroups(Generic.Error, Text, Name, Text), '#pop'),
  391. (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
  392. bygroups(Generic.Error, Text), '#pop')
  393. ],
  394. }
  395. class RubyLexer(ExtendedRegexLexer):
  396. """
  397. For `Ruby <http://www.ruby-lang.org>`_ source code.
  398. """
  399. name = 'Ruby'
  400. aliases = ['rb', 'ruby', 'duby']
  401. filenames = ['*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec',
  402. '*.rbx', '*.duby']
  403. mimetypes = ['text/x-ruby', 'application/x-ruby']
  404. flags = re.DOTALL | re.MULTILINE
  405. def heredoc_callback(self, match, ctx):
  406. # okay, this is the hardest part of parsing Ruby...
  407. # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line
  408. start = match.start(1)
  409. yield start, Operator, match.group(1) # <<-?
  410. yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
  411. yield match.start(3), Name.Constant, match.group(3) # heredoc name
  412. yield match.start(4), String.Heredoc, match.group(4) # quote again
  413. heredocstack = ctx.__dict__.setdefault('heredocstack', [])
  414. outermost = not bool(heredocstack)
  415. heredocstack.append((match.group(1) == '<<-', match.group(3)))
  416. ctx.pos = match.start(5)
  417. ctx.end = match.end(5)
  418. # this may find other heredocs
  419. for i, t, v in self.get_tokens_unprocessed(context=ctx):
  420. yield i, t, v
  421. ctx.pos = match.end()
  422. if outermost:
  423. # this is the outer heredoc again, now we can process them all
  424. for tolerant, hdname in heredocstack:
  425. lines = []
  426. for match in line_re.finditer(ctx.text, ctx.pos):
  427. if tolerant:
  428. check = match.group().strip()
  429. else:
  430. check = match.group().rstrip()
  431. if check == hdname:
  432. for amatch in lines:
  433. yield amatch.start(), String.Heredoc, amatch.group()
  434. yield match.start(), Name.Constant, match.group()
  435. ctx.pos = match.end()
  436. break
  437. else:
  438. lines.append(match)
  439. else:
  440. # end of heredoc not found -- error!
  441. for amatch in lines:
  442. yield amatch.start(), Error, amatch.group()
  443. ctx.end = len(ctx.text)
  444. del heredocstack[:]
  445. def gen_rubystrings_rules():
  446. def intp_regex_callback(self, match, ctx):
  447. yield match.start(1), String.Regex, match.group(1) # begin
  448. nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
  449. for i, t, v in self.get_tokens_unprocessed(context=nctx):
  450. yield match.start(3)+i, t, v
  451. yield match.start(4), String.Regex, match.group(4) # end[mixounse]*
  452. ctx.pos = match.end()
  453. def intp_string_callback(self, match, ctx):
  454. yield match.start(1), String.Other, match.group(1)
  455. nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
  456. for i, t, v in self.get_tokens_unprocessed(context=nctx):
  457. yield match.start(3)+i, t, v
  458. yield match.start(4), String.Other, match.group(4) # end
  459. ctx.pos = match.end()
  460. states = {}
  461. states['strings'] = [
  462. # easy ones
  463. (r'\:@{0,2}([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|'
  464. r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol),
  465. (r":'(\\\\|\\'|[^'])*'", String.Symbol),
  466. (r"'(\\\\|\\'|[^'])*'", String.Single),
  467. (r':"', String.Symbol, 'simple-sym'),
  468. (r'"', String.Double, 'simple-string'),
  469. (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
  470. ]
  471. # double-quoted string and symbol
  472. for name, ttype, end in ('string', String.Double, '"'), \
  473. ('sym', String.Symbol, '"'), \
  474. ('backtick', String.Backtick, '`'):
  475. states['simple-'+name] = [
  476. include('string-intp-escaped'),
  477. (r'[^\\%s#]+' % end, ttype),
  478. (r'[\\#]', ttype),
  479. (end, ttype, '#pop'),
  480. ]
  481. # braced quoted strings
  482. for lbrace, rbrace, name in ('\\{', '\\}', 'cb'), \
  483. ('\\[', '\\]', 'sb'), \
  484. ('\\(', '\\)', 'pa'), \
  485. ('<', '>', 'ab'):
  486. states[name+'-intp-string'] = [
  487. (r'\\[\\' + lbrace + rbrace + ']', String.Other),
  488. (r'(?<!\\)' + lbrace, String.Other, '#push'),
  489. (r'(?<!\\)' + rbrace, String.Other, '#pop'),
  490. include('string-intp-escaped'),
  491. (r'[\\#' + lbrace + rbrace + ']', String.Other),
  492. (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
  493. ]
  494. states['strings'].append((r'%[QWx]?' + lbrace, String.Other,
  495. name+'-intp-string'))
  496. states[name+'-string'] = [
  497. (r'\\[\\' + lbrace + rbrace + ']', String.Other),
  498. (r'(?<!\\)' + lbrace, String.Other, '#push'),
  499. (r'(?<!\\)' + rbrace, String.Other, '#pop'),
  500. (r'[\\#' + lbrace + rbrace + ']', String.Other),
  501. (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
  502. ]
  503. states['strings'].append((r'%[qsw]' + lbrace, String.Other,
  504. name+'-string'))
  505. states[name+'-regex'] = [
  506. (r'\\[\\' + lbrace + rbrace + ']', String.Regex),
  507. (r'(?<!\\)' + lbrace, String.Regex, '#push'),
  508. (r'(?<!\\)' + rbrace + '[mixounse]*', String.Regex, '#pop'),
  509. include('string-intp'),
  510. (r'[\\#' + lbrace + rbrace + ']', String.Regex),
  511. (r'[^\\#' + lbrace + rbrace + ']+', String.Regex),
  512. ]
  513. states['strings'].append((r'%r' + lbrace, String.Regex,
  514. name+'-regex'))
  515. # these must come after %<brace>!
  516. states['strings'] += [
  517. # %r regex
  518. (r'(%r([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)',
  519. intp_regex_callback),
  520. # regular fancy strings with qsw
  521. (r'%[qsw]([^a-zA-Z0-9])((?:\\\1|(?!\1).)*)\1', String.Other),
  522. (r'(%[QWx]([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2)',
  523. intp_string_callback),
  524. # special forms of fancy strings after operators or
  525. # in method calls with braces
  526. (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
  527. bygroups(Text, String.Other, None)),
  528. # and because of fixed width lookbehinds the whole thing a
  529. # second time for line startings...
  530. (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
  531. bygroups(Text, String.Other, None)),
  532. # all regular fancy strings without qsw
  533. (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)',
  534. intp_string_callback),
  535. ]
  536. return states
  537. tokens = {
  538. 'root': [
  539. (r'#.*?$', Comment.Single),
  540. (r'=begin\s.*?\n=end.*?$', Comment.Multiline),
  541. # keywords
  542. (r'(BEGIN|END|alias|begin|break|case|defined\?|'
  543. r'do|else|elsif|end|ensure|for|if|in|next|redo|'
  544. r'rescue|raise|retry|return|super|then|undef|unless|until|when|'
  545. r'while|yield)\b', Keyword),
  546. # start of function, class and module names
  547. (r'(module)(\s+)([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)',
  548. bygroups(Keyword, Text, Name.Namespace)),
  549. (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
  550. (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
  551. (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
  552. # special methods
  553. (r'(initialize|new|loop|include|extend|raise|attr_reader|'
  554. r'attr_writer|attr_accessor|attr|catch|throw|private|'
  555. r'module_function|public|protected|true|false|nil)\b',
  556. Keyword.Pseudo),
  557. (r'(not|and|or)\b', Operator.Word),
  558. (r'(autoload|block_given|const_defined|eql|equal|frozen|include|'
  559. r'instance_of|is_a|iterator|kind_of|method_defined|nil|'
  560. r'private_method_defined|protected_method_defined|'
  561. r'public_method_defined|respond_to|tainted)\?', Name.Builtin),
  562. (r'(chomp|chop|exit|gsub|sub)!', Name.Builtin),
  563. (r'(?<!\.)(Array|Float|Integer|String|__id__|__send__|abort|'
  564. r'ancestors|at_exit|autoload|binding|callcc|caller|'
  565. r'catch|chomp|chop|class_eval|class_variables|'
  566. r'clone|const_defined\?|const_get|const_missing|const_set|'
  567. r'constants|display|dup|eval|exec|exit|extend|fail|fork|'
  568. r'format|freeze|getc|gets|global_variables|gsub|'
  569. r'hash|id|included_modules|inspect|instance_eval|'
  570. r'instance_method|instance_methods|'
  571. r'instance_variable_get|instance_variable_set|instance_variables|'
  572. r'lambda|load|local_variables|loop|'
  573. r'method|method_missing|methods|module_eval|name|'
  574. r'object_id|open|p|print|printf|private_class_method|'
  575. r'private_instance_methods|'
  576. r'private_methods|proc|protected_instance_methods|'
  577. r'protected_methods|public_class_method|'
  578. r'public_instance_methods|public_methods|'
  579. r'putc|puts|raise|rand|readline|readlines|require|'
  580. r'scan|select|self|send|set_trace_func|singleton_methods|sleep|'
  581. r'split|sprintf|srand|sub|syscall|system|taint|'
  582. r'test|throw|to_a|to_s|trace_var|trap|untaint|untrace_var|'
  583. r'warn)\b', Name.Builtin),
  584. (r'__(FILE|LINE)__\b', Name.Builtin.Pseudo),
  585. # normal heredocs
  586. (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)',
  587. heredoc_callback),
  588. # empty string heredocs
  589. (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback),
  590. (r'__END__', Comment.Preproc, 'end-part'),
  591. # multiline regex (after keywords or assignments)
  592. (r'(?:^|(?<=[=<>~!])|'
  593. r'(?<=(?:\s|;)when\s)|'
  594. r'(?<=(?:\s|;)or\s)|'
  595. r'(?<=(?:\s|;)and\s)|'
  596. r'(?<=(?:\s|;|\.)index\s)|'
  597. r'(?<=(?:\s|;|\.)scan\s)|'
  598. r'(?<=(?:\s|;|\.)sub\s)|'
  599. r'(?<=(?:\s|;|\.)sub!\s)|'
  600. r'(?<=(?:\s|;|\.)gsub\s)|'
  601. r'(?<=(?:\s|;|\.)gsub!\s)|'
  602. r'(?<=(?:\s|;|\.)match\s)|'
  603. r'(?<=(?:\s|;)if\s)|'
  604. r'(?<=(?:\s|;)elsif\s)|'
  605. r'(?<=^when\s)|'
  606. r'(?<=^index\s)|'
  607. r'(?<=^scan\s)|'
  608. r'(?<=^sub\s)|'
  609. r'(?<=^gsub\s)|'
  610. r'(?<=^sub!\s)|'
  611. r'(?<=^gsub!\s)|'
  612. r'(?<=^match\s)|'
  613. r'(?<=^if\s)|'
  614. r'(?<=^elsif\s)'
  615. r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'),
  616. # multiline regex (in method calls or subscripts)
  617. (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'),
  618. # multiline regex (this time the funny no whitespace rule)
  619. (r'(\s+)(/)(?![\s=])', bygroups(Text, String.Regex),
  620. 'multiline-regex'),
  621. # lex numbers and ignore following regular expressions which
  622. # are division operators in fact (grrrr. i hate that. any
  623. # better ideas?)
  624. # since pygments 0.7 we also eat a "?" operator after numbers
  625. # so that the char operator does not work. Chars are not allowed
  626. # there so that you can use the ternary operator.
  627. # stupid example:
  628. # x>=0?n[x]:""
  629. (r'(0_?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?',
  630. bygroups(Number.Oct, Text, Operator)),
  631. (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?',
  632. bygroups(Number.Hex, Text, Operator)),
  633. (r'(0b[01]+(?:_[01]+)*)(\s*)([/?])?',
  634. bygroups(Number.Bin, Text, Operator)),
  635. (r'([\d]+(?:_\d+)*)(\s*)([/?])?',
  636. bygroups(Number.Integer, Text, Operator)),
  637. # Names
  638. (r'@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class),
  639. (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance),
  640. (r'\$[a-zA-Z0-9_]+', Name.Variable.Global),
  641. (r'\$[!@&`\'+~=/\\,;.<>_*$?:"]', Name.Variable.Global),
  642. (r'\$-[0adFiIlpvw]', Name.Variable.Global),
  643. (r'::', Operator),
  644. include('strings'),
  645. # chars
  646. (r'\?(\\[MC]-)*' # modifiers
  647. r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
  648. r'(?!\w)',
  649. String.Char),
  650. (r'[A-Z][a-zA-Z0-9_]+', Name.Constant),
  651. # this is needed because ruby attributes can look
  652. # like keywords (class) or like this: ` ?!?
  653. (r'(\.|::)([a-zA-Z_]\w*[\!\?]?|[*%&^`~+-/\[<>=])',
  654. bygroups(Operator, Name)),
  655. (r'[a-zA-Z_]\w*[\!\?]?', Name),
  656. (r'(\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|'
  657. r'!~|&&?|\|\||\.{1,3})', Operator),
  658. (r'[-+/*%=<>&!^|~]=?', Operator),
  659. (r'[(){};,/?:\\]', Punctuation),
  660. (r'\s+', Text)
  661. ],
  662. 'funcname': [
  663. (r'\(', Punctuation, 'defexpr'),
  664. (r'(?:([a-zA-Z_][a-zA-Z0-9_]*)(\.))?'
  665. r'([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|'
  666. r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
  667. bygroups(Name.Class, Operator, Name.Function), '#pop'),
  668. (r'', Text, '#pop')
  669. ],
  670. 'classname': [
  671. (r'\(', Punctuation, 'defexpr'),
  672. (r'<<', Operator, '#pop'),
  673. (r'[A-Z_]\w*', Name.Class, '#pop'),
  674. (r'', Text, '#pop')
  675. ],
  676. 'defexpr': [
  677. (r'(\))(\.|::)?', bygroups(Punctuation, Operator), '#pop'),
  678. (r'\(', Operator, '#push'),
  679. include('root')
  680. ],
  681. 'in-intp': [
  682. ('}', String.Interpol, '#pop'),
  683. include('root'),
  684. ],
  685. 'string-intp': [
  686. (r'#{', String.Interpol, 'in-intp'),
  687. (r'#@@?[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol),
  688. (r'#\$[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol)
  689. ],
  690. 'string-intp-escaped': [
  691. include('string-intp'),
  692. (r'\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})',
  693. String.Escape)
  694. ],
  695. 'interpolated-regex': [
  696. include('string-intp'),
  697. (r'[\\#]', String.Regex),
  698. (r'[^\\#]+', String.Regex),
  699. ],
  700. 'interpolated-string': [
  701. include('string-intp'),
  702. (r'[\\#]', String.Other),
  703. (r'[^\\#]+', String.Other),
  704. ],
  705. 'multiline-regex': [
  706. include('string-intp'),
  707. (r'\\\\', String.Regex),
  708. (r'\\/', String.Regex),
  709. (r'[\\#]', String.Regex),
  710. (r'[^\\/#]+', String.Regex),
  711. (r'/[mixounse]*', String.Regex, '#pop'),
  712. ],
  713. 'end-part': [
  714. (r'.+', Comment.Preproc, '#pop')
  715. ]
  716. }
  717. tokens.update(gen_rubystrings_rules())
  718. def analyse_text(text):
  719. return shebang_matches(text, r'ruby(1\.\d)?')
  720. class RubyConsoleLexer(Lexer):
  721. """
  722. For Ruby interactive console (**irb**) output like:
  723. .. sourcecode:: rbcon
  724. irb(main):001:0> a = 1
  725. => 1
  726. irb(main):002:0> puts a
  727. 1
  728. => nil
  729. """
  730. name = 'Ruby irb session'
  731. aliases = ['rbcon', 'irb']
  732. mimetypes = ['text/x-ruby-shellsession']
  733. _prompt_re = re.compile('irb\([a-zA-Z_][a-zA-Z0-9_]*\):\d{3}:\d+[>*"\'] '
  734. '|>> |\?> ')
  735. def get_tokens_unprocessed(self, text):
  736. rblexer = RubyLexer(**self.options)
  737. curcode = ''
  738. insertions = []
  739. for match in line_re.finditer(text):
  740. line = match.group()
  741. m = self._prompt_re.match(line)
  742. if m is not None:
  743. end = m.end()
  744. insertions.append((len(curcode),
  745. [(0, Generic.Prompt, line[:end])]))
  746. curcode += line[end:]
  747. else:
  748. if curcode:
  749. for item in do_insertions(insertions,
  750. rblexer.get_tokens_unprocessed(curcode)):
  751. yield item
  752. curcode = ''
  753. insertions = []
  754. yield match.start(), Generic.Output, line
  755. if curcode:
  756. for item in do_insertions(insertions,
  757. rblexer.get_tokens_unprocessed(curcode)):
  758. yield item
  759. class PerlLexer(RegexLexer):
  760. """
  761. For `Perl <http://www.perl.org>`_ source code.
  762. """
  763. name = 'Perl'
  764. aliases = ['perl', 'pl']
  765. filenames = ['*.pl', '*.pm']
  766. mimetypes = ['text/x-perl', 'application/x-perl']
  767. flags = re.DOTALL | re.MULTILINE
  768. # TODO: give this to a perl guy who knows how to parse perl...
  769. tokens = {
  770. 'balanced-regex': [
  771. (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'),
  772. (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'),
  773. (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
  774. (r'{(\\\\|\\[^\\]|[^\\}])*}[egimosx]*', String.Regex, '#pop'),
  775. (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'),
  776. (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'),
  777. (r'\((\\\\|\\[^\\]|[^\\\)])*\)[egimosx]*', String.Regex, '#pop'),
  778. (r'@(\\\\|\\[^\\]|[^\\\@])*@[egimosx]*', String.Regex, '#pop'),
  779. (r'%(\\\\|\\[^\\]|[^\\\%])*%[egimosx]*', String.Regex, '#pop'),
  780. (r'\$(\\\\|\\[^\\]|[^\\\$])*\$[egimosx]*', String.Regex, '#pop'),
  781. ],
  782. 'root': [
  783. (r'\#.*?$', Comment.Single),
  784. (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline),
  785. (r'(case|continue|do|else|elsif|for|foreach|if|last|my|'
  786. r'next|our|redo|reset|then|unless|until|while|use|'
  787. r'print|new|BEGIN|CHECK|INIT|END|return)\b', Keyword),
  788. (r'(format)(\s+)([a-zA-Z0-9_]+)(\s*)(=)(\s*\n)',
  789. bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'),
  790. (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
  791. # common delimiters
  792. (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*',
  793. String.Regex),
  794. (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex),
  795. (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex),
  796. (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*',
  797. String.Regex),
  798. (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*',
  799. String.Regex),
  800. # balanced delimiters
  801. (r's{(\\\\|\\[^\\]|[^\\}])*}\s*', String.Regex, 'balanced-regex'),
  802. (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'),
  803. (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex,
  804. 'balanced-regex'),
  805. (r's\((\\\\|\\[^\\]|[^\\\)])*\)\s*', String.Regex,
  806. 'balanced-regex'),
  807. (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex),
  808. (r'm(?=[/!\\{<\[\(@%\$])', String.Regex, 'balanced-regex'),
  809. (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*',
  810. String.Regex),
  811. (r'\s+', Text),
  812. (r'(abs|accept|alarm|atan2|bind|binmode|bless|caller|chdir|'
  813. r'chmod|chomp|chop|chown|chr|chroot|close|closedir|connect|'
  814. r'continue|cos|crypt|dbmclose|dbmopen|defined|delete|die|'
  815. r'dump|each|endgrent|endhostent|endnetent|endprotoent|'
  816. r'endpwent|endservent|eof|eval|exec|exists|exit|exp|fcntl|'
  817. r'fileno|flock|fork|format|formline|getc|getgrent|getgrgid|'
  818. r'getgrnam|gethostbyaddr|gethostbyname|gethostent|getlogin|'
  819. r'getnetbyaddr|getnetbyname|getnetent|getpeername|getpgrp|'
  820. r'getppid|getpriority|getprotobyname|getprotobynumber|'
  821. r'getprotoent|getpwent|getpwnam|getpwuid|getservbyname|'
  822. r'getservbyport|getservent|getsockname|getsockopt|glob|gmtime|'
  823. r'goto|grep|hex|import|index|int|ioctl|join|keys|kill|last|'
  824. r'lc|lcfirst|length|link|listen|local|localtime|log|lstat|'
  825. r'map|mkdir|msgctl|msgget|msgrcv|msgsnd|my|next|no|oct|open|'
  826. r'opendir|ord|our|pack|package|pipe|pop|pos|printf|'
  827. r'prototype|push|quotemeta|rand|read|readdir|'
  828. r'readline|readlink|readpipe|recv|redo|ref|rename|require|'
  829. r'reverse|rewinddir|rindex|rmdir|scalar|seek|seekdir|'
  830. r'select|semctl|semget|semop|send|setgrent|sethostent|setnetent|'
  831. r'setpgrp|setpriority|setprotoent|setpwent|setservent|'
  832. r'setsockopt|shift|shmctl|shmget|shmread|shmwrite|shutdown|'
  833. r'sin|sleep|socket|socketpair|sort|splice|split|sprintf|sqrt|'
  834. r'srand|stat|study|substr|symlink|syscall|sysopen|sysread|'
  835. r'sysseek|system|syswrite|tell|telldir|tie|tied|time|times|tr|'
  836. r'truncate|uc|ucfirst|umask|undef|unlink|unpack|unshift|untie|'
  837. r'utime|values|vec|wait|waitpid|wantarray|warn|write'
  838. r')\b', Name.Builtin),
  839. (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
  840. (r'<<([\'"]?)([a-zA-Z_][a-zA-Z0-9_]*)\1;?\n.*?\n\2\n', String),
  841. (r'__END__', Comment.Preproc, 'end-part'),
  842. (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
  843. (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
  844. (r'[$@%#]+', Name.Variable, 'varname'),
  845. (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
  846. (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
  847. (r'0b[01]+(_[01]+)*', Number.Bin),
  848. (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
  849. Number.Float),
  850. (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
  851. (r'\d+(_\d+)*', Number.Integer),
  852. (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
  853. (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
  854. (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick),
  855. (r'<([^\s>]+)>', String.Regex),
  856. (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'),
  857. (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'),
  858. (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'),
  859. (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'),
  860. (r'(q|qq|qw|qr|qx)([^a-zA-Z0-9])(.|\n)*?\2', String.Other),
  861. (r'package\s+', Keyword, 'modulename'),
  862. (r'sub\s+', Keyword, 'funcname'),
  863. (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|'
  864. r'!~|&&?|\|\||\.{1,3})', Operator),
  865. (r'[-+/*%=<>&^|!\\~]=?', Operator),
  866. (r'[\(\)\[\]:;,<>/\?\{\}]', Punctuation), # yes, there's no shortage
  867. # of punctuation in Perl!
  868. (r'(?=\w)', Name, 'name'),
  869. ],
  870. 'format': [
  871. (r'\.\n', String.Interpol, '#pop'),
  872. (r'[^\n]*\n', String.Interpol),
  873. ],
  874. 'varname': [
  875. (r'\s+', Text),
  876. (r'\{', Punctuation, '#pop'), # hash syntax?
  877. (r'\)|,', Punctuation, '#pop'), # argument specifier
  878. (r'[a-zA-Z0-9_]+::', Name.Namespace),
  879. (r'[a-zA-Z0-9_:]+', Name.Variable, '#pop'),
  880. ],
  881. 'name': [
  882. (r'[a-zA-Z0-9_]+::', Name.Namespace),
  883. (r'[a-zA-Z0-9_:]+', Name, '#pop'),
  884. (r'[A-Z_]+(?=[^a-zA-Z0-9_])', Name.Constant, '#pop'),
  885. (r'(?=[^a-zA-Z0-9_])', Text, '#pop'),
  886. ],
  887. 'modulename': [
  888. (r'[a-zA-Z_]\w*', Name.Namespace, '#pop')
  889. ],
  890. 'funcname': [
  891. (r'[a-zA-Z_]\w*[\!\?]?', Name.Function),
  892. (r'\s+', Text),
  893. # argument declaration
  894. (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)),
  895. (r'.*?{', Punctuation, '#pop'),
  896. (r';', Punctuation, '#pop'),
  897. ],
  898. 'cb-string': [
  899. (r'\\[\{\}\\]', String.Other),
  900. (r'\\', String.Other),
  901. (r'\{', String.Other, 'cb-string'),
  902. (r'\}', String.Other, '#pop'),
  903. (r'[^\{\}\\]+', String.Other)
  904. ],
  905. 'rb-string': [
  906. (r'\\[\(\)\\]', String.Other),
  907. (r'\\', String.Other),
  908. (r'\(', String.Other, 'rb-string'),
  909. (r'\)', String.Other, '#pop'),
  910. (r'[^\(\)]+', String.Other)
  911. ],
  912. 'sb-string': [
  913. (r'\\[\[\]\\]', String.Other),
  914. (r'\\', String.Other),
  915. (r'\[', String.Other, 'sb-string'),
  916. (r'\]', String.Other, '#pop'),
  917. (r'[^\[\]]+', String.Other)
  918. ],
  919. 'lt-string': [
  920. (r'\\[\<\>\\]', String.Other),
  921. (r'\\', String.Other),
  922. (r'\<', String.Other, 'lt-string'),
  923. (r'\>', String.Other, '#pop'),
  924. (r'[^\<\>]+', String.Other)
  925. ],
  926. 'end-part': [
  927. (r'.+', Comment.Preproc, '#pop')
  928. ]
  929. }
  930. def analyse_text(text):
  931. if shebang_matches(text, r'perl'):
  932. return True
  933. if 'my $' in text:
  934. return 0.9
  935. return 0.1 # who knows, might still be perl!
  936. class LuaLexer(RegexLexer):
  937. """
  938. For `Lua <http://www.lua.org>`_ source code.
  939. Additional options accepted:
  940. `func_name_highlighting`
  941. If given and ``True``, highlight builtin function names
  942. (default: ``True``).
  943. `disabled_modules`
  944. If given, must be a list of module names whose function names
  945. should not be highlighted. By default all modules are highlighted.
  946. To get a list of allowed modules have a look into the
  947. `_luabuiltins` module:
  948. .. sourcecode:: pycon
  949. >>> from pygments.lexers._luabuiltins import MODULES
  950. >>> MODULES.keys()
  951. ['string', 'coroutine', 'modules', 'io', 'basic', ...]
  952. """
  953. name = 'Lua'
  954. aliases = ['lua']
  955. filenames = ['*.lua', '*.wlua']
  956. mimetypes = ['text/x-lua', 'application/x-lua']
  957. tokens = {
  958. 'root': [
  959. # lua allows a file to start with a shebang
  960. (r'#!(.*?)$', Comment.Preproc),
  961. (r'', Text, 'base'),
  962. ],
  963. 'base': [
  964. (r'(?s)--\[(=*)\[.*?\]\1\]', Comment.Multiline),
  965. ('--.*$', Comment.Single),
  966. (r'(?i)(\d*\.\d+|\d+\.\d*)(e[+-]?\d+)?', Number.Float),
  967. (r'(?i)\d+e[+-]?\d+', Number.Float),
  968. ('(?i)0x[0-9a-f]*', Number.Hex),
  969. (r'\d+', Number.Integer),
  970. (r'\n', Text),
  971. (r'[^\S\n]', Text),
  972. # multiline strings
  973. (r'(?s)\[(=*)\[.*?\]\1\]', String),
  974. (r'(==|~=|<=|>=|\.\.\.|\.\.|[=+\-*/%^<>#])', Operator),
  975. (r'[\[\]\{\}\(\)\.,:;]', Punctuation),
  976. (r'(and|or|not)\b', Operator.Word),
  977. ('(break|do|else|elseif|end|for|if|in|repeat|return|then|until|'
  978. r'while)\b', Keyword),
  979. (r'(local)\b', Keyword.Declaration),
  980. (r'(true|false|nil)\b', Keyword.Constant),
  981. (r'(function)\b', Keyword, 'funcname'),
  982. (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name),
  983. ("'", String.Single, combined('stringescape', 'sqs')),
  984. ('"', String.Double, combined('stringescape', 'dqs'))
  985. ],
  986. 'funcname': [
  987. (r'\s+', Text),
  988. ('(?:([A-Za-z_][A-Za-z0-9_]*)(\.))?([A-Za-z_][A-Za-z0-9_]*)',
  989. bygroups(Name.Class, Punctuation, Name.Function), '#pop'),
  990. # inline function
  991. ('\(', Punctuation, '#pop'),
  992. ],
  993. # if I understand correctly, every character is valid in a lua string,
  994. # so this state is only for later corrections
  995. 'string': [
  996. ('.', String)
  997. ],
  998. 'stringescape': [
  999. (r'''\\([abfnrtv\\"']|\d{1,3})''', String.Escape)
  1000. ],
  1001. 'sqs': [
  1002. ("'", String, '#pop'),
  1003. include('string')
  1004. ],
  1005. 'dqs': [
  1006. ('"', String, '#pop'),
  1007. include('string')
  1008. ]
  1009. }
  1010. def __init__(self, **options):
  1011. self.func_name_highlighting = get_bool_opt(
  1012. options, 'func_name_highlighting', True)
  1013. self.disabled_modules = get_list_opt(options, 'disabled_modules', [])
  1014. self._functions = set()
  1015. if self.func_name_highlighting:
  1016. from pygments.lexers._luabuiltins import MODULES
  1017. for mod, func in MODULES.iteritems():
  1018. if mod not in self.disabled_modules:
  1019. self._functions.update(func)
  1020. RegexLexer.__init__(self, **options)
  1021. def get_tokens_unprocessed(self, text):
  1022. for index, token, value in \
  1023. RegexLexer.get_tokens_unprocessed(self, text):
  1024. if token is Name:
  1025. if value in self._functions:
  1026. yield index, Name.Builtin, value
  1027. continue
  1028. elif '.' in value:
  1029. a, b = value.split('.')
  1030. yield index, Name, a
  1031. yield index + len(a), Punctuation, u'.'
  1032. yield index + len(a) + 1, Name, b
  1033. continue
  1034. yield index, token, value
  1035. class MoonScriptLexer(LuaLexer):
  1036. """
  1037. For `MoonScript <http://moonscript.org.org>`_ source code.
  1038. *New in Pygments 1.5.*
  1039. """
  1040. name = "MoonScript"
  1041. aliases = ["moon", "moonscript"]
  1042. filenames = ["*.moon"]
  1043. mimetypes = ['text/x-moonscript', 'application/x-moonscript']
  1044. tokens = {
  1045. 'root': [
  1046. (r'#!(.*?)$', Comment.Preproc),
  1047. (r'', Text, 'base'),
  1048. ],
  1049. 'base': [
  1050. ('--.*$', Comment.Single),
  1051. (r'(?i)(\d*\.\d+|\d+\.\d*)(e[+-]?\d+)?', Number.Float),
  1052. (r'(?i)\d+e[+-]?\d+', Number.Float),
  1053. (r'(?i)0x[0-9a-f]*', Number.Hex),
  1054. (r'\d+', Number.Integer),
  1055. (r'\n', Text),
  1056. (r'[^\S\n]+', Text),
  1057. (r'(?s)\[(=*)\[.*?\]\1\]', String),
  1058. (r'(->|=>)', Name.Function),
  1059. (r':[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable),
  1060. (r'(==|!=|~=|<=|>=|\.\.\.|\.\.|[=+\-*/%^<>#!.\\:])', Operator),
  1061. (r'[;,]', Punctuation),
  1062. (r'[\[\]\{\}\(\)]', Keyword.Type),
  1063. (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Variable),
  1064. (r"(class|extends|if|then|super|do|with|import|export|"
  1065. r"while|elseif|return|for|in|from|when|using|else|"
  1066. r"and|or|not|switch|break)\b", Keyword),
  1067. (r'(true|false|nil)\b', Keyword.Constant),
  1068. (r'(and|or|not)\b', Operator.Word),
  1069. (r'(self)\b', Name.Builtin.Pseudo),
  1070. (r'@@?([a-zA-Z_][a-zA-Z0-9_]*)?', Name.Variable.Class),
  1071. (r'[A-Z]\w*', Name.Class), # proper name
  1072. (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name),
  1073. ("'", String.Single, combined('stringescape', 'sqs')),
  1074. ('"', String.Double, combined('stringescape', 'dqs'))
  1075. ],
  1076. 'stringescape': [
  1077. (r'''\\([abfnrtv\\"']|\d{1,3})''', String.Escape)
  1078. ],
  1079. 'sqs': [
  1080. ("'", String.Single, '#pop'),
  1081. (".", String)
  1082. ],
  1083. 'dqs': [
  1084. ('"', String.Double, '#pop'),
  1085. (".", String)
  1086. ]
  1087. }
  1088. def get_tokens_unprocessed(self, text):
  1089. # set . as Operator instead of Punctuation
  1090. for index, token, value in \
  1091. LuaLexer.get_tokens_unprocessed(self, text):
  1092. if token == Punctuation and value == ".":
  1093. token = Operator
  1094. yield index, token, value
  1095. class CrocLexer(RegexLexer):
  1096. """
  1097. For `Croc <http://jfbillingsley.com/croc>`_ source.
  1098. """
  1099. name = 'Croc'
  1100. filenames = ['*.croc']
  1101. aliases = ['croc']
  1102. mimetypes = ['text/x-crocsrc']
  1103. tokens = {
  1104. 'root': [
  1105. (r'\n', Text),
  1106. (r'\s+', Text),
  1107. # Comments
  1108. (r'//(.*?)\n', Comment.Single),
  1109. (r'/\*', Comment.Multiline, 'nestedcomment'),
  1110. # Keywords
  1111. (r'(as|assert|break|case|catch|class|continue|default'
  1112. r'|do|else|finally|for|foreach|function|global|namespace'
  1113. r'|if|import|in|is|local|module|return|scope|super|switch'
  1114. r'|this|throw|try|vararg|while|with|yield)\b', Keyword),
  1115. (r'(false|true|null)\b', Keyword.Constant),
  1116. # FloatLiteral

Large files files are truncated, but you can click here to view the full file