PageRenderTime 69ms CodeModel.GetById 36ms RepoModel.GetById 0ms app.codeStats 0ms

/console/app/pygments/lexers/compiled.py

https://bitbucket.org/alex_muscar/myspace-competition-radar
Python | 1251 lines | 1089 code | 53 blank | 109 comment | 89 complexity | c35a71fe239c7fc3bcf58b1e9b5976ea MD5 | raw file
Possible License(s): GPL-3.0
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.compiled
  4. ~~~~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for compiled languages.
  6. :copyright: 2006-2008 by Georg Brandl, Armin Ronacher, Christoph Hack,
  7. Whitney Young, Kirk McDonald, Stou Sandalski, Krzysiek Goj.
  8. :license: BSD, see LICENSE for more details.
  9. """
  10. import re
  11. try:
  12. set
  13. except NameError:
  14. from sets import Set as set
  15. from pygments.scanner import Scanner
  16. from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
  17. this
  18. from pygments.util import get_bool_opt, get_list_opt
  19. from pygments.token import \
  20. Text, Comment, Operator, Keyword, Name, String, Number, Punctuation, \
  21. Error
  22. # backwards compatibility
  23. from pygments.lexers.functional import OcamlLexer
  24. __all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'JavaLexer', 'ScalaLexer',
  25. 'DylanLexer', 'OcamlLexer', 'ObjectiveCLexer', 'FortranLexer']
  26. class CLexer(RegexLexer):
  27. """
  28. For C source code with preprocessor directives.
  29. """
  30. name = 'C'
  31. aliases = ['c']
  32. filenames = ['*.c', '*.h']
  33. mimetypes = ['text/x-chdr', 'text/x-csrc']
  34. #: optional Comment or Whitespace
  35. _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
  36. tokens = {
  37. 'whitespace': [
  38. (r'^\s*#if\s+0', Comment.Preproc, 'if0'),
  39. (r'^\s*#', Comment.Preproc, 'macro'),
  40. (r'\n', Text),
  41. (r'\s+', Text),
  42. (r'\\\n', Text), # line continuation
  43. (r'//(\n|(.|\n)*?[^\\]\n)', Comment),
  44. (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment),
  45. ],
  46. 'statements': [
  47. (r'L?"', String, 'string'),
  48. (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
  49. (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
  50. (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
  51. (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
  52. (r'0[0-7]+[Ll]?', Number.Oct),
  53. (r'\d+[Ll]?', Number.Integer),
  54. (r'[~!%^&*+=|?:<>/-]', Operator),
  55. (r'[()\[\],.]', Punctuation),
  56. (r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)),
  57. (r'(auto|break|case|const|continue|default|do|else|enum|extern|'
  58. r'for|goto|if|register|restricted|return|sizeof|static|struct|'
  59. r'switch|typedef|union|volatile|virtual|while)\b', Keyword),
  60. (r'(int|long|float|short|double|char|unsigned|signed|void)\b',
  61. Keyword.Type),
  62. (r'(_{0,2}inline|naked|restrict|thread|typename)\b', Keyword.Reserved),
  63. (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|'
  64. r'declspec|finally|int64|try|leave)\b', Keyword.Reserved),
  65. (r'(true|false|NULL)\b', Name.Builtin),
  66. ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label),
  67. ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
  68. ],
  69. 'root': [
  70. include('whitespace'),
  71. # functions
  72. (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments
  73. r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name
  74. r'(\s*\([^;]*?\))' # signature
  75. r'(' + _ws + r')({)',
  76. bygroups(using(this), Name.Function, using(this), using(this),
  77. Punctuation),
  78. 'function'),
  79. # function declarations
  80. (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments
  81. r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name
  82. r'(\s*\([^;]*?\))' # signature
  83. r'(' + _ws + r')(;)',
  84. bygroups(using(this), Name.Function, using(this), using(this),
  85. Punctuation)),
  86. ('', Text, 'statement'),
  87. ],
  88. 'statement' : [
  89. include('whitespace'),
  90. include('statements'),
  91. ('[{}]', Punctuation),
  92. (';', Punctuation, '#pop'),
  93. ],
  94. 'function': [
  95. include('whitespace'),
  96. include('statements'),
  97. (';', Punctuation),
  98. ('{', Punctuation, '#push'),
  99. ('}', Punctuation, '#pop'),
  100. ],
  101. 'string': [
  102. (r'"', String, '#pop'),
  103. (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
  104. (r'[^\\"\n]+', String), # all other characters
  105. (r'\\\n', String), # line continuation
  106. (r'\\', String), # stray backslash
  107. ],
  108. 'macro': [
  109. (r'[^/\n]+', Comment.Preproc),
  110. (r'/[*](.|\n)*?[*]/', Comment),
  111. (r'//.*?\n', Comment, '#pop'),
  112. (r'/', Comment.Preproc),
  113. (r'(?<=\\)\n', Comment.Preproc),
  114. (r'\n', Comment.Preproc, '#pop'),
  115. ],
  116. 'if0': [
  117. (r'^\s*#if.*?(?<!\\)\n', Comment, '#push'),
  118. (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
  119. (r'^\s*#endif.*?(?<!\\)\n', Comment, '#pop'),
  120. (r'.*?\n', Comment),
  121. ]
  122. }
  123. stdlib_types = ['size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t',
  124. 'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list',
  125. 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t',
  126. 'wctrans_t', 'wint_t', 'wctype_t']
  127. c99_types = ['_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t',
  128. 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t',
  129. 'int_least16_t', 'int_least32_t', 'int_least64_t',
  130. 'uint_least8_t', 'uint_least16_t', 'uint_least32_t',
  131. 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t',
  132. 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t',
  133. 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t']
  134. def __init__(self, **options):
  135. self.stdlibhighlighting = get_bool_opt(options,
  136. 'stdlibhighlighting', True)
  137. self.c99highlighting = get_bool_opt(options,
  138. 'c99highlighting', True)
  139. RegexLexer.__init__(self, **options)
  140. def get_tokens_unprocessed(self, text):
  141. for index, token, value in \
  142. RegexLexer.get_tokens_unprocessed(self, text):
  143. if token is Name:
  144. if self.stdlibhighlighting and value in self.stdlib_types:
  145. token = Keyword.Type
  146. elif self.c99highlighting and value in self.c99_types:
  147. token = Keyword.Type
  148. yield index, token, value
  149. class CppLexer(RegexLexer):
  150. """
  151. For C++ source code with preprocessor directives.
  152. """
  153. name = 'C++'
  154. aliases = ['cpp', 'c++']
  155. filenames = ['*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx']
  156. mimetypes = ['text/x-c++hdr', 'text/x-c++src']
  157. tokens = {
  158. 'root': [
  159. (r'^\s*#if\s+0', Comment.Preproc, 'if0'),
  160. (r'^\s*#', Comment.Preproc, 'macro'),
  161. (r'\n', Text),
  162. (r'\s+', Text),
  163. (r'\\\n', Text), # line continuation
  164. (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment),
  165. (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment),
  166. (r'[{}]', Punctuation),
  167. (r'L?"', String, 'string'),
  168. (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
  169. (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
  170. (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
  171. (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
  172. (r'0[0-7]+[Ll]?', Number.Oct),
  173. (r'\d+[Ll]?', Number.Integer),
  174. (r'[~!%^&*+=|?:<>/-]', Operator),
  175. (r'[()\[\],.;]', Punctuation),
  176. (r'(asm|auto|break|case|catch|const|const_cast|continue|'
  177. r'default|delete|do|dynamic_cast|else|enum|explicit|export|'
  178. r'extern|for|friend|goto|if|mutable|namespace|new|operator|'
  179. r'private|protected|public|register|reinterpret_cast|return|'
  180. r'restrict|sizeof|static|static_cast|struct|switch|template|'
  181. r'this|throw|throws|try|typedef|typeid|typename|union|using|'
  182. r'volatile|virtual|while)\b', Keyword),
  183. (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
  184. (r'(bool|int|long|float|short|double|char|unsigned|signed|'
  185. r'void|wchar_t)\b', Keyword.Type),
  186. (r'(_{0,2}inline|naked|thread)\b', Keyword.Reserved),
  187. (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|'
  188. r'declspec|finally|int64|try|leave|wchar_t|w64|virtual_inheritance|'
  189. r'uuidof|unaligned|super|single_inheritance|raise|noop|'
  190. r'multiple_inheritance|m128i|m128d|m128|m64|interface|'
  191. r'identifier|forceinline|event|assume)\b', Keyword.Reserved),
  192. (r'(true|false)\b', Keyword.Constant),
  193. (r'NULL\b', Name.Builtin),
  194. ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label),
  195. ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
  196. ],
  197. 'classname': [
  198. (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'),
  199. # template specification
  200. (r'\s*(?=>)', Text, '#pop'),
  201. ],
  202. 'string': [
  203. (r'"', String, '#pop'),
  204. (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
  205. (r'[^\\"\n]+', String), # all other characters
  206. (r'\\\n', String), # line continuation
  207. (r'\\', String), # stray backslash
  208. ],
  209. 'macro': [
  210. (r'[^/\n]+', Comment.Preproc),
  211. (r'/[*](.|\n)*?[*]/', Comment),
  212. (r'//.*?\n', Comment, '#pop'),
  213. (r'/', Comment.Preproc),
  214. (r'(?<=\\)\n', Comment.Preproc),
  215. (r'\n', Comment.Preproc, '#pop'),
  216. ],
  217. 'if0': [
  218. (r'^\s*#if.*?(?<!\\)\n', Comment, '#push'),
  219. (r'^\s*#endif.*?(?<!\\)\n', Comment, '#pop'),
  220. (r'.*?\n', Comment),
  221. ]
  222. }
  223. class DLexer(RegexLexer):
  224. """
  225. For D source.
  226. """
  227. name = 'D'
  228. filenames = ['*.d', '*.di']
  229. aliases = ['d']
  230. mimetypes = ['text/x-dsrc']
  231. tokens = {
  232. 'root': [
  233. (r'\n', Text),
  234. (r'\s+', Text),
  235. #(r'\\\n', Text), # line continuations
  236. # Comments
  237. (r'//(.*?)\n', Comment),
  238. (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment),
  239. (r'/\+', Comment, 'nested_comment'),
  240. # Keywords
  241. (r'(abstract|alias|align|asm|assert|auto|body|break|case|cast'
  242. r'|catch|class|const|continue|debug|default|delegate|delete'
  243. r'|deprecated|do|else|enum|export|extern|finally|final'
  244. r'|foreach_reverse|foreach|for|function|goto|if|import|inout'
  245. r'|interface|invariant|in|is|lazy|mixin|module|new|nothrow|out'
  246. r'|override|package|pragma|private|protected|public|pure|ref|return'
  247. r'|scope|static|struct|super|switch|synchronized|template|this'
  248. r'|throw|try|typedef|typeid|typeof|union|unittest|version|volatile'
  249. r'|while|with|__traits)\b', Keyword
  250. ),
  251. (r'(bool|byte|cdouble|cent|cfloat|char|creal|dchar|double|float'
  252. r'|idouble|ifloat|int|ireal|long|real|short|ubyte|ucent|uint|ulong'
  253. r'|ushort|void|wchar)\b', Keyword.Type
  254. ),
  255. (r'(false|true|null)\b', Keyword.Constant),
  256. (r'macro\b', Keyword.Reserved),
  257. (r'(string|wstring|dstring)\b', Name.Builtin),
  258. # FloatLiteral
  259. # -- HexFloat
  260. (r'0[xX]([0-9a-fA-F_]*\.[0-9a-fA-F_]+|[0-9a-fA-F_]+)'
  261. r'[pP][+\-]?[0-9_]+[fFL]?[i]?', Number.Float),
  262. # -- DecimalFloat
  263. (r'[0-9_]+(\.[0-9_]+[eE][+\-]?[0-9_]+|'
  264. r'\.[0-9_]*|[eE][+\-]?[0-9_]+)[fFL]?[i]?', Number.Float),
  265. (r'\.(0|[1-9][0-9_]*)([eE][+\-]?[0-9_]+)?[fFL]?[i]?', Number.Float),
  266. # IntegerLiteral
  267. # -- Binary
  268. (r'0[Bb][01_]+', Number),
  269. # -- Octal
  270. (r'0[0-7_]+', Number.Oct),
  271. # -- Hexadecimal
  272. (r'0[xX][0-9a-fA-F_]+', Number.Hex),
  273. # -- Decimal
  274. (r'(0|[1-9][0-9_]*)([LUu]|Lu|LU|uL|UL)?', Number.Integer),
  275. # CharacterLiteral
  276. (r"""'(\\['"?\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}"""
  277. r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|\\&\w+;|.)'""",
  278. String.Char
  279. ),
  280. # StringLiteral
  281. # -- WysiwygString
  282. (r'r"[^"]*"[cwd]?', String),
  283. # -- AlternateWysiwygString
  284. (r'`[^`]*`[cwd]?', String),
  285. # -- DoubleQuotedString
  286. (r'"(\\\\|\\"|[^"])*"[cwd]?', String),
  287. # -- EscapeSequence
  288. (r"""\\(['"?\\abfnrtv]|x[0-9a-fA-F]{2}|[0-7]{1,3}"""
  289. r"""|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|&\w+;)""",
  290. String
  291. ),
  292. # -- HexString
  293. (r'x"[0-9a-fA-F_\s]*"[cwd]?', String),
  294. # -- DelimitedString
  295. (r'q"\[', String, 'delimited_bracket'),
  296. (r'q"\(', String, 'delimited_parenthesis'),
  297. (r'q"<', String, 'delimited_angle'),
  298. (r'q"{', String, 'delimited_curly'),
  299. (r'q"([a-zA-Z_]\w*)\n.*?\n\1"', String),
  300. (r'q"(.).*?\1"', String),
  301. # -- TokenString
  302. (r'q{', String, 'token_string'),
  303. # Tokens
  304. (r'(~=|\^=|%=|\*=|==|!>=|!<=|!<>=|!<>|!<|!>|!=|>>>=|>>>|>>=|>>|>='
  305. r'|<>=|<>|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.\.|\.\.|/=)'
  306. r'|[/.&|\-+<>!()\[\]{}?,;:$=*%^~]', Punctuation
  307. ),
  308. # Identifier
  309. (r'[a-zA-Z_]\w*', Name),
  310. ],
  311. 'nested_comment': [
  312. (r'[^+/]+', Comment),
  313. (r'/\+', Comment, '#push'),
  314. (r'\+/', Comment, '#pop'),
  315. (r'[+/]', Comment),
  316. ],
  317. 'token_string': [
  318. (r'{', Punctuation, 'token_string_nest'),
  319. (r'}', String, '#pop'),
  320. include('root'),
  321. ],
  322. 'token_string_nest': [
  323. (r'{', Punctuation, '#push'),
  324. (r'}', Punctuation, '#pop'),
  325. include('root'),
  326. ],
  327. 'delimited_bracket': [
  328. (r'[^\[\]]+', String),
  329. (r'\[', String, 'delimited_inside_bracket'),
  330. (r'\]"', String, '#pop'),
  331. ],
  332. 'delimited_inside_bracket': [
  333. (r'[^\[\]]+', String),
  334. (r'\[', String, '#push'),
  335. (r'\]', String, '#pop'),
  336. ],
  337. 'delimited_parenthesis': [
  338. (r'[^\(\)]+', String),
  339. (r'\(', String, 'delimited_inside_parenthesis'),
  340. (r'\)"', String, '#pop'),
  341. ],
  342. 'delimited_inside_parenthesis': [
  343. (r'[^\(\)]+', String),
  344. (r'\(', String, '#push'),
  345. (r'\)', String, '#pop'),
  346. ],
  347. 'delimited_angle': [
  348. (r'[^<>]+', String),
  349. (r'<', String, 'delimited_inside_angle'),
  350. (r'>"', String, '#pop'),
  351. ],
  352. 'delimited_inside_angle': [
  353. (r'[^<>]+', String),
  354. (r'<', String, '#push'),
  355. (r'>', String, '#pop'),
  356. ],
  357. 'delimited_curly': [
  358. (r'[^{}]+', String),
  359. (r'{', String, 'delimited_inside_curly'),
  360. (r'}"', String, '#pop'),
  361. ],
  362. 'delimited_inside_curly': [
  363. (r'[^{}]+', String),
  364. (r'{', String, '#push'),
  365. (r'}', String, '#pop'),
  366. ],
  367. }
  368. class DelphiLexer(Lexer):
  369. """
  370. For `Delphi <http://www.borland.com/delphi/>`_ (Borland Object Pascal),
  371. Turbo Pascal and Free Pascal source code.
  372. Additional options accepted:
  373. `turbopascal`
  374. Highlight Turbo Pascal specific keywords (default: ``True``).
  375. `delphi`
  376. Highlight Borland Delphi specific keywords (default: ``True``).
  377. `freepascal`
  378. Highlight Free Pascal specific keywords (default: ``True``).
  379. `units`
  380. A list of units that should be considered builtin, supported are
  381. ``System``, ``SysUtils``, ``Classes`` and ``Math``.
  382. Default is to consider all of them builtin.
  383. """
  384. name = 'Delphi'
  385. aliases = ['delphi', 'pas', 'pascal', 'objectpascal']
  386. filenames = ['*.pas']
  387. mimetypes = ['text/x-pascal']
  388. TURBO_PASCAL_KEYWORDS = [
  389. 'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case',
  390. 'const', 'constructor', 'continue', 'destructor', 'div', 'do',
  391. 'downto', 'else', 'end', 'file', 'for', 'function', 'goto',
  392. 'if', 'implementation', 'in', 'inherited', 'inline', 'interface',
  393. 'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator',
  394. 'or', 'packed', 'procedure', 'program', 'record', 'reintroduce',
  395. 'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to',
  396. 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor'
  397. ]
  398. DELPHI_KEYWORDS = [
  399. 'as', 'class', 'except', 'exports', 'finalization', 'finally',
  400. 'initialization', 'is', 'library', 'on', 'property', 'raise',
  401. 'threadvar', 'try'
  402. ]
  403. FREE_PASCAL_KEYWORDS = [
  404. 'dispose', 'exit', 'false', 'new', 'true'
  405. ]
  406. BLOCK_KEYWORDS = set([
  407. 'begin', 'class', 'const', 'constructor', 'destructor', 'end',
  408. 'finalization', 'function', 'implementation', 'initialization',
  409. 'label', 'library', 'operator', 'procedure', 'program', 'property',
  410. 'record', 'threadvar', 'type', 'unit', 'uses', 'var'
  411. ])
  412. FUNCTION_MODIFIERS = set([
  413. 'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe',
  414. 'pascal', 'register', 'safecall', 'softfloat', 'stdcall',
  415. 'varargs', 'name', 'dynamic', 'near', 'virtual', 'external',
  416. 'override', 'assembler'
  417. ])
  418. # XXX: those aren't global. but currently we know no way for defining
  419. # them just for the type context.
  420. DIRECTIVES = set([
  421. 'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far',
  422. 'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected',
  423. 'published', 'public'
  424. ])
  425. BUILTIN_TYPES = set([
  426. 'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool',
  427. 'cardinal', 'char', 'comp', 'currency', 'double', 'dword',
  428. 'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint',
  429. 'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean',
  430. 'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency',
  431. 'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle',
  432. 'pint64', 'pinteger', 'plongint', 'plongword', 'pointer',
  433. 'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint',
  434. 'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword',
  435. 'pwordarray', 'pwordbool', 'real', 'real48', 'shortint',
  436. 'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate',
  437. 'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant',
  438. 'widechar', 'widestring', 'word', 'wordbool'
  439. ])
  440. BUILTIN_UNITS = {
  441. 'System': [
  442. 'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8',
  443. 'append', 'arctan', 'assert', 'assigned', 'assignfile',
  444. 'beginthread', 'blockread', 'blockwrite', 'break', 'chdir',
  445. 'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble',
  446. 'concat', 'continue', 'copy', 'cos', 'dec', 'delete',
  447. 'dispose', 'doubletocomp', 'endthread', 'enummodules',
  448. 'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr',
  449. 'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize',
  450. 'fillchar', 'finalize', 'findclasshinstance', 'findhinstance',
  451. 'findresourcehinstance', 'flush', 'frac', 'freemem',
  452. 'get8087cw', 'getdir', 'getlasterror', 'getmem',
  453. 'getmemorymanager', 'getmodulefilename', 'getvariantmanager',
  454. 'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert',
  455. 'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset',
  456. 'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd',
  457. 'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount',
  458. 'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random',
  459. 'randomize', 'read', 'readln', 'reallocmem',
  460. 'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir',
  461. 'round', 'runerror', 'seek', 'seekeof', 'seekeoln',
  462. 'set8087cw', 'setlength', 'setlinebreakstyle',
  463. 'setmemorymanager', 'setstring', 'settextbuf',
  464. 'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt',
  465. 'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar',
  466. 'succ', 'swap', 'trunc', 'truncate', 'typeinfo',
  467. 'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring',
  468. 'upcase', 'utf8decode', 'utf8encode', 'utf8toansi',
  469. 'utf8tounicode', 'val', 'vararrayredim', 'varclear',
  470. 'widecharlentostring', 'widecharlentostrvar',
  471. 'widechartostring', 'widechartostrvar',
  472. 'widestringtoucs4string', 'write', 'writeln'
  473. ],
  474. 'SysUtils': [
  475. 'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks',
  476. 'allocmem', 'ansicomparefilename', 'ansicomparestr',
  477. 'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr',
  478. 'ansilastchar', 'ansilowercase', 'ansilowercasefilename',
  479. 'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext',
  480. 'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp',
  481. 'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan',
  482. 'ansistrscan', 'ansistrupper', 'ansiuppercase',
  483. 'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep',
  484. 'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype',
  485. 'callterminateprocs', 'changefileext', 'charlength',
  486. 'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr',
  487. 'comparetext', 'createdir', 'createguid', 'currentyear',
  488. 'currtostr', 'currtostrf', 'date', 'datetimetofiledate',
  489. 'datetimetostr', 'datetimetostring', 'datetimetosystemtime',
  490. 'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate',
  491. 'decodedatefully', 'decodetime', 'deletefile', 'directoryexists',
  492. 'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime',
  493. 'exceptionerrormessage', 'excludetrailingbackslash',
  494. 'excludetrailingpathdelimiter', 'expandfilename',
  495. 'expandfilenamecase', 'expanduncfilename', 'extractfiledir',
  496. 'extractfiledrive', 'extractfileext', 'extractfilename',
  497. 'extractfilepath', 'extractrelativepath', 'extractshortpathname',
  498. 'fileage', 'fileclose', 'filecreate', 'filedatetodatetime',
  499. 'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly',
  500. 'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr',
  501. 'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage',
  502. 'findclose', 'findcmdlineswitch', 'findfirst', 'findnext',
  503. 'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr',
  504. 'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr',
  505. 'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr',
  506. 'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir',
  507. 'getenvironmentvariable', 'getfileversion', 'getformatsettings',
  508. 'getlocaleformatsettings', 'getmodulename', 'getpackagedescription',
  509. 'getpackageinfo', 'gettime', 'guidtostring', 'incamonth',
  510. 'includetrailingbackslash', 'includetrailingpathdelimiter',
  511. 'incmonth', 'initializepackage', 'interlockeddecrement',
  512. 'interlockedexchange', 'interlockedexchangeadd',
  513. 'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter',
  514. 'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident',
  515. 'languages', 'lastdelimiter', 'loadpackage', 'loadstr',
  516. 'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now',
  517. 'outofmemoryerror', 'quotedstr', 'raiselastoserror',
  518. 'raiselastwin32error', 'removedir', 'renamefile', 'replacedate',
  519. 'replacetime', 'safeloadlibrary', 'samefilename', 'sametext',
  520. 'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize',
  521. 'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy',
  522. 'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp',
  523. 'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy',
  524. 'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew',
  525. 'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos',
  526. 'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr',
  527. 'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime',
  528. 'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint',
  529. 'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime',
  530. 'strtotimedef', 'strupper', 'supports', 'syserrormessage',
  531. 'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime',
  532. 'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright',
  533. 'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime',
  534. 'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime',
  535. 'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime',
  536. 'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext',
  537. 'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase',
  538. 'widesamestr', 'widesametext', 'wideuppercase', 'win32check',
  539. 'wraptext'
  540. ],
  541. 'Classes': [
  542. 'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize',
  543. 'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect',
  544. 'extractstrings', 'findclass', 'findglobalcomponent', 'getclass',
  545. 'groupdescendantswith', 'hextobin', 'identtoint',
  546. 'initinheritedcomponent', 'inttoident', 'invalidpoint',
  547. 'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext',
  548. 'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource',
  549. 'pointsequal', 'readcomponentres', 'readcomponentresex',
  550. 'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias',
  551. 'registerclasses', 'registercomponents', 'registerintegerconsts',
  552. 'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup',
  553. 'teststreamformat', 'unregisterclass', 'unregisterclasses',
  554. 'unregisterintegerconsts', 'unregistermoduleclasses',
  555. 'writecomponentresfile'
  556. ],
  557. 'Math': [
  558. 'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec',
  559. 'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil',
  560. 'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc',
  561. 'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle',
  562. 'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance',
  563. 'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask',
  564. 'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg',
  565. 'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate',
  566. 'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero',
  567. 'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue',
  568. 'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue',
  569. 'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods',
  570. 'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance',
  571. 'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd',
  572. 'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant',
  573. 'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode',
  574. 'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev',
  575. 'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation',
  576. 'tan', 'tanh', 'totalvariance', 'variance'
  577. ]
  578. }
  579. ASM_REGISTERS = set([
  580. 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0',
  581. 'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0',
  582. 'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx',
  583. 'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp',
  584. 'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
  585. 'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5',
  586. 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5',
  587. 'xmm6', 'xmm7'
  588. ])
  589. ASM_INSTRUCTIONS = set([
  590. 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound',
  591. 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw',
  592. 'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae',
  593. 'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg',
  594. 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb',
  595. 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl',
  596. 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo',
  597. 'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb',
  598. 'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid',
  599. 'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt',
  600. 'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd',
  601. 'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd',
  602. 'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe',
  603. 'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle',
  604. 'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge',
  605. 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe',
  606. 'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave',
  607. 'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw',
  608. 'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw',
  609. 'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr',
  610. 'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx',
  611. 'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd',
  612. 'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw',
  613. 'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw',
  614. 'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe',
  615. 'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror',
  616. 'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb',
  617. 'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe',
  618. 'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle',
  619. 'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng',
  620. 'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz',
  621. 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl',
  622. 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold',
  623. 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str',
  624. 'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit',
  625. 'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait',
  626. 'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat',
  627. 'xlatb', 'xor'
  628. ])
  629. def __init__(self, **options):
  630. Lexer.__init__(self, **options)
  631. self.keywords = set()
  632. if get_bool_opt(options, 'turbopascal', True):
  633. self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
  634. if get_bool_opt(options, 'delphi', True):
  635. self.keywords.update(self.DELPHI_KEYWORDS)
  636. if get_bool_opt(options, 'freepascal', True):
  637. self.keywords.update(self.FREE_PASCAL_KEYWORDS)
  638. self.builtins = set()
  639. for unit in get_list_opt(options, 'units', self.BUILTIN_UNITS.keys()):
  640. self.builtins.update(self.BUILTIN_UNITS[unit])
  641. def get_tokens_unprocessed(self, text):
  642. scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE)
  643. stack = ['initial']
  644. in_function_block = False
  645. in_property_block = False
  646. was_dot = False
  647. next_token_is_function = False
  648. next_token_is_property = False
  649. collect_labels = False
  650. block_labels = set()
  651. brace_balance = [0, 0]
  652. while not scanner.eos:
  653. token = Error
  654. if stack[-1] == 'initial':
  655. if scanner.scan(r'\s+'):
  656. token = Text
  657. elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
  658. if scanner.match.startswith('$'):
  659. token = Comment.Preproc
  660. else:
  661. token = Comment.Multiline
  662. elif scanner.scan(r'//.*?$'):
  663. token = Comment.Single
  664. elif scanner.scan(r'[-+*\/=<>:;,.@\^]'):
  665. token = Operator
  666. # stop label highlighting on next ";"
  667. if collect_labels and scanner.match == ';':
  668. collect_labels = False
  669. elif scanner.scan(r'[\(\)\[\]]+'):
  670. token = Punctuation
  671. # abort function naming ``foo = Function(...)``
  672. next_token_is_function = False
  673. # if we are in a function block we count the open
  674. # braces because ootherwise it's impossible to
  675. # determine the end of the modifier context
  676. if in_function_block or in_property_block:
  677. if scanner.match == '(':
  678. brace_balance[0] += 1
  679. elif scanner.match == ')':
  680. brace_balance[0] -= 1
  681. elif scanner.match == '[':
  682. brace_balance[1] += 1
  683. elif scanner.match == ']':
  684. brace_balance[1] -= 1
  685. elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
  686. lowercase_name = scanner.match.lower()
  687. if lowercase_name == 'result':
  688. token = Name.Builtin.Pseudo
  689. elif lowercase_name in self.keywords:
  690. token = Keyword
  691. # if we are in a special block and a
  692. # block ending keyword occours (and the parenthesis
  693. # is balanced) we end the current block context
  694. if (in_function_block or in_property_block) and \
  695. lowercase_name in self.BLOCK_KEYWORDS and \
  696. brace_balance[0] <= 0 and \
  697. brace_balance[1] <= 0:
  698. in_function_block = False
  699. in_property_block = False
  700. brace_balance = [0, 0]
  701. block_labels = set()
  702. if lowercase_name in ('label', 'goto'):
  703. collect_labels = True
  704. elif lowercase_name == 'asm':
  705. stack.append('asm')
  706. elif lowercase_name == 'property':
  707. in_property_block = True
  708. next_token_is_property = True
  709. elif lowercase_name in ('procedure', 'operator',
  710. 'function', 'constructor',
  711. 'destructor'):
  712. in_function_block = True
  713. next_token_is_function = True
  714. # we are in a function block and the current name
  715. # is in the set of registered modifiers. highlight
  716. # it as pseudo keyword
  717. elif in_function_block and \
  718. lowercase_name in self.FUNCTION_MODIFIERS:
  719. token = Keyword.Pseudo
  720. # if we are in a property highlight some more
  721. # modifiers
  722. elif in_property_block and \
  723. lowercase_name in ('read', 'write'):
  724. token = Keyword.Pseudo
  725. next_token_is_function = True
  726. # if the last iteration set next_token_is_function
  727. # to true we now want this name highlighted as
  728. # function. so do that and reset the state
  729. elif next_token_is_function:
  730. # Look if the next token is a dot. If yes it's
  731. # not a function, but a class name and the
  732. # part after the dot a function name
  733. if scanner.test(r'\s*\.\s*'):
  734. token = Name.Class
  735. # it's not a dot, our job is done
  736. else:
  737. token = Name.Function
  738. next_token_is_function = False
  739. # same for properties
  740. elif next_token_is_property:
  741. token = Name.Property
  742. next_token_is_property = False
  743. # Highlight this token as label and add it
  744. # to the list of known labels
  745. elif collect_labels:
  746. token = Name.Label
  747. block_labels.add(scanner.match.lower())
  748. # name is in list of known labels
  749. elif lowercase_name in block_labels:
  750. token = Name.Label
  751. elif lowercase_name in self.BUILTIN_TYPES:
  752. token = Keyword.Type
  753. elif lowercase_name in self.DIRECTIVES:
  754. token = Keyword.Pseudo
  755. # builtins are just builtins if the token
  756. # before isn't a dot
  757. elif not was_dot and lowercase_name in self.builtins:
  758. token = Name.Builtin
  759. else:
  760. token = Name
  761. elif scanner.scan(r"'"):
  762. token = String
  763. stack.append('string')
  764. elif scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
  765. token = String.Char
  766. elif scanner.scan(r'\$[0-9A-Fa-f]+'):
  767. token = Number.Hex
  768. elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
  769. token = Number.Integer
  770. elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
  771. token = Number.Float
  772. else:
  773. # if the stack depth is deeper than once, pop
  774. if len(stack) > 1:
  775. stack.pop()
  776. scanner.get_char()
  777. elif stack[-1] == 'string':
  778. if scanner.scan(r"''"):
  779. token = String.Escape
  780. elif scanner.scan(r"'"):
  781. token = String
  782. stack.pop()
  783. elif scanner.scan(r"[^']*"):
  784. token = String
  785. else:
  786. scanner.get_char()
  787. stack.pop()
  788. elif stack[-1] == 'asm':
  789. if scanner.scan(r'\s+'):
  790. token = Text
  791. elif scanner.scan(r'end'):
  792. token = Keyword
  793. stack.pop()
  794. elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
  795. if scanner.match.startswith('$'):
  796. token = Comment.Preproc
  797. else:
  798. token = Comment.Multiline
  799. elif scanner.scan(r'//.*?$'):
  800. token = Comment.Single
  801. elif scanner.scan(r"'"):
  802. token = String
  803. stack.append('string')
  804. elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'):
  805. token = Name.Label
  806. elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
  807. lowercase_name = scanner.match.lower()
  808. if lowercase_name in self.ASM_INSTRUCTIONS:
  809. token = Keyword
  810. elif lowercase_name in self.ASM_REGISTERS:
  811. token = Name.Builtin
  812. else:
  813. token = Name
  814. elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'):
  815. token = Operator
  816. elif scanner.scan(r'[\(\)\[\]]+'):
  817. token = Punctuation
  818. elif scanner.scan(r'\$[0-9A-Fa-f]+'):
  819. token = Number.Hex
  820. elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
  821. token = Number.Integer
  822. elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
  823. token = Number.Float
  824. else:
  825. scanner.get_char()
  826. stack.pop()
  827. # save the dot!!!11
  828. if scanner.match.strip():
  829. was_dot = scanner.match == '.'
  830. yield scanner.start_pos, token, scanner.match or ''
  831. class JavaLexer(RegexLexer):
  832. """
  833. For `Java <http://www.sun.com/java/>`_ source code.
  834. """
  835. name = 'Java'
  836. aliases = ['java']
  837. filenames = ['*.java']
  838. mimetypes = ['text/x-java']
  839. flags = re.MULTILINE | re.DOTALL
  840. #: optional Comment or Whitespace
  841. _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
  842. tokens = {
  843. 'root': [
  844. # method names
  845. (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # return arguments
  846. r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name
  847. r'(\s*)(\()', # signature start
  848. bygroups(using(this), Name.Function, Text, Operator)),
  849. (r'[^\S\n]+', Text),
  850. (r'//.*?\n', Comment),
  851. (r'/\*.*?\*/', Comment),
  852. (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator),
  853. (r'(assert|break|case|catch|continue|default|do|else|finally|for|'
  854. r'if|goto|instanceof|new|return|switch|this|throw|try|while)\b',
  855. Keyword),
  856. (r'(abstract|const|enum|extends|final|implements|native|private|'
  857. r'protected|public|static|strictfp|super|synchronized|throws|'
  858. r'transient|volatile)\b', Keyword.Declaration),
  859. (r'(boolean|byte|char|double|float|int|long|short|void)\b',
  860. Keyword.Type),
  861. (r'(package)(\s+)', bygroups(Keyword.Namespace, Text)),
  862. (r'(true|false|null)\b', Keyword.Constant),
  863. (r'(class|interface)(\s+)', bygroups(Keyword.Declaration, Text), 'class'),
  864. (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'),
  865. (r'"(\\\\|\\"|[^"])*"', String),
  866. (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char),
  867. (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)),
  868. (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label),
  869. (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name),
  870. (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator),
  871. (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
  872. (r'0x[0-9a-f]+', Number.Hex),
  873. (r'[0-9]+L?', Number.Integer),
  874. (r'\n', Text)
  875. ],
  876. 'class': [
  877. (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
  878. ],
  879. 'import': [
  880. (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop')
  881. ],
  882. }
  883. class ScalaLexer(RegexLexer):
  884. """
  885. For `Scala <http://www.scala-lang.org>`_ source code.
  886. """
  887. name = 'Scala'
  888. aliases = ['scala']
  889. filenames = ['*.scala']
  890. mimetypes = ['text/x-scala']
  891. flags = re.MULTILINE | re.DOTALL
  892. #: optional Comment or Whitespace
  893. _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
  894. tokens = {
  895. 'root': [
  896. # method names
  897. (r'(class|interface|trait|object)(\s+)', bygroups(Keyword, Text), 'class'),
  898. (r'^(\s*def)'
  899. r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name
  900. r'(\s*)(\()', # signature start
  901. bygroups(using(this), Name.Function, Text, Operator)),
  902. (r"'([a-zA-Z_][a-zA-Z0-9_]*)", Text.Symbol),
  903. (r'[^\S\n]+', Text),
  904. (r'//.*?\n', Comment),
  905. (r'/\*.*?\*/', Comment),
  906. (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator),
  907. (r'(abstract|case|catch|do|else|extends|final|finally|for|forSome'
  908. r'|if|implicit|lazy|match|new|null|override|private|protected'
  909. r'|requires|return|sealed|super|this|throw|try|type|while|with'
  910. r'|yield|let|def|var|println|=>|<-|_)\b', Keyword),
  911. (r'(boolean|byte|char|double|float|int|long|short|void)\b',
  912. Keyword.Type),
  913. (r'(String|Int|Array|HashMap)\b', Keyword.Type),
  914. (r'(true|false|null)\b', Keyword.Constant),
  915. (r'(import)(\s+)', bygroups(Keyword, Text), 'import'),
  916. (r'"(\\\\|\\"|[^"])*"', String),
  917. (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char),
  918. (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)),
  919. (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name),
  920. (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator),
  921. (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
  922. (r'0x[0-9a-f]+', Number.Hex),
  923. (r'[0-9]+L?', Number.Integer),
  924. (r'\n', Text)
  925. ],
  926. 'class': [
  927. (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'),
  928. (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(\()',
  929. bygroups(Name.Class, Text, Operator), '#pop'),
  930. ],
  931. 'import': [
  932. (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop')
  933. ],
  934. }
  935. class DylanLexer(RegexLexer):
  936. """
  937. For the `Dylan <http://www.opendylan.org/>`_ language.
  938. *New in Pygments 0.7.*
  939. """
  940. name = 'Dylan'
  941. aliases = ['dylan']
  942. filenames = ['*.dylan']
  943. mimetypes = ['text/x-dylan']
  944. flags = re.DOTALL
  945. tokens = {
  946. 'root': [
  947. (r'\b(subclass|abstract|block|c(on(crete|stant)|lass)|domain'
  948. r'|ex(c(eption|lude)|port)|f(unction(|al))|generic|handler'
  949. r'|i(n(herited|line|stance|terface)|mport)|library|m(acro|ethod)'
  950. r'|open|primary|sealed|si(deways|ngleton)|slot'
  951. r'|v(ariable|irtual))\b', Name.Builtin),
  952. (r'<\w+>', Keyword.Type),
  953. (r'#?"(?:\\.|[^"])+?"', String.Double),
  954. (r'//.*?\n', Comment),
  955. (r'/\*[\w\W]*?\*/', Comment.Multiline),
  956. (r'\'.*?\'', String.Single),
  957. (r'=>|\b(a(bove|fterwards)|b(e(gin|low)|y)|c(ase|leanup|reate)'
  958. r'|define|else(|if)|end|f(inally|or|rom)|i[fn]|l(et|ocal)|otherwise'
  959. r'|rename|s(elect|ignal)|t(hen|o)|u(n(less|til)|se)|wh(en|ile))\b',
  960. Keyword),
  961. (r'([ \t])([!\$%&\*\/:<=>\?~_^a-zA-Z0-9.+\-]*:)',
  962. bygroups(Text, Name.Variable)),
  963. (r'([ \t]*)(\S+[^:])([ \t]*)(\()([ \t]*)',
  964. bygroups(Text, Name.Function, Text, Punctuation, Text)),
  965. (r'-?[0-9.]+', Number),
  966. (r'[(),;]', Punctuation),
  967. (r'\$[a-zA-Z0-9-]+', Name.Constant),
  968. (r'[!$%&*/:<>=?~^.+\[\]{}-]+', Operator),
  969. (r'\s+', Text),
  970. (r'#[a-zA-Z0-9-]+', Keyword),
  971. (r'[a-zA-Z0-9-]+', Name.Variable),
  972. ],
  973. }
  974. class ObjectiveCLexer(RegexLexer):
  975. """
  976. For Objective-C source code with preprocessor directives.
  977. """
  978. name = 'Objective-C'
  979. aliases = ['objective-c', 'objectivec', 'obj-c', 'objc']
  980. #XXX: objc has .h files too :-/
  981. filenames = ['*.m']
  982. mimetypes = ['text/x-objective-c']
  983. #: optional Comment or Whitespace
  984. _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
  985. tokens = {
  986. 'whitespace': [
  987. (r'^(\s*)(#if\s+0)', bygroups(Text, Comment.Preproc), 'if0'),
  988. (r'^(\s*)(#)', bygroups(Text, Comment.Preproc), 'macro'),
  989. (r'\n', Text),
  990. (r'\s+', Text),
  991. (r'\\\n', Text), # line continuation
  992. (r'//(\n|(.|\n)*?[^\\]\n)', Comment),
  993. (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment),
  994. ],
  995. 'statements': [
  996. (r'(L|@)?"', String, 'string'),
  997. (r"(L|@)?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
  998. (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
  999. (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
  1000. (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
  1001. (r'0[0-7]+[Ll]?', Number.Oct),
  1002. (r'\d+[Ll]?', Number.Integer),
  1003. (r'[~!%^&*+=|?:<>/-]', Operator),
  1004. (r'[()\[\],.]', Punctuation),
  1005. (r'(auto|break|case|const|continue|default|do|else|enum|extern|'
  1006. r'for|goto|if|register|restricted|return|sizeof|static|struct|'
  1007. r'switch|typedef|union|volatile|virtual|while|in|@selector|'
  1008. r'@private|@protected|@public|@encode|'
  1009. r'@synchronized|@try|@throw|@catch|@finally|@end|@property|'
  1010. r'@synthesize|@dynamic)\b', Keyword),
  1011. (r'(int|long|float|short|double|char|unsigned|signed|void|'
  1012. r'id|BOOL|IBOutlet|IBAction|SEL)\b', Keyword.Type),
  1013. (r'(_{0,2}inline|naked|restrict|thread|typename)\b', Keyword.Reserved),
  1014. (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|'
  1015. r'declspec|finally|int64|try|leave)\b', Keyword.Reserved),
  1016. (r'(TRUE|FALSE|nil|NULL)\b', Name.Builtin),
  1017. ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label),
  1018. ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
  1019. ],
  1020. 'root': [
  1021. include('whitespace'),
  1022. # functions
  1023. (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments
  1024. r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name
  1025. r'(\s*\([^;]*?\))' # signature
  1026. r'(' + _ws + r')({)',
  1027. bygroups(using(this), Name.Function, using(this), Text, Punctuation),
  1028. 'function'),
  1029. # function declarations
  1030. (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))' # return arguments
  1031. r'([a-zA-Z_][a-zA-Z0-9_]*)' # method name
  1032. r'(\s*\([^;]*?\))' # signature
  1033. r'(' + _ws + r')(;)',
  1034. bygroups(using(this), Name.Function, using(this), Text, Punctuation)),
  1035. (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text), 'classname'),
  1036. (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text), 'forward_classname'),
  1037. (r'(\s*)(@end)(\s*)', bygroups(Text, Keyword, Text)),
  1038. ('', Text, 'statement'),
  1039. ],
  1040. 'classname' : [
  1041. # interface definition that inherits
  1042. ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*:\s*)([a-zA-Z_][a-zA-Z0-9_]*)?',
  1043. bygroups(Name.Class, Text, Name.Class), '#pop'),
  1044. # interface definition for a category
  1045. ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(\([a-zA-Z_][a-zA-Z0-9_]*\))',
  1046. bygroups(Name.Class, Text, Name.Label), '#pop'),
  1047. # simple interface / implementation
  1048. ('([a-zA-Z_][a-zA-Z0-9_]*)', Name.Class, '#pop')
  1049. ],
  1050. 'forward_classname' : [
  1051. ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*,\s*)',
  1052. bygroups(Name.Class, Text), 'forward_classname'),
  1053. ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*;?)',
  1054. bygroups(Name.Class, Text), '#pop')
  1055. ],
  1056. 'statement' : [
  1057. include('whitespace'),
  1058. include('statements'),
  1059. ('[{}]', Punctuation),
  1060. (';', Punctuation, '#pop'),
  1061. ],
  1062. 'function': [
  1063. include('whitespace'),
  1064. include('statements'),
  1065. (';', Punctuation),
  1066. ('{', Punctuation, '#push'),
  1067. ('}', Punctuation, '#pop'),
  1068. ],
  1069. 'string': [
  1070. (r'"', String, '#pop'),
  1071. (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
  1072. (r'[^\\"\n]+', String), # all other characters
  1073. (r'\\\n', String), # line continuation
  1074. (r'\\', String), # stray backslash
  1075. ],
  1076. 'macro': [
  1077. (r'[^/\n]+', Comment.Preproc),
  1078. (r'/[*](.|\n)*?[*]/', Comment),
  1079. (r'//.*?\n', Comment, '#pop'),
  1080. (r'/', Comment.Preproc),
  1081. (r'(?<=\\)\n', Comment.Preproc),
  1082. (r'\n', Comment.Preproc, '#pop'),
  1083. ],
  1084. 'if0': [
  1085. (r'^\s*#if.*?(?<!\\)\n', Comment, '#push'),
  1086. (r'^\s*#endif.*?(?<!\\)\n', Comment, '#pop'),
  1087. (r'.*?\n', Comment),
  1088. ]
  1089. }
  1090. class FortranLexer(RegexLexer):
  1091. '''
  1092. Lexer for FORTRAN 90 code.
  1093. *New in Pygments 0.10.*
  1094. '''
  1095. name = 'Fortran'
  1096. aliases = ['fortran']
  1097. filenames = ['*.f', '*.f90']
  1098. mimetypes = ['text/x-fortran']
  1099. flags = re.IGNORECASE
  1100. # Data Types: INTEGER, REAL, COMPLEX, LOGICAL, CHARACTER and DOUBLE PRECISION
  1101. # Operators: **, *, +, -, /, <, >, <=, >=, ==, /=
  1102. # Logical (?): NOT, AND, OR, EQV, NEQV
  1103. # Builtins:
  1104. # http://gcc.gnu.org/onlinedocs/gcc-3.4.6/g77/Table-of-Intrinsic-Functions.html
  1105. tokens = {
  1106. 'root': [
  1107. (r'!.*\n', Comment),
  1108. include('strings'),
  1109. include('core'),
  1110. (r'[a-z][a-z0-9_]*', Name.Variable),
  1111. include('nums'),
  1112. (r'[\s]+', Text),
  1113. ],
  1114. 'core': [
  1115. # Statements
  1116. (r'\b(ACCEPT|ALLOCATABLE|ALLOCATE|ARRAY|ASSIGN|BACKSPACE|BLOCK DATA|'
  1117. r'BYTE|CALL|CASE|CLOSE|COMMON|CONTAINS|CONTINUE|CYCLE|DATA|'
  1118. r'DEALLOCATE|DECODE|DIMENSION|DO|ENCODE|END FILE|ENDIF|END|ENTRY|'
  1119. r'EQUIVALENCE|EXIT|EXTERNAL|EXTRINSIC|FORALL|FORMAT|FUNCTION|GOTO|'
  1120. r'IF|IMPLICIT|INCLUDE|INQUIRE|INTENT|INTERFACE|INTRINSIC|MODULE|'
  1121. r'NAMELIST|NULLIFY|NONE|OPEN|OPTIONAL|OPTIONS|PARAMETER|PAUSE|'
  1122. r'POINTER|PRINT|PRIVATE|PROGRAM|PUBLIC|PURE|READ|RECURSIVE|RETURN|'
  1123. r'REWIND|SAVE|SELECT|SEQUENCE|STOP|SUBROUTINE|TARGET|TYPE|USE|'
  1124. r'VOLATILE|WHERE|WRITE|WHILE|THEN|ELSE|ENDIF)\s*\b',
  1125. Keyword),
  1126. # Data Types
  1127. (r'\b(CHARACTER|COMPLEX|DOUBLE PRECISION|DOUBLE COMPLEX|INTEGER|'
  1128. r'LOGICAL|REAL)\s*\b',
  1129. Keyword.Type),
  1130. # Operators
  1131. (r'(\*\*|\*|\+|-|\/|<|>|<=|>=|==|\/=|=)', Operator),
  1132. (r'(::)', Keyword.Declaration),
  1133. (r'[(),:&%]', Punctuation),
  1134. # Intrinsics
  1135. (r'\b(Abort|Abs|Access|AChar|ACos|AdjustL|AdjustR|AImag|AInt|Alarm|'
  1136. r'All|Allocated|ALog|AMax|AMin|AMod|And|ANInt|Any|'
  1137. r'ASin|Associated|ATan|BesJ|BesJN|BesY|BesYN|'
  1138. r'Bit_Size|BTest|CAbs|CCos|Ceiling|CExp|Char|ChDir|ChMod|CLog|'
  1139. r'Cmplx|Complex|Conjg|Cos|CosH|Count|CPU_Time|CShift|CSin|CSqRt|'
  1140. r'CTime|DAbs|DACos|DASin|DATan|Date_and_Time|DbesJ|'
  1141. r'DbesJ|DbesJN|DbesY|DbesY|DbesYN|Dble|DCos|DCosH|DDiM|DErF|DErFC|'
  1142. r'DExp|Digits|DiM|DInt|DLog|DLog|DMax|DMin|DMod|DNInt|Dot_Product|'
  1143. r'DProd|DSign|DSinH|DSin|DSqRt|DTanH|DTan|DTime|EOShift|Epsilon|'
  1144. r'ErF|ErFC|ETime|Exit|Exp|Exponent|FDate|FGet|FGetC|Float|'
  1145. r'Floor|Flush|FNum|FPutC|FPut|Fraction|FSeek|FStat|FTell|'
  1146. r'GError|GetArg|GetCWD|GetEnv|GetGId|GetLog|GetPId|GetUId|'
  1147. r'GMTime|HostNm|Huge|IAbs|IAChar|IAnd|IArgC|IBClr|IBits|'
  1148. r'IBSet|IChar|IDate|IDiM|IDInt|IDNInt|IEOr|IErrNo|IFix|Imag|'
  1149. r'ImagPart|Index|Int|IOr|IRand|IsaTty|IShft|IShftC|ISign|'
  1150. r'ITime|Kill|Kind|LBound|Len|Len_Trim|LGe|LGt|Link|LLe|LLt|LnBlnk|'
  1151. r'Loc|Log|Log|Logical|Long|LShift|LStat|LTime|MatMul|Max|'
  1152. r'MaxExponent|MaxLoc|MaxVal|MClock|Merge|Min|MinExponent|MinLoc|'
  1153. r'MinVal|Mod|Modulo|MvBits|Nearest|NInt|Not|Or|Pack|PError|'
  1154. r'Precision|Present|Product|Radix|Rand|Random_Number|Random_Seed|'
  1155. r'Range|Real|RealPart|Rename|Repeat|Reshape|RRSpacing|RShift|Scale|'
  1156. r'Scan|Second|Selected_Int_Kind|Selected_Real_Kind|Set_Exponent|'
  1157. r'Shape|Short|Sign|Signal|SinH|Sin|Sleep|Sngl|Spacing|Spread|SqRt|'
  1158. r'SRand|Stat|Sum|SymLnk|System|System_Clock|Tan|TanH|Time|'
  1159. r'Tiny|Transfer|Transpose|Trim|TtyNam|UBound|UMask|Unlink|Unpack|'
  1160. r'Verify|XOr|ZAbs|ZCos|ZExp|ZLog|ZSin|ZSqRt)\s*\b',
  1161. Name.Builtin),
  1162. # Booleans
  1163. (r'\.(true|false)\.', Name.Builtin),
  1164. # Comparing Operators
  1165. (r'\.(eq|ne|lt|le|gt|ge|not|and|or|eqv|neqv)\.', Operator.Word),
  1166. ],
  1167. 'strings': [
  1168. (r'"(\\\\|\\[0-7]+|\\.|[^"])*"', String.Double),
  1169. (r"'(\\\\|\\[0-7]+|\\.|[^'])*'", String.Single),
  1170. ],
  1171. 'nums': [
  1172. (r'\d+(?![.Ee])', Number.Integer),
  1173. (r'[+-]?\d*\.\d+([eE][-+]?\d+)?', Number.Float),
  1174. (r'[+-]?\d+\.\d*([eE][-+]?\d+)?', Number.Float),
  1175. ],
  1176. }