PageRenderTime 75ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/pygments/pygments/lexers/functional.py

https://bitbucket.org/tcorll/butlertest
Python | 2597 lines | 2017 code | 217 blank | 363 comment | 25 complexity | dc4ba95035313373ea5462615f2136d3 MD5 | raw file
Possible License(s): Apache-2.0, GPL-2.0, LGPL-3.0, BSD-2-Clause
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.functional
  4. ~~~~~~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for functional languages.
  6. :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
  11. from pygments.token import Text, Comment, Operator, Keyword, Name, \
  12. String, Number, Punctuation, Literal, Generic, Error
  13. __all__ = ['RacketLexer', 'SchemeLexer', 'CommonLispLexer', 'HaskellLexer',
  14. 'LiterateHaskellLexer', 'SMLLexer', 'OcamlLexer', 'ErlangLexer',
  15. 'ErlangShellLexer', 'OpaLexer', 'CoqLexer', 'NewLispLexer',
  16. 'ElixirLexer', 'ElixirConsoleLexer', 'KokaLexer']
  17. class RacketLexer(RegexLexer):
  18. """
  19. Lexer for `Racket <http://racket-lang.org/>`_ source code (formerly known as
  20. PLT Scheme).
  21. *New in Pygments 1.6.*
  22. """
  23. name = 'Racket'
  24. aliases = ['racket', 'rkt']
  25. filenames = ['*.rkt', '*.rktl']
  26. mimetypes = ['text/x-racket', 'application/x-racket']
  27. # From namespace-mapped-symbols
  28. keywords = [
  29. '#%app', '#%datum', '#%expression', '#%module-begin',
  30. '#%plain-app', '#%plain-lambda', '#%plain-module-begin',
  31. '#%provide', '#%require', '#%stratified-body', '#%top',
  32. '#%top-interaction', '#%variable-reference', '...', 'and', 'begin',
  33. 'begin-for-syntax', 'begin0', 'case', 'case-lambda', 'cond',
  34. 'datum->syntax-object', 'define', 'define-for-syntax',
  35. 'define-struct', 'define-syntax', 'define-syntax-rule',
  36. 'define-syntaxes', 'define-values', 'define-values-for-syntax',
  37. 'delay', 'do', 'expand-path', 'fluid-let', 'hash-table-copy',
  38. 'hash-table-count', 'hash-table-for-each', 'hash-table-get',
  39. 'hash-table-iterate-first', 'hash-table-iterate-key',
  40. 'hash-table-iterate-next', 'hash-table-iterate-value',
  41. 'hash-table-map', 'hash-table-put!', 'hash-table-remove!',
  42. 'hash-table?', 'if', 'lambda', 'let', 'let*', 'let*-values',
  43. 'let-struct', 'let-syntax', 'let-syntaxes', 'let-values', 'let/cc',
  44. 'let/ec', 'letrec', 'letrec-syntax', 'letrec-syntaxes',
  45. 'letrec-syntaxes+values', 'letrec-values', 'list-immutable',
  46. 'make-hash-table', 'make-immutable-hash-table', 'make-namespace',
  47. 'module', 'module-identifier=?', 'module-label-identifier=?',
  48. 'module-template-identifier=?', 'module-transformer-identifier=?',
  49. 'namespace-transformer-require', 'or', 'parameterize',
  50. 'parameterize*', 'parameterize-break', 'provide',
  51. 'provide-for-label', 'provide-for-syntax', 'quasiquote',
  52. 'quasisyntax', 'quasisyntax/loc', 'quote', 'quote-syntax',
  53. 'quote-syntax/prune', 'require', 'require-for-label',
  54. 'require-for-syntax', 'require-for-template', 'set!',
  55. 'set!-values', 'syntax', 'syntax-case', 'syntax-case*',
  56. 'syntax-id-rules', 'syntax-object->datum', 'syntax-rules',
  57. 'syntax/loc', 'time', 'transcript-off', 'transcript-on', 'unless',
  58. 'unquote', 'unquote-splicing', 'unsyntax', 'unsyntax-splicing',
  59. 'when', 'with-continuation-mark', 'with-handlers',
  60. 'with-handlers*', 'with-syntax', 'λ'
  61. ]
  62. # From namespace-mapped-symbols
  63. builtins = [
  64. '*', '+', '-', '/', '<', '<=', '=', '>', '>=',
  65. 'abort-current-continuation', 'abs', 'absolute-path?', 'acos',
  66. 'add1', 'alarm-evt', 'always-evt', 'andmap', 'angle', 'append',
  67. 'apply', 'arithmetic-shift', 'arity-at-least',
  68. 'arity-at-least-value', 'arity-at-least?', 'asin', 'assoc', 'assq',
  69. 'assv', 'atan', 'banner', 'bitwise-and', 'bitwise-bit-field',
  70. 'bitwise-bit-set?', 'bitwise-ior', 'bitwise-not', 'bitwise-xor',
  71. 'boolean?', 'bound-identifier=?', 'box', 'box-immutable', 'box?',
  72. 'break-enabled', 'break-thread', 'build-path',
  73. 'build-path/convention-type', 'byte-pregexp', 'byte-pregexp?',
  74. 'byte-ready?', 'byte-regexp', 'byte-regexp?', 'byte?', 'bytes',
  75. 'bytes->immutable-bytes', 'bytes->list', 'bytes->path',
  76. 'bytes->path-element', 'bytes->string/latin-1',
  77. 'bytes->string/locale', 'bytes->string/utf-8', 'bytes-append',
  78. 'bytes-close-converter', 'bytes-convert', 'bytes-convert-end',
  79. 'bytes-converter?', 'bytes-copy', 'bytes-copy!', 'bytes-fill!',
  80. 'bytes-length', 'bytes-open-converter', 'bytes-ref', 'bytes-set!',
  81. 'bytes-utf-8-index', 'bytes-utf-8-length', 'bytes-utf-8-ref',
  82. 'bytes<?', 'bytes=?', 'bytes>?', 'bytes?', 'caaaar', 'caaadr',
  83. 'caaar', 'caadar', 'caaddr', 'caadr', 'caar', 'cadaar', 'cadadr',
  84. 'cadar', 'caddar', 'cadddr', 'caddr', 'cadr',
  85. 'call-in-nested-thread', 'call-with-break-parameterization',
  86. 'call-with-composable-continuation',
  87. 'call-with-continuation-barrier', 'call-with-continuation-prompt',
  88. 'call-with-current-continuation', 'call-with-escape-continuation',
  89. 'call-with-exception-handler',
  90. 'call-with-immediate-continuation-mark', 'call-with-input-file',
  91. 'call-with-output-file', 'call-with-parameterization',
  92. 'call-with-semaphore', 'call-with-semaphore/enable-break',
  93. 'call-with-values', 'call/cc', 'call/ec', 'car', 'cdaaar',
  94. 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cdar', 'cddaar',
  95. 'cddadr', 'cddar', 'cdddar', 'cddddr', 'cdddr', 'cddr', 'cdr',
  96. 'ceiling', 'channel-get', 'channel-put', 'channel-put-evt',
  97. 'channel-try-get', 'channel?', 'chaperone-box', 'chaperone-evt',
  98. 'chaperone-hash', 'chaperone-of?', 'chaperone-procedure',
  99. 'chaperone-struct', 'chaperone-struct-type', 'chaperone-vector',
  100. 'chaperone?', 'char->integer', 'char-alphabetic?', 'char-blank?',
  101. 'char-ci<=?', 'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?',
  102. 'char-downcase', 'char-foldcase', 'char-general-category',
  103. 'char-graphic?', 'char-iso-control?', 'char-lower-case?',
  104. 'char-numeric?', 'char-punctuation?', 'char-ready?',
  105. 'char-symbolic?', 'char-title-case?', 'char-titlecase',
  106. 'char-upcase', 'char-upper-case?', 'char-utf-8-length',
  107. 'char-whitespace?', 'char<=?', 'char<?', 'char=?', 'char>=?',
  108. 'char>?', 'char?', 'check-duplicate-identifier',
  109. 'checked-procedure-check-and-extract', 'choice-evt',
  110. 'cleanse-path', 'close-input-port', 'close-output-port',
  111. 'collect-garbage', 'collection-file-path', 'collection-path',
  112. 'compile', 'compile-allow-set!-undefined',
  113. 'compile-context-preservation-enabled',
  114. 'compile-enforce-module-constants', 'compile-syntax',
  115. 'compiled-expression?', 'compiled-module-expression?',
  116. 'complete-path?', 'complex?', 'cons',
  117. 'continuation-mark-set->context', 'continuation-mark-set->list',
  118. 'continuation-mark-set->list*', 'continuation-mark-set-first',
  119. 'continuation-mark-set?', 'continuation-marks',
  120. 'continuation-prompt-available?', 'continuation-prompt-tag?',
  121. 'continuation?', 'copy-file', 'cos',
  122. 'current-break-parameterization', 'current-code-inspector',
  123. 'current-command-line-arguments', 'current-compile',
  124. 'current-continuation-marks', 'current-custodian',
  125. 'current-directory', 'current-drive', 'current-error-port',
  126. 'current-eval', 'current-evt-pseudo-random-generator',
  127. 'current-gc-milliseconds', 'current-get-interaction-input-port',
  128. 'current-inexact-milliseconds', 'current-input-port',
  129. 'current-inspector', 'current-library-collection-paths',
  130. 'current-load', 'current-load-extension',
  131. 'current-load-relative-directory', 'current-load/use-compiled',
  132. 'current-locale', 'current-memory-use', 'current-milliseconds',
  133. 'current-module-declare-name', 'current-module-declare-source',
  134. 'current-module-name-resolver', 'current-namespace',
  135. 'current-output-port', 'current-parameterization',
  136. 'current-preserved-thread-cell-values', 'current-print',
  137. 'current-process-milliseconds', 'current-prompt-read',
  138. 'current-pseudo-random-generator', 'current-read-interaction',
  139. 'current-reader-guard', 'current-readtable', 'current-seconds',
  140. 'current-security-guard', 'current-subprocess-custodian-mode',
  141. 'current-thread', 'current-thread-group',
  142. 'current-thread-initial-stack-size',
  143. 'current-write-relative-directory', 'custodian-box-value',
  144. 'custodian-box?', 'custodian-limit-memory',
  145. 'custodian-managed-list', 'custodian-memory-accounting-available?',
  146. 'custodian-require-memory', 'custodian-shutdown-all', 'custodian?',
  147. 'custom-print-quotable-accessor', 'custom-print-quotable?',
  148. 'custom-write-accessor', 'custom-write?', 'date', 'date*',
  149. 'date*-nanosecond', 'date*-time-zone-name', 'date*?', 'date-day',
  150. 'date-dst?', 'date-hour', 'date-minute', 'date-month',
  151. 'date-second', 'date-time-zone-offset', 'date-week-day',
  152. 'date-year', 'date-year-day', 'date?', 'datum-intern-literal',
  153. 'default-continuation-prompt-tag', 'delete-directory',
  154. 'delete-file', 'denominator', 'directory-exists?',
  155. 'directory-list', 'display', 'displayln', 'dump-memory-stats',
  156. 'dynamic-require', 'dynamic-require-for-syntax', 'dynamic-wind',
  157. 'eof', 'eof-object?', 'ephemeron-value', 'ephemeron?', 'eprintf',
  158. 'eq-hash-code', 'eq?', 'equal-hash-code',
  159. 'equal-secondary-hash-code', 'equal?', 'equal?/recur',
  160. 'eqv-hash-code', 'eqv?', 'error', 'error-display-handler',
  161. 'error-escape-handler', 'error-print-context-length',
  162. 'error-print-source-location', 'error-print-width',
  163. 'error-value->string-handler', 'eval', 'eval-jit-enabled',
  164. 'eval-syntax', 'even?', 'evt?', 'exact->inexact', 'exact-integer?',
  165. 'exact-nonnegative-integer?', 'exact-positive-integer?', 'exact?',
  166. 'executable-yield-handler', 'exit', 'exit-handler', 'exn',
  167. 'exn-continuation-marks', 'exn-message', 'exn:break',
  168. 'exn:break-continuation', 'exn:break?', 'exn:fail',
  169. 'exn:fail:contract', 'exn:fail:contract:arity',
  170. 'exn:fail:contract:arity?', 'exn:fail:contract:continuation',
  171. 'exn:fail:contract:continuation?',
  172. 'exn:fail:contract:divide-by-zero',
  173. 'exn:fail:contract:divide-by-zero?',
  174. 'exn:fail:contract:non-fixnum-result',
  175. 'exn:fail:contract:non-fixnum-result?',
  176. 'exn:fail:contract:variable', 'exn:fail:contract:variable-id',
  177. 'exn:fail:contract:variable?', 'exn:fail:contract?',
  178. 'exn:fail:filesystem', 'exn:fail:filesystem:exists',
  179. 'exn:fail:filesystem:exists?', 'exn:fail:filesystem:version',
  180. 'exn:fail:filesystem:version?', 'exn:fail:filesystem?',
  181. 'exn:fail:network', 'exn:fail:network?', 'exn:fail:out-of-memory',
  182. 'exn:fail:out-of-memory?', 'exn:fail:read',
  183. 'exn:fail:read-srclocs', 'exn:fail:read:eof', 'exn:fail:read:eof?',
  184. 'exn:fail:read:non-char', 'exn:fail:read:non-char?',
  185. 'exn:fail:read?', 'exn:fail:syntax', 'exn:fail:syntax-exprs',
  186. 'exn:fail:syntax:unbound', 'exn:fail:syntax:unbound?',
  187. 'exn:fail:syntax?', 'exn:fail:unsupported',
  188. 'exn:fail:unsupported?', 'exn:fail:user', 'exn:fail:user?',
  189. 'exn:fail?', 'exn:srclocs-accessor', 'exn:srclocs?', 'exn?', 'exp',
  190. 'expand', 'expand-once', 'expand-syntax', 'expand-syntax-once',
  191. 'expand-syntax-to-top-form', 'expand-to-top-form',
  192. 'expand-user-path', 'expt', 'file-exists?',
  193. 'file-or-directory-identity', 'file-or-directory-modify-seconds',
  194. 'file-or-directory-permissions', 'file-position', 'file-size',
  195. 'file-stream-buffer-mode', 'file-stream-port?',
  196. 'filesystem-root-list', 'find-executable-path',
  197. 'find-library-collection-paths', 'find-system-path', 'fixnum?',
  198. 'floating-point-bytes->real', 'flonum?', 'floor', 'flush-output',
  199. 'for-each', 'force', 'format', 'fprintf', 'free-identifier=?',
  200. 'gcd', 'generate-temporaries', 'gensym', 'get-output-bytes',
  201. 'get-output-string', 'getenv', 'global-port-print-handler',
  202. 'guard-evt', 'handle-evt', 'handle-evt?', 'hash', 'hash-equal?',
  203. 'hash-eqv?', 'hash-has-key?', 'hash-placeholder?', 'hash-ref!',
  204. 'hasheq', 'hasheqv', 'identifier-binding',
  205. 'identifier-label-binding', 'identifier-prune-lexical-context',
  206. 'identifier-prune-to-source-module',
  207. 'identifier-remove-from-definition-context',
  208. 'identifier-template-binding', 'identifier-transformer-binding',
  209. 'identifier?', 'imag-part', 'immutable?', 'impersonate-box',
  210. 'impersonate-hash', 'impersonate-procedure', 'impersonate-struct',
  211. 'impersonate-vector', 'impersonator-of?',
  212. 'impersonator-prop:application-mark',
  213. 'impersonator-property-accessor-procedure?',
  214. 'impersonator-property?', 'impersonator?', 'inexact->exact',
  215. 'inexact-real?', 'inexact?', 'input-port?', 'inspector?',
  216. 'integer->char', 'integer->integer-bytes',
  217. 'integer-bytes->integer', 'integer-length', 'integer-sqrt',
  218. 'integer-sqrt/remainder', 'integer?',
  219. 'internal-definition-context-seal', 'internal-definition-context?',
  220. 'keyword->string', 'keyword<?', 'keyword?', 'kill-thread', 'lcm',
  221. 'length', 'liberal-define-context?', 'link-exists?', 'list',
  222. 'list*', 'list->bytes', 'list->string', 'list->vector', 'list-ref',
  223. 'list-tail', 'list?', 'load', 'load-extension',
  224. 'load-on-demand-enabled', 'load-relative',
  225. 'load-relative-extension', 'load/cd', 'load/use-compiled',
  226. 'local-expand', 'local-expand/capture-lifts',
  227. 'local-transformer-expand',
  228. 'local-transformer-expand/capture-lifts', 'locale-string-encoding',
  229. 'log', 'magnitude', 'make-arity-at-least', 'make-bytes',
  230. 'make-channel', 'make-continuation-prompt-tag', 'make-custodian',
  231. 'make-custodian-box', 'make-date', 'make-date*',
  232. 'make-derived-parameter', 'make-directory', 'make-ephemeron',
  233. 'make-exn', 'make-exn:break', 'make-exn:fail',
  234. 'make-exn:fail:contract', 'make-exn:fail:contract:arity',
  235. 'make-exn:fail:contract:continuation',
  236. 'make-exn:fail:contract:divide-by-zero',
  237. 'make-exn:fail:contract:non-fixnum-result',
  238. 'make-exn:fail:contract:variable', 'make-exn:fail:filesystem',
  239. 'make-exn:fail:filesystem:exists',
  240. 'make-exn:fail:filesystem:version', 'make-exn:fail:network',
  241. 'make-exn:fail:out-of-memory', 'make-exn:fail:read',
  242. 'make-exn:fail:read:eof', 'make-exn:fail:read:non-char',
  243. 'make-exn:fail:syntax', 'make-exn:fail:syntax:unbound',
  244. 'make-exn:fail:unsupported', 'make-exn:fail:user',
  245. 'make-file-or-directory-link', 'make-hash-placeholder',
  246. 'make-hasheq-placeholder', 'make-hasheqv',
  247. 'make-hasheqv-placeholder', 'make-immutable-hasheqv',
  248. 'make-impersonator-property', 'make-input-port', 'make-inspector',
  249. 'make-known-char-range-list', 'make-output-port', 'make-parameter',
  250. 'make-pipe', 'make-placeholder', 'make-polar',
  251. 'make-prefab-struct', 'make-pseudo-random-generator',
  252. 'make-reader-graph', 'make-readtable', 'make-rectangular',
  253. 'make-rename-transformer', 'make-resolved-module-path',
  254. 'make-security-guard', 'make-semaphore', 'make-set!-transformer',
  255. 'make-shared-bytes', 'make-sibling-inspector',
  256. 'make-special-comment', 'make-srcloc', 'make-string',
  257. 'make-struct-field-accessor', 'make-struct-field-mutator',
  258. 'make-struct-type', 'make-struct-type-property',
  259. 'make-syntax-delta-introducer', 'make-syntax-introducer',
  260. 'make-thread-cell', 'make-thread-group', 'make-vector',
  261. 'make-weak-box', 'make-weak-hasheqv', 'make-will-executor', 'map',
  262. 'max', 'mcar', 'mcdr', 'mcons', 'member', 'memq', 'memv', 'min',
  263. 'module->exports', 'module->imports', 'module->language-info',
  264. 'module->namespace', 'module-compiled-exports',
  265. 'module-compiled-imports', 'module-compiled-language-info',
  266. 'module-compiled-name', 'module-path-index-join',
  267. 'module-path-index-resolve', 'module-path-index-split',
  268. 'module-path-index?', 'module-path?', 'module-predefined?',
  269. 'module-provide-protected?', 'modulo', 'mpair?', 'nack-guard-evt',
  270. 'namespace-attach-module', 'namespace-attach-module-declaration',
  271. 'namespace-base-phase', 'namespace-mapped-symbols',
  272. 'namespace-module-identifier', 'namespace-module-registry',
  273. 'namespace-require', 'namespace-require/constant',
  274. 'namespace-require/copy', 'namespace-require/expansion-time',
  275. 'namespace-set-variable-value!', 'namespace-symbol->identifier',
  276. 'namespace-syntax-introduce', 'namespace-undefine-variable!',
  277. 'namespace-unprotect-module', 'namespace-variable-value',
  278. 'namespace?', 'negative?', 'never-evt', 'newline',
  279. 'normal-case-path', 'not', 'null', 'null?', 'number->string',
  280. 'number?', 'numerator', 'object-name', 'odd?', 'open-input-bytes',
  281. 'open-input-file', 'open-input-output-file', 'open-input-string',
  282. 'open-output-bytes', 'open-output-file', 'open-output-string',
  283. 'ormap', 'output-port?', 'pair?', 'parameter-procedure=?',
  284. 'parameter?', 'parameterization?', 'path->bytes',
  285. 'path->complete-path', 'path->directory-path', 'path->string',
  286. 'path-add-suffix', 'path-convention-type', 'path-element->bytes',
  287. 'path-element->string', 'path-for-some-system?',
  288. 'path-list-string->path-list', 'path-replace-suffix',
  289. 'path-string?', 'path?', 'peek-byte', 'peek-byte-or-special',
  290. 'peek-bytes', 'peek-bytes!', 'peek-bytes-avail!',
  291. 'peek-bytes-avail!*', 'peek-bytes-avail!/enable-break',
  292. 'peek-char', 'peek-char-or-special', 'peek-string', 'peek-string!',
  293. 'pipe-content-length', 'placeholder-get', 'placeholder-set!',
  294. 'placeholder?', 'poll-guard-evt', 'port-closed-evt',
  295. 'port-closed?', 'port-commit-peeked', 'port-count-lines!',
  296. 'port-count-lines-enabled', 'port-display-handler',
  297. 'port-file-identity', 'port-file-unlock', 'port-next-location',
  298. 'port-print-handler', 'port-progress-evt',
  299. 'port-provides-progress-evts?', 'port-read-handler',
  300. 'port-try-file-lock?', 'port-write-handler', 'port-writes-atomic?',
  301. 'port-writes-special?', 'port?', 'positive?',
  302. 'prefab-key->struct-type', 'prefab-struct-key', 'pregexp',
  303. 'pregexp?', 'primitive-closure?', 'primitive-result-arity',
  304. 'primitive?', 'print', 'print-as-expression',
  305. 'print-boolean-long-form', 'print-box', 'print-graph',
  306. 'print-hash-table', 'print-mpair-curly-braces',
  307. 'print-pair-curly-braces', 'print-reader-abbreviations',
  308. 'print-struct', 'print-syntax-width', 'print-unreadable',
  309. 'print-vector-length', 'printf', 'procedure->method',
  310. 'procedure-arity', 'procedure-arity-includes?', 'procedure-arity?',
  311. 'procedure-closure-contents-eq?', 'procedure-extract-target',
  312. 'procedure-reduce-arity', 'procedure-rename',
  313. 'procedure-struct-type?', 'procedure?', 'promise?',
  314. 'prop:arity-string', 'prop:checked-procedure',
  315. 'prop:custom-print-quotable', 'prop:custom-write',
  316. 'prop:equal+hash', 'prop:evt', 'prop:exn:srclocs',
  317. 'prop:impersonator-of', 'prop:input-port',
  318. 'prop:liberal-define-context', 'prop:output-port',
  319. 'prop:procedure', 'prop:rename-transformer',
  320. 'prop:set!-transformer', 'pseudo-random-generator->vector',
  321. 'pseudo-random-generator-vector?', 'pseudo-random-generator?',
  322. 'putenv', 'quotient', 'quotient/remainder', 'raise',
  323. 'raise-arity-error', 'raise-mismatch-error', 'raise-syntax-error',
  324. 'raise-type-error', 'raise-user-error', 'random', 'random-seed',
  325. 'rational?', 'rationalize', 'read', 'read-accept-bar-quote',
  326. 'read-accept-box', 'read-accept-compiled', 'read-accept-dot',
  327. 'read-accept-graph', 'read-accept-infix-dot', 'read-accept-lang',
  328. 'read-accept-quasiquote', 'read-accept-reader', 'read-byte',
  329. 'read-byte-or-special', 'read-bytes', 'read-bytes!',
  330. 'read-bytes-avail!', 'read-bytes-avail!*',
  331. 'read-bytes-avail!/enable-break', 'read-bytes-line',
  332. 'read-case-sensitive', 'read-char', 'read-char-or-special',
  333. 'read-curly-brace-as-paren', 'read-decimal-as-inexact',
  334. 'read-eval-print-loop', 'read-language', 'read-line',
  335. 'read-on-demand-source', 'read-square-bracket-as-paren',
  336. 'read-string', 'read-string!', 'read-syntax',
  337. 'read-syntax/recursive', 'read/recursive', 'readtable-mapping',
  338. 'readtable?', 'real->double-flonum', 'real->floating-point-bytes',
  339. 'real->single-flonum', 'real-part', 'real?', 'regexp',
  340. 'regexp-match', 'regexp-match-peek', 'regexp-match-peek-immediate',
  341. 'regexp-match-peek-positions',
  342. 'regexp-match-peek-positions-immediate',
  343. 'regexp-match-peek-positions-immediate/end',
  344. 'regexp-match-peek-positions/end', 'regexp-match-positions',
  345. 'regexp-match-positions/end', 'regexp-match/end', 'regexp-match?',
  346. 'regexp-max-lookbehind', 'regexp-replace', 'regexp-replace*',
  347. 'regexp?', 'relative-path?', 'remainder',
  348. 'rename-file-or-directory', 'rename-transformer-target',
  349. 'rename-transformer?', 'resolve-path', 'resolved-module-path-name',
  350. 'resolved-module-path?', 'reverse', 'round', 'seconds->date',
  351. 'security-guard?', 'semaphore-peek-evt', 'semaphore-post',
  352. 'semaphore-try-wait?', 'semaphore-wait',
  353. 'semaphore-wait/enable-break', 'semaphore?',
  354. 'set!-transformer-procedure', 'set!-transformer?', 'set-box!',
  355. 'set-mcar!', 'set-mcdr!', 'set-port-next-location!',
  356. 'shared-bytes', 'shell-execute', 'simplify-path', 'sin',
  357. 'single-flonum?', 'sleep', 'special-comment-value',
  358. 'special-comment?', 'split-path', 'sqrt', 'srcloc',
  359. 'srcloc-column', 'srcloc-line', 'srcloc-position', 'srcloc-source',
  360. 'srcloc-span', 'srcloc?', 'string', 'string->bytes/latin-1',
  361. 'string->bytes/locale', 'string->bytes/utf-8',
  362. 'string->immutable-string', 'string->keyword', 'string->list',
  363. 'string->number', 'string->path', 'string->path-element',
  364. 'string->symbol', 'string->uninterned-symbol',
  365. 'string->unreadable-symbol', 'string-append', 'string-ci<=?',
  366. 'string-ci<?', 'string-ci=?', 'string-ci>=?', 'string-ci>?',
  367. 'string-copy', 'string-copy!', 'string-downcase', 'string-fill!',
  368. 'string-foldcase', 'string-length', 'string-locale-ci<?',
  369. 'string-locale-ci=?', 'string-locale-ci>?',
  370. 'string-locale-downcase', 'string-locale-upcase',
  371. 'string-locale<?', 'string-locale=?', 'string-locale>?',
  372. 'string-normalize-nfc', 'string-normalize-nfd',
  373. 'string-normalize-nfkc', 'string-normalize-nfkd', 'string-ref',
  374. 'string-set!', 'string-titlecase', 'string-upcase',
  375. 'string-utf-8-length', 'string<=?', 'string<?', 'string=?',
  376. 'string>=?', 'string>?', 'string?', 'struct->vector',
  377. 'struct-accessor-procedure?', 'struct-constructor-procedure?',
  378. 'struct-info', 'struct-mutator-procedure?',
  379. 'struct-predicate-procedure?', 'struct-type-info',
  380. 'struct-type-make-constructor', 'struct-type-make-predicate',
  381. 'struct-type-property-accessor-procedure?',
  382. 'struct-type-property?', 'struct-type?', 'struct:arity-at-least',
  383. 'struct:date', 'struct:date*', 'struct:exn', 'struct:exn:break',
  384. 'struct:exn:fail', 'struct:exn:fail:contract',
  385. 'struct:exn:fail:contract:arity',
  386. 'struct:exn:fail:contract:continuation',
  387. 'struct:exn:fail:contract:divide-by-zero',
  388. 'struct:exn:fail:contract:non-fixnum-result',
  389. 'struct:exn:fail:contract:variable', 'struct:exn:fail:filesystem',
  390. 'struct:exn:fail:filesystem:exists',
  391. 'struct:exn:fail:filesystem:version', 'struct:exn:fail:network',
  392. 'struct:exn:fail:out-of-memory', 'struct:exn:fail:read',
  393. 'struct:exn:fail:read:eof', 'struct:exn:fail:read:non-char',
  394. 'struct:exn:fail:syntax', 'struct:exn:fail:syntax:unbound',
  395. 'struct:exn:fail:unsupported', 'struct:exn:fail:user',
  396. 'struct:srcloc', 'struct?', 'sub1', 'subbytes', 'subprocess',
  397. 'subprocess-group-enabled', 'subprocess-kill', 'subprocess-pid',
  398. 'subprocess-status', 'subprocess-wait', 'subprocess?', 'substring',
  399. 'symbol->string', 'symbol-interned?', 'symbol-unreadable?',
  400. 'symbol?', 'sync', 'sync/enable-break', 'sync/timeout',
  401. 'sync/timeout/enable-break', 'syntax->list', 'syntax-arm',
  402. 'syntax-column', 'syntax-disarm', 'syntax-e', 'syntax-line',
  403. 'syntax-local-bind-syntaxes', 'syntax-local-certifier',
  404. 'syntax-local-context', 'syntax-local-expand-expression',
  405. 'syntax-local-get-shadower', 'syntax-local-introduce',
  406. 'syntax-local-lift-context', 'syntax-local-lift-expression',
  407. 'syntax-local-lift-module-end-declaration',
  408. 'syntax-local-lift-provide', 'syntax-local-lift-require',
  409. 'syntax-local-lift-values-expression',
  410. 'syntax-local-make-definition-context',
  411. 'syntax-local-make-delta-introducer',
  412. 'syntax-local-module-defined-identifiers',
  413. 'syntax-local-module-exports',
  414. 'syntax-local-module-required-identifiers', 'syntax-local-name',
  415. 'syntax-local-phase-level',
  416. 'syntax-local-transforming-module-provides?', 'syntax-local-value',
  417. 'syntax-local-value/immediate', 'syntax-original?',
  418. 'syntax-position', 'syntax-property',
  419. 'syntax-property-symbol-keys', 'syntax-protect', 'syntax-rearm',
  420. 'syntax-recertify', 'syntax-shift-phase-level', 'syntax-source',
  421. 'syntax-source-module', 'syntax-span', 'syntax-taint',
  422. 'syntax-tainted?', 'syntax-track-origin',
  423. 'syntax-transforming-module-expression?', 'syntax-transforming?',
  424. 'syntax?', 'system-big-endian?', 'system-idle-evt',
  425. 'system-language+country', 'system-library-subpath',
  426. 'system-path-convention-type', 'system-type', 'tan',
  427. 'tcp-abandon-port', 'tcp-accept', 'tcp-accept-evt',
  428. 'tcp-accept-ready?', 'tcp-accept/enable-break', 'tcp-addresses',
  429. 'tcp-close', 'tcp-connect', 'tcp-connect/enable-break',
  430. 'tcp-listen', 'tcp-listener?', 'tcp-port?', 'terminal-port?',
  431. 'thread', 'thread-cell-ref', 'thread-cell-set!', 'thread-cell?',
  432. 'thread-dead-evt', 'thread-dead?', 'thread-group?',
  433. 'thread-resume', 'thread-resume-evt', 'thread-rewind-receive',
  434. 'thread-running?', 'thread-suspend', 'thread-suspend-evt',
  435. 'thread-wait', 'thread/suspend-to-kill', 'thread?', 'time-apply',
  436. 'truncate', 'udp-addresses', 'udp-bind!', 'udp-bound?',
  437. 'udp-close', 'udp-connect!', 'udp-connected?', 'udp-open-socket',
  438. 'udp-receive!', 'udp-receive!*', 'udp-receive!-evt',
  439. 'udp-receive!/enable-break', 'udp-receive-ready-evt', 'udp-send',
  440. 'udp-send*', 'udp-send-evt', 'udp-send-ready-evt', 'udp-send-to',
  441. 'udp-send-to*', 'udp-send-to-evt', 'udp-send-to/enable-break',
  442. 'udp-send/enable-break', 'udp?', 'unbox',
  443. 'uncaught-exception-handler', 'use-collection-link-paths',
  444. 'use-compiled-file-paths', 'use-user-specific-search-paths',
  445. 'values', 'variable-reference->empty-namespace',
  446. 'variable-reference->module-base-phase',
  447. 'variable-reference->module-declaration-inspector',
  448. 'variable-reference->module-source',
  449. 'variable-reference->namespace', 'variable-reference->phase',
  450. 'variable-reference->resolved-module-path',
  451. 'variable-reference-constant?', 'variable-reference?', 'vector',
  452. 'vector->immutable-vector', 'vector->list',
  453. 'vector->pseudo-random-generator',
  454. 'vector->pseudo-random-generator!', 'vector->values',
  455. 'vector-fill!', 'vector-immutable', 'vector-length', 'vector-ref',
  456. 'vector-set!', 'vector-set-performance-stats!', 'vector?',
  457. 'version', 'void', 'void?', 'weak-box-value', 'weak-box?',
  458. 'will-execute', 'will-executor?', 'will-register',
  459. 'will-try-execute', 'with-input-from-file', 'with-output-to-file',
  460. 'wrap-evt', 'write', 'write-byte', 'write-bytes',
  461. 'write-bytes-avail', 'write-bytes-avail*', 'write-bytes-avail-evt',
  462. 'write-bytes-avail/enable-break', 'write-char', 'write-special',
  463. 'write-special-avail*', 'write-special-evt', 'write-string', 'zero?'
  464. ]
  465. # From SchemeLexer
  466. valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~|-]+'
  467. tokens = {
  468. 'root' : [
  469. (r';.*$', Comment.Single),
  470. (r'#\|[^|]+\|#', Comment.Multiline),
  471. # whitespaces - usually not relevant
  472. (r'\s+', Text),
  473. ## numbers: Keep in mind Racket reader hash prefixes,
  474. ## which can denote the base or the type. These don't map
  475. ## neatly onto pygments token types; some judgment calls
  476. ## here. Note that none of these regexps attempt to
  477. ## exclude identifiers that start with a number, such as a
  478. ## variable named "100-Continue".
  479. # #b
  480. (r'#b[-+]?[01]+\.[01]+', Number.Float),
  481. (r'#b[01]+e[-+]?[01]+', Number.Float),
  482. (r'#b[-+]?[01]/[01]+', Number),
  483. (r'#b[-+]?[01]+', Number.Integer),
  484. (r'#b\S*', Error),
  485. # #d OR no hash prefix
  486. (r'(#d)?[-+]?\d+\.\d+', Number.Float),
  487. (r'(#d)?\d+e[-+]?\d+', Number.Float),
  488. (r'(#d)?[-+]?\d+/\d+', Number),
  489. (r'(#d)?[-+]?\d+', Number.Integer),
  490. (r'#d\S*', Error),
  491. # #e
  492. (r'#e[-+]?\d+\.\d+', Number.Float),
  493. (r'#e\d+e[-+]?\d+', Number.Float),
  494. (r'#e[-+]?\d+/\d+', Number),
  495. (r'#e[-+]?\d+', Number),
  496. (r'#e\S*', Error),
  497. # #i is always inexact-real, i.e. float
  498. (r'#i[-+]?\d+\.\d+', Number.Float),
  499. (r'#i\d+e[-+]?\d+', Number.Float),
  500. (r'#i[-+]?\d+/\d+', Number.Float),
  501. (r'#i[-+]?\d+', Number.Float),
  502. (r'#i\S*', Error),
  503. # #o
  504. (r'#o[-+]?[0-7]+\.[0-7]+', Number.Oct),
  505. (r'#o[0-7]+e[-+]?[0-7]+', Number.Oct),
  506. (r'#o[-+]?[0-7]+/[0-7]+', Number.Oct),
  507. (r'#o[-+]?[0-7]+', Number.Oct),
  508. (r'#o\S*', Error),
  509. # #x
  510. (r'#x[-+]?[0-9a-fA-F]+\.[0-9a-fA-F]+', Number.Hex),
  511. # the exponent variation (e.g. #x1e1) is N/A
  512. (r'#x[-+]?[0-9a-fA-F]+/[0-9a-fA-F]+', Number.Hex),
  513. (r'#x[-+]?[0-9a-fA-F]+', Number.Hex),
  514. (r'#x\S*', Error),
  515. # strings, symbols and characters
  516. (r'"(\\\\|\\"|[^"])*"', String),
  517. (r"'" + valid_name, String.Symbol),
  518. (r"#\\([()/'\"._!§$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char),
  519. (r'#rx".+"', String.Regex),
  520. (r'#px".+"', String.Regex),
  521. # constants
  522. (r'(#t|#f)', Name.Constant),
  523. # keyword argument names (e.g. #:keyword)
  524. (r'#:\S+', Keyword.Declaration),
  525. # #lang
  526. (r'#lang \S+', Keyword.Namespace),
  527. # special operators
  528. (r"('|#|`|,@|,|\.)", Operator),
  529. # highlight the keywords
  530. ('(%s)' % '|'.join([
  531. re.escape(entry) + ' ' for entry in keywords]),
  532. Keyword
  533. ),
  534. # first variable in a quoted string like
  535. # '(this is syntactic sugar)
  536. (r"(?<='\()" + valid_name, Name.Variable),
  537. (r"(?<=#\()" + valid_name, Name.Variable),
  538. # highlight the builtins
  539. ("(?<=\()(%s)" % '|'.join([
  540. re.escape(entry) + ' ' for entry in builtins]),
  541. Name.Builtin
  542. ),
  543. # the remaining functions; handle both ( and [
  544. (r'(?<=(\(|\[|\{))' + valid_name, Name.Function),
  545. # find the remaining variables
  546. (valid_name, Name.Variable),
  547. # the famous parentheses!
  548. (r'(\(|\)|\[|\]|\{|\})', Punctuation),
  549. ],
  550. }
  551. class SchemeLexer(RegexLexer):
  552. """
  553. A Scheme lexer, parsing a stream and outputting the tokens
  554. needed to highlight scheme code.
  555. This lexer could be most probably easily subclassed to parse
  556. other LISP-Dialects like Common Lisp, Emacs Lisp or AutoLisp.
  557. This parser is checked with pastes from the LISP pastebin
  558. at http://paste.lisp.org/ to cover as much syntax as possible.
  559. It supports the full Scheme syntax as defined in R5RS.
  560. *New in Pygments 0.6.*
  561. """
  562. name = 'Scheme'
  563. aliases = ['scheme', 'scm']
  564. filenames = ['*.scm', '*.ss']
  565. mimetypes = ['text/x-scheme', 'application/x-scheme']
  566. # list of known keywords and builtins taken form vim 6.4 scheme.vim
  567. # syntax file.
  568. keywords = [
  569. 'lambda', 'define', 'if', 'else', 'cond', 'and', 'or', 'case', 'let',
  570. 'let*', 'letrec', 'begin', 'do', 'delay', 'set!', '=>', 'quote',
  571. 'quasiquote', 'unquote', 'unquote-splicing', 'define-syntax',
  572. 'let-syntax', 'letrec-syntax', 'syntax-rules'
  573. ]
  574. builtins = [
  575. '*', '+', '-', '/', '<', '<=', '=', '>', '>=', 'abs', 'acos', 'angle',
  576. 'append', 'apply', 'asin', 'assoc', 'assq', 'assv', 'atan',
  577. 'boolean?', 'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr',
  578. 'caar', 'cadaar', 'cadadr', 'cadar', 'caddar', 'cadddr', 'caddr',
  579. 'cadr', 'call-with-current-continuation', 'call-with-input-file',
  580. 'call-with-output-file', 'call-with-values', 'call/cc', 'car',
  581. 'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cdar',
  582. 'cddaar', 'cddadr', 'cddar', 'cdddar', 'cddddr', 'cdddr', 'cddr',
  583. 'cdr', 'ceiling', 'char->integer', 'char-alphabetic?', 'char-ci<=?',
  584. 'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?', 'char-downcase',
  585. 'char-lower-case?', 'char-numeric?', 'char-ready?', 'char-upcase',
  586. 'char-upper-case?', 'char-whitespace?', 'char<=?', 'char<?', 'char=?',
  587. 'char>=?', 'char>?', 'char?', 'close-input-port', 'close-output-port',
  588. 'complex?', 'cons', 'cos', 'current-input-port', 'current-output-port',
  589. 'denominator', 'display', 'dynamic-wind', 'eof-object?', 'eq?',
  590. 'equal?', 'eqv?', 'eval', 'even?', 'exact->inexact', 'exact?', 'exp',
  591. 'expt', 'floor', 'for-each', 'force', 'gcd', 'imag-part',
  592. 'inexact->exact', 'inexact?', 'input-port?', 'integer->char',
  593. 'integer?', 'interaction-environment', 'lcm', 'length', 'list',
  594. 'list->string', 'list->vector', 'list-ref', 'list-tail', 'list?',
  595. 'load', 'log', 'magnitude', 'make-polar', 'make-rectangular',
  596. 'make-string', 'make-vector', 'map', 'max', 'member', 'memq', 'memv',
  597. 'min', 'modulo', 'negative?', 'newline', 'not', 'null-environment',
  598. 'null?', 'number->string', 'number?', 'numerator', 'odd?',
  599. 'open-input-file', 'open-output-file', 'output-port?', 'pair?',
  600. 'peek-char', 'port?', 'positive?', 'procedure?', 'quotient',
  601. 'rational?', 'rationalize', 'read', 'read-char', 'real-part', 'real?',
  602. 'remainder', 'reverse', 'round', 'scheme-report-environment',
  603. 'set-car!', 'set-cdr!', 'sin', 'sqrt', 'string', 'string->list',
  604. 'string->number', 'string->symbol', 'string-append', 'string-ci<=?',
  605. 'string-ci<?', 'string-ci=?', 'string-ci>=?', 'string-ci>?',
  606. 'string-copy', 'string-fill!', 'string-length', 'string-ref',
  607. 'string-set!', 'string<=?', 'string<?', 'string=?', 'string>=?',
  608. 'string>?', 'string?', 'substring', 'symbol->string', 'symbol?',
  609. 'tan', 'transcript-off', 'transcript-on', 'truncate', 'values',
  610. 'vector', 'vector->list', 'vector-fill!', 'vector-length',
  611. 'vector-ref', 'vector-set!', 'vector?', 'with-input-from-file',
  612. 'with-output-to-file', 'write', 'write-char', 'zero?'
  613. ]
  614. # valid names for identifiers
  615. # well, names can only not consist fully of numbers
  616. # but this should be good enough for now
  617. valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~|-]+'
  618. tokens = {
  619. 'root' : [
  620. # the comments - always starting with semicolon
  621. # and going to the end of the line
  622. (r';.*$', Comment.Single),
  623. # whitespaces - usually not relevant
  624. (r'\s+', Text),
  625. # numbers
  626. (r'-?\d+\.\d+', Number.Float),
  627. (r'-?\d+', Number.Integer),
  628. # support for uncommon kinds of numbers -
  629. # have to figure out what the characters mean
  630. #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
  631. # strings, symbols and characters
  632. (r'"(\\\\|\\"|[^"])*"', String),
  633. (r"'" + valid_name, String.Symbol),
  634. (r"#\\([()/'\"._!§$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char),
  635. # constants
  636. (r'(#t|#f)', Name.Constant),
  637. # special operators
  638. (r"('|#|`|,@|,|\.)", Operator),
  639. # highlight the keywords
  640. ('(%s)' % '|'.join([
  641. re.escape(entry) + ' ' for entry in keywords]),
  642. Keyword
  643. ),
  644. # first variable in a quoted string like
  645. # '(this is syntactic sugar)
  646. (r"(?<='\()" + valid_name, Name.Variable),
  647. (r"(?<=#\()" + valid_name, Name.Variable),
  648. # highlight the builtins
  649. ("(?<=\()(%s)" % '|'.join([
  650. re.escape(entry) + ' ' for entry in builtins]),
  651. Name.Builtin
  652. ),
  653. # the remaining functions
  654. (r'(?<=\()' + valid_name, Name.Function),
  655. # find the remaining variables
  656. (valid_name, Name.Variable),
  657. # the famous parentheses!
  658. (r'(\(|\))', Punctuation),
  659. (r'(\[|\])', Punctuation),
  660. ],
  661. }
  662. class CommonLispLexer(RegexLexer):
  663. """
  664. A Common Lisp lexer.
  665. *New in Pygments 0.9.*
  666. """
  667. name = 'Common Lisp'
  668. aliases = ['common-lisp', 'cl']
  669. filenames = ['*.cl', '*.lisp', '*.el'] # use for Elisp too
  670. mimetypes = ['text/x-common-lisp']
  671. flags = re.IGNORECASE | re.MULTILINE
  672. ### couple of useful regexes
  673. # characters that are not macro-characters and can be used to begin a symbol
  674. nonmacro = r'\\.|[a-zA-Z0-9!$%&*+-/<=>?@\[\]^_{}~]'
  675. constituent = nonmacro + '|[#.:]'
  676. terminated = r'(?=[ "()\'\n,;`])' # whitespace or terminating macro characters
  677. ### symbol token, reverse-engineered from hyperspec
  678. # Take a deep breath...
  679. symbol = r'(\|[^|]+\||(?:%s)(?:%s)*)' % (nonmacro, constituent)
  680. def __init__(self, **options):
  681. from pygments.lexers._clbuiltins import BUILTIN_FUNCTIONS, \
  682. SPECIAL_FORMS, MACROS, LAMBDA_LIST_KEYWORDS, DECLARATIONS, \
  683. BUILTIN_TYPES, BUILTIN_CLASSES
  684. self.builtin_function = BUILTIN_FUNCTIONS
  685. self.special_forms = SPECIAL_FORMS
  686. self.macros = MACROS
  687. self.lambda_list_keywords = LAMBDA_LIST_KEYWORDS
  688. self.declarations = DECLARATIONS
  689. self.builtin_types = BUILTIN_TYPES
  690. self.builtin_classes = BUILTIN_CLASSES
  691. RegexLexer.__init__(self, **options)
  692. def get_tokens_unprocessed(self, text):
  693. stack = ['root']
  694. for index, token, value in RegexLexer.get_tokens_unprocessed(self, text, stack):
  695. if token is Name.Variable:
  696. if value in self.builtin_function:
  697. yield index, Name.Builtin, value
  698. continue
  699. if value in self.special_forms:
  700. yield index, Keyword, value
  701. continue
  702. if value in self.macros:
  703. yield index, Name.Builtin, value
  704. continue
  705. if value in self.lambda_list_keywords:
  706. yield index, Keyword, value
  707. continue
  708. if value in self.declarations:
  709. yield index, Keyword, value
  710. continue
  711. if value in self.builtin_types:
  712. yield index, Keyword.Type, value
  713. continue
  714. if value in self.builtin_classes:
  715. yield index, Name.Class, value
  716. continue
  717. yield index, token, value
  718. tokens = {
  719. 'root' : [
  720. ('', Text, 'body'),
  721. ],
  722. 'multiline-comment' : [
  723. (r'#\|', Comment.Multiline, '#push'), # (cf. Hyperspec 2.4.8.19)
  724. (r'\|#', Comment.Multiline, '#pop'),
  725. (r'[^|#]+', Comment.Multiline),
  726. (r'[|#]', Comment.Multiline),
  727. ],
  728. 'commented-form' : [
  729. (r'\(', Comment.Preproc, '#push'),
  730. (r'\)', Comment.Preproc, '#pop'),
  731. (r'[^()]+', Comment.Preproc),
  732. ],
  733. 'body' : [
  734. # whitespace
  735. (r'\s+', Text),
  736. # single-line comment
  737. (r';.*$', Comment.Single),
  738. # multi-line comment
  739. (r'#\|', Comment.Multiline, 'multiline-comment'),
  740. # encoding comment (?)
  741. (r'#\d*Y.*$', Comment.Special),
  742. # strings and characters
  743. (r'"(\\.|\\\n|[^"\\])*"', String),
  744. # quoting
  745. (r":" + symbol, String.Symbol),
  746. (r"'" + symbol, String.Symbol),
  747. (r"'", Operator),
  748. (r"`", Operator),
  749. # decimal numbers
  750. (r'[-+]?\d+\.?' + terminated, Number.Integer),
  751. (r'[-+]?\d+/\d+' + terminated, Number),
  752. (r'[-+]?(\d*\.\d+([defls][-+]?\d+)?|\d+(\.\d*)?[defls][-+]?\d+)' \
  753. + terminated, Number.Float),
  754. # sharpsign strings and characters
  755. (r"#\\." + terminated, String.Char),
  756. (r"#\\" + symbol, String.Char),
  757. # vector
  758. (r'#\(', Operator, 'body'),
  759. # bitstring
  760. (r'#\d*\*[01]*', Literal.Other),
  761. # uninterned symbol
  762. (r'#:' + symbol, String.Symbol),
  763. # read-time and load-time evaluation
  764. (r'#[.,]', Operator),
  765. # function shorthand
  766. (r'#\'', Name.Function),
  767. # binary rational
  768. (r'#[bB][+-]?[01]+(/[01]+)?', Number),
  769. # octal rational
  770. (r'#[oO][+-]?[0-7]+(/[0-7]+)?', Number.Oct),
  771. # hex rational
  772. (r'#[xX][+-]?[0-9a-fA-F]+(/[0-9a-fA-F]+)?', Number.Hex),
  773. # radix rational
  774. (r'#\d+[rR][+-]?[0-9a-zA-Z]+(/[0-9a-zA-Z]+)?', Number),
  775. # complex
  776. (r'(#[cC])(\()', bygroups(Number, Punctuation), 'body'),
  777. # array
  778. (r'(#\d+[aA])(\()', bygroups(Literal.Other, Punctuation), 'body'),
  779. # structure
  780. (r'(#[sS])(\()', bygroups(Literal.Other, Punctuation), 'body'),
  781. # path
  782. (r'#[pP]?"(\\.|[^"])*"', Literal.Other),
  783. # reference
  784. (r'#\d+=', Operator),
  785. (r'#\d+#', Operator),
  786. # read-time comment
  787. (r'#+nil' + terminated + '\s*\(', Comment.Preproc, 'commented-form'),
  788. # read-time conditional
  789. (r'#[+-]', Operator),
  790. # special operators that should have been parsed already
  791. (r'(,@|,|\.)', Operator),
  792. # special constants
  793. (r'(t|nil)' + terminated, Name.Constant),
  794. # functions and variables
  795. (r'\*' + symbol + '\*', Name.Variable.Global),
  796. (symbol, Name.Variable),
  797. # parentheses
  798. (r'\(', Punctuation, 'body'),
  799. (r'\)', Punctuation, '#pop'),
  800. ],
  801. }
  802. class HaskellLexer(RegexLexer):
  803. """
  804. A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
  805. *New in Pygments 0.8.*
  806. """
  807. name = 'Haskell'
  808. aliases = ['haskell', 'hs']
  809. filenames = ['*.hs']
  810. mimetypes = ['text/x-haskell']
  811. reserved = ['case','class','data','default','deriving','do','else',
  812. 'if','in','infix[lr]?','instance',
  813. 'let','newtype','of','then','type','where','_']
  814. ascii = ['NUL','SOH','[SE]TX','EOT','ENQ','ACK',
  815. 'BEL','BS','HT','LF','VT','FF','CR','S[OI]','DLE',
  816. 'DC[1-4]','NAK','SYN','ETB','CAN',
  817. 'EM','SUB','ESC','[FGRU]S','SP','DEL']
  818. tokens = {
  819. 'root': [
  820. # Whitespace:
  821. (r'\s+', Text),
  822. #(r'--\s*|.*$', Comment.Doc),
  823. (r'--(?![!#$%&*+./<=>?@\^|_~:\\]).*?$', Comment.Single),
  824. (r'{-', Comment.Multiline, 'comment'),
  825. # Lexemes:
  826. # Identifiers
  827. (r'\bimport\b', Keyword.Reserved, 'import'),
  828. (r'\bmodule\b', Keyword.Reserved, 'module'),
  829. (r'\berror\b', Name.Exception),
  830. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  831. (r'^[_a-z][\w\']*', Name.Function),
  832. (r"'?[_a-z][\w']*", Name),
  833. (r"('')?[A-Z][\w\']*", Keyword.Type),
  834. # Operators
  835. (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
  836. (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  837. (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
  838. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
  839. # Numbers
  840. (r'\d+[eE][+-]?\d+', Number.Float),
  841. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  842. (r'0[oO][0-7]+', Number.Oct),
  843. (r'0[xX][\da-fA-F]+', Number.Hex),
  844. (r'\d+', Number.Integer),
  845. # Character/String Literals
  846. (r"'", String.Char, 'character'),
  847. (r'"', String, 'string'),
  848. # Special
  849. (r'\[\]', Keyword.Type),
  850. (r'\(\)', Name.Builtin),
  851. (r'[][(),;`{}]', Punctuation),
  852. ],
  853. 'import': [
  854. # Import statements
  855. (r'\s+', Text),
  856. (r'"', String, 'string'),
  857. # after "funclist" state
  858. (r'\)', Punctuation, '#pop'),
  859. (r'qualified\b', Keyword),
  860. # import X as Y
  861. (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(as)(\s+)([A-Z][a-zA-Z0-9_.]*)',
  862. bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
  863. # import X hiding (functions)
  864. (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(hiding)(\s+)(\()',
  865. bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
  866. # import X (functions)
  867. (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()',
  868. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  869. # import X
  870. (r'[a-zA-Z0-9_.]+', Name.Namespace, '#pop'),
  871. ],
  872. 'module': [
  873. (r'\s+', Text),
  874. (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()',
  875. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  876. (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'),
  877. ],
  878. 'funclist': [
  879. (r'\s+', Text),
  880. (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type),
  881. (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
  882. (r'--.*$', Comment.Single),
  883. (r'{-', Comment.Multiline, 'comment'),
  884. (r',', Punctuation),
  885. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  886. # (HACK, but it makes sense to push two instances, believe me)
  887. (r'\(', Punctuation, ('funclist', 'funclist')),
  888. (r'\)', Punctuation, '#pop:2'),
  889. ],
  890. 'comment': [
  891. # Multiline Comments
  892. (r'[^-{}]+', Comment.Multiline),
  893. (r'{-', Comment.Multiline, '#push'),
  894. (r'-}', Comment.Multiline, '#pop'),
  895. (r'[-{}]', Comment.Multiline),
  896. ],
  897. 'character': [
  898. # Allows multi-chars, incorrectly.
  899. (r"[^\\']", String.Char),
  900. (r"\\", String.Escape, 'escape'),
  901. ("'", String.Char, '#pop'),
  902. ],
  903. 'string': [
  904. (r'[^\\"]+', String),
  905. (r"\\", String.Escape, 'escape'),
  906. ('"', String, '#pop'),
  907. ],
  908. 'escape': [
  909. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  910. (r'\^[][A-Z@\^_]', String.Escape, '#pop'),
  911. ('|'.join(ascii), String.Escape, '#pop'),
  912. (r'o[0-7]+', String.Escape, '#pop'),
  913. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  914. (r'\d+', String.Escape, '#pop'),
  915. (r'\s+\\', String.Escape, '#pop'),
  916. ],
  917. }
  918. line_re = re.compile('.*?\n')
  919. bird_re = re.compile(r'(>[ \t]*)(.*\n)')
  920. class LiterateHaskellLexer(Lexer):
  921. """
  922. For Literate Haskell (Bird-style or LaTeX) source.
  923. Additional options accepted:
  924. `litstyle`
  925. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  926. is autodetected: if the first non-whitespace character in the source
  927. is a backslash or percent character, LaTeX is assumed, else Bird.
  928. *New in Pygments 0.9.*
  929. """
  930. name = 'Literate Haskell'
  931. aliases = ['lhs', 'literate-haskell']
  932. filenames = ['*.lhs']
  933. mimetypes = ['text/x-literate-haskell']
  934. def get_tokens_unprocessed(self, text):
  935. hslexer = HaskellLexer(**self.options)
  936. style = self.options.get('litstyle')
  937. if style is None:
  938. style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
  939. code = ''
  940. insertions = []
  941. if style == 'bird':
  942. # bird-style
  943. for match in line_re.finditer(text):
  944. line = match.group()
  945. m = bird_re.match(line)
  946. if m:
  947. insertions.append((len(code),
  948. [(0, Comment.Special, m.group(1))]))
  949. code += m.group(2)
  950. else:
  951. insertions.append((len(code), [(0, Text, line)]))
  952. else:
  953. # latex-style
  954. from pygments.lexers.text import TexLexer
  955. lxlexer = TexLexer(**self.options)
  956. codelines = 0
  957. latex = ''
  958. for match in line_re.finditer(text):
  959. line = match.group()
  960. if codelines:
  961. if line.lstrip().startswith('\\end{code}'):
  962. codelines = 0
  963. latex += line
  964. else:
  965. code += line
  966. elif line.lstrip().startswith('\\begin{code}'):
  967. codelines = 1
  968. latex += line
  969. insertions.append((len(code),
  970. list(lxlexer.get_tokens_unprocessed(latex))))
  971. latex = ''
  972. else:
  973. latex += line
  974. insertions.append((len(code),
  975. list(lxlexer.get_tokens_unprocessed(latex))))
  976. for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)):
  977. yield item
  978. class SMLLexer(RegexLexer):
  979. """
  980. For the Standard ML language.
  981. *New in Pygments 1.5.*
  982. """
  983. name = 'Standard ML'
  984. aliases = ['sml']
  985. filenames = ['*.sml', '*.sig', '*.fun',]
  986. mimetypes = ['text/x-standardml', 'application/x-standardml']
  987. alphanumid_reserved = [
  988. # Core
  989. 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
  990. 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
  991. 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
  992. 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
  993. # Modules
  994. 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
  995. 'struct', 'structure', 'where',
  996. ]
  997. symbolicid_reserved = [
  998. # Core
  999. ':', '\|', '=', '=>', '->', '#',
  1000. # Modules
  1001. ':>',
  1002. ]
  1003. nonid_reserved = [ '(', ')', '[', ']', '{', '}', ',', ';', '...', '_' ]
  1004. alphanumid_re = r"[a-zA-Z][a-zA-Z0-9_']*"
  1005. symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
  1006. # A character constant is a sequence of the form #s, where s is a string
  1007. # constant denoting a string of size one character. This setup just parses
  1008. # the entire string as either a String.Double or a String.Char (depending
  1009. # on the argument), even if the String.Char is an erronous
  1010. # multiple-character string.
  1011. def stringy (whatkind):
  1012. return [
  1013. (r'[^"\\]', whatkind),
  1014. (r'\\[\\\"abtnvfr]', String.Escape),
  1015. # Control-character notation is used for codes < 32,
  1016. # where \^@ == \000
  1017. (r'\\\^[\x40-\x5e]', String.Escape),
  1018. # Docs say 'decimal digits'
  1019. (r'\\[0-9]{3}', String.Escape),
  1020. (r'\\u[0-9a-fA-F]{4}', String.Escape),
  1021. (r'\\\s+\\', String.Interpol),
  1022. (r'"', whatkind, '#pop'),
  1023. ]
  1024. # Callbacks for distinguishing tokens and reserved words
  1025. def long_id_callback(self, match):
  1026. if match.group(1) in self.alphanumid_reserved: token = Error
  1027. else: token = Name.Namespace
  1028. yield match.start(1), token, match.group(1)
  1029. yield match.start(2), Punctuation, match.group(2)
  1030. def end_id_callback(self, match):
  1031. if match.group(1) in self.alphanumid_reserved: token = Error
  1032. elif match.group(1) in self.symbolicid_reserved: token = Error
  1033. else: token = Name
  1034. yield match.start(1), token, match.group(1)
  1035. def id_callback(self, match):
  1036. str = match.group(1)
  1037. if str in self.alphanumid_reserved: token = Keyword.Reserved
  1038. elif str in self.symbolicid_reserved: token = Punctuation
  1039. else: token = Name
  1040. yield match.start(1), token, str
  1041. tokens = {
  1042. # Whitespace and comments are (almost) everywhere
  1043. 'whitespace': [
  1044. (r'\s+', Text),
  1045. (r'\(\*', Comment.Multiline, 'comment'),
  1046. ],
  1047. 'delimiters': [
  1048. # This lexer treats these delimiters specially:
  1049. # Delimiters define scopes, and the scope is how the meaning of
  1050. # the `|' is resolved - is it a case/handle expression, or function
  1051. # definition by cases? (This is not how the Definition works, but
  1052. # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
  1053. (r'\(|\[|{', Punctuation, 'main'),
  1054. (r'\)|\]|}', Punctuation, '#pop'),
  1055. (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
  1056. (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
  1057. (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
  1058. ],
  1059. 'core': [
  1060. # Punctuation that doesn't overlap symbolic identifiers
  1061. (r'(%s)' % '|'.join([re.escape(z) for z in nonid_reserved]),
  1062. Punctuation),
  1063. # Special constants: strings, floats, numbers in decimal and hex
  1064. (r'#"', String.Char, 'char'),
  1065. (r'"', String.Double, 'string'),
  1066. (r'~?0x[0-9a-fA-F]+', Number.Hex),
  1067. (r'0wx[0-9a-fA-F]+', Number.Hex),
  1068. (r'0w\d+', Number.Integer),
  1069. (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
  1070. (r'~?\d+\.\d+', Number.Float),
  1071. (r'~?\d+[eE]~?\d+', Number.Float),
  1072. (r'~?\d+', Number.Integer),
  1073. # Labels
  1074. (r'#\s*[1-9][0-9]*', Name.Label),
  1075. (r'#\s*(%s)' % alphanumid_re, Name.Label),
  1076. (r'#\s+(%s)' % symbolicid_re, Name.Label),
  1077. # Some reserved words trigger a special, local lexer state change
  1078. (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
  1079. (r'(?=\b(exception)\b(?!\'))', Text, ('ename')),
  1080. (r'\b(functor|include|open|signature|structure)\b(?!\')',
  1081. Keyword.Reserved, 'sname'),
  1082. (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
  1083. # Regular identifiers, long and otherwise
  1084. (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
  1085. (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),
  1086. (r'(%s)' % alphanumid_re, id_callback),
  1087. (r'(%s)' % symbolicid_re, id_callback),
  1088. ],
  1089. 'dotted': [
  1090. (r'(%s)(\.)' % alphanumid_re, long_id_callback),
  1091. (r'(%s)' % alphanumid_re, end_id_callback, "#pop"),
  1092. (r'(%s)' % symbolicid_re, end_id_callback, "#pop"),
  1093. (r'\s+', Error),
  1094. (r'\S+', Error),
  1095. ],
  1096. # Main parser (prevents errors in files that have scoping errors)
  1097. 'root': [ (r'', Text, 'main') ],
  1098. # In this scope, I expect '|' to not be followed by a function name,
  1099. # and I expect 'and' to be followed by a binding site
  1100. 'main': [
  1101. include('whitespace'),
  1102. # Special behavior of val/and/fun
  1103. (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
  1104. (r'\b(fun)\b(?!\')', Keyword.Reserved,
  1105. ('#pop', 'main-fun', 'fname')),
  1106. include('delimiters'),
  1107. include('core'),
  1108. (r'\S+', Error),
  1109. ],
  1110. # In this scope, I expect '|' and 'and' to be followed by a function
  1111. 'main-fun': [
  1112. include('whitespace'),
  1113. (r'\s', Text),
  1114. (r'\(\*', Comment.Multiline, 'comment'),
  1115. # Special behavior of val/and/fun
  1116. (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
  1117. (r'\b(val)\b(?!\')', Keyword.Reserved,
  1118. ('#pop', 'main', 'vname')),
  1119. # Special behavior of '|' and '|'-manipulating keywords
  1120. (r'\|', Punctuation, 'fname'),
  1121. (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
  1122. ('#pop', 'main')),
  1123. include('delimiters'),
  1124. include('core'),
  1125. (r'\S+', Error),
  1126. ],
  1127. # Character and string parsers
  1128. 'char': stringy(String.Char),
  1129. 'string': stringy(String.Double),
  1130. 'breakout': [
  1131. (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),
  1132. ],
  1133. # Dealing with what comes after module system keywords
  1134. 'sname': [
  1135. include('whitespace'),
  1136. include('breakout'),
  1137. (r'(%s)' % alphanumid_re, Name.Namespace),
  1138. (r'', Text, '#pop'),
  1139. ],
  1140. # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
  1141. 'fname': [
  1142. include('whitespace'),
  1143. (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
  1144. (r'\(', Punctuation, 'tyvarseq'),
  1145. (r'(%s)' % alphanumid_re, Name.Function, '#pop'),
  1146. (r'(%s)' % symbolicid_re, Name.Function, '#pop'),
  1147. # Ignore interesting function declarations like "fun (x + y) = ..."
  1148. (r'', Text, '#pop'),
  1149. ],
  1150. # Dealing with what comes after the 'val' (or 'and') keyword
  1151. 'vname': [
  1152. include('whitespace'),
  1153. (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
  1154. (r'\(', Punctuation, 'tyvarseq'),
  1155. (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),
  1156. bygroups(Name.Variable, Text, Punctuation), '#pop'),
  1157. (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),
  1158. bygroups(Name.Variable, Text, Punctuation), '#pop'),
  1159. (r'(%s)' % alphanumid_re, Name.Variable, '#pop'),
  1160. (r'(%s)' % symbolicid_re, Name.Variable, '#pop'),
  1161. # Ignore interesting patterns like 'val (x, y)'
  1162. (r'', Text, '#pop'),
  1163. ],
  1164. # Dealing with what comes after the 'type' (or 'and') keyword
  1165. 'tname': [
  1166. include('whitespace'),
  1167. include('breakout'),
  1168. (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
  1169. (r'\(', Punctuation, 'tyvarseq'),
  1170. (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),
  1171. (r'(%s)' % alphanumid_re, Keyword.Type),
  1172. (r'(%s)' % symbolicid_re, Keyword.Type),
  1173. (r'\S+', Error, '#pop'),
  1174. ],
  1175. # A type binding includes most identifiers
  1176. 'typbind': [
  1177. include('whitespace'),
  1178. (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
  1179. include('breakout'),
  1180. include('core'),
  1181. (r'\S+', Error, '#pop'),
  1182. ],
  1183. # Dealing with what comes after the 'datatype' (or 'and') keyword
  1184. 'dname': [
  1185. include('whitespace'),
  1186. include('breakout'),
  1187. (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
  1188. (r'\(', Punctuation, 'tyvarseq'),
  1189. (r'(=)(\s*)(datatype)',
  1190. bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
  1191. (r'=(?!%s)' % symbolicid_re, Punctuation,
  1192. ('#pop', 'datbind', 'datcon')),
  1193. (r'(%s)' % alphanumid_re, Keyword.Type),
  1194. (r'(%s)' % symbolicid_re, Keyword.Type),
  1195. (r'\S+', Error, '#pop'),
  1196. ],
  1197. # common case - A | B | C of int
  1198. 'datbind': [
  1199. include('whitespace'),
  1200. (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
  1201. (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
  1202. (r'\b(of)\b(?!\')', Keyword.Reserved),
  1203. (r'(\|)(\s*)(%s)' % alphanumid_re,
  1204. bygroups(Punctuation, Text, Name.Class)),
  1205. (r'(\|)(\s+)(%s)' % symbolicid_re,
  1206. bygroups(Punctuation, Text, Name.Class)),
  1207. include('breakout'),
  1208. include('core'),
  1209. (r'\S+', Error),
  1210. ],
  1211. # Dealing with what comes after an exception
  1212. 'ename': [
  1213. include('whitespace'),
  1214. (r'(exception|and)\b(\s+)(%s)' % alphanumid_re,
  1215. bygroups(Keyword.Reserved, Text, Name.Class)),
  1216. (r'(exception|and)\b(\s*)(%s)' % symbolicid_re,
  1217. bygroups(Keyword.Reserved, Text, Name.Class)),
  1218. (r'\b(of)\b(?!\')', Keyword.Reserved),
  1219. include('breakout'),
  1220. include('core'),
  1221. (r'\S+', Error),
  1222. ],
  1223. 'datcon': [
  1224. include('whitespace'),
  1225. (r'(%s)' % alphanumid_re, Name.Class, '#pop'),
  1226. (r'(%s)' % symbolicid_re, Name.Class, '#pop'),
  1227. (r'\S+', Error, '#pop'),
  1228. ],
  1229. # Series of type variables
  1230. 'tyvarseq': [
  1231. (r'\s', Text),
  1232. (r'\(\*', Comment.Multiline, 'comment'),
  1233. (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
  1234. (alphanumid_re, Name),
  1235. (r',', Punctuation),
  1236. (r'\)', Punctuation, '#pop'),
  1237. (symbolicid_re, Name),
  1238. ],
  1239. 'comment': [
  1240. (r'[^(*)]', Comment.Multiline),
  1241. (r'\(\*', Comment.Multiline, '#push'),
  1242. (r'\*\)', Comment.Multiline, '#pop'),
  1243. (r'[(*)]', Comment.Multiline),
  1244. ],
  1245. }
  1246. class OcamlLexer(RegexLexer):
  1247. """
  1248. For the OCaml language.
  1249. *New in Pygments 0.7.*
  1250. """
  1251. name = 'OCaml'
  1252. aliases = ['ocaml']
  1253. filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
  1254. mimetypes = ['text/x-ocaml']
  1255. keywords = [
  1256. 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
  1257. 'downto', 'else', 'end', 'exception', 'external', 'false',
  1258. 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
  1259. 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
  1260. 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
  1261. 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
  1262. 'type', 'value', 'val', 'virtual', 'when', 'while', 'with',
  1263. ]
  1264. keyopts = [
  1265. '!=','#','&','&&','\(','\)','\*','\+',',','-',
  1266. '-\.','->','\.','\.\.',':','::',':=',':>',';',';;','<',
  1267. '<-','=','>','>]','>}','\?','\?\?','\[','\[<','\[>','\[\|',
  1268. ']','_','`','{','{<','\|','\|]','}','~'
  1269. ]
  1270. operators = r'[!$%&*+\./:<=>?@^|~-]'
  1271. word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or']
  1272. prefix_syms = r'[!?~]'
  1273. infix_syms = r'[=<>@^|&+\*/$%-]'
  1274. primitives = ['unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array']
  1275. tokens = {
  1276. 'escape-sequence': [
  1277. (r'\\[\\\"\'ntbr]', String.Escape),
  1278. (r'\\[0-9]{3}', String.Escape),
  1279. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  1280. ],
  1281. 'root': [
  1282. (r'\s+', Text),
  1283. (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
  1284. (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)',
  1285. Name.Namespace, 'dotted'),
  1286. (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class),
  1287. (r'\(\*(?![)])', Comment, 'comment'),
  1288. (r'\b(%s)\b' % '|'.join(keywords), Keyword),
  1289. (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
  1290. (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
  1291. (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
  1292. (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
  1293. (r"[^\W\d][\w']*", Name),
  1294. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  1295. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  1296. (r'0[oO][0-7][0-7_]*', Number.Oct),
  1297. (r'0[bB][01][01_]*', Number.Binary),
  1298. (r'\d[\d_]*', Number.Integer),
  1299. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  1300. String.Char),
  1301. (r"'.'", String.Char),
  1302. (r"'", Keyword), # a stray quote is another syntax element
  1303. (r'"', String.Double, 'string'),
  1304. (r'[~?][a-z][\w\']*:', Name.Variable),
  1305. ],
  1306. 'comment': [
  1307. (r'[^(*)]+', Comment),
  1308. (r'\(\*', Comment, '#push'),
  1309. (r'\*\)', Comment, '#pop'),
  1310. (r'[(*)]', Comment),
  1311. ],
  1312. 'string': [
  1313. (r'[^\\"]+', String.Double),
  1314. include('escape-sequence'),
  1315. (r'\\\n', String.Double),
  1316. (r'"', String.Double, '#pop'),
  1317. ],
  1318. 'dotted': [
  1319. (r'\s+', Text),
  1320. (r'\.', Punctuation),
  1321. (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace),
  1322. (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'),
  1323. (r'[a-z_][A-Za-z0-9_\']*', Name, '#pop'),
  1324. ],
  1325. }
  1326. class ErlangLexer(RegexLexer):
  1327. """
  1328. For the Erlang functional programming language.
  1329. Blame Jeremy Thurgood (http://jerith.za.net/).
  1330. *New in Pygments 0.9.*
  1331. """
  1332. name = 'Erlang'
  1333. aliases = ['erlang']
  1334. filenames = ['*.erl', '*.hrl', '*.es', '*.escript']
  1335. mimetypes = ['text/x-erlang']
  1336. keywords = [
  1337. 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if',
  1338. 'let', 'of', 'query', 'receive', 'try', 'when',
  1339. ]
  1340. builtins = [ # See erlang(3) man page
  1341. 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list',
  1342. 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions',
  1343. 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module',
  1344. 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit',
  1345. 'float', 'float_to_list', 'fun_info', 'fun_to_list',
  1346. 'function_exported', 'garbage_collect', 'get', 'get_keys',
  1347. 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary',
  1348. 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean',
  1349. 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list',
  1350. 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record',
  1351. 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom',
  1352. 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom',
  1353. 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple',
  1354. 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5',
  1355. 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor',
  1356. 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2',
  1357. 'pid_to_list', 'port_close', 'port_command', 'port_connect',
  1358. 'port_control', 'port_call', 'port_info', 'port_to_list',
  1359. 'process_display', 'process_flag', 'process_info', 'purge_module',
  1360. 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process',
  1361. 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie',
  1362. 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor',
  1363. 'spawn_opt', 'split_binary', 'start_timer', 'statistics',
  1364. 'suspend_process', 'system_flag', 'system_info', 'system_monitor',
  1365. 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered',
  1366. 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list',
  1367. 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis'
  1368. ]
  1369. operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)'
  1370. word_operators = [
  1371. 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor',
  1372. 'div', 'not', 'or', 'orelse', 'rem', 'xor'
  1373. ]
  1374. atom_re = r"(?:[a-z][a-zA-Z0-9_]*|'[^\n']*[^\\]')"
  1375. variable_re = r'(?:[A-Z_][a-zA-Z0-9_]*)'
  1376. escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))'
  1377. macro_re = r'(?:'+variable_re+r'|'+atom_re+r')'
  1378. base_re = r'(?:[2-9]|[12][0-9]|3[0-6])'
  1379. tokens = {
  1380. 'root': [
  1381. (r'\s+', Text),
  1382. (r'%.*\n', Comment),
  1383. ('(' + '|'.join(keywords) + r')\b', Keyword),
  1384. ('(' + '|'.join(builtins) + r')\b', Name.Builtin),
  1385. ('(' + '|'.join(word_operators) + r')\b', Operator.Word),
  1386. (r'^-', Punctuation, 'directive'),
  1387. (operators, Operator),
  1388. (r'"', String, 'string'),
  1389. (r'<<', Name.Label),
  1390. (r'>>', Name.Label),
  1391. ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)),
  1392. ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()',
  1393. bygroups(Name.Function, Text, Punctuation)),
  1394. (r'[+-]?'+base_re+r'#[0-9a-zA-Z]+', Number.Integer),
  1395. (r'[+-]?\d+', Number.Integer),
  1396. (r'[+-]?\d+.\d+', Number.Float),
  1397. (r'[]\[:_@\".{}()|;,]', Punctuation),
  1398. (variable_re, Name.Variable),
  1399. (atom_re, Name),
  1400. (r'\?'+macro_re, Name.Constant),
  1401. (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char),
  1402. (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label),
  1403. ],
  1404. 'string': [
  1405. (escape_re, String.Escape),
  1406. (r'"', String, '#pop'),
  1407. (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol),
  1408. (r'[^"\\~]+', String),
  1409. (r'~', String),
  1410. ],
  1411. 'directive': [
  1412. (r'(define)(\s*)(\()('+macro_re+r')',
  1413. bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'),
  1414. (r'(record)(\s*)(\()('+macro_re+r')',
  1415. bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'),
  1416. (atom_re, Name.Entity, '#pop'),
  1417. ],
  1418. }
  1419. class ErlangShellLexer(Lexer):
  1420. """
  1421. Shell sessions in erl (for Erlang code).
  1422. *New in Pygments 1.1.*
  1423. """
  1424. name = 'Erlang erl session'
  1425. aliases = ['erl']
  1426. filenames = ['*.erl-sh']
  1427. mimetypes = ['text/x-erl-shellsession']
  1428. _prompt_re = re.compile(r'\d+>(?=\s|\Z)')
  1429. def get_tokens_unprocessed(self, text):
  1430. erlexer = ErlangLexer(**self.options)
  1431. curcode = ''
  1432. insertions = []
  1433. for match in line_re.finditer(text):
  1434. line = match.group()
  1435. m = self._prompt_re.match(line)
  1436. if m is not None:
  1437. end = m.end()
  1438. insertions.append((len(curcode),
  1439. [(0, Generic.Prompt, line[:end])]))
  1440. curcode += line[end:]
  1441. else:
  1442. if curcode:
  1443. for item in do_insertions(insertions,
  1444. erlexer.get_tokens_unprocessed(curcode)):
  1445. yield item
  1446. curcode = ''
  1447. insertions = []
  1448. if line.startswith('*'):
  1449. yield match.start(), Generic.Traceback, line
  1450. else:
  1451. yield match.start(), Generic.Output, line
  1452. if curcode:
  1453. for item in do_insertions(insertions,
  1454. erlexer.get_tokens_unprocessed(curcode)):
  1455. yield item
  1456. class OpaLexer(RegexLexer):
  1457. """
  1458. Lexer for the Opa language (http://opalang.org).
  1459. *New in Pygments 1.5.*
  1460. """
  1461. name = 'Opa'
  1462. aliases = ['opa']
  1463. filenames = ['*.opa']
  1464. mimetypes = ['text/x-opa']
  1465. # most of these aren't strictly keywords
  1466. # but if you color only real keywords, you might just
  1467. # as well not color anything
  1468. keywords = [
  1469. 'and', 'as', 'begin', 'css', 'database', 'db', 'do', 'else', 'end',
  1470. 'external', 'forall', 'if', 'import', 'match', 'package', 'parser',
  1471. 'rec', 'server', 'then', 'type', 'val', 'with', 'xml_parser'
  1472. ]
  1473. # matches both stuff and `stuff`
  1474. ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
  1475. op_re = r'[.=\-<>,@~%/+?*&^!]'
  1476. punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere
  1477. # because they are also used for inserts
  1478. tokens = {
  1479. # copied from the caml lexer, should be adapted
  1480. 'escape-sequence': [
  1481. (r'\\[\\\"\'ntr}]', String.Escape),
  1482. (r'\\[0-9]{3}', String.Escape),
  1483. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  1484. ],
  1485. # factorizing these rules, because they are inserted many times
  1486. 'comments': [
  1487. (r'/\*', Comment, 'nested-comment'),
  1488. (r'//.*?$', Comment),
  1489. ],
  1490. 'comments-and-spaces': [
  1491. include('comments'),
  1492. (r'\s+', Text),
  1493. ],
  1494. 'root': [
  1495. include('comments-and-spaces'),
  1496. # keywords
  1497. (r'\b(%s)\b' % '|'.join(keywords), Keyword),
  1498. # directives
  1499. # we could parse the actual set of directives instead of anything
  1500. # starting with @, but this is troublesome
  1501. # because it needs to be adjusted all the time
  1502. # and assuming we parse only sources that compile, it is useless
  1503. (r'@'+ident_re+r'\b', Name.Builtin.Pseudo),
  1504. # number literals
  1505. (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),
  1506. (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),
  1507. (r'-?\d+[eE][+\-]?\d+', Number.Float),
  1508. (r'0[xX][\da-fA-F]+', Number.Hex),
  1509. (r'0[oO][0-7]+', Number.Oct),
  1510. (r'0[bB][01]+', Number.Binary),
  1511. (r'\d+', Number.Integer),
  1512. # color literals
  1513. (r'#[\da-fA-F]{3,6}', Number.Integer),
  1514. # string literals
  1515. (r'"', String.Double, 'string'),
  1516. # char literal, should be checked because this is the regexp from
  1517. # the caml lexer
  1518. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
  1519. String.Char),
  1520. # this is meant to deal with embedded exprs in strings
  1521. # every time we find a '}' we pop a state so that if we were
  1522. # inside a string, we are back in the string state
  1523. # as a consequence, we must also push a state every time we find a
  1524. # '{' or else we will have errors when parsing {} for instance
  1525. (r'{', Operator, '#push'),
  1526. (r'}', Operator, '#pop'),
  1527. # html literals
  1528. # this is a much more strict that the actual parser,
  1529. # since a<b would not be parsed as html
  1530. # but then again, the parser is way too lax, and we can't hope
  1531. # to have something as tolerant
  1532. (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
  1533. # db path
  1534. # matching the '[_]' in '/a[_]' because it is a part
  1535. # of the syntax of the db path definition
  1536. # unfortunately, i don't know how to match the ']' in
  1537. # /a[1], so this is somewhat inconsistent
  1538. (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),
  1539. # putting the same color on <- as on db path, since
  1540. # it can be used only to mean Db.write
  1541. (r'<-(?!'+op_re+r')', Name.Variable),
  1542. # 'modules'
  1543. # although modules are not distinguished by their names as in caml
  1544. # the standard library seems to follow the convention that modules
  1545. # only area capitalized
  1546. (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
  1547. # operators
  1548. # = has a special role because this is the only
  1549. # way to syntactic distinguish binding constructions
  1550. # unfortunately, this colors the equal in {x=2} too
  1551. (r'=(?!'+op_re+r')', Keyword),
  1552. (r'(%s)+' % op_re, Operator),
  1553. (r'(%s)+' % punc_re, Operator),
  1554. # coercions
  1555. (r':', Operator, 'type'),
  1556. # type variables
  1557. # we need this rule because we don't parse specially type
  1558. # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
  1559. ("'"+ident_re, Keyword.Type),
  1560. # id literal, #something, or #{expr}
  1561. (r'#'+ident_re, String.Single),
  1562. (r'#(?={)', String.Single),
  1563. # identifiers
  1564. # this avoids to color '2' in 'a2' as an integer
  1565. (ident_re, Text),
  1566. # default, not sure if that is needed or not
  1567. # (r'.', Text),
  1568. ],
  1569. # it is quite painful to have to parse types to know where they end
  1570. # this is the general rule for a type
  1571. # a type is either:
  1572. # * -> ty
  1573. # * type-with-slash
  1574. # * type-with-slash -> ty
  1575. # * type-with-slash (, type-with-slash)+ -> ty
  1576. #
  1577. # the code is pretty funky in here, but this code would roughly
  1578. # translate in caml to:
  1579. # let rec type stream =
  1580. # match stream with
  1581. # | [< "->"; stream >] -> type stream
  1582. # | [< ""; stream >] ->
  1583. # type_with_slash stream
  1584. # type_lhs_1 stream;
  1585. # and type_1 stream = ...
  1586. 'type': [
  1587. include('comments-and-spaces'),
  1588. (r'->', Keyword.Type),
  1589. (r'', Keyword.Type, ('#pop', 'type-lhs-1', 'type-with-slash')),
  1590. ],
  1591. # parses all the atomic or closed constructions in the syntax of type
  1592. # expressions: record types, tuple types, type constructors, basic type
  1593. # and type variables
  1594. 'type-1': [
  1595. include('comments-and-spaces'),
  1596. (r'\(', Keyword.Type, ('#pop', 'type-tuple')),
  1597. (r'~?{', Keyword.Type, ('#pop', 'type-record')),
  1598. (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
  1599. (ident_re, Keyword.Type, '#pop'),
  1600. ("'"+ident_re, Keyword.Type),
  1601. # this case is not in the syntax but sometimes
  1602. # we think we are parsing types when in fact we are parsing
  1603. # some css, so we just pop the states until we get back into
  1604. # the root state
  1605. (r'', Keyword.Type, '#pop'),
  1606. ],
  1607. # type-with-slash is either:
  1608. # * type-1
  1609. # * type-1 (/ type-1)+
  1610. 'type-with-slash': [
  1611. include('comments-and-spaces'),
  1612. (r'', Keyword.Type, ('#pop', 'slash-type-1', 'type-1')),
  1613. ],
  1614. 'slash-type-1': [
  1615. include('comments-and-spaces'),
  1616. ('/', Keyword.Type, ('#pop', 'type-1')),
  1617. # same remark as above
  1618. (r'', Keyword.Type, '#pop'),
  1619. ],
  1620. # we go in this state after having parsed a type-with-slash
  1621. # while trying to parse a type
  1622. # and at this point we must determine if we are parsing an arrow
  1623. # type (in which case we must continue parsing) or not (in which
  1624. # case we stop)
  1625. 'type-lhs-1': [
  1626. include('comments-and-spaces'),
  1627. (r'->', Keyword.Type, ('#pop', 'type')),
  1628. (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
  1629. (r'', Keyword.Type, '#pop'),
  1630. ],
  1631. 'type-arrow': [
  1632. include('comments-and-spaces'),
  1633. # the look ahead here allows to parse f(x : int, y : float -> truc)
  1634. # correctly
  1635. (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
  1636. (r'->', Keyword.Type, ('#pop', 'type')),
  1637. # same remark as above
  1638. (r'', Keyword.Type, '#pop'),
  1639. ],
  1640. # no need to do precise parsing for tuples and records
  1641. # because they are closed constructions, so we can simply
  1642. # find the closing delimiter
  1643. # note that this function would be not work if the source
  1644. # contained identifiers like `{)` (although it could be patched
  1645. # to support it)
  1646. 'type-tuple': [
  1647. include('comments-and-spaces'),
  1648. (r'[^\(\)/*]+', Keyword.Type),
  1649. (r'[/*]', Keyword.Type),
  1650. (r'\(', Keyword.Type, '#push'),
  1651. (r'\)', Keyword.Type, '#pop'),
  1652. ],
  1653. 'type-record': [
  1654. include('comments-and-spaces'),
  1655. (r'[^{}/*]+', Keyword.Type),
  1656. (r'[/*]', Keyword.Type),
  1657. (r'{', Keyword.Type, '#push'),
  1658. (r'}', Keyword.Type, '#pop'),
  1659. ],
  1660. # 'type-tuple': [
  1661. # include('comments-and-spaces'),
  1662. # (r'\)', Keyword.Type, '#pop'),
  1663. # (r'', Keyword.Type, ('#pop', 'type-tuple-1', 'type-1')),
  1664. # ],
  1665. # 'type-tuple-1': [
  1666. # include('comments-and-spaces'),
  1667. # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)
  1668. # (r',', Keyword.Type, 'type-1'),
  1669. # ],
  1670. # 'type-record':[
  1671. # include('comments-and-spaces'),
  1672. # (r'}', Keyword.Type, '#pop'),
  1673. # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),
  1674. # ],
  1675. # 'type-record-field-expr': [
  1676. #
  1677. # ],
  1678. 'nested-comment': [
  1679. (r'[^/*]+', Comment),
  1680. (r'/\*', Comment, '#push'),
  1681. (r'\*/', Comment, '#pop'),
  1682. (r'[/*]', Comment),
  1683. ],
  1684. # the coy pasting between string and single-string
  1685. # is kinda sad. Is there a way to avoid that??
  1686. 'string': [
  1687. (r'[^\\"{]+', String.Double),
  1688. (r'"', String.Double, '#pop'),
  1689. (r'{', Operator, 'root'),
  1690. include('escape-sequence'),
  1691. ],
  1692. 'single-string': [
  1693. (r'[^\\\'{]+', String.Double),
  1694. (r'\'', String.Double, '#pop'),
  1695. (r'{', Operator, 'root'),
  1696. include('escape-sequence'),
  1697. ],
  1698. # all the html stuff
  1699. # can't really reuse some existing html parser
  1700. # because we must be able to parse embedded expressions
  1701. # we are in this state after someone parsed the '<' that
  1702. # started the html literal
  1703. 'html-open-tag': [
  1704. (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
  1705. (r'>', String.Single, ('#pop', 'html-content')),
  1706. ],
  1707. # we are in this state after someone parsed the '</' that
  1708. # started the end of the closing tag
  1709. 'html-end-tag': [
  1710. # this is a star, because </> is allowed
  1711. (r'[\w\-:]*>', String.Single, '#pop'),
  1712. ],
  1713. # we are in this state after having parsed '<ident(:ident)?'
  1714. # we thus parse a possibly empty list of attributes
  1715. 'html-attr': [
  1716. (r'\s+', Text),
  1717. (r'[\w\-:]+=', String.Single, 'html-attr-value'),
  1718. (r'/>', String.Single, '#pop'),
  1719. (r'>', String.Single, ('#pop', 'html-content')),
  1720. ],
  1721. 'html-attr-value': [
  1722. (r"'", String.Single, ('#pop', 'single-string')),
  1723. (r'"', String.Single, ('#pop', 'string')),
  1724. (r'#'+ident_re, String.Single, '#pop'),
  1725. (r'#(?={)', String.Single, ('#pop', 'root')),
  1726. (r'{', Operator, ('#pop', 'root')), # this is a tail call!
  1727. ],
  1728. # we should probably deal with '\' escapes here
  1729. 'html-content': [
  1730. (r'<!--', Comment, 'html-comment'),
  1731. (r'</', String.Single, ('#pop', 'html-end-tag')),
  1732. (r'<', String.Single, 'html-open-tag'),
  1733. (r'{', Operator, 'root'),
  1734. (r'.|\s+', String.Single),
  1735. ],
  1736. 'html-comment': [
  1737. (r'-->', Comment, '#pop'),
  1738. (r'[^\-]+|-', Comment),
  1739. ],
  1740. }
  1741. class CoqLexer(RegexLexer):
  1742. """
  1743. For the `Coq <http://coq.inria.fr/>`_ theorem prover.
  1744. *New in Pygments 1.5.*
  1745. """
  1746. name = 'Coq'
  1747. aliases = ['coq']
  1748. filenames = ['*.v']
  1749. mimetypes = ['text/x-coq']
  1750. keywords1 = [
  1751. # Vernacular commands
  1752. 'Section', 'Module', 'End', 'Require', 'Import', 'Export', 'Variable',
  1753. 'Variables', 'Parameter', 'Parameters', 'Axiom', 'Hypothesis',
  1754. 'Hypotheses', 'Notation', 'Local', 'Tactic', 'Reserved', 'Scope',
  1755. 'Open', 'Close', 'Bind', 'Delimit', 'Definition', 'Let', 'Ltac',
  1756. 'Fixpoint', 'CoFixpoint', 'Morphism', 'Relation', 'Implicit',
  1757. 'Arguments', 'Set', 'Unset', 'Contextual', 'Strict', 'Prenex',
  1758. 'Implicits', 'Inductive', 'CoInductive', 'Record', 'Structure',
  1759. 'Canonical', 'Coercion', 'Theorem', 'Lemma', 'Corollary',
  1760. 'Proposition', 'Fact', 'Remark', 'Example', 'Proof', 'Goal', 'Save',
  1761. 'Qed', 'Defined', 'Hint', 'Resolve', 'Rewrite', 'View', 'Search',
  1762. 'Show', 'Print', 'Printing', 'All', 'Graph', 'Projections', 'inside',
  1763. 'outside',
  1764. ]
  1765. keywords2 = [
  1766. # Gallina
  1767. 'forall', 'exists', 'exists2', 'fun', 'fix', 'cofix', 'struct',
  1768. 'match', 'end', 'in', 'return', 'let', 'if', 'is', 'then', 'else',
  1769. 'for', 'of', 'nosimpl', 'with', 'as',
  1770. ]
  1771. keywords3 = [
  1772. # Sorts
  1773. 'Type', 'Prop',
  1774. ]
  1775. keywords4 = [
  1776. # Tactics
  1777. 'pose', 'set', 'move', 'case', 'elim', 'apply', 'clear', 'hnf', 'intro',
  1778. 'intros', 'generalize', 'rename', 'pattern', 'after', 'destruct',
  1779. 'induction', 'using', 'refine', 'inversion', 'injection', 'rewrite',
  1780. 'congr', 'unlock', 'compute', 'ring', 'field', 'replace', 'fold',
  1781. 'unfold', 'change', 'cutrewrite', 'simpl', 'have', 'suff', 'wlog',
  1782. 'suffices', 'without', 'loss', 'nat_norm', 'assert', 'cut', 'trivial',
  1783. 'revert', 'bool_congr', 'nat_congr', 'symmetry', 'transitivity', 'auto',
  1784. 'split', 'left', 'right', 'autorewrite',
  1785. ]
  1786. keywords5 = [
  1787. # Terminators
  1788. 'by', 'done', 'exact', 'reflexivity', 'tauto', 'romega', 'omega',
  1789. 'assumption', 'solve', 'contradiction', 'discriminate',
  1790. ]
  1791. keywords6 = [
  1792. # Control
  1793. 'do', 'last', 'first', 'try', 'idtac', 'repeat',
  1794. ]
  1795. # 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
  1796. # 'downto', 'else', 'end', 'exception', 'external', 'false',
  1797. # 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
  1798. # 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
  1799. # 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
  1800. # 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
  1801. # 'type', 'val', 'virtual', 'when', 'while', 'with'
  1802. keyopts = [
  1803. '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
  1804. r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
  1805. '<-', '=', '>', '>]', '>}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
  1806. r'\[\|', ']', '_', '`', '{', '{<', r'\|', r'\|]', '}', '~', '=>',
  1807. r'/\\', r'\\/',
  1808. u'Π', u'λ',
  1809. ]
  1810. operators = r'[!$%&*+\./:<=>?@^|~-]'
  1811. word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or']
  1812. prefix_syms = r'[!?~]'
  1813. infix_syms = r'[=<>@^|&+\*/$%-]'
  1814. primitives = ['unit', 'int', 'float', 'bool', 'string', 'char', 'list',
  1815. 'array']
  1816. tokens = {
  1817. 'root': [
  1818. (r'\s+', Text),
  1819. (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
  1820. (r'\(\*', Comment, 'comment'),
  1821. (r'\b(%s)\b' % '|'.join(keywords1), Keyword.Namespace),
  1822. (r'\b(%s)\b' % '|'.join(keywords2), Keyword),
  1823. (r'\b(%s)\b' % '|'.join(keywords3), Keyword.Type),
  1824. (r'\b(%s)\b' % '|'.join(keywords4), Keyword),
  1825. (r'\b(%s)\b' % '|'.join(keywords5), Keyword.Pseudo),
  1826. (r'\b(%s)\b' % '|'.join(keywords6), Keyword.Reserved),
  1827. (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)',
  1828. Name.Namespace, 'dotted'),
  1829. (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class),
  1830. (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
  1831. (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
  1832. (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
  1833. (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
  1834. (r"[^\W\d][\w']*", Name),
  1835. (r'\d[\d_]*', Number.Integer),
  1836. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  1837. (r'0[oO][0-7][0-7_]*', Number.Oct),
  1838. (r'0[bB][01][01_]*', Number.Binary),
  1839. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  1840. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  1841. String.Char),
  1842. (r"'.'", String.Char),
  1843. (r"'", Keyword), # a stray quote is another syntax element
  1844. (r'"', String.Double, 'string'),
  1845. (r'[~?][a-z][\w\']*:', Name.Variable),
  1846. ],
  1847. 'comment': [
  1848. (r'[^(*)]+', Comment),
  1849. (r'\(\*', Comment, '#push'),
  1850. (r'\*\)', Comment, '#pop'),
  1851. (r'[(*)]', Comment),
  1852. ],
  1853. 'string': [
  1854. (r'[^"]+', String.Double),
  1855. (r'""', String.Double),
  1856. (r'"', String.Double, '#pop'),
  1857. ],
  1858. 'dotted': [
  1859. (r'\s+', Text),
  1860. (r'\.', Punctuation),
  1861. (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace),
  1862. (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'),
  1863. (r'[a-z][a-z0-9_\']*', Name, '#pop'),
  1864. (r'', Text, '#pop')
  1865. ],
  1866. }
  1867. def analyse_text(text):
  1868. if text.startswith('(*'):
  1869. return True
  1870. class NewLispLexer(RegexLexer):
  1871. """
  1872. For `newLISP. <www.newlisp.org>`_ source code (version 10.3.0).
  1873. *New in Pygments 1.5.*
  1874. """
  1875. name = 'NewLisp'
  1876. aliases = ['newlisp']
  1877. filenames = ['*.lsp', '*.nl']
  1878. mimetypes = ['text/x-newlisp', 'application/x-newlisp']
  1879. flags = re.IGNORECASE | re.MULTILINE | re.UNICODE
  1880. # list of built-in functions for newLISP version 10.3
  1881. builtins = [
  1882. '^', '--', '-', ':', '!', '!=', '?', '@', '*', '/', '&', '%', '+', '++',
  1883. '<', '<<', '<=', '=', '>', '>=', '>>', '|', '~', '$', '$0', '$1', '$10',
  1884. '$11', '$12', '$13', '$14', '$15', '$2', '$3', '$4', '$5', '$6', '$7',
  1885. '$8', '$9', '$args', '$idx', '$it', '$main-args', 'abort', 'abs',
  1886. 'acos', 'acosh', 'add', 'address', 'amb', 'and', 'and', 'append-file',
  1887. 'append', 'apply', 'args', 'array-list', 'array?', 'array', 'asin',
  1888. 'asinh', 'assoc', 'atan', 'atan2', 'atanh', 'atom?', 'base64-dec',
  1889. 'base64-enc', 'bayes-query', 'bayes-train', 'begin', 'begin', 'begin',
  1890. 'beta', 'betai', 'bind', 'binomial', 'bits', 'callback', 'case', 'case',
  1891. 'case', 'catch', 'ceil', 'change-dir', 'char', 'chop', 'Class', 'clean',
  1892. 'close', 'command-event', 'cond', 'cond', 'cond', 'cons', 'constant',
  1893. 'context?', 'context', 'copy-file', 'copy', 'cos', 'cosh', 'count',
  1894. 'cpymem', 'crc32', 'crit-chi2', 'crit-z', 'current-line', 'curry',
  1895. 'date-list', 'date-parse', 'date-value', 'date', 'debug', 'dec',
  1896. 'def-new', 'default', 'define-macro', 'define-macro', 'define',
  1897. 'delete-file', 'delete-url', 'delete', 'destroy', 'det', 'device',
  1898. 'difference', 'directory?', 'directory', 'div', 'do-until', 'do-while',
  1899. 'doargs', 'dolist', 'dostring', 'dotimes', 'dotree', 'dump', 'dup',
  1900. 'empty?', 'encrypt', 'ends-with', 'env', 'erf', 'error-event',
  1901. 'eval-string', 'eval', 'exec', 'exists', 'exit', 'exp', 'expand',
  1902. 'explode', 'extend', 'factor', 'fft', 'file-info', 'file?', 'filter',
  1903. 'find-all', 'find', 'first', 'flat', 'float?', 'float', 'floor', 'flt',
  1904. 'fn', 'for-all', 'for', 'fork', 'format', 'fv', 'gammai', 'gammaln',
  1905. 'gcd', 'get-char', 'get-float', 'get-int', 'get-long', 'get-string',
  1906. 'get-url', 'global?', 'global', 'if-not', 'if', 'ifft', 'import', 'inc',
  1907. 'index', 'inf?', 'int', 'integer?', 'integer', 'intersect', 'invert',
  1908. 'irr', 'join', 'lambda-macro', 'lambda?', 'lambda', 'last-error',
  1909. 'last', 'legal?', 'length', 'let', 'let', 'let', 'letex', 'letn',
  1910. 'letn', 'letn', 'list?', 'list', 'load', 'local', 'log', 'lookup',
  1911. 'lower-case', 'macro?', 'main-args', 'MAIN', 'make-dir', 'map', 'mat',
  1912. 'match', 'max', 'member', 'min', 'mod', 'module', 'mul', 'multiply',
  1913. 'NaN?', 'net-accept', 'net-close', 'net-connect', 'net-error',
  1914. 'net-eval', 'net-interface', 'net-ipv', 'net-listen', 'net-local',
  1915. 'net-lookup', 'net-packet', 'net-peek', 'net-peer', 'net-ping',
  1916. 'net-receive-from', 'net-receive-udp', 'net-receive', 'net-select',
  1917. 'net-send-to', 'net-send-udp', 'net-send', 'net-service',
  1918. 'net-sessions', 'new', 'nil?', 'nil', 'normal', 'not', 'now', 'nper',
  1919. 'npv', 'nth', 'null?', 'number?', 'open', 'or', 'ostype', 'pack',
  1920. 'parse-date', 'parse', 'peek', 'pipe', 'pmt', 'pop-assoc', 'pop',
  1921. 'post-url', 'pow', 'prefix', 'pretty-print', 'primitive?', 'print',
  1922. 'println', 'prob-chi2', 'prob-z', 'process', 'prompt-event',
  1923. 'protected?', 'push', 'put-url', 'pv', 'quote?', 'quote', 'rand',
  1924. 'random', 'randomize', 'read', 'read-char', 'read-expr', 'read-file',
  1925. 'read-key', 'read-line', 'read-utf8', 'read', 'reader-event',
  1926. 'real-path', 'receive', 'ref-all', 'ref', 'regex-comp', 'regex',
  1927. 'remove-dir', 'rename-file', 'replace', 'reset', 'rest', 'reverse',
  1928. 'rotate', 'round', 'save', 'search', 'seed', 'seek', 'select', 'self',
  1929. 'semaphore', 'send', 'sequence', 'series', 'set-locale', 'set-ref-all',
  1930. 'set-ref', 'set', 'setf', 'setq', 'sgn', 'share', 'signal', 'silent',
  1931. 'sin', 'sinh', 'sleep', 'slice', 'sort', 'source', 'spawn', 'sqrt',
  1932. 'starts-with', 'string?', 'string', 'sub', 'swap', 'sym', 'symbol?',
  1933. 'symbols', 'sync', 'sys-error', 'sys-info', 'tan', 'tanh', 'term',
  1934. 'throw-error', 'throw', 'time-of-day', 'time', 'timer', 'title-case',
  1935. 'trace-highlight', 'trace', 'transpose', 'Tree', 'trim', 'true?',
  1936. 'true', 'unicode', 'unify', 'unique', 'unless', 'unpack', 'until',
  1937. 'upper-case', 'utf8', 'utf8len', 'uuid', 'wait-pid', 'when', 'while',
  1938. 'write', 'write-char', 'write-file', 'write-line', 'write',
  1939. 'xfer-event', 'xml-error', 'xml-parse', 'xml-type-tags', 'zero?',
  1940. ]
  1941. # valid names
  1942. valid_name = r'([a-zA-Z0-9!$%&*+.,/<=>?@^_~|-])+|(\[.*?\])+'
  1943. tokens = {
  1944. 'root': [
  1945. # shebang
  1946. (r'#!(.*?)$', Comment.Preproc),
  1947. # comments starting with semicolon
  1948. (r';.*$', Comment.Single),
  1949. # comments starting with #
  1950. (r'#.*$', Comment.Single),
  1951. # whitespace
  1952. (r'\s+', Text),
  1953. # strings, symbols and characters
  1954. (r'"(\\\\|\\"|[^"])*"', String),
  1955. # braces
  1956. (r"{", String, "bracestring"),
  1957. # [text] ... [/text] delimited strings
  1958. (r'\[text\]*', String, "tagstring"),
  1959. # 'special' operators...
  1960. (r"('|:)", Operator),
  1961. # highlight the builtins
  1962. ('(%s)' % '|'.join(re.escape(entry) + '\\b' for entry in builtins),
  1963. Keyword),
  1964. # the remaining functions
  1965. (r'(?<=\()' + valid_name, Name.Variable),
  1966. # the remaining variables
  1967. (valid_name, String.Symbol),
  1968. # parentheses
  1969. (r'(\(|\))', Punctuation),
  1970. ],
  1971. # braced strings...
  1972. 'bracestring': [
  1973. ("{", String, "#push"),
  1974. ("}", String, "#pop"),
  1975. ("[^{}]+", String),
  1976. ],
  1977. # tagged [text]...[/text] delimited strings...
  1978. 'tagstring': [
  1979. (r'(?s)(.*?)(\[/text\])', String, '#pop'),
  1980. ],
  1981. }
  1982. class ElixirLexer(RegexLexer):
  1983. """
  1984. For the `Elixir language <http://elixir-lang.org>`_.
  1985. *New in Pygments 1.5.*
  1986. """
  1987. name = 'Elixir'
  1988. aliases = ['elixir', 'ex', 'exs']
  1989. filenames = ['*.ex', '*.exs']
  1990. mimetypes = ['text/x-elixir']
  1991. def gen_elixir_sigil_rules():
  1992. states = {}
  1993. states['strings'] = [
  1994. (r'(%[A-Ba-z])?"""(?:.|\n)*?"""', String.Doc),
  1995. (r"'''(?:.|\n)*?'''", String.Doc),
  1996. (r'"', String.Double, 'dqs'),
  1997. (r"'.*'", String.Single),
  1998. (r'(?<!\w)\?(\\(x\d{1,2}|\h{1,2}(?!\h)\b|0[0-7]{0,2}(?![0-7])\b|'
  1999. r'[^x0MC])|(\\[MC]-)+\w|[^\s\\])', String.Other)
  2000. ]
  2001. for lbrace, rbrace, name, in ('\\{', '\\}', 'cb'), \
  2002. ('\\[', '\\]', 'sb'), \
  2003. ('\\(', '\\)', 'pa'), \
  2004. ('\\<', '\\>', 'lt'):
  2005. states['strings'] += [
  2006. (r'%[a-z]' + lbrace, String.Double, name + 'intp'),
  2007. (r'%[A-Z]' + lbrace, String.Double, name + 'no-intp')
  2008. ]
  2009. states[name +'intp'] = [
  2010. (r'' + rbrace + '[a-z]*', String.Double, "#pop"),
  2011. include('enddoublestr')
  2012. ]
  2013. states[name +'no-intp'] = [
  2014. (r'.*' + rbrace + '[a-z]*', String.Double , "#pop")
  2015. ]
  2016. return states
  2017. tokens = {
  2018. 'root': [
  2019. (r'\s+', Text),
  2020. (r'#.*$', Comment.Single),
  2021. (r'\b(case|cond|end|bc|lc|if|unless|try|loop|receive|fn|defmodule|'
  2022. r'defp?|defprotocol|defimpl|defrecord|defmacrop?|defdelegate|'
  2023. r'defexception|exit|raise|throw|unless|after|rescue|catch|else)\b(?![?!])|'
  2024. r'(?<!\.)\b(do|\-\>)\b\s*', Keyword),
  2025. (r'\b(import|require|use|recur|quote|unquote|super|refer)\b(?![?!])',
  2026. Keyword.Namespace),
  2027. (r'(?<!\.)\b(and|not|or|when|xor|in)\b', Operator.Word),
  2028. (r'%=|\*=|\*\*=|\+=|\-=|\^=|\|\|=|'
  2029. r'<=>|<(?!<|=)|>(?!<|=|>)|<=|>=|===|==|=~|!=|!~|(?=[ \t])\?|'
  2030. r'(?<=[ \t])!+|&&|\|\||\^|\*|\+|\-|/|'
  2031. r'\||\+\+|\-\-|\*\*|\/\/|\<\-|\<\>|<<|>>|=|\.', Operator),
  2032. (r'(?<!:)(:)([a-zA-Z_]\w*([?!]|=(?![>=]))?|\<\>|===?|>=?|<=?|'
  2033. r'<=>|&&?|%\(\)|%\[\]|%\{\}|\+\+?|\-\-?|\|\|?|\!|//|[%&`/\|]|'
  2034. r'\*\*?|=?~|<\-)|([a-zA-Z_]\w*([?!])?)(:)(?!:)', String.Symbol),
  2035. (r':"', String.Symbol, 'interpoling_symbol'),
  2036. (r'\b(nil|true|false)\b(?![?!])|\b[A-Z]\w*\b', Name.Constant),
  2037. (r'\b(__(FILE|LINE|MODULE|MAIN|FUNCTION)__)\b(?![?!])', Name.Builtin.Pseudo),
  2038. (r'[a-zA-Z_!][\w_]*[!\?]?', Name),
  2039. (r'[(){};,/\|:\\\[\]]', Punctuation),
  2040. (r'@[a-zA-Z_]\w*|&\d', Name.Variable),
  2041. (r'\b(0[xX][0-9A-Fa-f]+|\d(_?\d)*(\.(?![^\d\s])'
  2042. r'(_?\d)*)?([eE][-+]?\d(_?\d)*)?|0[bB][01]+)\b', Number),
  2043. (r'%r\/.*\/', String.Regex),
  2044. include('strings'),
  2045. ],
  2046. 'dqs': [
  2047. (r'"', String.Double, "#pop"),
  2048. include('enddoublestr')
  2049. ],
  2050. 'interpoling': [
  2051. (r'#{', String.Interpol, 'interpoling_string'),
  2052. ],
  2053. 'interpoling_string' : [
  2054. (r'}', String.Interpol, "#pop"),
  2055. include('root')
  2056. ],
  2057. 'interpoling_symbol': [
  2058. (r'"', String.Symbol, "#pop"),
  2059. include('interpoling'),
  2060. (r'[^#"]+', String.Symbol),
  2061. ],
  2062. 'enddoublestr' : [
  2063. include('interpoling'),
  2064. (r'[^#"]+', String.Double),
  2065. ]
  2066. }
  2067. tokens.update(gen_elixir_sigil_rules())
  2068. class ElixirConsoleLexer(Lexer):
  2069. """
  2070. For Elixir interactive console (iex) output like:
  2071. .. sourcecode:: iex
  2072. iex> [head | tail] = [1,2,3]
  2073. [1,2,3]
  2074. iex> head
  2075. 1
  2076. iex> tail
  2077. [2,3]
  2078. iex> [head | tail]
  2079. [1,2,3]
  2080. iex> length [head | tail]
  2081. 3
  2082. *New in Pygments 1.5.*
  2083. """
  2084. name = 'Elixir iex session'
  2085. aliases = ['iex']
  2086. mimetypes = ['text/x-elixir-shellsession']
  2087. _prompt_re = re.compile('(iex|\.{3})> ')
  2088. def get_tokens_unprocessed(self, text):
  2089. exlexer = ElixirLexer(**self.options)
  2090. curcode = ''
  2091. insertions = []
  2092. for match in line_re.finditer(text):
  2093. line = match.group()
  2094. if line.startswith(u'** '):
  2095. insertions.append((len(curcode),
  2096. [(0, Generic.Error, line[:-1])]))
  2097. curcode += line[-1:]
  2098. else:
  2099. m = self._prompt_re.match(line)
  2100. if m is not None:
  2101. end = m.end()
  2102. insertions.append((len(curcode),
  2103. [(0, Generic.Prompt, line[:end])]))
  2104. curcode += line[end:]
  2105. else:
  2106. if curcode:
  2107. for item in do_insertions(insertions,
  2108. exlexer.get_tokens_unprocessed(curcode)):
  2109. yield item
  2110. curcode = ''
  2111. insertions = []
  2112. yield match.start(), Generic.Output, line
  2113. if curcode:
  2114. for item in do_insertions(insertions,
  2115. exlexer.get_tokens_unprocessed(curcode)):
  2116. yield item
  2117. class KokaLexer(RegexLexer):
  2118. """
  2119. Lexer for the `Koka <http://research.microsoft.com/en-us/projects/koka/>`_
  2120. language.
  2121. *New in Pygments 1.6.*
  2122. """
  2123. name = 'Koka'
  2124. aliases = ['koka']
  2125. filenames = ['*.kk', '*.kki']
  2126. mimetypes = ['text/x-koka']
  2127. keywords = [
  2128. 'infix', 'infixr', 'infixl', 'prefix', 'postfix',
  2129. 'type', 'cotype', 'rectype', 'alias',
  2130. 'struct', 'con',
  2131. 'fun', 'function', 'val', 'var',
  2132. 'external',
  2133. 'if', 'then', 'else', 'elif', 'return', 'match',
  2134. 'private', 'public', 'private',
  2135. 'module', 'import', 'as',
  2136. 'include', 'inline',
  2137. 'rec',
  2138. 'try', 'yield', 'enum',
  2139. 'interface', 'instance',
  2140. ]
  2141. # keywords that are followed by a type
  2142. typeStartKeywords = [
  2143. 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
  2144. ]
  2145. # keywords valid in a type
  2146. typekeywords = [
  2147. 'forall', 'exists', 'some', 'with',
  2148. ]
  2149. # builtin names and special names
  2150. builtin = [
  2151. 'for', 'while', 'repeat',
  2152. 'foreach', 'foreach-indexed',
  2153. 'error', 'catch', 'finally',
  2154. 'cs', 'js', 'file', 'ref', 'assigned',
  2155. ]
  2156. # symbols that can be in an operator
  2157. symbols = '[\$%&\*\+@!/\\\^~=\.:\-\?\|<>]+'
  2158. # symbol boundary: an operator keyword should not be followed by any of these
  2159. sboundary = '(?!'+symbols+')'
  2160. # name boundary: a keyword should not be followed by any of these
  2161. boundary = '(?![a-zA-Z0-9_\\-])'
  2162. # main lexer
  2163. tokens = {
  2164. 'root': [
  2165. include('whitespace'),
  2166. # go into type mode
  2167. (r'::?' + sboundary, Keyword.Type, 'type'),
  2168. (r'alias' + boundary, Keyword, 'alias-type'),
  2169. (r'struct' + boundary, Keyword, 'struct-type'),
  2170. (r'(%s)' % '|'.join(typeStartKeywords) + boundary, Keyword, 'type'),
  2171. # special sequences of tokens (we use ?: for non-capturing group as
  2172. # required by 'bygroups')
  2173. (r'(module)(\s*)((?:interface)?)(\s*)'
  2174. r'((?:[a-z](?:[a-zA-Z0-9_]|\-[a-zA-Z])*\.)*'
  2175. r'[a-z](?:[a-zA-Z0-9_]|\-[a-zA-Z])*)',
  2176. bygroups(Keyword, Text, Keyword, Text, Name.Namespace)),
  2177. (r'(import)(\s+)((?:[a-z](?:[a-zA-Z0-9_]|\-[a-zA-Z])*\.)*[a-z]'
  2178. r'(?:[a-zA-Z0-9_]|\-[a-zA-Z])*)(\s*)((?:as)?)'
  2179. r'((?:[A-Z](?:[a-zA-Z0-9_]|\-[a-zA-Z])*)?)',
  2180. bygroups(Keyword, Text, Name.Namespace, Text, Keyword,
  2181. Name.Namespace)),
  2182. # keywords
  2183. (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
  2184. (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
  2185. (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
  2186. (r'::|:=|\->|[=\.:]' + sboundary, Keyword),
  2187. (r'\-' + sboundary, Generic.Strong),
  2188. # names
  2189. (r'[A-Z]([a-zA-Z0-9_]|\-[a-zA-Z])*(?=\.)', Name.Namespace),
  2190. (r'[A-Z]([a-zA-Z0-9_]|\-[a-zA-Z])*(?!\.)', Name.Class),
  2191. (r'[a-z]([a-zA-Z0-9_]|\-[a-zA-Z])*', Name),
  2192. (r'_([a-zA-Z0-9_]|\-[a-zA-Z])*', Name.Variable),
  2193. # literal string
  2194. (r'@"', String.Double, 'litstring'),
  2195. # operators
  2196. (symbols, Operator),
  2197. (r'`', Operator),
  2198. (r'[\{\}\(\)\[\];,]', Punctuation),
  2199. # literals. No check for literal characters with len > 1
  2200. (r'[0-9]+\.[0-9]+([eE][\-\+]?[0-9]+)?', Number.Float),
  2201. (r'0[xX][0-9a-fA-F]+', Number.Hex),
  2202. (r'[0-9]+', Number.Integer),
  2203. (r"'", String.Char, 'char'),
  2204. (r'"', String.Double, 'string'),
  2205. ],
  2206. # type started by alias
  2207. 'alias-type': [
  2208. (r'=',Keyword),
  2209. include('type')
  2210. ],
  2211. # type started by struct
  2212. 'struct-type': [
  2213. (r'(?=\((?!,*\)))',Punctuation, '#pop'),
  2214. include('type')
  2215. ],
  2216. # type started by colon
  2217. 'type': [
  2218. (r'[\(\[<]', Keyword.Type, 'type-nested'),
  2219. include('type-content')
  2220. ],
  2221. # type nested in brackets: can contain parameters, comma etc.
  2222. 'type-nested': [
  2223. (r'[\)\]>]', Keyword.Type, '#pop'),
  2224. (r'[\(\[<]', Keyword.Type, 'type-nested'),
  2225. (r',', Keyword.Type),
  2226. (r'([a-z](?:[a-zA-Z0-9_]|\-[a-zA-Z])*)(\s*)(:)(?!:)',
  2227. bygroups(Name.Variable,Text,Keyword.Type)), # parameter name
  2228. include('type-content')
  2229. ],
  2230. # shared contents of a type
  2231. 'type-content': [
  2232. include('whitespace'),
  2233. # keywords
  2234. (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
  2235. (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
  2236. Keyword, '#pop'), # need to match because names overlap...
  2237. # kinds
  2238. (r'[EPH]' + boundary, Keyword.Type),
  2239. (r'[*!]', Keyword.Type),
  2240. # type names
  2241. (r'[A-Z]([a-zA-Z0-9_]|\-[a-zA-Z])*(?=\.)', Name.Namespace),
  2242. (r'[A-Z]([a-zA-Z0-9_]|\-[a-zA-Z])*(?!\.)', Name.Class),
  2243. (r'[a-z][0-9]*(?![a-zA-Z_\-])', Keyword.Type), # Generic.Emph
  2244. (r'_([a-zA-Z0-9_]|\-[a-zA-Z])*', Keyword.Type), # Generic.Emph
  2245. (r'[a-z]([a-zA-Z0-9_]|\-[a-zA-Z])*', Keyword.Type),
  2246. # type keyword operators
  2247. (r'::|\->|[\.:|]', Keyword.Type),
  2248. #catchall
  2249. (r'', Text, '#pop')
  2250. ],
  2251. # comments and literals
  2252. 'whitespace': [
  2253. (r'\s+', Text),
  2254. (r'/\*', Comment.Multiline, 'comment'),
  2255. (r'//.*$', Comment.Single)
  2256. ],
  2257. 'comment': [
  2258. (r'[^/\*]+', Comment.Multiline),
  2259. (r'/\*', Comment.Multiline, '#push'),
  2260. (r'\*/', Comment.Multiline, '#pop'),
  2261. (r'[\*/]', Comment.Multiline),
  2262. ],
  2263. 'litstring': [
  2264. (r'[^"]+', String.Double),
  2265. (r'""', String.Escape),
  2266. (r'"', String.Double, '#pop'),
  2267. ],
  2268. 'string': [
  2269. (r'[^\\"\n]+', String.Double),
  2270. include('escape-sequence'),
  2271. (r'["\n]', String.Double, '#pop'),
  2272. ],
  2273. 'char': [
  2274. (r'[^\\\'\n]+', String.Char),
  2275. include('escape-sequence'),
  2276. (r'[\'\n]', String.Char, '#pop'),
  2277. ],
  2278. 'escape-sequence': [
  2279. (r'\\[abfnrtv0\\\"\'\?]', String.Escape),
  2280. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  2281. (r'\\u[0-9a-fA-F]{4}', String.Escape),
  2282. # Yes, \U literals are 6 hex digits.
  2283. (r'\\U[0-9a-fA-F]{6}', String.Escape)
  2284. ]
  2285. }