PageRenderTime 34ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/src/lexer.coffee

http://github.com/jashkenas/coffee-script
CoffeeScript | 1470 lines | 1139 code | 125 blank | 206 comment | 197 complexity | 8d9b20ecaa480c33dd8e517941c3567f MD5 | raw file
  1. # The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
  2. # matches against the beginning of the source code. When a match is found,
  3. # a token is produced, we consume the match, and start again. Tokens are in the
  4. # form:
  5. #
  6. # [tag, value, locationData]
  7. #
  8. # where locationData is {first_line, first_column, last_line, last_column, last_line_exclusive, last_column_exclusive}, which is a
  9. # format that can be fed directly into [Jison](https://github.com/zaach/jison). These
  10. # are read by jison in the `parser.lexer` function defined in coffeescript.coffee.
  11. {Rewriter, INVERSES, UNFINISHED} = require './rewriter'
  12. # Import the helpers we need.
  13. {count, starts, compact, repeat, invertLiterate, merge,
  14. attachCommentsToNode, locationDataToString, throwSyntaxError
  15. replaceUnicodeCodePointEscapes, flatten, parseNumber} = require './helpers'
  16. # The Lexer Class
  17. # ---------------
  18. # The Lexer class reads a stream of CoffeeScript and divvies it up into tagged
  19. # tokens. Some potential ambiguity in the grammar has been avoided by
  20. # pushing some extra smarts into the Lexer.
  21. exports.Lexer = class Lexer
  22. # **tokenize** is the Lexer's main method. Scan by attempting to match tokens
  23. # one at a time, using a regular expression anchored at the start of the
  24. # remaining code, or a custom recursive token-matching method
  25. # (for interpolations). When the next token has been recorded, we move forward
  26. # within the code past the token, and begin again.
  27. #
  28. # Each tokenizing method is responsible for returning the number of characters
  29. # it has consumed.
  30. #
  31. # Before returning the token stream, run it through the [Rewriter](rewriter.html).
  32. tokenize: (code, opts = {}) ->
  33. @literate = opts.literate # Are we lexing literate CoffeeScript?
  34. @indent = 0 # The current indentation level.
  35. @baseIndent = 0 # The overall minimum indentation level.
  36. @continuationLineAdditionalIndent = 0 # The over-indentation at the current level.
  37. @outdebt = 0 # The under-outdentation at the current level.
  38. @indents = [] # The stack of all current indentation levels.
  39. @indentLiteral = '' # The indentation.
  40. @ends = [] # The stack for pairing up tokens.
  41. @tokens = [] # Stream of parsed tokens in the form `['TYPE', value, location data]`.
  42. @seenFor = no # Used to recognize `FORIN`, `FOROF` and `FORFROM` tokens.
  43. @seenImport = no # Used to recognize `IMPORT FROM? AS?` tokens.
  44. @seenExport = no # Used to recognize `EXPORT FROM? AS?` tokens.
  45. @importSpecifierList = no # Used to identify when in an `IMPORT {...} FROM? ...`.
  46. @exportSpecifierList = no # Used to identify when in an `EXPORT {...} FROM? ...`.
  47. @jsxDepth = 0 # Used to optimize JSX checks, how deep in JSX we are.
  48. @jsxObjAttribute = {} # Used to detect if JSX attributes is wrapped in {} (<div {props...} />).
  49. @chunkLine =
  50. opts.line or 0 # The start line for the current @chunk.
  51. @chunkColumn =
  52. opts.column or 0 # The start column of the current @chunk.
  53. @chunkOffset =
  54. opts.offset or 0 # The start offset for the current @chunk.
  55. @locationDataCompensations =
  56. opts.locationDataCompensations or {} # The location data compensations for the current @chunk.
  57. code = @clean code # The stripped, cleaned original source code.
  58. # At every position, run through this list of attempted matches,
  59. # short-circuiting if any of them succeed. Their order determines precedence:
  60. # `@literalToken` is the fallback catch-all.
  61. i = 0
  62. while @chunk = code[i..]
  63. consumed = \
  64. @identifierToken() or
  65. @commentToken() or
  66. @whitespaceToken() or
  67. @lineToken() or
  68. @stringToken() or
  69. @numberToken() or
  70. @jsxToken() or
  71. @regexToken() or
  72. @jsToken() or
  73. @literalToken()
  74. # Update position.
  75. [@chunkLine, @chunkColumn, @chunkOffset] = @getLineAndColumnFromChunk consumed
  76. i += consumed
  77. return {@tokens, index: i} if opts.untilBalanced and @ends.length is 0
  78. @closeIndentation()
  79. @error "missing #{end.tag}", (end.origin ? end)[2] if end = @ends.pop()
  80. return @tokens if opts.rewrite is off
  81. (new Rewriter).rewrite @tokens
  82. # Preprocess the code to remove leading and trailing whitespace, carriage
  83. # returns, etc. If were lexing literate CoffeeScript, strip external Markdown
  84. # by removing all lines that arent indented by at least four spaces or a tab.
  85. clean: (code) ->
  86. thusFar = 0
  87. if code.charCodeAt(0) is BOM
  88. code = code.slice 1
  89. @locationDataCompensations[0] = 1
  90. thusFar += 1
  91. if WHITESPACE.test code
  92. code = "\n#{code}"
  93. @chunkLine--
  94. @locationDataCompensations[0] ?= 0
  95. @locationDataCompensations[0] -= 1
  96. code = code
  97. .replace /\r/g, (match, offset) =>
  98. @locationDataCompensations[thusFar + offset] = 1
  99. ''
  100. .replace TRAILING_SPACES, ''
  101. code = invertLiterate code if @literate
  102. code
  103. # Tokenizers
  104. # ----------
  105. # Matches identifying literals: variables, keywords, method names, etc.
  106. # Check to ensure that JavaScript reserved words arent being used as
  107. # identifiers. Because CoffeeScript reserves a handful of keywords that are
  108. # allowed in JavaScript, were careful not to tag them as keywords when
  109. # referenced as property names here, so you can still do `jQuery.is()` even
  110. # though `is` means `===` otherwise.
  111. identifierToken: ->
  112. inJSXTag = @atJSXTag()
  113. regex = if inJSXTag then JSX_ATTRIBUTE else IDENTIFIER
  114. return 0 unless match = regex.exec @chunk
  115. [input, id, colon] = match
  116. # Preserve length of id for location data
  117. idLength = id.length
  118. poppedToken = undefined
  119. if id is 'own' and @tag() is 'FOR'
  120. @token 'OWN', id
  121. return id.length
  122. if id is 'from' and @tag() is 'YIELD'
  123. @token 'FROM', id
  124. return id.length
  125. if id is 'as' and @seenImport
  126. if @value() is '*'
  127. @tokens[@tokens.length - 1][0] = 'IMPORT_ALL'
  128. else if @value(yes) in COFFEE_KEYWORDS
  129. prev = @prev()
  130. [prev[0], prev[1]] = ['IDENTIFIER', @value(yes)]
  131. if @tag() in ['DEFAULT', 'IMPORT_ALL', 'IDENTIFIER']
  132. @token 'AS', id
  133. return id.length
  134. if id is 'as' and @seenExport
  135. if @tag() in ['IDENTIFIER', 'DEFAULT']
  136. @token 'AS', id
  137. return id.length
  138. if @value(yes) in COFFEE_KEYWORDS
  139. prev = @prev()
  140. [prev[0], prev[1]] = ['IDENTIFIER', @value(yes)]
  141. @token 'AS', id
  142. return id.length
  143. if id is 'default' and @seenExport and @tag() in ['EXPORT', 'AS']
  144. @token 'DEFAULT', id
  145. return id.length
  146. if id is 'do' and regExSuper = /^(\s*super)(?!\(\))/.exec @chunk[3...]
  147. @token 'SUPER', 'super'
  148. @token 'CALL_START', '('
  149. @token 'CALL_END', ')'
  150. [input, sup] = regExSuper
  151. return sup.length + 3
  152. prev = @prev()
  153. tag =
  154. if colon or prev? and
  155. (prev[0] in ['.', '?.', '::', '?::'] or
  156. not prev.spaced and prev[0] is '@')
  157. 'PROPERTY'
  158. else
  159. 'IDENTIFIER'
  160. tokenData = {}
  161. if tag is 'IDENTIFIER' and (id in JS_KEYWORDS or id in COFFEE_KEYWORDS) and
  162. not (@exportSpecifierList and id in COFFEE_KEYWORDS)
  163. tag = id.toUpperCase()
  164. if tag is 'WHEN' and @tag() in LINE_BREAK
  165. tag = 'LEADING_WHEN'
  166. else if tag is 'FOR'
  167. @seenFor = {endsLength: @ends.length}
  168. else if tag is 'UNLESS'
  169. tag = 'IF'
  170. else if tag is 'IMPORT'
  171. @seenImport = yes
  172. else if tag is 'EXPORT'
  173. @seenExport = yes
  174. else if tag in UNARY
  175. tag = 'UNARY'
  176. else if tag in RELATION
  177. if tag isnt 'INSTANCEOF' and @seenFor
  178. tag = 'FOR' + tag
  179. @seenFor = no
  180. else
  181. tag = 'RELATION'
  182. if @value() is '!'
  183. poppedToken = @tokens.pop()
  184. tokenData.invert = poppedToken.data?.original ? poppedToken[1]
  185. else if tag is 'IDENTIFIER' and @seenFor and id is 'from' and
  186. isForFrom(prev)
  187. tag = 'FORFROM'
  188. @seenFor = no
  189. # Throw an error on attempts to use `get` or `set` as keywords, or
  190. # what CoffeeScript would normally interpret as calls to functions named
  191. # `get` or `set`, i.e. `get({foo: function () {}})`.
  192. else if tag is 'PROPERTY' and prev
  193. if prev.spaced and prev[0] in CALLABLE and /^[gs]et$/.test(prev[1]) and
  194. @tokens.length > 1 and @tokens[@tokens.length - 2][0] not in ['.', '?.', '@']
  195. @error "'#{prev[1]}' cannot be used as a keyword, or as a function call
  196. without parentheses", prev[2]
  197. else if prev[0] is '.' and @tokens.length > 1 and (prevprev = @tokens[@tokens.length - 2])[0] is 'UNARY' and prevprev[1] is 'new'
  198. prevprev[0] = 'NEW_TARGET'
  199. else if @tokens.length > 2
  200. prevprev = @tokens[@tokens.length - 2]
  201. if prev[0] in ['@', 'THIS'] and prevprev and prevprev.spaced and
  202. /^[gs]et$/.test(prevprev[1]) and
  203. @tokens[@tokens.length - 3][0] not in ['.', '?.', '@']
  204. @error "'#{prevprev[1]}' cannot be used as a keyword, or as a
  205. function call without parentheses", prevprev[2]
  206. if tag is 'IDENTIFIER' and id in RESERVED and not inJSXTag
  207. @error "reserved word '#{id}'", length: id.length
  208. unless tag is 'PROPERTY' or @exportSpecifierList or @importSpecifierList
  209. if id in COFFEE_ALIASES
  210. alias = id
  211. id = COFFEE_ALIAS_MAP[id]
  212. tokenData.original = alias
  213. tag = switch id
  214. when '!' then 'UNARY'
  215. when '==', '!=' then 'COMPARE'
  216. when 'true', 'false' then 'BOOL'
  217. when 'break', 'continue', \
  218. 'debugger' then 'STATEMENT'
  219. when '&&', '||' then id
  220. else tag
  221. tagToken = @token tag, id, length: idLength, data: tokenData
  222. tagToken.origin = [tag, alias, tagToken[2]] if alias
  223. if poppedToken
  224. [tagToken[2].first_line, tagToken[2].first_column, tagToken[2].range[0]] =
  225. [poppedToken[2].first_line, poppedToken[2].first_column, poppedToken[2].range[0]]
  226. if colon
  227. colonOffset = input.lastIndexOf if inJSXTag then '=' else ':'
  228. colonToken = @token ':', ':', offset: colonOffset
  229. colonToken.jsxColon = yes if inJSXTag # used by rewriter
  230. if inJSXTag and tag is 'IDENTIFIER' and prev[0] isnt ':'
  231. @token ',', ',', length: 0, origin: tagToken, generated: yes
  232. input.length
  233. # Matches numbers, including decimals, hex, and exponential notation.
  234. # Be careful not to interfere with ranges in progress.
  235. numberToken: ->
  236. return 0 unless match = NUMBER.exec @chunk
  237. number = match[0]
  238. lexedLength = number.length
  239. switch
  240. when /^0[BOX]/.test number
  241. @error "radix prefix in '#{number}' must be lowercase", offset: 1
  242. when /^(?!0x).*E/.test number
  243. @error "exponential notation in '#{number}' must be indicated with a lowercase 'e'",
  244. offset: number.indexOf('E')
  245. when /^0\d*[89]/.test number
  246. @error "decimal literal '#{number}' must not be prefixed with '0'", length: lexedLength
  247. when /^0\d+/.test number
  248. @error "octal literal '#{number}' must be prefixed with '0o'", length: lexedLength
  249. parsedValue = parseNumber number
  250. tokenData = {parsedValue}
  251. tag = if parsedValue is Infinity then 'INFINITY' else 'NUMBER'
  252. if tag is 'INFINITY'
  253. tokenData.original = number
  254. @token tag, number,
  255. length: lexedLength
  256. data: tokenData
  257. lexedLength
  258. # Matches strings, including multiline strings, as well as heredocs, with or without
  259. # interpolation.
  260. stringToken: ->
  261. [quote] = STRING_START.exec(@chunk) || []
  262. return 0 unless quote
  263. # If the preceding token is `from` and this is an import or export statement,
  264. # properly tag the `from`.
  265. prev = @prev()
  266. if prev and @value() is 'from' and (@seenImport or @seenExport)
  267. prev[0] = 'FROM'
  268. regex = switch quote
  269. when "'" then STRING_SINGLE
  270. when '"' then STRING_DOUBLE
  271. when "'''" then HEREDOC_SINGLE
  272. when '"""' then HEREDOC_DOUBLE
  273. {tokens, index: end} = @matchWithInterpolations regex, quote
  274. heredoc = quote.length is 3
  275. if heredoc
  276. # Find the smallest indentation. It will be removed from all lines later.
  277. indent = null
  278. doc = (token[1] for token, i in tokens when token[0] is 'NEOSTRING').join '#{}'
  279. while match = HEREDOC_INDENT.exec doc
  280. attempt = match[1]
  281. indent = attempt if indent is null or 0 < attempt.length < indent.length
  282. delimiter = quote.charAt(0)
  283. @mergeInterpolationTokens tokens, {quote, indent, endOffset: end}, (value) =>
  284. @validateUnicodeCodePointEscapes value, delimiter: quote
  285. if @atJSXTag()
  286. @token ',', ',', length: 0, origin: @prev, generated: yes
  287. end
  288. # Matches and consumes comments. The comments are taken out of the token
  289. # stream and saved for later, to be reinserted into the output after
  290. # everything has been parsed and the JavaScript code generated.
  291. commentToken: (chunk = @chunk, {heregex, returnCommentTokens = no, offsetInChunk = 0} = {}) ->
  292. return 0 unless match = chunk.match COMMENT
  293. [commentWithSurroundingWhitespace, hereLeadingWhitespace, hereComment, hereTrailingWhitespace, lineComment] = match
  294. contents = null
  295. # Does this comment follow code on the same line?
  296. leadingNewline = /^\s*\n+\s*#/.test commentWithSurroundingWhitespace
  297. if hereComment
  298. matchIllegal = HERECOMMENT_ILLEGAL.exec hereComment
  299. if matchIllegal
  300. @error "block comments cannot contain #{matchIllegal[0]}",
  301. offset: '###'.length + matchIllegal.index, length: matchIllegal[0].length
  302. # Parse indentation or outdentation as if this block comment didnt exist.
  303. chunk = chunk.replace "####{hereComment}###", ''
  304. # Remove leading newlines, like `Rewriter::removeLeadingNewlines`, to
  305. # avoid the creation of unwanted `TERMINATOR` tokens.
  306. chunk = chunk.replace /^\n+/, ''
  307. @lineToken {chunk}
  308. # Pull out the ###-style comments content, and format it.
  309. content = hereComment
  310. contents = [{
  311. content
  312. length: commentWithSurroundingWhitespace.length - hereLeadingWhitespace.length - hereTrailingWhitespace.length
  313. leadingWhitespace: hereLeadingWhitespace
  314. }]
  315. else
  316. # The `COMMENT` regex captures successive line comments as one token.
  317. # Remove any leading newlines before the first comment, but preserve
  318. # blank lines between line comments.
  319. leadingNewlines = ''
  320. content = lineComment.replace /^(\n*)/, (leading) ->
  321. leadingNewlines = leading
  322. ''
  323. precedingNonCommentLines = ''
  324. hasSeenFirstCommentLine = no
  325. contents =
  326. content.split '\n'
  327. .map (line, index) ->
  328. unless line.indexOf('#') > -1
  329. precedingNonCommentLines += "\n#{line}"
  330. return
  331. leadingWhitespace = ''
  332. content = line.replace /^([ |\t]*)#/, (_, whitespace) ->
  333. leadingWhitespace = whitespace
  334. ''
  335. comment = {
  336. content
  337. length: '#'.length + content.length
  338. leadingWhitespace: "#{unless hasSeenFirstCommentLine then leadingNewlines else ''}#{precedingNonCommentLines}#{leadingWhitespace}"
  339. precededByBlankLine: !!precedingNonCommentLines
  340. }
  341. hasSeenFirstCommentLine = yes
  342. precedingNonCommentLines = ''
  343. comment
  344. .filter (comment) -> comment
  345. getIndentSize = ({leadingWhitespace, nonInitial}) ->
  346. lastNewlineIndex = leadingWhitespace.lastIndexOf '\n'
  347. if hereComment? or not nonInitial
  348. return null unless lastNewlineIndex > -1
  349. else
  350. lastNewlineIndex ?= -1
  351. leadingWhitespace.length - 1 - lastNewlineIndex
  352. commentAttachments = for {content, length, leadingWhitespace, precededByBlankLine}, i in contents
  353. nonInitial = i isnt 0
  354. leadingNewlineOffset = if nonInitial then 1 else 0
  355. offsetInChunk += leadingNewlineOffset + leadingWhitespace.length
  356. indentSize = getIndentSize {leadingWhitespace, nonInitial}
  357. noIndent = not indentSize? or indentSize is -1
  358. commentAttachment = {
  359. content
  360. here: hereComment?
  361. newLine: leadingNewline or nonInitial # Line comments after the first one start new lines, by definition.
  362. locationData: @makeLocationData {offsetInChunk, length}
  363. precededByBlankLine
  364. indentSize
  365. indented: not noIndent and indentSize > @indent
  366. outdented: not noIndent and indentSize < @indent
  367. }
  368. commentAttachment.heregex = yes if heregex
  369. offsetInChunk += length
  370. commentAttachment
  371. prev = @prev()
  372. unless prev
  373. # If theres no previous token, create a placeholder token to attach
  374. # this comment to; and follow with a newline.
  375. commentAttachments[0].newLine = yes
  376. @lineToken chunk: @chunk[commentWithSurroundingWhitespace.length..], offset: commentWithSurroundingWhitespace.length # Set the indent.
  377. placeholderToken = @makeToken 'JS', '', offset: commentWithSurroundingWhitespace.length, generated: yes
  378. placeholderToken.comments = commentAttachments
  379. @tokens.push placeholderToken
  380. @newlineToken commentWithSurroundingWhitespace.length
  381. else
  382. attachCommentsToNode commentAttachments, prev
  383. return commentAttachments if returnCommentTokens
  384. commentWithSurroundingWhitespace.length
  385. # Matches JavaScript interpolated directly into the source via backticks.
  386. jsToken: ->
  387. return 0 unless @chunk.charAt(0) is '`' and
  388. (match = (matchedHere = HERE_JSTOKEN.exec(@chunk)) or JSTOKEN.exec(@chunk))
  389. # Convert escaped backticks to backticks, and escaped backslashes
  390. # just before escaped backticks to backslashes
  391. script = match[1]
  392. {length} = match[0]
  393. @token 'JS', script, {length, data: {here: !!matchedHere}}
  394. length
  395. # Matches regular expression literals, as well as multiline extended ones.
  396. # Lexing regular expressions is difficult to distinguish from division, so we
  397. # borrow some basic heuristics from JavaScript and Ruby.
  398. regexToken: ->
  399. switch
  400. when match = REGEX_ILLEGAL.exec @chunk
  401. @error "regular expressions cannot begin with #{match[2]}",
  402. offset: match.index + match[1].length
  403. when match = @matchWithInterpolations HEREGEX, '///'
  404. {tokens, index} = match
  405. comments = []
  406. while matchedComment = HEREGEX_COMMENT.exec @chunk[0...index]
  407. {index: commentIndex} = matchedComment
  408. [fullMatch, leadingWhitespace, comment] = matchedComment
  409. comments.push {comment, offsetInChunk: commentIndex + leadingWhitespace.length}
  410. commentTokens = flatten(
  411. for commentOpts in comments
  412. @commentToken commentOpts.comment, Object.assign commentOpts, heregex: yes, returnCommentTokens: yes
  413. )
  414. when match = REGEX.exec @chunk
  415. [regex, body, closed] = match
  416. @validateEscapes body, isRegex: yes, offsetInChunk: 1
  417. index = regex.length
  418. prev = @prev()
  419. if prev
  420. if prev.spaced and prev[0] in CALLABLE
  421. return 0 if not closed or POSSIBLY_DIVISION.test regex
  422. else if prev[0] in NOT_REGEX
  423. return 0
  424. @error 'missing / (unclosed regex)' unless closed
  425. else
  426. return 0
  427. [flags] = REGEX_FLAGS.exec @chunk[index..]
  428. end = index + flags.length
  429. origin = @makeToken 'REGEX', null, length: end
  430. switch
  431. when not VALID_FLAGS.test flags
  432. @error "invalid regular expression flags #{flags}", offset: index, length: flags.length
  433. when regex or tokens.length is 1
  434. delimiter = if body then '/' else '///'
  435. body ?= tokens[0][1]
  436. @validateUnicodeCodePointEscapes body, {delimiter}
  437. @token 'REGEX', "/#{body}/#{flags}", {length: end, origin, data: {delimiter}}
  438. else
  439. @token 'REGEX_START', '(', {length: 0, origin, generated: yes}
  440. @token 'IDENTIFIER', 'RegExp', length: 0, generated: yes
  441. @token 'CALL_START', '(', length: 0, generated: yes
  442. @mergeInterpolationTokens tokens, {double: yes, heregex: {flags}, endOffset: end - flags.length, quote: '///'}, (str) =>
  443. @validateUnicodeCodePointEscapes str, {delimiter}
  444. if flags
  445. @token ',', ',', offset: index - 1, length: 0, generated: yes
  446. @token 'STRING', '"' + flags + '"', offset: index, length: flags.length
  447. @token ')', ')', offset: end, length: 0, generated: yes
  448. @token 'REGEX_END', ')', offset: end, length: 0, generated: yes
  449. # Explicitly attach any heregex comments to the REGEX/REGEX_END token.
  450. if commentTokens?.length
  451. addTokenData @tokens[@tokens.length - 1],
  452. heregexCommentTokens: commentTokens
  453. end
  454. # Matches newlines, indents, and outdents, and determines which is which.
  455. # If we can detect that the current line is continued onto the next line,
  456. # then the newline is suppressed:
  457. #
  458. # elements
  459. # .each( ... )
  460. # .map( ... )
  461. #
  462. # Keeps track of the level of indentation, because a single outdent token
  463. # can close multiple indents, so we need to know how far in we happen to be.
  464. lineToken: ({chunk = @chunk, offset = 0} = {}) ->
  465. return 0 unless match = MULTI_DENT.exec chunk
  466. indent = match[0]
  467. prev = @prev()
  468. backslash = prev?[0] is '\\'
  469. @seenFor = no unless (backslash or @seenFor?.endsLength < @ends.length) and @seenFor
  470. @seenImport = no unless (backslash and @seenImport) or @importSpecifierList
  471. @seenExport = no unless (backslash and @seenExport) or @exportSpecifierList
  472. size = indent.length - 1 - indent.lastIndexOf '\n'
  473. noNewlines = @unfinished()
  474. newIndentLiteral = if size > 0 then indent[-size..] else ''
  475. unless /^(.?)\1*$/.exec newIndentLiteral
  476. @error 'mixed indentation', offset: indent.length
  477. return indent.length
  478. minLiteralLength = Math.min newIndentLiteral.length, @indentLiteral.length
  479. if newIndentLiteral[...minLiteralLength] isnt @indentLiteral[...minLiteralLength]
  480. @error 'indentation mismatch', offset: indent.length
  481. return indent.length
  482. if size - @continuationLineAdditionalIndent is @indent
  483. if noNewlines then @suppressNewlines() else @newlineToken offset
  484. return indent.length
  485. if size > @indent
  486. if noNewlines
  487. @continuationLineAdditionalIndent = size - @indent unless backslash
  488. if @continuationLineAdditionalIndent
  489. prev.continuationLineIndent = @indent + @continuationLineAdditionalIndent
  490. @suppressNewlines()
  491. return indent.length
  492. unless @tokens.length
  493. @baseIndent = @indent = size
  494. @indentLiteral = newIndentLiteral
  495. return indent.length
  496. diff = size - @indent + @outdebt
  497. @token 'INDENT', diff, offset: offset + indent.length - size, length: size
  498. @indents.push diff
  499. @ends.push {tag: 'OUTDENT'}
  500. @outdebt = @continuationLineAdditionalIndent = 0
  501. @indent = size
  502. @indentLiteral = newIndentLiteral
  503. else if size < @baseIndent
  504. @error 'missing indentation', offset: offset + indent.length
  505. else
  506. endsContinuationLineIndentation = @continuationLineAdditionalIndent > 0
  507. @continuationLineAdditionalIndent = 0
  508. @outdentToken {moveOut: @indent - size, noNewlines, outdentLength: indent.length, offset, indentSize: size, endsContinuationLineIndentation}
  509. indent.length
  510. # Record an outdent token or multiple tokens, if we happen to be moving back
  511. # inwards past several recorded indents. Sets new @indent value.
  512. outdentToken: ({moveOut, noNewlines, outdentLength = 0, offset = 0, indentSize, endsContinuationLineIndentation}) ->
  513. decreasedIndent = @indent - moveOut
  514. while moveOut > 0
  515. lastIndent = @indents[@indents.length - 1]
  516. if not lastIndent
  517. @outdebt = moveOut = 0
  518. else if @outdebt and moveOut <= @outdebt
  519. @outdebt -= moveOut
  520. moveOut = 0
  521. else
  522. dent = @indents.pop() + @outdebt
  523. if outdentLength and @chunk[outdentLength] in INDENTABLE_CLOSERS
  524. decreasedIndent -= dent - moveOut
  525. moveOut = dent
  526. @outdebt = 0
  527. # pair might call outdentToken, so preserve decreasedIndent
  528. @pair 'OUTDENT'
  529. @token 'OUTDENT', moveOut, length: outdentLength, indentSize: indentSize + moveOut - dent
  530. moveOut -= dent
  531. @outdebt -= moveOut if dent
  532. @suppressSemicolons()
  533. unless @tag() is 'TERMINATOR' or noNewlines
  534. terminatorToken = @token 'TERMINATOR', '\n', offset: offset + outdentLength, length: 0
  535. terminatorToken.endsContinuationLineIndentation = {preContinuationLineIndent: @indent} if endsContinuationLineIndentation
  536. @indent = decreasedIndent
  537. @indentLiteral = @indentLiteral[...decreasedIndent]
  538. this
  539. # Matches and consumes non-meaningful whitespace. Tag the previous token
  540. # as being spaced, because there are some cases where it makes a difference.
  541. whitespaceToken: ->
  542. return 0 unless (match = WHITESPACE.exec @chunk) or
  543. (nline = @chunk.charAt(0) is '\n')
  544. prev = @prev()
  545. prev[if match then 'spaced' else 'newLine'] = true if prev
  546. if match then match[0].length else 0
  547. # Generate a newline token. Consecutive newlines get merged together.
  548. newlineToken: (offset) ->
  549. @suppressSemicolons()
  550. @token 'TERMINATOR', '\n', {offset, length: 0} unless @tag() is 'TERMINATOR'
  551. this
  552. # Use a `\` at a line-ending to suppress the newline.
  553. # The slash is removed here once its job is done.
  554. suppressNewlines: ->
  555. prev = @prev()
  556. if prev[1] is '\\'
  557. if prev.comments and @tokens.length > 1
  558. # `@tokens.length` should be at least 2 (some code, then `\`).
  559. # If something puts a `\` after nothing, they deserve to lose any
  560. # comments that trail it.
  561. attachCommentsToNode prev.comments, @tokens[@tokens.length - 2]
  562. @tokens.pop()
  563. this
  564. jsxToken: ->
  565. firstChar = @chunk[0]
  566. # Check the previous token to detect if attribute is spread.
  567. prevChar = if @tokens.length > 0 then @tokens[@tokens.length - 1][0] else ''
  568. if firstChar is '<'
  569. match = JSX_IDENTIFIER.exec(@chunk[1...]) or JSX_FRAGMENT_IDENTIFIER.exec(@chunk[1...])
  570. return 0 unless match and (
  571. @jsxDepth > 0 or
  572. # Not the right hand side of an unspaced comparison (i.e. `a<b`).
  573. not (prev = @prev()) or
  574. prev.spaced or
  575. prev[0] not in COMPARABLE_LEFT_SIDE
  576. )
  577. [input, id] = match
  578. fullId = id
  579. if '.' in id
  580. [id, properties...] = id.split '.'
  581. else
  582. properties = []
  583. tagToken = @token 'JSX_TAG', id,
  584. length: id.length + 1
  585. data:
  586. openingBracketToken: @makeToken '<', '<'
  587. tagNameToken: @makeToken 'IDENTIFIER', id, offset: 1
  588. offset = id.length + 1
  589. for property in properties
  590. @token '.', '.', {offset}
  591. offset += 1
  592. @token 'PROPERTY', property, {offset}
  593. offset += property.length
  594. @token 'CALL_START', '(', generated: yes
  595. @token '[', '[', generated: yes
  596. @ends.push {tag: '/>', origin: tagToken, name: id, properties}
  597. @jsxDepth++
  598. return fullId.length + 1
  599. else if jsxTag = @atJSXTag()
  600. if @chunk[...2] is '/>' # Self-closing tag.
  601. @pair '/>'
  602. @token ']', ']',
  603. length: 2
  604. generated: yes
  605. @token 'CALL_END', ')',
  606. length: 2
  607. generated: yes
  608. data:
  609. selfClosingSlashToken: @makeToken '/', '/'
  610. closingBracketToken: @makeToken '>', '>', offset: 1
  611. @jsxDepth--
  612. return 2
  613. else if firstChar is '{'
  614. if prevChar is ':'
  615. # This token represents the start of a JSX attribute value
  616. # thats an expression (e.g. the `{b}` in `<div a={b} />`).
  617. # Our grammar represents the beginnings of expressions as `(`
  618. # tokens, so make this into a `(` token that displays as `{`.
  619. token = @token '(', '{'
  620. @jsxObjAttribute[@jsxDepth] = no
  621. # tag attribute name as JSX
  622. addTokenData @tokens[@tokens.length - 3],
  623. jsx: yes
  624. else
  625. token = @token '{', '{'
  626. @jsxObjAttribute[@jsxDepth] = yes
  627. @ends.push {tag: '}', origin: token}
  628. return 1
  629. else if firstChar is '>' # end of opening tag
  630. # Ignore terminators inside a tag.
  631. {origin: openingTagToken} = @pair '/>' # As if the current tag was self-closing.
  632. @token ']', ']',
  633. generated: yes
  634. data:
  635. closingBracketToken: @makeToken '>', '>'
  636. @token ',', 'JSX_COMMA', generated: yes
  637. {tokens, index: end} =
  638. @matchWithInterpolations INSIDE_JSX, '>', '</', JSX_INTERPOLATION
  639. @mergeInterpolationTokens tokens, {endOffset: end, jsx: yes}, (value) =>
  640. @validateUnicodeCodePointEscapes value, delimiter: '>'
  641. match = JSX_IDENTIFIER.exec(@chunk[end...]) or JSX_FRAGMENT_IDENTIFIER.exec(@chunk[end...])
  642. if not match or match[1] isnt "#{jsxTag.name}#{(".#{property}" for property in jsxTag.properties).join ''}"
  643. @error "expected corresponding JSX closing tag for #{jsxTag.name}",
  644. jsxTag.origin.data.tagNameToken[2]
  645. [, fullTagName] = match
  646. afterTag = end + fullTagName.length
  647. if @chunk[afterTag] isnt '>'
  648. @error "missing closing > after tag name", offset: afterTag, length: 1
  649. # -2/+2 for the opening `</` and +1 for the closing `>`.
  650. endToken = @token 'CALL_END', ')',
  651. offset: end - 2
  652. length: fullTagName.length + 3
  653. generated: yes
  654. data:
  655. closingTagOpeningBracketToken: @makeToken '<', '<', offset: end - 2
  656. closingTagSlashToken: @makeToken '/', '/', offset: end - 1
  657. # TODO: individual tokens for complex tag name? eg < / A . B >
  658. closingTagNameToken: @makeToken 'IDENTIFIER', fullTagName, offset: end
  659. closingTagClosingBracketToken: @makeToken '>', '>', offset: end + fullTagName.length
  660. # make the closing tag location data more easily accessible to the grammar
  661. addTokenData openingTagToken, endToken.data
  662. @jsxDepth--
  663. return afterTag + 1
  664. else
  665. return 0
  666. else if @atJSXTag 1
  667. if firstChar is '}'
  668. @pair firstChar
  669. if @jsxObjAttribute[@jsxDepth]
  670. @token '}', '}'
  671. @jsxObjAttribute[@jsxDepth] = no
  672. else
  673. @token ')', '}'
  674. @token ',', ',', generated: yes
  675. return 1
  676. else
  677. return 0
  678. else
  679. return 0
  680. atJSXTag: (depth = 0) ->
  681. return no if @jsxDepth is 0
  682. i = @ends.length - 1
  683. i-- while @ends[i]?.tag is 'OUTDENT' or depth-- > 0 # Ignore indents.
  684. last = @ends[i]
  685. last?.tag is '/>' and last
  686. # We treat all other single characters as a token. E.g.: `( ) , . !`
  687. # Multi-character operators are also literal tokens, so that Jison can assign
  688. # the proper order of operations. There are some symbols that we tag specially
  689. # here. `;` and newlines are both treated as a `TERMINATOR`, we distinguish
  690. # parentheses that indicate a method call from regular parentheses, and so on.
  691. literalToken: ->
  692. if match = OPERATOR.exec @chunk
  693. [value] = match
  694. @tagParameters() if CODE.test value
  695. else
  696. value = @chunk.charAt 0
  697. tag = value
  698. prev = @prev()
  699. if prev and value in ['=', COMPOUND_ASSIGN...]
  700. skipToken = false
  701. if value is '=' and prev[1] in ['||', '&&'] and not prev.spaced
  702. prev[0] = 'COMPOUND_ASSIGN'
  703. prev[1] += '='
  704. prev.data.original += '=' if prev.data?.original
  705. prev[2].range = [
  706. prev[2].range[0]
  707. prev[2].range[1] + 1
  708. ]
  709. prev[2].last_column += 1
  710. prev[2].last_column_exclusive += 1
  711. prev = @tokens[@tokens.length - 2]
  712. skipToken = true
  713. if prev and prev[0] isnt 'PROPERTY'
  714. origin = prev.origin ? prev
  715. message = isUnassignable prev[1], origin[1]
  716. @error message, origin[2] if message
  717. return value.length if skipToken
  718. if value is '(' and prev?[0] is 'IMPORT'
  719. prev[0] = 'DYNAMIC_IMPORT'
  720. if value is '{' and @seenImport
  721. @importSpecifierList = yes
  722. else if @importSpecifierList and value is '}'
  723. @importSpecifierList = no
  724. else if value is '{' and prev?[0] is 'EXPORT'
  725. @exportSpecifierList = yes
  726. else if @exportSpecifierList and value is '}'
  727. @exportSpecifierList = no
  728. if value is ';'
  729. @error 'unexpected ;' if prev?[0] in ['=', UNFINISHED...]
  730. @seenFor = @seenImport = @seenExport = no
  731. tag = 'TERMINATOR'
  732. else if value is '*' and prev?[0] is 'EXPORT'
  733. tag = 'EXPORT_ALL'
  734. else if value in MATH then tag = 'MATH'
  735. else if value in COMPARE then tag = 'COMPARE'
  736. else if value in COMPOUND_ASSIGN then tag = 'COMPOUND_ASSIGN'
  737. else if value in UNARY then tag = 'UNARY'
  738. else if value in UNARY_MATH then tag = 'UNARY_MATH'
  739. else if value in SHIFT then tag = 'SHIFT'
  740. else if value is '?' and prev?.spaced then tag = 'BIN?'
  741. else if prev
  742. if value is '(' and not prev.spaced and prev[0] in CALLABLE
  743. prev[0] = 'FUNC_EXIST' if prev[0] is '?'
  744. tag = 'CALL_START'
  745. else if value is '[' and ((prev[0] in INDEXABLE and not prev.spaced) or
  746. (prev[0] is '::')) # `.prototype` cant be a method you can call.
  747. tag = 'INDEX_START'
  748. switch prev[0]
  749. when '?' then prev[0] = 'INDEX_SOAK'
  750. token = @makeToken tag, value
  751. switch value
  752. when '(', '{', '[' then @ends.push {tag: INVERSES[value], origin: token}
  753. when ')', '}', ']' then @pair value
  754. @tokens.push @makeToken tag, value
  755. value.length
  756. # Token Manipulators
  757. # ------------------
  758. # A source of ambiguity in our grammar used to be parameter lists in function
  759. # definitions versus argument lists in function calls. Walk backwards, tagging
  760. # parameters specially in order to make things easier for the parser.
  761. tagParameters: ->
  762. return @tagDoIife() if @tag() isnt ')'
  763. stack = []
  764. {tokens} = this
  765. i = tokens.length
  766. paramEndToken = tokens[--i]
  767. paramEndToken[0] = 'PARAM_END'
  768. while tok = tokens[--i]
  769. switch tok[0]
  770. when ')'
  771. stack.push tok
  772. when '(', 'CALL_START'
  773. if stack.length then stack.pop()
  774. else if tok[0] is '('
  775. tok[0] = 'PARAM_START'
  776. return @tagDoIife i - 1
  777. else
  778. paramEndToken[0] = 'CALL_END'
  779. return this
  780. this
  781. # Tag `do` followed by a function differently than `do` followed by eg an
  782. # identifier to allow for different grammar precedence
  783. tagDoIife: (tokenIndex) ->
  784. tok = @tokens[tokenIndex ? @tokens.length - 1]
  785. return this unless tok?[0] is 'DO'
  786. tok[0] = 'DO_IIFE'
  787. this
  788. # Close up all remaining open blocks at the end of the file.
  789. closeIndentation: ->
  790. @outdentToken moveOut: @indent, indentSize: 0
  791. # Match the contents of a delimited token and expand variables and expressions
  792. # inside it using Ruby-like notation for substitution of arbitrary
  793. # expressions.
  794. #
  795. # "Hello #{name.capitalize()}."
  796. #
  797. # If it encounters an interpolation, this method will recursively create a new
  798. # Lexer and tokenize until the `{` of `#{` is balanced with a `}`.
  799. #
  800. # - `regex` matches the contents of a token (but not `delimiter`, and not
  801. # `#{` if interpolations are desired).
  802. # - `delimiter` is the delimiter of the token. Examples are `'`, `"`, `'''`,
  803. # `"""` and `///`.
  804. # - `closingDelimiter` is different from `delimiter` only in JSX
  805. # - `interpolators` matches the start of an interpolation, for JSX it's both
  806. # `{` and `<` (i.e. nested JSX tag)
  807. #
  808. # This method allows us to have strings within interpolations within strings,
  809. # ad infinitum.
  810. matchWithInterpolations: (regex, delimiter, closingDelimiter = delimiter, interpolators = /^#\{/) ->
  811. tokens = []
  812. offsetInChunk = delimiter.length
  813. return null unless @chunk[...offsetInChunk] is delimiter
  814. str = @chunk[offsetInChunk..]
  815. loop
  816. [strPart] = regex.exec str
  817. @validateEscapes strPart, {isRegex: delimiter.charAt(0) is '/', offsetInChunk}
  818. # Push a fake `'NEOSTRING'` token, which will get turned into a real string later.
  819. tokens.push @makeToken 'NEOSTRING', strPart, offset: offsetInChunk
  820. str = str[strPart.length..]
  821. offsetInChunk += strPart.length
  822. break unless match = interpolators.exec str
  823. [interpolator] = match
  824. # To remove the `#` in `#{`.
  825. interpolationOffset = interpolator.length - 1
  826. [line, column, offset] = @getLineAndColumnFromChunk offsetInChunk + interpolationOffset
  827. rest = str[interpolationOffset..]
  828. {tokens: nested, index} =
  829. new Lexer().tokenize rest, {line, column, offset, untilBalanced: on, @locationDataCompensations}
  830. # Account for the `#` in `#{`.
  831. index += interpolationOffset
  832. braceInterpolator = str[index - 1] is '}'
  833. if braceInterpolator
  834. # Turn the leading and trailing `{` and `}` into parentheses. Unnecessary
  835. # parentheses will be removed later.
  836. [open, ..., close] = nested
  837. open[0] = 'INTERPOLATION_START'
  838. open[1] = '('
  839. open[2].first_column -= interpolationOffset
  840. open[2].range = [
  841. open[2].range[0] - interpolationOffset
  842. open[2].range[1]
  843. ]
  844. close[0] = 'INTERPOLATION_END'
  845. close[1] = ')'
  846. close.origin = ['', 'end of interpolation', close[2]]
  847. # Remove leading `'TERMINATOR'` (if any).
  848. nested.splice 1, 1 if nested[1]?[0] is 'TERMINATOR'
  849. # Remove trailing `'INDENT'/'OUTDENT'` pair (if any).
  850. nested.splice -3, 2 if nested[nested.length - 3]?[0] is 'INDENT' and nested[nested.length - 2][0] is 'OUTDENT'
  851. unless braceInterpolator
  852. # We are not using `{` and `}`, so wrap the interpolated tokens instead.
  853. open = @makeToken 'INTERPOLATION_START', '(', offset: offsetInChunk, length: 0, generated: yes
  854. close = @makeToken 'INTERPOLATION_END', ')', offset: offsetInChunk + index, length: 0, generated: yes
  855. nested = [open, nested..., close]
  856. # Push a fake `'TOKENS'` token, which will get turned into real tokens later.
  857. tokens.push ['TOKENS', nested]
  858. str = str[index..]
  859. offsetInChunk += index
  860. unless str[...closingDelimiter.length] is closingDelimiter
  861. @error "missing #{closingDelimiter}", length: delimiter.length
  862. {tokens, index: offsetInChunk + closingDelimiter.length}
  863. # Merge the array `tokens` of the fake token types `'TOKENS'` and `'NEOSTRING'`
  864. # (as returned by `matchWithInterpolations`) into the token stream. The value
  865. # of `'NEOSTRING'`s are converted using `fn` and turned into strings using
  866. # `options` first.
  867. mergeInterpolationTokens: (tokens, options, fn) ->
  868. {quote, indent, double, heregex, endOffset, jsx} = options
  869. if tokens.length > 1
  870. lparen = @token 'STRING_START', '(', length: quote?.length ? 0, data: {quote}, generated: not quote?.length
  871. firstIndex = @tokens.length
  872. $ = tokens.length - 1
  873. for token, i in tokens
  874. [tag, value] = token
  875. switch tag
  876. when 'TOKENS'
  877. # There are comments (and nothing else) in this interpolation.
  878. if value.length is 2 and (value[0].comments or value[1].comments)
  879. placeholderToken = @makeToken 'JS', '', generated: yes
  880. # Use the same location data as the first parenthesis.
  881. placeholderToken[2] = value[0][2]
  882. for val in value when val.comments
  883. placeholderToken.comments ?= []
  884. placeholderToken.comments.push val.comments...
  885. value.splice 1, 0, placeholderToken
  886. # Push all the tokens in the fake `'TOKENS'` token. These already have
  887. # sane location data.
  888. locationToken = value[0]
  889. tokensToPush = value
  890. when 'NEOSTRING'
  891. # Convert `'NEOSTRING'` into `'STRING'`.
  892. converted = fn.call this, token[1], i
  893. addTokenData token, initialChunk: yes if i is 0
  894. addTokenData token, finalChunk: yes if i is $
  895. addTokenData token, {indent, quote, double}
  896. addTokenData token, {heregex} if heregex
  897. addTokenData token, {jsx} if jsx
  898. token[0] = 'STRING'
  899. token[1] = '"' + converted + '"'
  900. if tokens.length is 1 and quote?
  901. token[2].first_column -= quote.length
  902. if token[1].substr(-2, 1) is '\n'
  903. token[2].last_line += 1
  904. token[2].last_column = quote.length - 1
  905. else
  906. token[2].last_column += quote.length
  907. token[2].last_column -= 1 if token[1].length is 2
  908. token[2].last_column_exclusive += quote.length
  909. token[2].range = [
  910. token[2].range[0] - quote.length
  911. token[2].range[1] + quote.length
  912. ]
  913. locationToken = token
  914. tokensToPush = [token]
  915. @tokens.push tokensToPush...
  916. if lparen
  917. [..., lastToken] = tokens
  918. lparen.origin = ['STRING', null,
  919. first_line: lparen[2].first_line
  920. first_column: lparen[2].first_column
  921. last_line: lastToken[2].last_line
  922. last_column: lastToken[2].last_column
  923. last_line_exclusive: lastToken[2].last_line_exclusive
  924. last_column_exclusive: lastToken[2].last_column_exclusive
  925. range: [
  926. lparen[2].range[0]
  927. lastToken[2].range[1]
  928. ]
  929. ]
  930. lparen[2] = lparen.origin[2] unless quote?.length
  931. rparen = @token 'STRING_END', ')', offset: endOffset - (quote ? '').length, length: quote?.length ? 0, generated: not quote?.length
  932. # Pairs up a closing token, ensuring that all listed pairs of tokens are
  933. # correctly balanced throughout the course of the token stream.
  934. pair: (tag) ->
  935. [..., prev] = @ends
  936. unless tag is wanted = prev?.tag
  937. @error "unmatched #{tag}" unless 'OUTDENT' is wanted
  938. # Auto-close `INDENT` to support syntax like this:
  939. #
  940. # el.click((event) ->
  941. # el.hide())
  942. #
  943. [..., lastIndent] = @indents
  944. @outdentToken moveOut: lastIndent, noNewlines: true
  945. return @pair tag
  946. @ends.pop()
  947. # Helpers
  948. # -------
  949. # Compensate for the things we strip out initially (e.g. carriage returns)
  950. # so that location data stays accurate with respect to the original source file.
  951. getLocationDataCompensation: (start, end) ->
  952. totalCompensation = 0
  953. initialEnd = end
  954. current = start
  955. while current <= end
  956. break if current is end and start isnt initialEnd
  957. compensation = @locationDataCompensations[current]
  958. if compensation?
  959. totalCompensation += compensation
  960. end += compensation
  961. current++
  962. return totalCompensation
  963. # Returns the line and column number from an offset into the current chunk.
  964. #
  965. # `offset` is a number of characters into `@chunk`.
  966. getLineAndColumnFromChunk: (offset) ->
  967. compensation = @getLocationDataCompensation @chunkOffset, @chunkOffset + offset
  968. if offset is 0
  969. return [@chunkLine, @chunkColumn + compensation, @chunkOffset + compensation]
  970. if offset >= @chunk.length
  971. string = @chunk
  972. else
  973. string = @chunk[..offset-1]
  974. lineCount = count string, '\n'
  975. column = @chunkColumn
  976. if lineCount > 0
  977. [..., lastLine] = string.split '\n'
  978. column = lastLine.length
  979. previousLinesCompensation = @getLocationDataCompensation @chunkOffset, @chunkOffset + offset - column
  980. # Don't recompensate for initially inserted newline.
  981. previousLinesCompensation = 0 if previousLinesCompensation < 0
  982. columnCompensation = @getLocationDataCompensation(
  983. @chunkOffset + offset + previousLinesCompensation - column
  984. @chunkOffset + offset + previousLinesCompensation
  985. )
  986. else
  987. column += string.length
  988. columnCompensation = compensation
  989. [@chunkLine + lineCount, column + columnCompensation, @chunkOffset + offset + compensation]
  990. makeLocationData: ({ offsetInChunk, length }) ->
  991. locationData = range: []
  992. [locationData.first_line, locationData.first_column, locationData.range[0]] =
  993. @getLineAndColumnFromChunk offsetInChunk
  994. # Use length - 1 for the final offset - were supplying the last_line and the last_column,
  995. # so if last_column == first_column, then were looking at a character of length 1.
  996. lastCharacter = if length > 0 then (length - 1) else 0
  997. [locationData.last_line, locationData.last_column, endOffset] =
  998. @getLineAndColumnFromChunk offsetInChunk + lastCharacter
  999. [locationData.last_line_exclusive, locationData.last_column_exclusive] =
  1000. @getLineAndColumnFromChunk offsetInChunk + lastCharacter + (if length > 0 then 1 else 0)
  1001. locationData.range[1] = if length > 0 then endOffset + 1 else endOffset
  1002. locationData
  1003. # Same as `token`, except this just returns the token without adding it
  1004. # to the results.
  1005. makeToken: (tag, value, {offset: offsetInChunk = 0, length = value.length, origin, generated, indentSize} = {}) ->
  1006. token = [tag, value, @makeLocationData {offsetInChunk, length}]
  1007. token.origin = origin if origin
  1008. token.generated = yes if generated
  1009. token.indentSize = indentSize if indentSize?
  1010. token
  1011. # Add a token to the results.
  1012. # `offset` is the offset into the current `@chunk` where the token starts.
  1013. # `length` is the length of the token in the `@chunk`, after the offset. If
  1014. # not specified, the length of `value` will be used.
  1015. #
  1016. # Returns the new token.
  1017. token: (tag, value, {offset, length, origin, data, generated, indentSize} = {}) ->
  1018. token = @makeToken tag, value, {offset, length, origin, generated, indentSize}
  1019. addTokenData token, data if data
  1020. @tokens.push token
  1021. token
  1022. # Peek at the last tag in the token stream.
  1023. tag: ->
  1024. [..., token] = @tokens
  1025. token?[0]
  1026. # Peek at the last value in the token stream.
  1027. value: (useOrigin = no) ->
  1028. [..., token] = @tokens
  1029. if useOrigin and token?.origin?
  1030. token.origin[1]
  1031. else
  1032. token?[1]
  1033. # Get the previous token in the token stream.
  1034. prev: ->
  1035. @tokens[@tokens.length - 1]
  1036. # Are we in the midst of an unfinished expression?
  1037. unfinished: ->
  1038. LINE_CONTINUER.test(@chunk) or
  1039. @tag() in UNFINISHED
  1040. validateUnicodeCodePointEscapes: (str, options) ->
  1041. replaceUnicodeCodePointEscapes str, merge options, {@error}
  1042. # Validates escapes in strings and regexes.
  1043. validateEscapes: (str, options = {}) ->
  1044. invalidEscapeRegex =
  1045. if options.isRegex
  1046. REGEX_INVALID_ESCAPE
  1047. else
  1048. STRING_INVALID_ESCAPE
  1049. match = invalidEscapeRegex.exec str
  1050. return unless match
  1051. [[], before, octal, hex, unicodeCodePoint, unicode] = match
  1052. message =
  1053. if octal
  1054. "octal escape sequences are not allowed"
  1055. else
  1056. "invalid escape sequence"
  1057. invalidEscape = "\\#{octal or hex or unicodeCodePoint or unicode}"
  1058. @error "#{message} #{invalidEscape}",
  1059. offset: (options.offsetInChunk ? 0) + match.index + before.length
  1060. length: invalidEscape.length
  1061. suppressSemicolons: ->
  1062. while @value() is ';'
  1063. @tokens.pop()
  1064. @error 'unexpected ;' if @prev()?[0] in ['=', UNFINISHED...]
  1065. # Throws an error at either a given offset from the current chunk or at the
  1066. # location of a token (`token[2]`).
  1067. error: (message, options = {}) =>
  1068. location =
  1069. if 'first_line' of options
  1070. options
  1071. else
  1072. [first_line, first_column] = @getLineAndColumnFromChunk options.offset ? 0
  1073. {first_line, first_column, last_column: first_column + (options.length ? 1) - 1}
  1074. throwSyntaxError message, location
  1075. # Helper functions
  1076. # ----------------
  1077. isUnassignable = (name, displayName = name) -> switch
  1078. when name in [JS_KEYWORDS..., COFFEE_KEYWORDS...]
  1079. "keyword '#{displayName}' can't be assigned"
  1080. when name in STRICT_PROSCRIBED
  1081. "'#{displayName}' can't be assigned"
  1082. when name in RESERVED
  1083. "reserved word '#{displayName}' can't be assigned"
  1084. else
  1085. false
  1086. exports.isUnassignable = isUnassignable
  1087. # `from` isnt a CoffeeScript keyword, but it behaves like one in `import` and
  1088. # `export` statements (handled above) and in the declaration line of a `for`
  1089. # loop. Try to detect when `from` is a variable identifier and when it is this
  1090. # sometimes keyword.
  1091. isForFrom = (prev) ->
  1092. # `for i from iterable`
  1093. if prev[0] is 'IDENTIFIER'
  1094. yes
  1095. # `for from…`
  1096. else if prev[0] is 'FOR'
  1097. no
  1098. # `for {from}…`, `for [from]…`, `for {a, from}…`, `for {a: from}…`
  1099. else if prev[1] in ['{', '[', ',', ':']
  1100. no
  1101. else
  1102. yes
  1103. addTokenData = (token, data) ->
  1104. Object.assign (token.data ?= {}), data
  1105. # Constants
  1106. # ---------
  1107. # Keywords that CoffeeScript shares in common with JavaScript.
  1108. JS_KEYWORDS = [
  1109. 'true', 'false', 'null', 'this'
  1110. 'new', 'delete', 'typeof', 'in', 'instanceof'
  1111. 'return', 'throw', 'break', 'continue', 'debugger', 'yield', 'await'
  1112. 'if', 'else', 'switch', 'for', 'while', 'do', 'try', 'catch', 'finally'
  1113. 'class', 'extends', 'super'
  1114. 'import', 'export', 'default'
  1115. ]
  1116. # CoffeeScript-only keywords.
  1117. COFFEE_KEYWORDS = [
  1118. 'undefined', 'Infinity', 'NaN'
  1119. 'then', 'unless', 'until', 'loop', 'of', 'by', 'when'
  1120. ]
  1121. COFFEE_ALIAS_MAP =
  1122. and : '&&'
  1123. or : '||'
  1124. is : '=='
  1125. isnt : '!='
  1126. not : '!'
  1127. yes : 'true'
  1128. no : 'false'
  1129. on : 'true'
  1130. off : 'false'
  1131. COFFEE_ALIASES = (key for key of COFFEE_ALIAS_MAP)
  1132. COFFEE_KEYWORDS = COFFEE_KEYWORDS.concat COFFEE_ALIASES
  1133. # The list of keywords that are reserved by JavaScript, but not used, or are
  1134. # used by CoffeeScript internally. We throw an error when these are encountered,
  1135. # to avoid having a JavaScript error at runtime.
  1136. RESERVED = [
  1137. 'case', 'function', 'var', 'void', 'with', 'const', 'let', 'enum'
  1138. 'native', 'implements', 'interface', 'package', 'private'
  1139. 'protected', 'public', 'static'
  1140. ]
  1141. STRICT_PROSCRIBED = ['arguments', 'eval']
  1142. # The superset of both JavaScript keywords and reserved words, none of which may
  1143. # be used as identifiers or properties.
  1144. exports.JS_FORBIDDEN = JS_KEYWORDS.concat(RESERVED).concat(STRICT_PROSCRIBED)
  1145. # The character code of the nasty Microsoft madness otherwise known as the BOM.
  1146. BOM = 65279
  1147. # Token matching regexes.
  1148. IDENTIFIER = /// ^
  1149. (?!\d)
  1150. ( (?: (?!\s)[$\w\x7f-\uffff] )+ )
  1151. ( [^\n\S]* : (?!:) )? # Is this a property name?
  1152. ///
  1153. # Like `IDENTIFIER`, but includes `-`s
  1154. JSX_IDENTIFIER_PART = /// (?: (?!\s)[\-$\w\x7f-\uffff] )+ ///.source
  1155. # In https://facebook.github.io/jsx/ spec, JSXElementName can be
  1156. # JSXIdentifier, JSXNamespacedName (JSXIdentifier : JSXIdentifier), or
  1157. # JSXMemberExpression (two or more JSXIdentifier connected by `.`s).
  1158. JSX_IDENTIFIER = /// ^
  1159. (?![\d<]) # Must not start with `<`.
  1160. ( #{JSX_IDENTIFIER_PART}
  1161. (?: \s* : \s* #{JSX_IDENTIFIER_PART} # JSXNamespacedName
  1162. | (?: \s* \. \s* #{JSX_IDENTIFIER_PART} )+ # JSXMemberExpression
  1163. )? )
  1164. ///
  1165. # Fragment: <></>
  1166. JSX_FRAGMENT_IDENTIFIER = /// ^
  1167. ()> # Ends immediately with `>`.
  1168. ///
  1169. # In https://facebook.github.io/jsx/ spec, JSXAttributeName can be either
  1170. # JSXIdentifier or JSXNamespacedName which is JSXIdentifier : JSXIdentifier
  1171. JSX_ATTRIBUTE = /// ^
  1172. (?!\d)
  1173. ( #{JSX_IDENTIFIER_PART}
  1174. (?: \s* : \s* #{JSX_IDENTIFIER_PART} # JSXNamespacedName
  1175. )? )
  1176. ( [^\S]* = (?!=) )? # Is this an attribute with a value?
  1177. ///
  1178. NUMBER = ///
  1179. ^ 0b[01](?:_?[01])*n? | # binary
  1180. ^ 0o[0-7](?:_?[0-7])*n? | # octal
  1181. ^ 0x[\da-f](?:_?[\da-f])*n? | # hex
  1182. ^ \d+n | # decimal bigint
  1183. ^ (?:\d(?:_?\d)*)? \.? (?:\d(?:_?\d)*)+ # decimal
  1184. (?:e[+-]? (?:\d(?:_?\d)*)+ )?
  1185. # decimal without support for numeric literal separators for reference:
  1186. # \d*\.?\d+ (?:e[+-]?\d+)?
  1187. ///i
  1188. OPERATOR = /// ^ (
  1189. ?: [-=]> # function
  1190. | [-+*/%<>&|^!?=]= # compound assign / compare
  1191. | >>>=? # zero-fill right shift
  1192. | ([-+:])\1 # doubles
  1193. | ([&|<>*/%])\2=? # logic / shift / power / floor division / modulo
  1194. | \?(\.|::) # soak access
  1195. | \.{2,3} # range or splat
  1196. ) ///
  1197. WHITESPACE = /^[^\n\S]+/
  1198. COMMENT = /^(\s*)###([^#][\s\S]*?)(?:###([^\n\S]*)|###$)|^((?:\s*#(?!##[^#]).*)+)/
  1199. CODE = /^[-=]>/
  1200. MULTI_DENT = /^(?:\n[^\n\S]*)+/
  1201. JSTOKEN = ///^ `(?!``) ((?: [^`\\] | \\[\s\S] )*) ` ///
  1202. HERE_JSTOKEN = ///^ ``` ((?: [^`\\] | \\[\s\S] | `(?!``) )*) ``` ///
  1203. # String-matching-regexes.
  1204. STRING_START = /^(?:'''|"""|'|")/
  1205. STRING_SINGLE = /// ^(?: [^\\'] | \\[\s\S] )* ///
  1206. STRING_DOUBLE = /// ^(?: [^\\"#] | \\[\s\S] | \#(?!\{) )* ///
  1207. HEREDOC_SINGLE = /// ^(?: [^\\'] | \\[\s\S] | '(?!'') )* ///
  1208. HEREDOC_DOUBLE = /// ^(?: [^\\"#] | \\[\s\S] | "(?!"") | \#(?!\{) )* ///
  1209. INSIDE_JSX = /// ^(?:
  1210. [^
  1211. \{ # Start of CoffeeScript interpolation.
  1212. < # Maybe JSX tag (`<` not allowed even if bare).
  1213. ]
  1214. )* /// # Similar to `HEREDOC_DOUBLE` but there is no escaping.
  1215. JSX_INTERPOLATION = /// ^(?:
  1216. \{ # CoffeeScript interpolation.
  1217. | <(?!/) # JSX opening tag.
  1218. )///
  1219. HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g
  1220. # Regex-matching-regexes.
  1221. REGEX = /// ^
  1222. / (?!/) ((
  1223. ?: [^ [ / \n \\ ] # Every other thing.
  1224. | \\[^\n] # Anything but newlines escaped.
  1225. | \[ # Character class.
  1226. (?: \\[^\n] | [^ \] \n \\ ] )*
  1227. \]
  1228. )*) (/)?
  1229. ///
  1230. REGEX_FLAGS = /^\w*/
  1231. VALID_FLAGS = /^(?!.*(.).*\1)[gimsuy]*$/
  1232. HEREGEX = /// ^
  1233. (?:
  1234. # Match any character, except those that need special handling below.
  1235. [^\\/#\s]
  1236. # Match `\` followed by any character.
  1237. | \\[\s\S]
  1238. # Match any `/` except `///`.
  1239. | /(?!//)
  1240. # Match `#` which is not part of interpolation, e.g. `#{}`.
  1241. | \#(?!\{)
  1242. # Comments consume everything until the end of the line, including `///`.
  1243. | \s+(?:#(?!\{).*)?
  1244. )*
  1245. ///
  1246. HEREGEX_COMMENT = /(\s+)(#(?!{).*)/gm
  1247. REGEX_ILLEGAL = /// ^ ( / | /{3}\s*) (\*) ///
  1248. POSSIBLY_DIVISION = /// ^ /=?\s ///
  1249. # Other regexes.
  1250. HERECOMMENT_ILLEGAL = /\*\//
  1251. LINE_CONTINUER = /// ^ \s* (?: , | \??\.(?![.\d]) | \??:: ) ///
  1252. STRING_INVALID_ESCAPE = ///
  1253. ( (?:^|[^\\]) (?:\\\\)* ) # Make sure the escape isnt escaped.
  1254. \\ (
  1255. ?: (0\d|[1-7]) # octal escape
  1256. | (x(?![\da-fA-F]{2}).{0,2}) # hex escape
  1257. | (u\{(?![\da-fA-F]{1,}\})[^}]*\}?) # unicode code point escape
  1258. | (u(?!\{|[\da-fA-F]{4}).{0,4}) # unicode escape
  1259. )
  1260. ///
  1261. REGEX_INVALID_ESCAPE = ///
  1262. ( (?:^|[^\\]) (?:\\\\)* ) # Make sure the escape isnt escaped.
  1263. \\ (
  1264. ?: (0\d) # octal escape
  1265. | (x(?![\da-fA-F]{2}).{0,2}) # hex escape
  1266. | (u\{(?![\da-fA-F]{1,}\})[^}]*\}?) # unicode code point escape
  1267. | (u(?!\{|[\da-fA-F]{4}).{0,4}) # unicode escape
  1268. )
  1269. ///
  1270. TRAILING_SPACES = /\s+$/
  1271. # Compound assignment tokens.
  1272. COMPOUND_ASSIGN = [
  1273. '-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>='
  1274. '&=', '^=', '|=', '**=', '//=', '%%='
  1275. ]
  1276. # Unary tokens.
  1277. UNARY = ['NEW', 'TYPEOF', 'DELETE']
  1278. UNARY_MATH = ['!', '~']
  1279. # Bit-shifting tokens.
  1280. SHIFT = ['<<', '>>', '>>>']
  1281. # Comparison tokens.
  1282. COMPARE = ['==', '!=', '<', '>', '<=', '>=']
  1283. # Mathematical tokens.
  1284. MATH = ['*', '/', '%', '//', '%%']
  1285. # Relational tokens that are negatable with `not` prefix.
  1286. RELATION = ['IN', 'OF', 'INSTANCEOF']
  1287. # Boolean tokens.
  1288. BOOL = ['TRUE', 'FALSE']
  1289. # Tokens which could legitimately be invoked or indexed. An opening
  1290. # parentheses or bracket following these tokens will be recorded as the start
  1291. # of a function invocation or indexing operation.
  1292. CALLABLE = ['IDENTIFIER', 'PROPERTY', ')', ']', '?', '@', 'THIS', 'SUPER', 'DYNAMIC_IMPORT']
  1293. INDEXABLE = CALLABLE.concat [
  1294. 'NUMBER', 'INFINITY', 'NAN', 'STRING', 'STRING_END', 'REGEX', 'REGEX_END'
  1295. 'BOOL', 'NULL', 'UNDEFINED', '}', '::'
  1296. ]
  1297. # Tokens which can be the left-hand side of a less-than comparison, i.e. `a<b`.
  1298. COMPARABLE_LEFT_SIDE = ['IDENTIFIER', ')', ']', 'NUMBER']
  1299. # Tokens which a regular expression will never immediately follow (except spaced
  1300. # CALLABLEs in some cases), but which a division operator can.
  1301. #
  1302. # See: http://www-archive.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
  1303. NOT_REGEX = INDEXABLE.concat ['++', '--']
  1304. # Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN`
  1305. # occurs at the start of a line. We disambiguate these from trailing whens to
  1306. # avoid an ambiguity in the grammar.
  1307. LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR']
  1308. # Additional indent in front of these is ignored.
  1309. INDENTABLE_CLOSERS = [')', '}', ']']