PageRenderTime 94ms CodeModel.GetById 19ms app.highlight 61ms RepoModel.GetById 1ms app.codeStats 0ms

/src/lexer.coffee

http://github.com/jashkenas/coffee-script
CoffeeScript | 1470 lines | 1139 code | 125 blank | 206 comment | 197 complexity | 8d9b20ecaa480c33dd8e517941c3567f MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1# The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
   2# matches against the beginning of the source code. When a match is found,
   3# a token is produced, we consume the match, and start again. Tokens are in the
   4# form:
   5#
   6#     [tag, value, locationData]
   7#
   8# where locationData is {first_line, first_column, last_line, last_column, last_line_exclusive, last_column_exclusive}, which is a
   9# format that can be fed directly into [Jison](https://github.com/zaach/jison).  These
  10# are read by jison in the `parser.lexer` function defined in coffeescript.coffee.
  11
  12{Rewriter, INVERSES, UNFINISHED} = require './rewriter'
  13
  14# Import the helpers we need.
  15{count, starts, compact, repeat, invertLiterate, merge,
  16attachCommentsToNode, locationDataToString, throwSyntaxError
  17replaceUnicodeCodePointEscapes, flatten, parseNumber} = require './helpers'
  18
  19# The Lexer Class
  20# ---------------
  21
  22# The Lexer class reads a stream of CoffeeScript and divvies it up into tagged
  23# tokens. Some potential ambiguity in the grammar has been avoided by
  24# pushing some extra smarts into the Lexer.
  25exports.Lexer = class Lexer
  26
  27  # **tokenize** is the Lexer's main method. Scan by attempting to match tokens
  28  # one at a time, using a regular expression anchored at the start of the
  29  # remaining code, or a custom recursive token-matching method
  30  # (for interpolations). When the next token has been recorded, we move forward
  31  # within the code past the token, and begin again.
  32  #
  33  # Each tokenizing method is responsible for returning the number of characters
  34  # it has consumed.
  35  #
  36  # Before returning the token stream, run it through the [Rewriter](rewriter.html).
  37  tokenize: (code, opts = {}) ->
  38    @literate   = opts.literate  # Are we lexing literate CoffeeScript?
  39    @indent     = 0              # The current indentation level.
  40    @baseIndent = 0              # The overall minimum indentation level.
  41    @continuationLineAdditionalIndent = 0 # The over-indentation at the current level.
  42    @outdebt    = 0              # The under-outdentation at the current level.
  43    @indents    = []             # The stack of all current indentation levels.
  44    @indentLiteral = ''          # The indentation.
  45    @ends       = []             # The stack for pairing up tokens.
  46    @tokens     = []             # Stream of parsed tokens in the form `['TYPE', value, location data]`.
  47    @seenFor    = no             # Used to recognize `FORIN`, `FOROF` and `FORFROM` tokens.
  48    @seenImport = no             # Used to recognize `IMPORT FROM? AS?` tokens.
  49    @seenExport = no             # Used to recognize `EXPORT FROM? AS?` tokens.
  50    @importSpecifierList = no    # Used to identify when in an `IMPORT {...} FROM? ...`.
  51    @exportSpecifierList = no    # Used to identify when in an `EXPORT {...} FROM? ...`.
  52    @jsxDepth = 0                # Used to optimize JSX checks, how deep in JSX we are.
  53    @jsxObjAttribute = {}        # Used to detect if JSX attributes is wrapped in {} (<div {props...} />).
  54
  55    @chunkLine =
  56      opts.line or 0             # The start line for the current @chunk.
  57    @chunkColumn =
  58      opts.column or 0           # The start column of the current @chunk.
  59    @chunkOffset =
  60      opts.offset or 0           # The start offset for the current @chunk.
  61    @locationDataCompensations =
  62      opts.locationDataCompensations or {} # The location data compensations for the current @chunk.
  63    code = @clean code           # The stripped, cleaned original source code.
  64
  65    # At every position, run through this list of attempted matches,
  66    # short-circuiting if any of them succeed. Their order determines precedence:
  67    # `@literalToken` is the fallback catch-all.
  68    i = 0
  69    while @chunk = code[i..]
  70      consumed = \
  71           @identifierToken() or
  72           @commentToken()    or
  73           @whitespaceToken() or
  74           @lineToken()       or
  75           @stringToken()     or
  76           @numberToken()     or
  77           @jsxToken()        or
  78           @regexToken()      or
  79           @jsToken()         or
  80           @literalToken()
  81
  82      # Update position.
  83      [@chunkLine, @chunkColumn, @chunkOffset] = @getLineAndColumnFromChunk consumed
  84
  85      i += consumed
  86
  87      return {@tokens, index: i} if opts.untilBalanced and @ends.length is 0
  88
  89    @closeIndentation()
  90    @error "missing #{end.tag}", (end.origin ? end)[2] if end = @ends.pop()
  91    return @tokens if opts.rewrite is off
  92    (new Rewriter).rewrite @tokens
  93
  94  # Preprocess the code to remove leading and trailing whitespace, carriage
  95  # returns, etc. If we’re lexing literate CoffeeScript, strip external Markdown
  96  # by removing all lines that aren’t indented by at least four spaces or a tab.
  97  clean: (code) ->
  98    thusFar = 0
  99    if code.charCodeAt(0) is BOM
 100      code = code.slice 1
 101      @locationDataCompensations[0] = 1
 102      thusFar += 1
 103    if WHITESPACE.test code
 104      code = "\n#{code}"
 105      @chunkLine--
 106      @locationDataCompensations[0] ?= 0
 107      @locationDataCompensations[0] -= 1
 108    code = code
 109      .replace /\r/g, (match, offset) =>
 110        @locationDataCompensations[thusFar + offset] = 1
 111        ''
 112      .replace TRAILING_SPACES, ''
 113    code = invertLiterate code if @literate
 114    code
 115
 116  # Tokenizers
 117  # ----------
 118
 119  # Matches identifying literals: variables, keywords, method names, etc.
 120  # Check to ensure that JavaScript reserved words aren’t being used as
 121  # identifiers. Because CoffeeScript reserves a handful of keywords that are
 122  # allowed in JavaScript, we’re careful not to tag them as keywords when
 123  # referenced as property names here, so you can still do `jQuery.is()` even
 124  # though `is` means `===` otherwise.
 125  identifierToken: ->
 126    inJSXTag = @atJSXTag()
 127    regex = if inJSXTag then JSX_ATTRIBUTE else IDENTIFIER
 128    return 0 unless match = regex.exec @chunk
 129    [input, id, colon] = match
 130
 131    # Preserve length of id for location data
 132    idLength = id.length
 133    poppedToken = undefined
 134    if id is 'own' and @tag() is 'FOR'
 135      @token 'OWN', id
 136      return id.length
 137    if id is 'from' and @tag() is 'YIELD'
 138      @token 'FROM', id
 139      return id.length
 140    if id is 'as' and @seenImport
 141      if @value() is '*'
 142        @tokens[@tokens.length - 1][0] = 'IMPORT_ALL'
 143      else if @value(yes) in COFFEE_KEYWORDS
 144        prev = @prev()
 145        [prev[0], prev[1]] = ['IDENTIFIER', @value(yes)]
 146      if @tag() in ['DEFAULT', 'IMPORT_ALL', 'IDENTIFIER']
 147        @token 'AS', id
 148        return id.length
 149    if id is 'as' and @seenExport
 150      if @tag() in ['IDENTIFIER', 'DEFAULT']
 151        @token 'AS', id
 152        return id.length
 153      if @value(yes) in COFFEE_KEYWORDS
 154        prev = @prev()
 155        [prev[0], prev[1]] = ['IDENTIFIER', @value(yes)]
 156        @token 'AS', id
 157        return id.length
 158    if id is 'default' and @seenExport and @tag() in ['EXPORT', 'AS']
 159      @token 'DEFAULT', id
 160      return id.length
 161    if id is 'do' and regExSuper = /^(\s*super)(?!\(\))/.exec @chunk[3...]
 162      @token 'SUPER', 'super'
 163      @token 'CALL_START', '('
 164      @token 'CALL_END', ')'
 165      [input, sup] = regExSuper
 166      return sup.length + 3
 167
 168    prev = @prev()
 169
 170    tag =
 171      if colon or prev? and
 172         (prev[0] in ['.', '?.', '::', '?::'] or
 173         not prev.spaced and prev[0] is '@')
 174        'PROPERTY'
 175      else
 176        'IDENTIFIER'
 177
 178    tokenData = {}
 179    if tag is 'IDENTIFIER' and (id in JS_KEYWORDS or id in COFFEE_KEYWORDS) and
 180       not (@exportSpecifierList and id in COFFEE_KEYWORDS)
 181      tag = id.toUpperCase()
 182      if tag is 'WHEN' and @tag() in LINE_BREAK
 183        tag = 'LEADING_WHEN'
 184      else if tag is 'FOR'
 185        @seenFor = {endsLength: @ends.length}
 186      else if tag is 'UNLESS'
 187        tag = 'IF'
 188      else if tag is 'IMPORT'
 189        @seenImport = yes
 190      else if tag is 'EXPORT'
 191        @seenExport = yes
 192      else if tag in UNARY
 193        tag = 'UNARY'
 194      else if tag in RELATION
 195        if tag isnt 'INSTANCEOF' and @seenFor
 196          tag = 'FOR' + tag
 197          @seenFor = no
 198        else
 199          tag = 'RELATION'
 200          if @value() is '!'
 201            poppedToken = @tokens.pop()
 202            tokenData.invert = poppedToken.data?.original ? poppedToken[1]
 203    else if tag is 'IDENTIFIER' and @seenFor and id is 'from' and
 204       isForFrom(prev)
 205      tag = 'FORFROM'
 206      @seenFor = no
 207    # Throw an error on attempts to use `get` or `set` as keywords, or
 208    # what CoffeeScript would normally interpret as calls to functions named
 209    # `get` or `set`, i.e. `get({foo: function () {}})`.
 210    else if tag is 'PROPERTY' and prev
 211      if prev.spaced and prev[0] in CALLABLE and /^[gs]et$/.test(prev[1]) and
 212         @tokens.length > 1 and @tokens[@tokens.length - 2][0] not in ['.', '?.', '@']
 213        @error "'#{prev[1]}' cannot be used as a keyword, or as a function call
 214        without parentheses", prev[2]
 215      else if prev[0] is '.' and @tokens.length > 1 and (prevprev = @tokens[@tokens.length - 2])[0] is 'UNARY' and prevprev[1] is 'new'
 216        prevprev[0] = 'NEW_TARGET'
 217      else if @tokens.length > 2
 218        prevprev = @tokens[@tokens.length - 2]
 219        if prev[0] in ['@', 'THIS'] and prevprev and prevprev.spaced and
 220           /^[gs]et$/.test(prevprev[1]) and
 221           @tokens[@tokens.length - 3][0] not in ['.', '?.', '@']
 222          @error "'#{prevprev[1]}' cannot be used as a keyword, or as a
 223          function call without parentheses", prevprev[2]
 224
 225    if tag is 'IDENTIFIER' and id in RESERVED and not inJSXTag
 226      @error "reserved word '#{id}'", length: id.length
 227
 228    unless tag is 'PROPERTY' or @exportSpecifierList or @importSpecifierList
 229      if id in COFFEE_ALIASES
 230        alias = id
 231        id = COFFEE_ALIAS_MAP[id]
 232        tokenData.original = alias
 233      tag = switch id
 234        when '!'                 then 'UNARY'
 235        when '==', '!='          then 'COMPARE'
 236        when 'true', 'false'     then 'BOOL'
 237        when 'break', 'continue', \
 238             'debugger'          then 'STATEMENT'
 239        when '&&', '||'          then id
 240        else  tag
 241
 242    tagToken = @token tag, id, length: idLength, data: tokenData
 243    tagToken.origin = [tag, alias, tagToken[2]] if alias
 244    if poppedToken
 245      [tagToken[2].first_line, tagToken[2].first_column, tagToken[2].range[0]] =
 246        [poppedToken[2].first_line, poppedToken[2].first_column, poppedToken[2].range[0]]
 247    if colon
 248      colonOffset = input.lastIndexOf if inJSXTag then '=' else ':'
 249      colonToken = @token ':', ':', offset: colonOffset
 250      colonToken.jsxColon = yes if inJSXTag # used by rewriter
 251    if inJSXTag and tag is 'IDENTIFIER' and prev[0] isnt ':'
 252      @token ',', ',', length: 0, origin: tagToken, generated: yes
 253
 254    input.length
 255
 256  # Matches numbers, including decimals, hex, and exponential notation.
 257  # Be careful not to interfere with ranges in progress.
 258  numberToken: ->
 259    return 0 unless match = NUMBER.exec @chunk
 260
 261    number = match[0]
 262    lexedLength = number.length
 263
 264    switch
 265      when /^0[BOX]/.test number
 266        @error "radix prefix in '#{number}' must be lowercase", offset: 1
 267      when /^(?!0x).*E/.test number
 268        @error "exponential notation in '#{number}' must be indicated with a lowercase 'e'",
 269          offset: number.indexOf('E')
 270      when /^0\d*[89]/.test number
 271        @error "decimal literal '#{number}' must not be prefixed with '0'", length: lexedLength
 272      when /^0\d+/.test number
 273        @error "octal literal '#{number}' must be prefixed with '0o'", length: lexedLength
 274
 275    parsedValue = parseNumber number
 276    tokenData = {parsedValue}
 277
 278    tag = if parsedValue is Infinity then 'INFINITY' else 'NUMBER'
 279    if tag is 'INFINITY'
 280      tokenData.original = number
 281    @token tag, number,
 282      length: lexedLength
 283      data: tokenData
 284    lexedLength
 285
 286  # Matches strings, including multiline strings, as well as heredocs, with or without
 287  # interpolation.
 288  stringToken: ->
 289    [quote] = STRING_START.exec(@chunk) || []
 290    return 0 unless quote
 291
 292    # If the preceding token is `from` and this is an import or export statement,
 293    # properly tag the `from`.
 294    prev = @prev()
 295    if prev and @value() is 'from' and (@seenImport or @seenExport)
 296      prev[0] = 'FROM'
 297
 298    regex = switch quote
 299      when "'"   then STRING_SINGLE
 300      when '"'   then STRING_DOUBLE
 301      when "'''" then HEREDOC_SINGLE
 302      when '"""' then HEREDOC_DOUBLE
 303
 304    {tokens, index: end} = @matchWithInterpolations regex, quote
 305
 306    heredoc = quote.length is 3
 307    if heredoc
 308      # Find the smallest indentation. It will be removed from all lines later.
 309      indent = null
 310      doc = (token[1] for token, i in tokens when token[0] is 'NEOSTRING').join '#{}'
 311      while match = HEREDOC_INDENT.exec doc
 312        attempt = match[1]
 313        indent = attempt if indent is null or 0 < attempt.length < indent.length
 314
 315    delimiter = quote.charAt(0)
 316    @mergeInterpolationTokens tokens, {quote, indent, endOffset: end}, (value) =>
 317      @validateUnicodeCodePointEscapes value, delimiter: quote
 318
 319    if @atJSXTag()
 320      @token ',', ',', length: 0, origin: @prev, generated: yes
 321
 322    end
 323
 324  # Matches and consumes comments. The comments are taken out of the token
 325  # stream and saved for later, to be reinserted into the output after
 326  # everything has been parsed and the JavaScript code generated.
 327  commentToken: (chunk = @chunk, {heregex, returnCommentTokens = no, offsetInChunk = 0} = {}) ->
 328    return 0 unless match = chunk.match COMMENT
 329    [commentWithSurroundingWhitespace, hereLeadingWhitespace, hereComment, hereTrailingWhitespace, lineComment] = match
 330    contents = null
 331    # Does this comment follow code on the same line?
 332    leadingNewline = /^\s*\n+\s*#/.test commentWithSurroundingWhitespace
 333    if hereComment
 334      matchIllegal = HERECOMMENT_ILLEGAL.exec hereComment
 335      if matchIllegal
 336        @error "block comments cannot contain #{matchIllegal[0]}",
 337          offset: '###'.length + matchIllegal.index, length: matchIllegal[0].length
 338
 339      # Parse indentation or outdentation as if this block comment didn’t exist.
 340      chunk = chunk.replace "####{hereComment}###", ''
 341      # Remove leading newlines, like `Rewriter::removeLeadingNewlines`, to
 342      # avoid the creation of unwanted `TERMINATOR` tokens.
 343      chunk = chunk.replace /^\n+/, ''
 344      @lineToken {chunk}
 345
 346      # Pull out the ###-style comment’s content, and format it.
 347      content = hereComment
 348      contents = [{
 349        content
 350        length: commentWithSurroundingWhitespace.length - hereLeadingWhitespace.length - hereTrailingWhitespace.length
 351        leadingWhitespace: hereLeadingWhitespace
 352      }]
 353    else
 354      # The `COMMENT` regex captures successive line comments as one token.
 355      # Remove any leading newlines before the first comment, but preserve
 356      # blank lines between line comments.
 357      leadingNewlines = ''
 358      content = lineComment.replace /^(\n*)/, (leading) ->
 359        leadingNewlines = leading
 360        ''
 361      precedingNonCommentLines = ''
 362      hasSeenFirstCommentLine = no
 363      contents =
 364        content.split '\n'
 365        .map (line, index) ->
 366          unless line.indexOf('#') > -1
 367            precedingNonCommentLines += "\n#{line}"
 368            return
 369          leadingWhitespace = ''
 370          content = line.replace /^([ |\t]*)#/, (_, whitespace) ->
 371            leadingWhitespace = whitespace
 372            ''
 373          comment = {
 374            content
 375            length: '#'.length + content.length
 376            leadingWhitespace: "#{unless hasSeenFirstCommentLine then leadingNewlines else ''}#{precedingNonCommentLines}#{leadingWhitespace}"
 377            precededByBlankLine: !!precedingNonCommentLines
 378          }
 379          hasSeenFirstCommentLine = yes
 380          precedingNonCommentLines = ''
 381          comment
 382        .filter (comment) -> comment
 383
 384    getIndentSize = ({leadingWhitespace, nonInitial}) ->
 385      lastNewlineIndex = leadingWhitespace.lastIndexOf '\n'
 386      if hereComment? or not nonInitial
 387        return null unless lastNewlineIndex > -1
 388      else
 389        lastNewlineIndex ?= -1
 390      leadingWhitespace.length - 1 - lastNewlineIndex
 391    commentAttachments = for {content, length, leadingWhitespace, precededByBlankLine}, i in contents
 392      nonInitial = i isnt 0
 393      leadingNewlineOffset = if nonInitial then 1 else 0
 394      offsetInChunk += leadingNewlineOffset + leadingWhitespace.length
 395      indentSize = getIndentSize {leadingWhitespace, nonInitial}
 396      noIndent = not indentSize? or indentSize is -1
 397      commentAttachment = {
 398        content
 399        here: hereComment?
 400        newLine: leadingNewline or nonInitial # Line comments after the first one start new lines, by definition.
 401        locationData: @makeLocationData {offsetInChunk, length}
 402        precededByBlankLine
 403        indentSize
 404        indented:  not noIndent and indentSize > @indent
 405        outdented: not noIndent and indentSize < @indent
 406      }
 407      commentAttachment.heregex = yes if heregex
 408      offsetInChunk += length
 409      commentAttachment
 410
 411    prev = @prev()
 412    unless prev
 413      # If there’s no previous token, create a placeholder token to attach
 414      # this comment to; and follow with a newline.
 415      commentAttachments[0].newLine = yes
 416      @lineToken chunk: @chunk[commentWithSurroundingWhitespace.length..], offset: commentWithSurroundingWhitespace.length # Set the indent.
 417      placeholderToken = @makeToken 'JS', '', offset: commentWithSurroundingWhitespace.length, generated: yes
 418      placeholderToken.comments = commentAttachments
 419      @tokens.push placeholderToken
 420      @newlineToken commentWithSurroundingWhitespace.length
 421    else
 422      attachCommentsToNode commentAttachments, prev
 423
 424    return commentAttachments if returnCommentTokens
 425    commentWithSurroundingWhitespace.length
 426
 427  # Matches JavaScript interpolated directly into the source via backticks.
 428  jsToken: ->
 429    return 0 unless @chunk.charAt(0) is '`' and
 430      (match = (matchedHere = HERE_JSTOKEN.exec(@chunk)) or JSTOKEN.exec(@chunk))
 431    # Convert escaped backticks to backticks, and escaped backslashes
 432    # just before escaped backticks to backslashes
 433    script = match[1]
 434    {length} = match[0]
 435    @token 'JS', script, {length, data: {here: !!matchedHere}}
 436    length
 437
 438  # Matches regular expression literals, as well as multiline extended ones.
 439  # Lexing regular expressions is difficult to distinguish from division, so we
 440  # borrow some basic heuristics from JavaScript and Ruby.
 441  regexToken: ->
 442    switch
 443      when match = REGEX_ILLEGAL.exec @chunk
 444        @error "regular expressions cannot begin with #{match[2]}",
 445          offset: match.index + match[1].length
 446      when match = @matchWithInterpolations HEREGEX, '///'
 447        {tokens, index} = match
 448        comments = []
 449        while matchedComment = HEREGEX_COMMENT.exec @chunk[0...index]
 450          {index: commentIndex} = matchedComment
 451          [fullMatch, leadingWhitespace, comment] = matchedComment
 452          comments.push {comment, offsetInChunk: commentIndex + leadingWhitespace.length}
 453        commentTokens = flatten(
 454          for commentOpts in comments
 455            @commentToken commentOpts.comment, Object.assign commentOpts, heregex: yes, returnCommentTokens: yes
 456        )
 457      when match = REGEX.exec @chunk
 458        [regex, body, closed] = match
 459        @validateEscapes body, isRegex: yes, offsetInChunk: 1
 460        index = regex.length
 461        prev = @prev()
 462        if prev
 463          if prev.spaced and prev[0] in CALLABLE
 464            return 0 if not closed or POSSIBLY_DIVISION.test regex
 465          else if prev[0] in NOT_REGEX
 466            return 0
 467        @error 'missing / (unclosed regex)' unless closed
 468      else
 469        return 0
 470
 471    [flags] = REGEX_FLAGS.exec @chunk[index..]
 472    end = index + flags.length
 473    origin = @makeToken 'REGEX', null, length: end
 474    switch
 475      when not VALID_FLAGS.test flags
 476        @error "invalid regular expression flags #{flags}", offset: index, length: flags.length
 477      when regex or tokens.length is 1
 478        delimiter = if body then '/' else '///'
 479        body ?= tokens[0][1]
 480        @validateUnicodeCodePointEscapes body, {delimiter}
 481        @token 'REGEX', "/#{body}/#{flags}", {length: end, origin, data: {delimiter}}
 482      else
 483        @token 'REGEX_START', '(',    {length: 0, origin, generated: yes}
 484        @token 'IDENTIFIER', 'RegExp', length: 0, generated: yes
 485        @token 'CALL_START', '(',      length: 0, generated: yes
 486        @mergeInterpolationTokens tokens, {double: yes, heregex: {flags}, endOffset: end - flags.length, quote: '///'}, (str) =>
 487          @validateUnicodeCodePointEscapes str, {delimiter}
 488        if flags
 489          @token ',', ',',                    offset: index - 1, length: 0, generated: yes
 490          @token 'STRING', '"' + flags + '"', offset: index,     length: flags.length
 491        @token ')', ')',                      offset: end,       length: 0, generated: yes
 492        @token 'REGEX_END', ')',              offset: end,       length: 0, generated: yes
 493
 494    # Explicitly attach any heregex comments to the REGEX/REGEX_END token.
 495    if commentTokens?.length
 496      addTokenData @tokens[@tokens.length - 1],
 497        heregexCommentTokens: commentTokens
 498
 499    end
 500
 501  # Matches newlines, indents, and outdents, and determines which is which.
 502  # If we can detect that the current line is continued onto the next line,
 503  # then the newline is suppressed:
 504  #
 505  #     elements
 506  #       .each( ... )
 507  #       .map( ... )
 508  #
 509  # Keeps track of the level of indentation, because a single outdent token
 510  # can close multiple indents, so we need to know how far in we happen to be.
 511  lineToken: ({chunk = @chunk, offset = 0} = {}) ->
 512    return 0 unless match = MULTI_DENT.exec chunk
 513    indent = match[0]
 514
 515    prev = @prev()
 516    backslash = prev?[0] is '\\'
 517    @seenFor = no unless (backslash or @seenFor?.endsLength < @ends.length) and @seenFor
 518    @seenImport = no unless (backslash and @seenImport) or @importSpecifierList
 519    @seenExport = no unless (backslash and @seenExport) or @exportSpecifierList
 520
 521    size = indent.length - 1 - indent.lastIndexOf '\n'
 522    noNewlines = @unfinished()
 523
 524    newIndentLiteral = if size > 0 then indent[-size..] else ''
 525    unless /^(.?)\1*$/.exec newIndentLiteral
 526      @error 'mixed indentation', offset: indent.length
 527      return indent.length
 528
 529    minLiteralLength = Math.min newIndentLiteral.length, @indentLiteral.length
 530    if newIndentLiteral[...minLiteralLength] isnt @indentLiteral[...minLiteralLength]
 531      @error 'indentation mismatch', offset: indent.length
 532      return indent.length
 533
 534    if size - @continuationLineAdditionalIndent is @indent
 535      if noNewlines then @suppressNewlines() else @newlineToken offset
 536      return indent.length
 537
 538    if size > @indent
 539      if noNewlines
 540        @continuationLineAdditionalIndent = size - @indent unless backslash
 541        if @continuationLineAdditionalIndent
 542          prev.continuationLineIndent = @indent + @continuationLineAdditionalIndent
 543        @suppressNewlines()
 544        return indent.length
 545      unless @tokens.length
 546        @baseIndent = @indent = size
 547        @indentLiteral = newIndentLiteral
 548        return indent.length
 549      diff = size - @indent + @outdebt
 550      @token 'INDENT', diff, offset: offset + indent.length - size, length: size
 551      @indents.push diff
 552      @ends.push {tag: 'OUTDENT'}
 553      @outdebt = @continuationLineAdditionalIndent = 0
 554      @indent = size
 555      @indentLiteral = newIndentLiteral
 556    else if size < @baseIndent
 557      @error 'missing indentation', offset: offset + indent.length
 558    else
 559      endsContinuationLineIndentation = @continuationLineAdditionalIndent > 0
 560      @continuationLineAdditionalIndent = 0
 561      @outdentToken {moveOut: @indent - size, noNewlines, outdentLength: indent.length, offset, indentSize: size, endsContinuationLineIndentation}
 562    indent.length
 563
 564  # Record an outdent token or multiple tokens, if we happen to be moving back
 565  # inwards past several recorded indents. Sets new @indent value.
 566  outdentToken: ({moveOut, noNewlines, outdentLength = 0, offset = 0, indentSize, endsContinuationLineIndentation}) ->
 567    decreasedIndent = @indent - moveOut
 568    while moveOut > 0
 569      lastIndent = @indents[@indents.length - 1]
 570      if not lastIndent
 571        @outdebt = moveOut = 0
 572      else if @outdebt and moveOut <= @outdebt
 573        @outdebt -= moveOut
 574        moveOut   = 0
 575      else
 576        dent = @indents.pop() + @outdebt
 577        if outdentLength and @chunk[outdentLength] in INDENTABLE_CLOSERS
 578          decreasedIndent -= dent - moveOut
 579          moveOut = dent
 580        @outdebt = 0
 581        # pair might call outdentToken, so preserve decreasedIndent
 582        @pair 'OUTDENT'
 583        @token 'OUTDENT', moveOut, length: outdentLength, indentSize: indentSize + moveOut - dent
 584        moveOut -= dent
 585    @outdebt -= moveOut if dent
 586    @suppressSemicolons()
 587
 588    unless @tag() is 'TERMINATOR' or noNewlines
 589      terminatorToken = @token 'TERMINATOR', '\n', offset: offset + outdentLength, length: 0
 590      terminatorToken.endsContinuationLineIndentation = {preContinuationLineIndent: @indent} if endsContinuationLineIndentation
 591    @indent = decreasedIndent
 592    @indentLiteral = @indentLiteral[...decreasedIndent]
 593    this
 594
 595  # Matches and consumes non-meaningful whitespace. Tag the previous token
 596  # as being “spaced”, because there are some cases where it makes a difference.
 597  whitespaceToken: ->
 598    return 0 unless (match = WHITESPACE.exec @chunk) or
 599                    (nline = @chunk.charAt(0) is '\n')
 600    prev = @prev()
 601    prev[if match then 'spaced' else 'newLine'] = true if prev
 602    if match then match[0].length else 0
 603
 604  # Generate a newline token. Consecutive newlines get merged together.
 605  newlineToken: (offset) ->
 606    @suppressSemicolons()
 607    @token 'TERMINATOR', '\n', {offset, length: 0} unless @tag() is 'TERMINATOR'
 608    this
 609
 610  # Use a `\` at a line-ending to suppress the newline.
 611  # The slash is removed here once its job is done.
 612  suppressNewlines: ->
 613    prev = @prev()
 614    if prev[1] is '\\'
 615      if prev.comments and @tokens.length > 1
 616        # `@tokens.length` should be at least 2 (some code, then `\`).
 617        # If something puts a `\` after nothing, they deserve to lose any
 618        # comments that trail it.
 619        attachCommentsToNode prev.comments, @tokens[@tokens.length - 2]
 620      @tokens.pop()
 621    this
 622
 623  jsxToken: ->
 624    firstChar = @chunk[0]
 625    # Check the previous token to detect if attribute is spread.
 626    prevChar = if @tokens.length > 0 then @tokens[@tokens.length - 1][0] else ''
 627    if firstChar is '<'
 628      match = JSX_IDENTIFIER.exec(@chunk[1...]) or JSX_FRAGMENT_IDENTIFIER.exec(@chunk[1...])
 629      return 0 unless match and (
 630        @jsxDepth > 0 or
 631        # Not the right hand side of an unspaced comparison (i.e. `a<b`).
 632        not (prev = @prev()) or
 633        prev.spaced or
 634        prev[0] not in COMPARABLE_LEFT_SIDE
 635      )
 636      [input, id] = match
 637      fullId = id
 638      if '.' in id
 639        [id, properties...] = id.split '.'
 640      else
 641        properties = []
 642      tagToken = @token 'JSX_TAG', id,
 643        length: id.length + 1
 644        data:
 645          openingBracketToken: @makeToken '<', '<'
 646          tagNameToken: @makeToken 'IDENTIFIER', id, offset: 1
 647      offset = id.length + 1
 648      for property in properties
 649        @token '.', '.', {offset}
 650        offset += 1
 651        @token 'PROPERTY', property, {offset}
 652        offset += property.length
 653      @token 'CALL_START', '(', generated: yes
 654      @token '[', '[', generated: yes
 655      @ends.push {tag: '/>', origin: tagToken, name: id, properties}
 656      @jsxDepth++
 657      return fullId.length + 1
 658    else if jsxTag = @atJSXTag()
 659      if @chunk[...2] is '/>' # Self-closing tag.
 660        @pair '/>'
 661        @token ']', ']',
 662          length: 2
 663          generated: yes
 664        @token 'CALL_END', ')',
 665          length: 2
 666          generated: yes
 667          data:
 668            selfClosingSlashToken: @makeToken '/', '/'
 669            closingBracketToken: @makeToken '>', '>', offset: 1
 670        @jsxDepth--
 671        return 2
 672      else if firstChar is '{'
 673        if prevChar is ':'
 674          # This token represents the start of a JSX attribute value
 675          # that’s an expression (e.g. the `{b}` in `<div a={b} />`).
 676          # Our grammar represents the beginnings of expressions as `(`
 677          # tokens, so make this into a `(` token that displays as `{`.
 678          token = @token '(', '{'
 679          @jsxObjAttribute[@jsxDepth] = no
 680          # tag attribute name as JSX
 681          addTokenData @tokens[@tokens.length - 3],
 682            jsx: yes
 683        else
 684          token = @token '{', '{'
 685          @jsxObjAttribute[@jsxDepth] = yes
 686        @ends.push {tag: '}', origin: token}
 687        return 1
 688      else if firstChar is '>' # end of opening tag
 689        # Ignore terminators inside a tag.
 690        {origin: openingTagToken} = @pair '/>' # As if the current tag was self-closing.
 691        @token ']', ']',
 692          generated: yes
 693          data:
 694            closingBracketToken: @makeToken '>', '>'
 695        @token ',', 'JSX_COMMA', generated: yes
 696        {tokens, index: end} =
 697          @matchWithInterpolations INSIDE_JSX, '>', '</', JSX_INTERPOLATION
 698        @mergeInterpolationTokens tokens, {endOffset: end, jsx: yes}, (value) =>
 699          @validateUnicodeCodePointEscapes value, delimiter: '>'
 700        match = JSX_IDENTIFIER.exec(@chunk[end...]) or JSX_FRAGMENT_IDENTIFIER.exec(@chunk[end...])
 701        if not match or match[1] isnt "#{jsxTag.name}#{(".#{property}" for property in jsxTag.properties).join ''}"
 702          @error "expected corresponding JSX closing tag for #{jsxTag.name}",
 703            jsxTag.origin.data.tagNameToken[2]
 704        [, fullTagName] = match
 705        afterTag = end + fullTagName.length
 706        if @chunk[afterTag] isnt '>'
 707          @error "missing closing > after tag name", offset: afterTag, length: 1
 708        # -2/+2 for the opening `</` and +1 for the closing `>`.
 709        endToken = @token 'CALL_END', ')',
 710          offset: end - 2
 711          length: fullTagName.length + 3
 712          generated: yes
 713          data:
 714            closingTagOpeningBracketToken: @makeToken '<', '<', offset: end - 2
 715            closingTagSlashToken: @makeToken '/', '/', offset: end - 1
 716            # TODO: individual tokens for complex tag name? eg < / A . B >
 717            closingTagNameToken: @makeToken 'IDENTIFIER', fullTagName, offset: end
 718            closingTagClosingBracketToken: @makeToken '>', '>', offset: end + fullTagName.length
 719        # make the closing tag location data more easily accessible to the grammar
 720        addTokenData openingTagToken, endToken.data
 721        @jsxDepth--
 722        return afterTag + 1
 723      else
 724        return 0
 725    else if @atJSXTag 1
 726      if firstChar is '}'
 727        @pair firstChar
 728        if @jsxObjAttribute[@jsxDepth]
 729          @token '}', '}'
 730          @jsxObjAttribute[@jsxDepth] = no
 731        else
 732          @token ')', '}'
 733        @token ',', ',', generated: yes
 734        return 1
 735      else
 736        return 0
 737    else
 738      return 0
 739
 740  atJSXTag: (depth = 0) ->
 741    return no if @jsxDepth is 0
 742    i = @ends.length - 1
 743    i-- while @ends[i]?.tag is 'OUTDENT' or depth-- > 0 # Ignore indents.
 744    last = @ends[i]
 745    last?.tag is '/>' and last
 746
 747  # We treat all other single characters as a token. E.g.: `( ) , . !`
 748  # Multi-character operators are also literal tokens, so that Jison can assign
 749  # the proper order of operations. There are some symbols that we tag specially
 750  # here. `;` and newlines are both treated as a `TERMINATOR`, we distinguish
 751  # parentheses that indicate a method call from regular parentheses, and so on.
 752  literalToken: ->
 753    if match = OPERATOR.exec @chunk
 754      [value] = match
 755      @tagParameters() if CODE.test value
 756    else
 757      value = @chunk.charAt 0
 758    tag  = value
 759    prev = @prev()
 760
 761    if prev and value in ['=', COMPOUND_ASSIGN...]
 762      skipToken = false
 763      if value is '=' and prev[1] in ['||', '&&'] and not prev.spaced
 764        prev[0] = 'COMPOUND_ASSIGN'
 765        prev[1] += '='
 766        prev.data.original += '=' if prev.data?.original
 767        prev[2].range = [
 768          prev[2].range[0]
 769          prev[2].range[1] + 1
 770        ]
 771        prev[2].last_column += 1
 772        prev[2].last_column_exclusive += 1
 773        prev = @tokens[@tokens.length - 2]
 774        skipToken = true
 775      if prev and prev[0] isnt 'PROPERTY'
 776        origin = prev.origin ? prev
 777        message = isUnassignable prev[1], origin[1]
 778        @error message, origin[2] if message
 779      return value.length if skipToken
 780
 781    if value is '(' and prev?[0] is 'IMPORT'
 782      prev[0] = 'DYNAMIC_IMPORT'
 783
 784    if value is '{' and @seenImport
 785      @importSpecifierList = yes
 786    else if @importSpecifierList and value is '}'
 787      @importSpecifierList = no
 788    else if value is '{' and prev?[0] is 'EXPORT'
 789      @exportSpecifierList = yes
 790    else if @exportSpecifierList and value is '}'
 791      @exportSpecifierList = no
 792
 793    if value is ';'
 794      @error 'unexpected ;' if prev?[0] in ['=', UNFINISHED...]
 795      @seenFor = @seenImport = @seenExport = no
 796      tag = 'TERMINATOR'
 797    else if value is '*' and prev?[0] is 'EXPORT'
 798      tag = 'EXPORT_ALL'
 799    else if value in MATH            then tag = 'MATH'
 800    else if value in COMPARE         then tag = 'COMPARE'
 801    else if value in COMPOUND_ASSIGN then tag = 'COMPOUND_ASSIGN'
 802    else if value in UNARY           then tag = 'UNARY'
 803    else if value in UNARY_MATH      then tag = 'UNARY_MATH'
 804    else if value in SHIFT           then tag = 'SHIFT'
 805    else if value is '?' and prev?.spaced then tag = 'BIN?'
 806    else if prev
 807      if value is '(' and not prev.spaced and prev[0] in CALLABLE
 808        prev[0] = 'FUNC_EXIST' if prev[0] is '?'
 809        tag = 'CALL_START'
 810      else if value is '[' and ((prev[0] in INDEXABLE and not prev.spaced) or
 811         (prev[0] is '::')) # `.prototype` can’t be a method you can call.
 812        tag = 'INDEX_START'
 813        switch prev[0]
 814          when '?'  then prev[0] = 'INDEX_SOAK'
 815    token = @makeToken tag, value
 816    switch value
 817      when '(', '{', '[' then @ends.push {tag: INVERSES[value], origin: token}
 818      when ')', '}', ']' then @pair value
 819    @tokens.push @makeToken tag, value
 820    value.length
 821
 822  # Token Manipulators
 823  # ------------------
 824
 825  # A source of ambiguity in our grammar used to be parameter lists in function
 826  # definitions versus argument lists in function calls. Walk backwards, tagging
 827  # parameters specially in order to make things easier for the parser.
 828  tagParameters: ->
 829    return @tagDoIife() if @tag() isnt ')'
 830    stack = []
 831    {tokens} = this
 832    i = tokens.length
 833    paramEndToken = tokens[--i]
 834    paramEndToken[0] = 'PARAM_END'
 835    while tok = tokens[--i]
 836      switch tok[0]
 837        when ')'
 838          stack.push tok
 839        when '(', 'CALL_START'
 840          if stack.length then stack.pop()
 841          else if tok[0] is '('
 842            tok[0] = 'PARAM_START'
 843            return @tagDoIife i - 1
 844          else
 845            paramEndToken[0] = 'CALL_END'
 846            return this
 847    this
 848
 849  # Tag `do` followed by a function differently than `do` followed by eg an
 850  # identifier to allow for different grammar precedence
 851  tagDoIife: (tokenIndex) ->
 852    tok = @tokens[tokenIndex ? @tokens.length - 1]
 853    return this unless tok?[0] is 'DO'
 854    tok[0] = 'DO_IIFE'
 855    this
 856
 857  # Close up all remaining open blocks at the end of the file.
 858  closeIndentation: ->
 859    @outdentToken moveOut: @indent, indentSize: 0
 860
 861  # Match the contents of a delimited token and expand variables and expressions
 862  # inside it using Ruby-like notation for substitution of arbitrary
 863  # expressions.
 864  #
 865  #     "Hello #{name.capitalize()}."
 866  #
 867  # If it encounters an interpolation, this method will recursively create a new
 868  # Lexer and tokenize until the `{` of `#{` is balanced with a `}`.
 869  #
 870  #  - `regex` matches the contents of a token (but not `delimiter`, and not
 871  #    `#{` if interpolations are desired).
 872  #  - `delimiter` is the delimiter of the token. Examples are `'`, `"`, `'''`,
 873  #    `"""` and `///`.
 874  #  - `closingDelimiter` is different from `delimiter` only in JSX
 875  #  - `interpolators` matches the start of an interpolation, for JSX it's both
 876  #    `{` and `<` (i.e. nested JSX tag)
 877  #
 878  # This method allows us to have strings within interpolations within strings,
 879  # ad infinitum.
 880  matchWithInterpolations: (regex, delimiter, closingDelimiter = delimiter, interpolators = /^#\{/) ->
 881    tokens = []
 882    offsetInChunk = delimiter.length
 883    return null unless @chunk[...offsetInChunk] is delimiter
 884    str = @chunk[offsetInChunk..]
 885    loop
 886      [strPart] = regex.exec str
 887
 888      @validateEscapes strPart, {isRegex: delimiter.charAt(0) is '/', offsetInChunk}
 889
 890      # Push a fake `'NEOSTRING'` token, which will get turned into a real string later.
 891      tokens.push @makeToken 'NEOSTRING', strPart, offset: offsetInChunk
 892
 893      str = str[strPart.length..]
 894      offsetInChunk += strPart.length
 895
 896      break unless match = interpolators.exec str
 897      [interpolator] = match
 898
 899      # To remove the `#` in `#{`.
 900      interpolationOffset = interpolator.length - 1
 901      [line, column, offset] = @getLineAndColumnFromChunk offsetInChunk + interpolationOffset
 902      rest = str[interpolationOffset..]
 903      {tokens: nested, index} =
 904        new Lexer().tokenize rest, {line, column, offset, untilBalanced: on, @locationDataCompensations}
 905      # Account for the `#` in `#{`.
 906      index += interpolationOffset
 907
 908      braceInterpolator = str[index - 1] is '}'
 909      if braceInterpolator
 910        # Turn the leading and trailing `{` and `}` into parentheses. Unnecessary
 911        # parentheses will be removed later.
 912        [open, ..., close] = nested
 913        open[0]  = 'INTERPOLATION_START'
 914        open[1]  = '('
 915        open[2].first_column -= interpolationOffset
 916        open[2].range = [
 917          open[2].range[0] - interpolationOffset
 918          open[2].range[1]
 919        ]
 920        close[0]  = 'INTERPOLATION_END'
 921        close[1] = ')'
 922        close.origin = ['', 'end of interpolation', close[2]]
 923
 924      # Remove leading `'TERMINATOR'` (if any).
 925      nested.splice 1, 1 if nested[1]?[0] is 'TERMINATOR'
 926      # Remove trailing `'INDENT'/'OUTDENT'` pair (if any).
 927      nested.splice -3, 2 if nested[nested.length - 3]?[0] is 'INDENT' and nested[nested.length - 2][0] is 'OUTDENT'
 928
 929      unless braceInterpolator
 930        # We are not using `{` and `}`, so wrap the interpolated tokens instead.
 931        open = @makeToken 'INTERPOLATION_START', '(', offset: offsetInChunk,         length: 0, generated: yes
 932        close = @makeToken 'INTERPOLATION_END', ')',  offset: offsetInChunk + index, length: 0, generated: yes
 933        nested = [open, nested..., close]
 934
 935      # Push a fake `'TOKENS'` token, which will get turned into real tokens later.
 936      tokens.push ['TOKENS', nested]
 937
 938      str = str[index..]
 939      offsetInChunk += index
 940
 941    unless str[...closingDelimiter.length] is closingDelimiter
 942      @error "missing #{closingDelimiter}", length: delimiter.length
 943
 944    {tokens, index: offsetInChunk + closingDelimiter.length}
 945
 946  # Merge the array `tokens` of the fake token types `'TOKENS'` and `'NEOSTRING'`
 947  # (as returned by `matchWithInterpolations`) into the token stream. The value
 948  # of `'NEOSTRING'`s are converted using `fn` and turned into strings using
 949  # `options` first.
 950  mergeInterpolationTokens: (tokens, options, fn) ->
 951    {quote, indent, double, heregex, endOffset, jsx} = options
 952
 953    if tokens.length > 1
 954      lparen = @token 'STRING_START', '(', length: quote?.length ? 0, data: {quote}, generated: not quote?.length
 955
 956    firstIndex = @tokens.length
 957    $ = tokens.length - 1
 958    for token, i in tokens
 959      [tag, value] = token
 960      switch tag
 961        when 'TOKENS'
 962          # There are comments (and nothing else) in this interpolation.
 963          if value.length is 2 and (value[0].comments or value[1].comments)
 964            placeholderToken = @makeToken 'JS', '', generated: yes
 965            # Use the same location data as the first parenthesis.
 966            placeholderToken[2] = value[0][2]
 967            for val in value when val.comments
 968              placeholderToken.comments ?= []
 969              placeholderToken.comments.push val.comments...
 970            value.splice 1, 0, placeholderToken
 971          # Push all the tokens in the fake `'TOKENS'` token. These already have
 972          # sane location data.
 973          locationToken = value[0]
 974          tokensToPush = value
 975        when 'NEOSTRING'
 976          # Convert `'NEOSTRING'` into `'STRING'`.
 977          converted = fn.call this, token[1], i
 978          addTokenData token, initialChunk: yes if i is 0
 979          addTokenData token, finalChunk: yes   if i is $
 980          addTokenData token, {indent, quote, double}
 981          addTokenData token, {heregex} if heregex
 982          addTokenData token, {jsx} if jsx
 983          token[0] = 'STRING'
 984          token[1] = '"' + converted + '"'
 985          if tokens.length is 1 and quote?
 986            token[2].first_column -= quote.length
 987            if token[1].substr(-2, 1) is '\n'
 988              token[2].last_line += 1
 989              token[2].last_column = quote.length - 1
 990            else
 991              token[2].last_column += quote.length
 992              token[2].last_column -= 1 if token[1].length is 2
 993            token[2].last_column_exclusive += quote.length
 994            token[2].range = [
 995              token[2].range[0] - quote.length
 996              token[2].range[1] + quote.length
 997            ]
 998          locationToken = token
 999          tokensToPush = [token]
1000      @tokens.push tokensToPush...
1001
1002    if lparen
1003      [..., lastToken] = tokens
1004      lparen.origin = ['STRING', null,
1005        first_line:            lparen[2].first_line
1006        first_column:          lparen[2].first_column
1007        last_line:             lastToken[2].last_line
1008        last_column:           lastToken[2].last_column
1009        last_line_exclusive:   lastToken[2].last_line_exclusive
1010        last_column_exclusive: lastToken[2].last_column_exclusive
1011        range: [
1012          lparen[2].range[0]
1013          lastToken[2].range[1]
1014        ]
1015      ]
1016      lparen[2] = lparen.origin[2] unless quote?.length
1017      rparen = @token 'STRING_END', ')', offset: endOffset - (quote ? '').length, length: quote?.length ? 0, generated: not quote?.length
1018
1019  # Pairs up a closing token, ensuring that all listed pairs of tokens are
1020  # correctly balanced throughout the course of the token stream.
1021  pair: (tag) ->
1022    [..., prev] = @ends
1023    unless tag is wanted = prev?.tag
1024      @error "unmatched #{tag}" unless 'OUTDENT' is wanted
1025      # Auto-close `INDENT` to support syntax like this:
1026      #
1027      #     el.click((event) ->
1028      #       el.hide())
1029      #
1030      [..., lastIndent] = @indents
1031      @outdentToken moveOut: lastIndent, noNewlines: true
1032      return @pair tag
1033    @ends.pop()
1034
1035  # Helpers
1036  # -------
1037
1038  # Compensate for the things we strip out initially (e.g. carriage returns)
1039  # so that location data stays accurate with respect to the original source file.
1040  getLocationDataCompensation: (start, end) ->
1041    totalCompensation = 0
1042    initialEnd = end
1043    current = start
1044    while current <= end
1045      break if current is end and start isnt initialEnd
1046      compensation = @locationDataCompensations[current]
1047      if compensation?
1048        totalCompensation += compensation
1049        end += compensation
1050      current++
1051    return totalCompensation
1052
1053  # Returns the line and column number from an offset into the current chunk.
1054  #
1055  # `offset` is a number of characters into `@chunk`.
1056  getLineAndColumnFromChunk: (offset) ->
1057    compensation = @getLocationDataCompensation @chunkOffset, @chunkOffset + offset
1058
1059    if offset is 0
1060      return [@chunkLine, @chunkColumn + compensation, @chunkOffset + compensation]
1061
1062    if offset >= @chunk.length
1063      string = @chunk
1064    else
1065      string = @chunk[..offset-1]
1066
1067    lineCount = count string, '\n'
1068
1069    column = @chunkColumn
1070    if lineCount > 0
1071      [..., lastLine] = string.split '\n'
1072      column = lastLine.length
1073      previousLinesCompensation = @getLocationDataCompensation @chunkOffset, @chunkOffset + offset - column
1074      # Don't recompensate for initially inserted newline.
1075      previousLinesCompensation = 0 if previousLinesCompensation < 0
1076      columnCompensation = @getLocationDataCompensation(
1077        @chunkOffset + offset + previousLinesCompensation - column
1078        @chunkOffset + offset + previousLinesCompensation
1079      )
1080    else
1081      column += string.length
1082      columnCompensation = compensation
1083
1084    [@chunkLine + lineCount, column + columnCompensation, @chunkOffset + offset + compensation]
1085
1086  makeLocationData: ({ offsetInChunk, length }) ->
1087    locationData = range: []
1088    [locationData.first_line, locationData.first_column, locationData.range[0]] =
1089      @getLineAndColumnFromChunk offsetInChunk
1090
1091    # Use length - 1 for the final offset - we’re supplying the last_line and the last_column,
1092    # so if last_column == first_column, then we’re looking at a character of length 1.
1093    lastCharacter = if length > 0 then (length - 1) else 0
1094    [locationData.last_line, locationData.last_column, endOffset] =
1095      @getLineAndColumnFromChunk offsetInChunk + lastCharacter
1096    [locationData.last_line_exclusive, locationData.last_column_exclusive] =
1097      @getLineAndColumnFromChunk offsetInChunk + lastCharacter + (if length > 0 then 1 else 0)
1098    locationData.range[1] = if length > 0 then endOffset + 1 else endOffset
1099
1100    locationData
1101
1102  # Same as `token`, except this just returns the token without adding it
1103  # to the results.
1104  makeToken: (tag, value, {offset: offsetInChunk = 0, length = value.length, origin, generated, indentSize} = {}) ->
1105    token = [tag, value, @makeLocationData {offsetInChunk, length}]
1106    token.origin = origin if origin
1107    token.generated = yes if generated
1108    token.indentSize = indentSize if indentSize?
1109    token
1110
1111  # Add a token to the results.
1112  # `offset` is the offset into the current `@chunk` where the token starts.
1113  # `length` is the length of the token in the `@chunk`, after the offset.  If
1114  # not specified, the length of `value` will be used.
1115  #
1116  # Returns the new token.
1117  token: (tag, value, {offset, length, origin, data, generated, indentSize} = {}) ->
1118    token = @makeToken tag, value, {offset, length, origin, generated, indentSize}
1119    addTokenData token, data if data
1120    @tokens.push token
1121    token
1122
1123  # Peek at the last tag in the token stream.
1124  tag: ->
1125    [..., token] = @tokens
1126    token?[0]
1127
1128  # Peek at the last value in the token stream.
1129  value: (useOrigin = no) ->
1130    [..., token] = @tokens
1131    if useOrigin and token?.origin?
1132      token.origin[1]
1133    else
1134      token?[1]
1135
1136  # Get the previous token in the token stream.
1137  prev: ->
1138    @tokens[@tokens.length - 1]
1139
1140  # Are we in the midst of an unfinished expression?
1141  unfinished: ->
1142    LINE_CONTINUER.test(@chunk) or
1143    @tag() in UNFINISHED
1144
1145  validateUnicodeCodePointEscapes: (str, options) ->
1146    replaceUnicodeCodePointEscapes str, merge options, {@error}
1147
1148  # Validates escapes in strings and regexes.
1149  validateEscapes: (str, options = {}) ->
1150    invalidEscapeRegex =
1151      if options.isRegex
1152        REGEX_INVALID_ESCAPE
1153      else
1154        STRING_INVALID_ESCAPE
1155    match = invalidEscapeRegex.exec str
1156    return unless match
1157    [[], before, octal, hex, unicodeCodePoint, unicode] = match
1158    message =
1159      if octal
1160        "octal escape sequences are not allowed"
1161      else
1162        "invalid escape sequence"
1163    invalidEscape = "\\#{octal or hex or unicodeCodePoint or unicode}"
1164    @error "#{message} #{invalidEscape}",
1165      offset: (options.offsetInChunk ? 0) + match.index + before.length
1166      length: invalidEscape.length
1167
1168  suppressSemicolons: ->
1169    while @value() is ';'
1170      @tokens.pop()
1171      @error 'unexpected ;' if @prev()?[0] in ['=', UNFINISHED...]
1172
1173  # Throws an error at either a given offset from the current chunk or at the
1174  # location of a token (`token[2]`).
1175  error: (message, options = {}) =>
1176    location =
1177      if 'first_line' of options
1178        options
1179      else
1180        [first_line, first_column] = @getLineAndColumnFromChunk options.offset ? 0
1181        {first_line, first_column, last_column: first_column + (options.length ? 1) - 1}
1182    throwSyntaxError message, location
1183
1184# Helper functions
1185# ----------------
1186
1187isUnassignable = (name, displayName = name) -> switch
1188  when name in [JS_KEYWORDS..., COFFEE_KEYWORDS...]
1189    "keyword '#{displayName}' can't be assigned"
1190  when name in STRICT_PROSCRIBED
1191    "'#{displayName}' can't be assigned"
1192  when name in RESERVED
1193    "reserved word '#{displayName}' can't be assigned"
1194  else
1195    false
1196
1197exports.isUnassignable = isUnassignable
1198
1199# `from` isn’t a CoffeeScript keyword, but it behaves like one in `import` and
1200# `export` statements (handled above) and in the declaration line of a `for`
1201# loop. Try to detect when `from` is a variable identifier and when it is this
1202# “sometimes” keyword.
1203isForFrom = (prev) ->
1204  # `for i from iterable`
1205  if prev[0] is 'IDENTIFIER'
1206    yes
1207  # `for from…`
1208  else if prev[0] is 'FOR'
1209    no
1210  # `for {from}…`, `for [from]…`, `for {a, from}…`, `for {a: from}…`
1211  else if prev[1] in ['{', '[', ',', ':']
1212    no
1213  else
1214    yes
1215
1216addTokenData = (token, data) ->
1217  Object.assign (token.data ?= {}), data
1218
1219# Constants
1220# ---------
1221
1222# Keywords that CoffeeScript shares in common with JavaScript.
1223JS_KEYWORDS = [
1224  'true', 'false', 'null', 'this'
1225  'new', 'delete', 'typeof', 'in', 'instanceof'
1226  'return', 'throw', 'break', 'continue', 'debugger', 'yield', 'await'
1227  'if', 'else', 'switch', 'for', 'while', 'do', 'try', 'catch', 'finally'
1228  'class', 'extends', 'super'
1229  'import', 'export', 'default'
1230]
1231
1232# CoffeeScript-only keywords.
1233COFFEE_KEYWORDS = [
1234  'undefined', 'Infinity', 'NaN'
1235  'then', 'unless', 'until', 'loop', 'of', 'by', 'when'
1236]
1237
1238COFFEE_ALIAS_MAP =
1239  and  : '&&'
1240  or   : '||'
1241  is   : '=='
1242  

Large files files are truncated, but you can click here to view the full file