PageRenderTime 81ms CodeModel.GetById 14ms app.highlight 59ms RepoModel.GetById 1ms app.codeStats 0ms

/src/rewriter.coffee

http://github.com/jashkenas/coffee-script
CoffeeScript | 858 lines | 580 code | 81 blank | 197 comment | 144 complexity | 41625954476546c002f19e86b664b04a MD5 | raw file
  1# The CoffeeScript language has a good deal of optional syntax, implicit syntax,
  2# and shorthand syntax. This can greatly complicate a grammar and bloat
  3# the resulting parse table. Instead of making the parser handle it all, we take
  4# a series of passes over the token stream, using this **Rewriter** to convert
  5# shorthand into the unambiguous long form, add implicit indentation and
  6# parentheses, and generally clean things up.
  7
  8{throwSyntaxError, extractAllCommentTokens} = require './helpers'
  9
 10# Move attached comments from one token to another.
 11moveComments = (fromToken, toToken) ->
 12  return unless fromToken.comments
 13  if toToken.comments and toToken.comments.length isnt 0
 14    unshiftedComments = []
 15    for comment in fromToken.comments
 16      if comment.unshift
 17        unshiftedComments.push comment
 18      else
 19        toToken.comments.push comment
 20    toToken.comments = unshiftedComments.concat toToken.comments
 21  else
 22    toToken.comments = fromToken.comments
 23  delete fromToken.comments
 24
 25# Create a generated token: one that exists due to a use of implicit syntax.
 26# Optionally have this new token take the attached comments from another token.
 27generate = (tag, value, origin, commentsToken) ->
 28  token = [tag, value]
 29  token.generated = yes
 30  token.origin = origin if origin
 31  moveComments commentsToken, token if commentsToken
 32  token
 33
 34# The **Rewriter** class is used by the [Lexer](lexer.html), directly against
 35# its internal array of tokens.
 36exports.Rewriter = class Rewriter
 37
 38  # Rewrite the token stream in multiple passes, one logical filter at
 39  # a time. This could certainly be changed into a single pass through the
 40  # stream, with a big ol’ efficient switch, but it’s much nicer to work with
 41  # like this. The order of these passes matters—indentation must be
 42  # corrected before implicit parentheses can be wrapped around blocks of code.
 43  rewrite: (@tokens) ->
 44    # Set environment variable `DEBUG_TOKEN_STREAM` to `true` to output token
 45    # debugging info. Also set `DEBUG_REWRITTEN_TOKEN_STREAM` to `true` to
 46    # output the token stream after it has been rewritten by this file.
 47    if process?.env?.DEBUG_TOKEN_STREAM
 48      console.log 'Initial token stream:' if process.env.DEBUG_REWRITTEN_TOKEN_STREAM
 49      console.log (t[0] + '/' + t[1] + (if t.comments then '*' else '') for t in @tokens).join ' '
 50    @removeLeadingNewlines()
 51    @closeOpenCalls()
 52    @closeOpenIndexes()
 53    @normalizeLines()
 54    @tagPostfixConditionals()
 55    @addImplicitBracesAndParens()
 56    @rescueStowawayComments()
 57    @addLocationDataToGeneratedTokens()
 58    @enforceValidJSXAttributes()
 59    @fixIndentationLocationData()
 60    @exposeTokenDataToGrammar()
 61    if process?.env?.DEBUG_REWRITTEN_TOKEN_STREAM
 62      console.log 'Rewritten token stream:' if process.env.DEBUG_TOKEN_STREAM
 63      console.log (t[0] + '/' + t[1] + (if t.comments then '*' else '') for t in @tokens).join ' '
 64    @tokens
 65
 66  # Rewrite the token stream, looking one token ahead and behind.
 67  # Allow the return value of the block to tell us how many tokens to move
 68  # forwards (or backwards) in the stream, to make sure we don’t miss anything
 69  # as tokens are inserted and removed, and the stream changes length under
 70  # our feet.
 71  scanTokens: (block) ->
 72    {tokens} = this
 73    i = 0
 74    i += block.call this, token, i, tokens while token = tokens[i]
 75    true
 76
 77  detectEnd: (i, condition, action, opts = {}) ->
 78    {tokens} = this
 79    levels = 0
 80    while token = tokens[i]
 81      return action.call this, token, i if levels is 0 and condition.call this, token, i
 82      if token[0] in EXPRESSION_START
 83        levels += 1
 84      else if token[0] in EXPRESSION_END
 85        levels -= 1
 86      if levels < 0
 87        return if opts.returnOnNegativeLevel
 88        return action.call this, token, i
 89      i += 1
 90    i - 1
 91
 92  # Leading newlines would introduce an ambiguity in the grammar, so we
 93  # dispatch them here.
 94  removeLeadingNewlines: ->
 95    # Find the index of the first non-`TERMINATOR` token.
 96    break for [tag], i in @tokens when tag isnt 'TERMINATOR'
 97    return if i is 0
 98    # If there are any comments attached to the tokens we’re about to discard,
 99    # shift them forward to what will become the new first token.
100    for leadingNewlineToken in @tokens[0...i]
101      moveComments leadingNewlineToken, @tokens[i]
102    # Discard all the leading newline tokens.
103    @tokens.splice 0, i
104
105  # The lexer has tagged the opening parenthesis of a method call. Match it with
106  # its paired close.
107  closeOpenCalls: ->
108    condition = (token, i) ->
109      token[0] in [')', 'CALL_END']
110
111    action = (token, i) ->
112      token[0] = 'CALL_END'
113
114    @scanTokens (token, i) ->
115      @detectEnd i + 1, condition, action if token[0] is 'CALL_START'
116      1
117
118  # The lexer has tagged the opening bracket of an indexing operation call.
119  # Match it with its paired close.
120  closeOpenIndexes: ->
121    startToken = null
122    condition = (token, i) ->
123      token[0] in [']', 'INDEX_END']
124
125    action = (token, i) ->
126      if @tokens.length >= i and @tokens[i + 1][0] is ':'
127        startToken[0] = '['
128        token[0] = ']'
129      else
130        token[0] = 'INDEX_END'
131
132    @scanTokens (token, i) ->
133      if token[0] is 'INDEX_START'
134        startToken = token
135        @detectEnd i + 1, condition, action
136      1
137
138  # Match tags in token stream starting at `i` with `pattern`.
139  # `pattern` may consist of strings (equality), an array of strings (one of)
140  # or null (wildcard). Returns the index of the match or -1 if no match.
141  indexOfTag: (i, pattern...) ->
142    fuzz = 0
143    for j in [0 ... pattern.length]
144      continue if not pattern[j]?
145      pattern[j] = [pattern[j]] if typeof pattern[j] is 'string'
146      return -1 if @tag(i + j + fuzz) not in pattern[j]
147    i + j + fuzz - 1
148
149  # Returns `yes` if standing in front of something looking like
150  # `@<x>:`, `<x>:` or `<EXPRESSION_START><x>...<EXPRESSION_END>:`.
151  looksObjectish: (j) ->
152    return yes if @indexOfTag(j, '@', null, ':') isnt -1 or @indexOfTag(j, null, ':') isnt -1
153    index = @indexOfTag j, EXPRESSION_START
154    if index isnt -1
155      end = null
156      @detectEnd index + 1, ((token) -> token[0] in EXPRESSION_END), ((token, i) -> end = i)
157      return yes if @tag(end + 1) is ':'
158    no
159
160  # Returns `yes` if current line of tokens contain an element of tags on same
161  # expression level. Stop searching at `LINEBREAKS` or explicit start of
162  # containing balanced expression.
163  findTagsBackwards: (i, tags) ->
164    backStack = []
165    while i >= 0 and (backStack.length or
166          @tag(i) not in tags and
167          (@tag(i) not in EXPRESSION_START or @tokens[i].generated) and
168          @tag(i) not in LINEBREAKS)
169      backStack.push @tag(i) if @tag(i) in EXPRESSION_END
170      backStack.pop() if @tag(i) in EXPRESSION_START and backStack.length
171      i -= 1
172    @tag(i) in tags
173
174  # Look for signs of implicit calls and objects in the token stream and
175  # add them.
176  addImplicitBracesAndParens: ->
177    # Track current balancing depth (both implicit and explicit) on stack.
178    stack = []
179    start = null
180
181    @scanTokens (token, i, tokens) ->
182      [tag]     = token
183      [prevTag] = prevToken = if i > 0 then tokens[i - 1] else []
184      [nextTag] = nextToken = if i < tokens.length - 1 then tokens[i + 1] else []
185      stackTop  = -> stack[stack.length - 1]
186      startIdx  = i
187
188      # Helper function, used for keeping track of the number of tokens consumed
189      # and spliced, when returning for getting a new token.
190      forward   = (n) -> i - startIdx + n
191
192      # Helper functions
193      isImplicit        = (stackItem) -> stackItem?[2]?.ours
194      isImplicitObject  = (stackItem) -> isImplicit(stackItem) and stackItem?[0] is '{'
195      isImplicitCall    = (stackItem) -> isImplicit(stackItem) and stackItem?[0] is '('
196      inImplicit        = -> isImplicit stackTop()
197      inImplicitCall    = -> isImplicitCall stackTop()
198      inImplicitObject  = -> isImplicitObject stackTop()
199      # Unclosed control statement inside implicit parens (like
200      # class declaration or if-conditionals).
201      inImplicitControl = -> inImplicit() and stackTop()?[0] is 'CONTROL'
202
203      startImplicitCall = (idx) ->
204        stack.push ['(', idx, ours: yes]
205        tokens.splice idx, 0, generate 'CALL_START', '(', ['', 'implicit function call', token[2]], prevToken
206
207      endImplicitCall = ->
208        stack.pop()
209        tokens.splice i, 0, generate 'CALL_END', ')', ['', 'end of input', token[2]], prevToken
210        i += 1
211
212      startImplicitObject = (idx, {startsLine = yes, continuationLineIndent} = {}) ->
213        stack.push ['{', idx, sameLine: yes, startsLine: startsLine, ours: yes, continuationLineIndent: continuationLineIndent]
214        val = new String '{'
215        val.generated = yes
216        tokens.splice idx, 0, generate '{', val, token, prevToken
217
218      endImplicitObject = (j) ->
219        j = j ? i
220        stack.pop()
221        tokens.splice j, 0, generate '}', '}', token, prevToken
222        i += 1
223
224      implicitObjectContinues = (j) =>
225        nextTerminatorIdx = null
226        @detectEnd j,
227          (token) -> token[0] is 'TERMINATOR'
228          (token, i) -> nextTerminatorIdx = i
229          returnOnNegativeLevel: yes
230        return no unless nextTerminatorIdx?
231        @looksObjectish nextTerminatorIdx + 1
232
233      # Don’t end an implicit call/object on next indent if any of these are in an argument/value.
234      if (
235        (inImplicitCall() or inImplicitObject()) and tag in CONTROL_IN_IMPLICIT or
236        inImplicitObject() and prevTag is ':' and tag is 'FOR'
237      )
238        stack.push ['CONTROL', i, ours: yes]
239        return forward(1)
240
241      if tag is 'INDENT' and inImplicit()
242
243        # An `INDENT` closes an implicit call unless
244        #
245        #  1. We have seen a `CONTROL` argument on the line.
246        #  2. The last token before the indent is part of the list below.
247        if prevTag not in ['=>', '->', '[', '(', ',', '{', 'ELSE', '=']
248          while inImplicitCall() or inImplicitObject() and prevTag isnt ':'
249            if inImplicitCall()
250              endImplicitCall()
251            else
252              endImplicitObject()
253        stack.pop() if inImplicitControl()
254        stack.push [tag, i]
255        return forward(1)
256
257      # Straightforward start of explicit expression.
258      if tag in EXPRESSION_START
259        stack.push [tag, i]
260        return forward(1)
261
262      # Close all implicit expressions inside of explicitly closed expressions.
263      if tag in EXPRESSION_END
264        while inImplicit()
265          if inImplicitCall()
266            endImplicitCall()
267          else if inImplicitObject()
268            endImplicitObject()
269          else
270            stack.pop()
271        start = stack.pop()
272
273      inControlFlow = =>
274        seenFor = @findTagsBackwards(i, ['FOR']) and @findTagsBackwards(i, ['FORIN', 'FOROF', 'FORFROM'])
275        controlFlow = seenFor or @findTagsBackwards i, ['WHILE', 'UNTIL', 'LOOP', 'LEADING_WHEN']
276        return no unless controlFlow
277        isFunc = no
278        tagCurrentLine = token[2].first_line
279        @detectEnd i,
280          (token, i) -> token[0] in LINEBREAKS
281          (token, i) ->
282            [prevTag, ,{first_line}] = tokens[i - 1] || []
283            isFunc = tagCurrentLine is first_line and prevTag in ['->', '=>']
284          returnOnNegativeLevel: yes
285        isFunc
286
287      # Recognize standard implicit calls like
288      # f a, f() b, f? c, h[0] d etc.
289      # Added support for spread dots on the left side: f ...a
290      if (tag in IMPLICIT_FUNC and token.spaced or
291          tag is '?' and i > 0 and not tokens[i - 1].spaced) and
292         (nextTag in IMPLICIT_CALL or
293         (nextTag is '...' and @tag(i + 2) in IMPLICIT_CALL and not @findTagsBackwards(i, ['INDEX_START', '['])) or
294          nextTag in IMPLICIT_UNSPACED_CALL and
295          not nextToken.spaced and not nextToken.newLine) and
296          not inControlFlow()
297        tag = token[0] = 'FUNC_EXIST' if tag is '?'
298        startImplicitCall i + 1
299        return forward(2)
300
301      # Implicit call taking an implicit indented object as first argument.
302      #
303      #     f
304      #       a: b
305      #       c: d
306      #
307      # Don’t accept implicit calls of this type, when on the same line
308      # as the control structures below as that may misinterpret constructs like:
309      #
310      #     if f
311      #        a: 1
312      # as
313      #
314      #     if f(a: 1)
315      #
316      # which is probably always unintended.
317      # Furthermore don’t allow this in literal arrays, as
318      # that creates grammatical ambiguities.
319      if tag in IMPLICIT_FUNC and
320         @indexOfTag(i + 1, 'INDENT') > -1 and @looksObjectish(i + 2) and
321         not @findTagsBackwards(i, ['CLASS', 'EXTENDS', 'IF', 'CATCH',
322          'SWITCH', 'LEADING_WHEN', 'FOR', 'WHILE', 'UNTIL'])
323        startImplicitCall i + 1
324        stack.push ['INDENT', i + 2]
325        return forward(3)
326
327      # Implicit objects start here.
328      if tag is ':'
329        # Go back to the (implicit) start of the object.
330        s = switch
331          when @tag(i - 1) in EXPRESSION_END
332            [startTag, startIndex] = start
333            if startTag is '[' and startIndex > 0 and @tag(startIndex - 1) is '@' and not tokens[startIndex - 1].spaced
334              startIndex - 1
335            else
336              startIndex
337          when @tag(i - 2) is '@' then i - 2
338          else i - 1
339
340        startsLine = s <= 0 or @tag(s - 1) in LINEBREAKS or tokens[s - 1].newLine
341        # Are we just continuing an already declared object?
342        if stackTop()
343          [stackTag, stackIdx] = stackTop()
344          if (stackTag is '{' or stackTag is 'INDENT' and @tag(stackIdx - 1) is '{') and
345             (startsLine or @tag(s - 1) is ',' or @tag(s - 1) is '{') and
346             @tag(s - 1) not in UNFINISHED
347            return forward(1)
348
349        preObjectToken = if i > 1 then tokens[i - 2] else []
350        startImplicitObject(s, {startsLine: !!startsLine, continuationLineIndent: preObjectToken.continuationLineIndent})
351        return forward(2)
352
353      # End implicit calls when chaining method calls
354      # like e.g.:
355      #
356      #     f ->
357      #       a
358      #     .g b, ->
359      #       c
360      #     .h a
361      #
362      # and also
363      #
364      #     f a
365      #     .g b
366      #     .h a
367
368      # Mark all enclosing objects as not sameLine
369      if tag in LINEBREAKS
370        for stackItem in stack by -1
371          break unless isImplicit stackItem
372          stackItem[2].sameLine = no if isImplicitObject stackItem
373
374      # End indented-continuation-line implicit objects once that indentation is over.
375      if tag is 'TERMINATOR' and token.endsContinuationLineIndentation
376        {preContinuationLineIndent} = token.endsContinuationLineIndentation
377        while inImplicitObject() and (implicitObjectIndent = stackTop()[2].continuationLineIndent)? and implicitObjectIndent > preContinuationLineIndent
378          endImplicitObject()
379
380      newLine = prevTag is 'OUTDENT' or prevToken.newLine
381      if tag in IMPLICIT_END or
382          (tag in CALL_CLOSERS and newLine) or
383          (tag in ['..', '...'] and @findTagsBackwards(i, ["INDEX_START"]))
384        while inImplicit()
385          [stackTag, stackIdx, {sameLine, startsLine}] = stackTop()
386          # Close implicit calls when reached end of argument list
387          if inImplicitCall() and prevTag isnt ',' or
388              (prevTag is ',' and tag is 'TERMINATOR' and not nextTag?)
389            endImplicitCall()
390          # Close implicit objects such as:
391          # return a: 1, b: 2 unless true
392          else if inImplicitObject() and sameLine and
393                  tag isnt 'TERMINATOR' and prevTag isnt ':' and
394                  not (tag in ['POST_IF', 'FOR', 'WHILE', 'UNTIL'] and startsLine and implicitObjectContinues(i + 1))
395            endImplicitObject()
396          # Close implicit objects when at end of line, line didn't end with a comma
397          # and the implicit object didn't start the line or the next line doesn’t look like
398          # the continuation of an object.
399          else if inImplicitObject() and tag is 'TERMINATOR' and prevTag isnt ',' and
400                  not (startsLine and @looksObjectish(i + 1))
401            endImplicitObject()
402          else if inImplicitControl() and tokens[stackTop()[1]][0] is 'CLASS' and tag is 'TERMINATOR'
403            stack.pop()
404          else
405            break
406
407      # Close implicit object if comma is the last character
408      # and what comes after doesn’t look like it belongs.
409      # This is used for trailing commas and calls, like:
410      #
411      #     x =
412      #         a: b,
413      #         c: d,
414      #     e = 2
415      #
416      # and
417      #
418      #     f a, b: c, d: e, f, g: h: i, j
419      #
420      if tag is ',' and not @looksObjectish(i + 1) and inImplicitObject() and not (@tag(i + 2) in ['FOROF', 'FORIN']) and
421         (nextTag isnt 'TERMINATOR' or not @looksObjectish(i + 2))
422        # When nextTag is OUTDENT the comma is insignificant and
423        # should just be ignored so embed it in the implicit object.
424        #
425        # When it isn’t the comma go on to play a role in a call or
426        # array further up the stack, so give it a chance.
427        offset = if nextTag is 'OUTDENT' then 1 else 0
428        while inImplicitObject()
429          endImplicitObject i + offset
430      return forward(1)
431
432  # Make sure only strings and wrapped expressions are used in JSX attributes.
433  enforceValidJSXAttributes: ->
434    @scanTokens (token, i, tokens) ->
435      if token.jsxColon
436        next = tokens[i + 1]
437        if next[0] not in ['STRING_START', 'STRING', '(']
438          throwSyntaxError 'expected wrapped or quoted JSX attribute', next[2]
439      return 1
440
441  # Not all tokens survive processing by the parser. To avoid comments getting
442  # lost into the ether, find comments attached to doomed tokens and move them
443  # to a token that will make it to the other side.
444  rescueStowawayComments: ->
445    insertPlaceholder = (token, j, tokens, method) ->
446      tokens[method] generate 'TERMINATOR', '\n', tokens[j] unless tokens[j][0] is 'TERMINATOR'
447      tokens[method] generate 'JS', '', tokens[j], token
448
449    dontShiftForward = (i, tokens) ->
450      j = i + 1
451      while j isnt tokens.length and tokens[j][0] in DISCARDED
452        return yes if tokens[j][0] is 'INTERPOLATION_END'
453        j++
454      no
455
456    shiftCommentsForward = (token, i, tokens) ->
457      # Find the next surviving token and attach this token’s comments to it,
458      # with a flag that we know to output such comments *before* that
459      # token’s own compilation. (Otherwise comments are output following
460      # the token they’re attached to.)
461      j = i
462      j++ while j isnt tokens.length and tokens[j][0] in DISCARDED
463      unless j is tokens.length or tokens[j][0] in DISCARDED
464        comment.unshift = yes for comment in token.comments
465        moveComments token, tokens[j]
466        return 1
467      else # All following tokens are doomed!
468        j = tokens.length - 1
469        insertPlaceholder token, j, tokens, 'push'
470        # The generated tokens were added to the end, not inline, so we don’t skip.
471        return 1
472
473    shiftCommentsBackward = (token, i, tokens) ->
474      # Find the last surviving token and attach this token’s comments to it.
475      j = i
476      j-- while j isnt -1 and tokens[j][0] in DISCARDED
477      unless j is -1 or tokens[j][0] in DISCARDED
478        moveComments token, tokens[j]
479        return 1
480      else # All previous tokens are doomed!
481        insertPlaceholder token, 0, tokens, 'unshift'
482        # We added two tokens, so shift forward to account for the insertion.
483        return 3
484
485    @scanTokens (token, i, tokens) ->
486      return 1 unless token.comments
487      ret = 1
488      if token[0] in DISCARDED
489        # This token won’t survive passage through the parser, so we need to
490        # rescue its attached tokens and redistribute them to nearby tokens.
491        # Comments that don’t start a new line can shift backwards to the last
492        # safe token, while other tokens should shift forward.
493        dummyToken = comments: []
494        j = token.comments.length - 1
495        until j is -1
496          if token.comments[j].newLine is no and token.comments[j].here is no
497            dummyToken.comments.unshift token.comments[j]
498            token.comments.splice j, 1
499          j--
500        if dummyToken.comments.length isnt 0
501          ret = shiftCommentsBackward dummyToken, i - 1, tokens
502        if token.comments.length isnt 0
503          shiftCommentsForward token, i, tokens
504      else unless dontShiftForward i, tokens
505        # If any of this token’s comments start a line—there’s only
506        # whitespace between the preceding newline and the start of the
507        # comment—and this isn’t one of the special `JS` tokens, then
508        # shift this comment forward to precede the next valid token.
509        # `Block.compileComments` also has logic to make sure that
510        # “starting new line” comments follow or precede the nearest
511        # newline relative to the token that the comment is attached to,
512        # but that newline might be inside a `}` or `)` or other generated
513        # token that we really want this comment to output after. Therefore
514        # we need to shift the comments here, avoiding such generated and
515        # discarded tokens.
516        dummyToken = comments: []
517        j = token.comments.length - 1
518        until j is -1
519          if token.comments[j].newLine and not token.comments[j].unshift and
520             not (token[0] is 'JS' and token.generated)
521            dummyToken.comments.unshift token.comments[j]
522            token.comments.splice j, 1
523          j--
524        if dummyToken.comments.length isnt 0
525          ret = shiftCommentsForward dummyToken, i + 1, tokens
526      delete token.comments if token.comments?.length is 0
527      ret
528
529  # Add location data to all tokens generated by the rewriter.
530  addLocationDataToGeneratedTokens: ->
531    @scanTokens (token, i, tokens) ->
532      return 1 if     token[2]
533      return 1 unless token.generated or token.explicit
534      if token.fromThen and token[0] is 'INDENT'
535        token[2] = token.origin[2]
536        return 1
537      if token[0] is '{' and nextLocation=tokens[i + 1]?[2]
538        {first_line: line, first_column: column, range: [rangeIndex]} = nextLocation
539      else if prevLocation = tokens[i - 1]?[2]
540        {last_line: line, last_column: column, range: [, rangeIndex]} = prevLocation
541        column += 1
542      else
543        line = column = 0
544        rangeIndex = 0
545      token[2] = {
546        first_line:            line
547        first_column:          column
548        last_line:             line
549        last_column:           column
550        last_line_exclusive:   line
551        last_column_exclusive: column
552        range: [rangeIndex, rangeIndex]
553      }
554      return 1
555
556  # `OUTDENT` tokens should always be positioned at the last character of the
557  # previous token, so that AST nodes ending in an `OUTDENT` token end up with a
558  # location corresponding to the last “real” token under the node.
559  fixIndentationLocationData: ->
560    @allComments ?= extractAllCommentTokens @tokens
561    findPrecedingComment = (token, {afterPosition, indentSize, first, indented}) =>
562      tokenStart = token[2].range[0]
563      matches = (comment) ->
564        if comment.outdented
565          return no unless indentSize? and comment.indentSize > indentSize
566        return no if indented and not comment.indented
567        return no unless comment.locationData.range[0] < tokenStart
568        return no unless comment.locationData.range[0] > afterPosition
569        yes
570      if first
571        lastMatching = null
572        for comment in @allComments by -1
573          if matches comment
574            lastMatching = comment
575          else if lastMatching
576            return lastMatching
577        return lastMatching
578      for comment in @allComments when matches comment by -1
579        return comment
580      null
581
582    @scanTokens (token, i, tokens) ->
583      return 1 unless token[0] in ['INDENT', 'OUTDENT'] or
584        (token.generated and token[0] is 'CALL_END' and not token.data?.closingTagNameToken) or
585        (token.generated and token[0] is '}')
586      isIndent = token[0] is 'INDENT'
587      prevToken = token.prevToken ? tokens[i - 1]
588      prevLocationData = prevToken[2]
589      # addLocationDataToGeneratedTokens() set the outdent’s location data
590      # to the preceding token’s, but in order to detect comments inside an
591      # empty "block" we want to look for comments preceding the next token.
592      useNextToken = token.explicit or token.generated
593      if useNextToken
594        nextToken = token
595        nextTokenIndex = i
596        nextToken = tokens[nextTokenIndex++] while (nextToken.explicit or nextToken.generated) and nextTokenIndex isnt tokens.length - 1
597      precedingComment = findPrecedingComment(
598        if useNextToken
599          nextToken
600        else
601          token
602        afterPosition: prevLocationData.range[0]
603        indentSize: token.indentSize
604        first: isIndent
605        indented: useNextToken
606      )
607      if isIndent
608        return 1 unless precedingComment?.newLine
609      # We don’t want e.g. an implicit call at the end of an `if` condition to
610      # include a following indented comment.
611      return 1 if token.generated and token[0] is 'CALL_END' and precedingComment?.indented
612      prevLocationData = precedingComment.locationData if precedingComment?
613      token[2] =
614        first_line:
615          if precedingComment?
616            prevLocationData.first_line
617          else
618            prevLocationData.last_line
619        first_column:
620          if precedingComment?
621            if isIndent
622              0
623            else
624              prevLocationData.first_column
625          else
626            prevLocationData.last_column
627        last_line:              prevLocationData.last_line
628        last_column:            prevLocationData.last_column
629        last_line_exclusive:    prevLocationData.last_line_exclusive
630        last_column_exclusive:  prevLocationData.last_column_exclusive
631        range:
632          if isIndent and precedingComment?
633            [
634              prevLocationData.range[0] - precedingComment.indentSize
635              prevLocationData.range[1]
636            ]
637          else
638            prevLocationData.range
639      return 1
640
641  # Because our grammar is LALR(1), it can’t handle some single-line
642  # expressions that lack ending delimiters. The **Rewriter** adds the implicit
643  # blocks, so it doesn’t need to. To keep the grammar clean and tidy, trailing
644  # newlines within expressions are removed and the indentation tokens of empty
645  # blocks are added.
646  normalizeLines: ->
647    starter = indent = outdent = null
648    leading_switch_when = null
649    leading_if_then = null
650    # Count `THEN` tags
651    ifThens = []
652
653    condition = (token, i) ->
654      token[1] isnt ';' and token[0] in SINGLE_CLOSERS and
655      not (token[0] is 'TERMINATOR' and @tag(i + 1) in EXPRESSION_CLOSE) and
656      not (token[0] is 'ELSE' and
657           (starter isnt 'THEN' or (leading_if_then or leading_switch_when))) and
658      not (token[0] in ['CATCH', 'FINALLY'] and starter in ['->', '=>']) or
659      token[0] in CALL_CLOSERS and
660      (@tokens[i - 1].newLine or @tokens[i - 1][0] is 'OUTDENT')
661
662    action = (token, i) ->
663      ifThens.pop() if token[0] is 'ELSE' and starter is 'THEN'
664      @tokens.splice (if @tag(i - 1) is ',' then i - 1 else i), 0, outdent
665
666    closeElseTag = (tokens, i) =>
667      tlen = ifThens.length
668      return i unless tlen > 0
669      lastThen = ifThens.pop()
670      [, outdentElse] = @indentation tokens[lastThen]
671      # Insert `OUTDENT` to close inner `IF`.
672      outdentElse[1] = tlen*2
673      tokens.splice(i, 0, outdentElse)
674      # Insert `OUTDENT` to close outer `IF`.
675      outdentElse[1] = 2
676      tokens.splice(i + 1, 0, outdentElse)
677      # Remove outdents from the end.
678      @detectEnd i + 2,
679        (token, i) -> token[0] in ['OUTDENT', 'TERMINATOR']
680        (token, i) ->
681            if @tag(i) is 'OUTDENT' and @tag(i + 1) is 'OUTDENT'
682              tokens.splice i, 2
683      i + 2
684
685    @scanTokens (token, i, tokens) ->
686      [tag] = token
687      conditionTag = tag in ['->', '=>'] and
688        @findTagsBackwards(i, ['IF', 'WHILE', 'FOR', 'UNTIL', 'SWITCH', 'WHEN', 'LEADING_WHEN', '[', 'INDEX_START']) and
689        not (@findTagsBackwards i, ['THEN', '..', '...'])
690
691      if tag is 'TERMINATOR'
692        if @tag(i + 1) is 'ELSE' and @tag(i - 1) isnt 'OUTDENT'
693          tokens.splice i, 1, @indentation()...
694          return 1
695        if @tag(i + 1) in EXPRESSION_CLOSE
696          if token[1] is ';' and @tag(i + 1) is 'OUTDENT'
697            tokens[i + 1].prevToken = token
698            moveComments token, tokens[i + 1]
699          tokens.splice i, 1
700          return 0
701      if tag is 'CATCH'
702        for j in [1..2] when @tag(i + j) in ['OUTDENT', 'TERMINATOR', 'FINALLY']
703          tokens.splice i + j, 0, @indentation()...
704          return 2 + j
705      if tag in ['->', '=>'] and (@tag(i + 1) in [',', ']'] or @tag(i + 1) is '.' and token.newLine)
706        [indent, outdent] = @indentation tokens[i]
707        tokens.splice i + 1, 0, indent, outdent
708        return 1
709      if tag in SINGLE_LINERS and @tag(i + 1) isnt 'INDENT' and
710         not (tag is 'ELSE' and @tag(i + 1) is 'IF') and
711         not conditionTag
712        starter = tag
713        [indent, outdent] = @indentation tokens[i]
714        indent.fromThen   = true if starter is 'THEN'
715        if tag is 'THEN'
716          leading_switch_when = @findTagsBackwards(i, ['LEADING_WHEN']) and @tag(i + 1) is 'IF'
717          leading_if_then = @findTagsBackwards(i, ['IF']) and @tag(i + 1) is 'IF'
718        ifThens.push i if tag is 'THEN' and @findTagsBackwards(i, ['IF'])
719        # `ELSE` tag is not closed.
720        if tag is 'ELSE' and @tag(i - 1) isnt 'OUTDENT'
721          i = closeElseTag tokens, i
722        tokens.splice i + 1, 0, indent
723        @detectEnd i + 2, condition, action
724        tokens.splice i, 1 if tag is 'THEN'
725        return 1
726      return 1
727
728  # Tag postfix conditionals as such, so that we can parse them with a
729  # different precedence.
730  tagPostfixConditionals: ->
731    original = null
732
733    condition = (token, i) ->
734      [tag] = token
735      [prevTag] = @tokens[i - 1]
736      tag is 'TERMINATOR' or (tag is 'INDENT' and prevTag not in SINGLE_LINERS)
737
738    action = (token, i) ->
739      if token[0] isnt 'INDENT' or (token.generated and not token.fromThen)
740        original[0] = 'POST_' + original[0]
741
742    @scanTokens (token, i) ->
743      return 1 unless token[0] is 'IF'
744      original = token
745      @detectEnd i + 1, condition, action
746      return 1
747
748  # For tokens with extra data, we want to make that data visible to the grammar
749  # by wrapping the token value as a String() object and setting the data as
750  # properties of that object. The grammar should then be responsible for
751  # cleaning this up for the node constructor: unwrapping the token value to a
752  # primitive string and separately passing any expected token data properties
753  exposeTokenDataToGrammar: ->
754    @scanTokens (token, i) ->
755      if token.generated or (token.data and Object.keys(token.data).length isnt 0)
756        token[1] = new String token[1]
757        token[1][key] = val for own key, val of (token.data ? {})
758        token[1].generated = yes if token.generated
759      1
760
761  # Generate the indentation tokens, based on another token on the same line.
762  indentation: (origin) ->
763    indent  = ['INDENT', 2]
764    outdent = ['OUTDENT', 2]
765    if origin
766      indent.generated = outdent.generated = yes
767      indent.origin = outdent.origin = origin
768    else
769      indent.explicit = outdent.explicit = yes
770    [indent, outdent]
771
772  generate: generate
773
774  # Look up a tag by token index.
775  tag: (i) -> @tokens[i]?[0]
776
777# Constants
778# ---------
779
780# List of the token pairs that must be balanced.
781BALANCED_PAIRS = [
782  ['(', ')']
783  ['[', ']']
784  ['{', '}']
785  ['INDENT', 'OUTDENT'],
786  ['CALL_START', 'CALL_END']
787  ['PARAM_START', 'PARAM_END']
788  ['INDEX_START', 'INDEX_END']
789  ['STRING_START', 'STRING_END']
790  ['INTERPOLATION_START', 'INTERPOLATION_END']
791  ['REGEX_START', 'REGEX_END']
792]
793
794# The inverse mappings of `BALANCED_PAIRS` we’re trying to fix up, so we can
795# look things up from either end.
796exports.INVERSES = INVERSES = {}
797
798# The tokens that signal the start/end of a balanced pair.
799EXPRESSION_START = []
800EXPRESSION_END   = []
801
802for [left, right] in BALANCED_PAIRS
803  EXPRESSION_START.push INVERSES[right] = left
804  EXPRESSION_END  .push INVERSES[left] = right
805
806# Tokens that indicate the close of a clause of an expression.
807EXPRESSION_CLOSE = ['CATCH', 'THEN', 'ELSE', 'FINALLY'].concat EXPRESSION_END
808
809# Tokens that, if followed by an `IMPLICIT_CALL`, indicate a function invocation.
810IMPLICIT_FUNC    = ['IDENTIFIER', 'PROPERTY', 'SUPER', ')', 'CALL_END', ']', 'INDEX_END', '@', 'THIS']
811
812# If preceded by an `IMPLICIT_FUNC`, indicates a function invocation.
813IMPLICIT_CALL    = [
814  'IDENTIFIER', 'JSX_TAG', 'PROPERTY', 'NUMBER', 'INFINITY', 'NAN'
815  'STRING', 'STRING_START', 'REGEX', 'REGEX_START', 'JS'
816  'NEW', 'PARAM_START', 'CLASS', 'IF', 'TRY', 'SWITCH', 'THIS'
817  'UNDEFINED', 'NULL', 'BOOL'
818  'UNARY', 'DO', 'DO_IIFE', 'YIELD', 'AWAIT', 'UNARY_MATH', 'SUPER', 'THROW'
819  '@', '->', '=>', '[', '(', '{', '--', '++'
820]
821
822IMPLICIT_UNSPACED_CALL = ['+', '-']
823
824# Tokens that always mark the end of an implicit call for single-liners.
825IMPLICIT_END     = ['POST_IF', 'FOR', 'WHILE', 'UNTIL', 'WHEN', 'BY',
826  'LOOP', 'TERMINATOR']
827
828# Single-line flavors of block expressions that have unclosed endings.
829# The grammar can’t disambiguate them, so we insert the implicit indentation.
830SINGLE_LINERS    = ['ELSE', '->', '=>', 'TRY', 'FINALLY', 'THEN']
831SINGLE_CLOSERS   = ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN']
832
833# Tokens that end a line.
834LINEBREAKS       = ['TERMINATOR', 'INDENT', 'OUTDENT']
835
836# Tokens that close open calls when they follow a newline.
837CALL_CLOSERS     = ['.', '?.', '::', '?::']
838
839# Tokens that prevent a subsequent indent from ending implicit calls/objects
840CONTROL_IN_IMPLICIT = ['IF', 'TRY', 'FINALLY', 'CATCH', 'CLASS', 'SWITCH']
841
842# Tokens that are swallowed up by the parser, never leading to code generation.
843# You can spot these in `grammar.coffee` because the `o` function second
844# argument doesn’t contain a `new` call for these tokens.
845# `STRING_START` isn’t on this list because its `locationData` matches that of
846# the node that becomes `StringWithInterpolations`, and therefore
847# `addDataToNode` attaches `STRING_START`’s tokens to that node.
848DISCARDED = ['(', ')', '[', ']', '{', '}', ':', '.', '..', '...', ',', '=', '++', '--', '?',
849  'AS', 'AWAIT', 'CALL_START', 'CALL_END', 'DEFAULT', 'DO', 'DO_IIFE', 'ELSE',
850  'EXTENDS', 'EXPORT', 'FORIN', 'FOROF', 'FORFROM', 'IMPORT', 'INDENT', 'INDEX_SOAK',
851  'INTERPOLATION_START', 'INTERPOLATION_END', 'LEADING_WHEN', 'OUTDENT', 'PARAM_END',
852  'REGEX_START', 'REGEX_END', 'RETURN', 'STRING_END', 'THROW', 'UNARY', 'YIELD'
853].concat IMPLICIT_UNSPACED_CALL.concat IMPLICIT_END.concat CALL_CLOSERS.concat CONTROL_IN_IMPLICIT
854
855# Tokens that, when appearing at the end of a line, suppress a following TERMINATOR/INDENT token
856exports.UNFINISHED = UNFINISHED = ['\\', '.', '?.', '?::', 'UNARY', 'DO', 'DO_IIFE', 'MATH', 'UNARY_MATH', '+', '-',
857           '**', 'SHIFT', 'RELATION', 'COMPARE', '&', '^', '|', '&&', '||',
858           'BIN?', 'EXTENDS']