/src/rewriter.coffee

http://github.com/jashkenas/coffee-script · CoffeeScript · 858 lines · 580 code · 81 blank · 197 comment · 144 complexity · 41625954476546c002f19e86b664b04a MD5 · raw file

  1. # The CoffeeScript language has a good deal of optional syntax, implicit syntax,
  2. # and shorthand syntax. This can greatly complicate a grammar and bloat
  3. # the resulting parse table. Instead of making the parser handle it all, we take
  4. # a series of passes over the token stream, using this **Rewriter** to convert
  5. # shorthand into the unambiguous long form, add implicit indentation and
  6. # parentheses, and generally clean things up.
  7. {throwSyntaxError, extractAllCommentTokens} = require './helpers'
  8. # Move attached comments from one token to another.
  9. moveComments = (fromToken, toToken) ->
  10. return unless fromToken.comments
  11. if toToken.comments and toToken.comments.length isnt 0
  12. unshiftedComments = []
  13. for comment in fromToken.comments
  14. if comment.unshift
  15. unshiftedComments.push comment
  16. else
  17. toToken.comments.push comment
  18. toToken.comments = unshiftedComments.concat toToken.comments
  19. else
  20. toToken.comments = fromToken.comments
  21. delete fromToken.comments
  22. # Create a generated token: one that exists due to a use of implicit syntax.
  23. # Optionally have this new token take the attached comments from another token.
  24. generate = (tag, value, origin, commentsToken) ->
  25. token = [tag, value]
  26. token.generated = yes
  27. token.origin = origin if origin
  28. moveComments commentsToken, token if commentsToken
  29. token
  30. # The **Rewriter** class is used by the [Lexer](lexer.html), directly against
  31. # its internal array of tokens.
  32. exports.Rewriter = class Rewriter
  33. # Rewrite the token stream in multiple passes, one logical filter at
  34. # a time. This could certainly be changed into a single pass through the
  35. # stream, with a big ol efficient switch, but its much nicer to work with
  36. # like this. The order of these passes mattersindentation must be
  37. # corrected before implicit parentheses can be wrapped around blocks of code.
  38. rewrite: (@tokens) ->
  39. # Set environment variable `DEBUG_TOKEN_STREAM` to `true` to output token
  40. # debugging info. Also set `DEBUG_REWRITTEN_TOKEN_STREAM` to `true` to
  41. # output the token stream after it has been rewritten by this file.
  42. if process?.env?.DEBUG_TOKEN_STREAM
  43. console.log 'Initial token stream:' if process.env.DEBUG_REWRITTEN_TOKEN_STREAM
  44. console.log (t[0] + '/' + t[1] + (if t.comments then '*' else '') for t in @tokens).join ' '
  45. @removeLeadingNewlines()
  46. @closeOpenCalls()
  47. @closeOpenIndexes()
  48. @normalizeLines()
  49. @tagPostfixConditionals()
  50. @addImplicitBracesAndParens()
  51. @rescueStowawayComments()
  52. @addLocationDataToGeneratedTokens()
  53. @enforceValidJSXAttributes()
  54. @fixIndentationLocationData()
  55. @exposeTokenDataToGrammar()
  56. if process?.env?.DEBUG_REWRITTEN_TOKEN_STREAM
  57. console.log 'Rewritten token stream:' if process.env.DEBUG_TOKEN_STREAM
  58. console.log (t[0] + '/' + t[1] + (if t.comments then '*' else '') for t in @tokens).join ' '
  59. @tokens
  60. # Rewrite the token stream, looking one token ahead and behind.
  61. # Allow the return value of the block to tell us how many tokens to move
  62. # forwards (or backwards) in the stream, to make sure we dont miss anything
  63. # as tokens are inserted and removed, and the stream changes length under
  64. # our feet.
  65. scanTokens: (block) ->
  66. {tokens} = this
  67. i = 0
  68. i += block.call this, token, i, tokens while token = tokens[i]
  69. true
  70. detectEnd: (i, condition, action, opts = {}) ->
  71. {tokens} = this
  72. levels = 0
  73. while token = tokens[i]
  74. return action.call this, token, i if levels is 0 and condition.call this, token, i
  75. if token[0] in EXPRESSION_START
  76. levels += 1
  77. else if token[0] in EXPRESSION_END
  78. levels -= 1
  79. if levels < 0
  80. return if opts.returnOnNegativeLevel
  81. return action.call this, token, i
  82. i += 1
  83. i - 1
  84. # Leading newlines would introduce an ambiguity in the grammar, so we
  85. # dispatch them here.
  86. removeLeadingNewlines: ->
  87. # Find the index of the first non-`TERMINATOR` token.
  88. break for [tag], i in @tokens when tag isnt 'TERMINATOR'
  89. return if i is 0
  90. # If there are any comments attached to the tokens were about to discard,
  91. # shift them forward to what will become the new first token.
  92. for leadingNewlineToken in @tokens[0...i]
  93. moveComments leadingNewlineToken, @tokens[i]
  94. # Discard all the leading newline tokens.
  95. @tokens.splice 0, i
  96. # The lexer has tagged the opening parenthesis of a method call. Match it with
  97. # its paired close.
  98. closeOpenCalls: ->
  99. condition = (token, i) ->
  100. token[0] in [')', 'CALL_END']
  101. action = (token, i) ->
  102. token[0] = 'CALL_END'
  103. @scanTokens (token, i) ->
  104. @detectEnd i + 1, condition, action if token[0] is 'CALL_START'
  105. 1
  106. # The lexer has tagged the opening bracket of an indexing operation call.
  107. # Match it with its paired close.
  108. closeOpenIndexes: ->
  109. startToken = null
  110. condition = (token, i) ->
  111. token[0] in [']', 'INDEX_END']
  112. action = (token, i) ->
  113. if @tokens.length >= i and @tokens[i + 1][0] is ':'
  114. startToken[0] = '['
  115. token[0] = ']'
  116. else
  117. token[0] = 'INDEX_END'
  118. @scanTokens (token, i) ->
  119. if token[0] is 'INDEX_START'
  120. startToken = token
  121. @detectEnd i + 1, condition, action
  122. 1
  123. # Match tags in token stream starting at `i` with `pattern`.
  124. # `pattern` may consist of strings (equality), an array of strings (one of)
  125. # or null (wildcard). Returns the index of the match or -1 if no match.
  126. indexOfTag: (i, pattern...) ->
  127. fuzz = 0
  128. for j in [0 ... pattern.length]
  129. continue if not pattern[j]?
  130. pattern[j] = [pattern[j]] if typeof pattern[j] is 'string'
  131. return -1 if @tag(i + j + fuzz) not in pattern[j]
  132. i + j + fuzz - 1
  133. # Returns `yes` if standing in front of something looking like
  134. # `@<x>:`, `<x>:` or `<EXPRESSION_START><x>...<EXPRESSION_END>:`.
  135. looksObjectish: (j) ->
  136. return yes if @indexOfTag(j, '@', null, ':') isnt -1 or @indexOfTag(j, null, ':') isnt -1
  137. index = @indexOfTag j, EXPRESSION_START
  138. if index isnt -1
  139. end = null
  140. @detectEnd index + 1, ((token) -> token[0] in EXPRESSION_END), ((token, i) -> end = i)
  141. return yes if @tag(end + 1) is ':'
  142. no
  143. # Returns `yes` if current line of tokens contain an element of tags on same
  144. # expression level. Stop searching at `LINEBREAKS` or explicit start of
  145. # containing balanced expression.
  146. findTagsBackwards: (i, tags) ->
  147. backStack = []
  148. while i >= 0 and (backStack.length or
  149. @tag(i) not in tags and
  150. (@tag(i) not in EXPRESSION_START or @tokens[i].generated) and
  151. @tag(i) not in LINEBREAKS)
  152. backStack.push @tag(i) if @tag(i) in EXPRESSION_END
  153. backStack.pop() if @tag(i) in EXPRESSION_START and backStack.length
  154. i -= 1
  155. @tag(i) in tags
  156. # Look for signs of implicit calls and objects in the token stream and
  157. # add them.
  158. addImplicitBracesAndParens: ->
  159. # Track current balancing depth (both implicit and explicit) on stack.
  160. stack = []
  161. start = null
  162. @scanTokens (token, i, tokens) ->
  163. [tag] = token
  164. [prevTag] = prevToken = if i > 0 then tokens[i - 1] else []
  165. [nextTag] = nextToken = if i < tokens.length - 1 then tokens[i + 1] else []
  166. stackTop = -> stack[stack.length - 1]
  167. startIdx = i
  168. # Helper function, used for keeping track of the number of tokens consumed
  169. # and spliced, when returning for getting a new token.
  170. forward = (n) -> i - startIdx + n
  171. # Helper functions
  172. isImplicit = (stackItem) -> stackItem?[2]?.ours
  173. isImplicitObject = (stackItem) -> isImplicit(stackItem) and stackItem?[0] is '{'
  174. isImplicitCall = (stackItem) -> isImplicit(stackItem) and stackItem?[0] is '('
  175. inImplicit = -> isImplicit stackTop()
  176. inImplicitCall = -> isImplicitCall stackTop()
  177. inImplicitObject = -> isImplicitObject stackTop()
  178. # Unclosed control statement inside implicit parens (like
  179. # class declaration or if-conditionals).
  180. inImplicitControl = -> inImplicit() and stackTop()?[0] is 'CONTROL'
  181. startImplicitCall = (idx) ->
  182. stack.push ['(', idx, ours: yes]
  183. tokens.splice idx, 0, generate 'CALL_START', '(', ['', 'implicit function call', token[2]], prevToken
  184. endImplicitCall = ->
  185. stack.pop()
  186. tokens.splice i, 0, generate 'CALL_END', ')', ['', 'end of input', token[2]], prevToken
  187. i += 1
  188. startImplicitObject = (idx, {startsLine = yes, continuationLineIndent} = {}) ->
  189. stack.push ['{', idx, sameLine: yes, startsLine: startsLine, ours: yes, continuationLineIndent: continuationLineIndent]
  190. val = new String '{'
  191. val.generated = yes
  192. tokens.splice idx, 0, generate '{', val, token, prevToken
  193. endImplicitObject = (j) ->
  194. j = j ? i
  195. stack.pop()
  196. tokens.splice j, 0, generate '}', '}', token, prevToken
  197. i += 1
  198. implicitObjectContinues = (j) =>
  199. nextTerminatorIdx = null
  200. @detectEnd j,
  201. (token) -> token[0] is 'TERMINATOR'
  202. (token, i) -> nextTerminatorIdx = i
  203. returnOnNegativeLevel: yes
  204. return no unless nextTerminatorIdx?
  205. @looksObjectish nextTerminatorIdx + 1
  206. # Dont end an implicit call/object on next indent if any of these are in an argument/value.
  207. if (
  208. (inImplicitCall() or inImplicitObject()) and tag in CONTROL_IN_IMPLICIT or
  209. inImplicitObject() and prevTag is ':' and tag is 'FOR'
  210. )
  211. stack.push ['CONTROL', i, ours: yes]
  212. return forward(1)
  213. if tag is 'INDENT' and inImplicit()
  214. # An `INDENT` closes an implicit call unless
  215. #
  216. # 1. We have seen a `CONTROL` argument on the line.
  217. # 2. The last token before the indent is part of the list below.
  218. if prevTag not in ['=>', '->', '[', '(', ',', '{', 'ELSE', '=']
  219. while inImplicitCall() or inImplicitObject() and prevTag isnt ':'
  220. if inImplicitCall()
  221. endImplicitCall()
  222. else
  223. endImplicitObject()
  224. stack.pop() if inImplicitControl()
  225. stack.push [tag, i]
  226. return forward(1)
  227. # Straightforward start of explicit expression.
  228. if tag in EXPRESSION_START
  229. stack.push [tag, i]
  230. return forward(1)
  231. # Close all implicit expressions inside of explicitly closed expressions.
  232. if tag in EXPRESSION_END
  233. while inImplicit()
  234. if inImplicitCall()
  235. endImplicitCall()
  236. else if inImplicitObject()
  237. endImplicitObject()
  238. else
  239. stack.pop()
  240. start = stack.pop()
  241. inControlFlow = =>
  242. seenFor = @findTagsBackwards(i, ['FOR']) and @findTagsBackwards(i, ['FORIN', 'FOROF', 'FORFROM'])
  243. controlFlow = seenFor or @findTagsBackwards i, ['WHILE', 'UNTIL', 'LOOP', 'LEADING_WHEN']
  244. return no unless controlFlow
  245. isFunc = no
  246. tagCurrentLine = token[2].first_line
  247. @detectEnd i,
  248. (token, i) -> token[0] in LINEBREAKS
  249. (token, i) ->
  250. [prevTag, ,{first_line}] = tokens[i - 1] || []
  251. isFunc = tagCurrentLine is first_line and prevTag in ['->', '=>']
  252. returnOnNegativeLevel: yes
  253. isFunc
  254. # Recognize standard implicit calls like
  255. # f a, f() b, f? c, h[0] d etc.
  256. # Added support for spread dots on the left side: f ...a
  257. if (tag in IMPLICIT_FUNC and token.spaced or
  258. tag is '?' and i > 0 and not tokens[i - 1].spaced) and
  259. (nextTag in IMPLICIT_CALL or
  260. (nextTag is '...' and @tag(i + 2) in IMPLICIT_CALL and not @findTagsBackwards(i, ['INDEX_START', '['])) or
  261. nextTag in IMPLICIT_UNSPACED_CALL and
  262. not nextToken.spaced and not nextToken.newLine) and
  263. not inControlFlow()
  264. tag = token[0] = 'FUNC_EXIST' if tag is '?'
  265. startImplicitCall i + 1
  266. return forward(2)
  267. # Implicit call taking an implicit indented object as first argument.
  268. #
  269. # f
  270. # a: b
  271. # c: d
  272. #
  273. # Dont accept implicit calls of this type, when on the same line
  274. # as the control structures below as that may misinterpret constructs like:
  275. #
  276. # if f
  277. # a: 1
  278. # as
  279. #
  280. # if f(a: 1)
  281. #
  282. # which is probably always unintended.
  283. # Furthermore dont allow this in literal arrays, as
  284. # that creates grammatical ambiguities.
  285. if tag in IMPLICIT_FUNC and
  286. @indexOfTag(i + 1, 'INDENT') > -1 and @looksObjectish(i + 2) and
  287. not @findTagsBackwards(i, ['CLASS', 'EXTENDS', 'IF', 'CATCH',
  288. 'SWITCH', 'LEADING_WHEN', 'FOR', 'WHILE', 'UNTIL'])
  289. startImplicitCall i + 1
  290. stack.push ['INDENT', i + 2]
  291. return forward(3)
  292. # Implicit objects start here.
  293. if tag is ':'
  294. # Go back to the (implicit) start of the object.
  295. s = switch
  296. when @tag(i - 1) in EXPRESSION_END
  297. [startTag, startIndex] = start
  298. if startTag is '[' and startIndex > 0 and @tag(startIndex - 1) is '@' and not tokens[startIndex - 1].spaced
  299. startIndex - 1
  300. else
  301. startIndex
  302. when @tag(i - 2) is '@' then i - 2
  303. else i - 1
  304. startsLine = s <= 0 or @tag(s - 1) in LINEBREAKS or tokens[s - 1].newLine
  305. # Are we just continuing an already declared object?
  306. if stackTop()
  307. [stackTag, stackIdx] = stackTop()
  308. if (stackTag is '{' or stackTag is 'INDENT' and @tag(stackIdx - 1) is '{') and
  309. (startsLine or @tag(s - 1) is ',' or @tag(s - 1) is '{') and
  310. @tag(s - 1) not in UNFINISHED
  311. return forward(1)
  312. preObjectToken = if i > 1 then tokens[i - 2] else []
  313. startImplicitObject(s, {startsLine: !!startsLine, continuationLineIndent: preObjectToken.continuationLineIndent})
  314. return forward(2)
  315. # End implicit calls when chaining method calls
  316. # like e.g.:
  317. #
  318. # f ->
  319. # a
  320. # .g b, ->
  321. # c
  322. # .h a
  323. #
  324. # and also
  325. #
  326. # f a
  327. # .g b
  328. # .h a
  329. # Mark all enclosing objects as not sameLine
  330. if tag in LINEBREAKS
  331. for stackItem in stack by -1
  332. break unless isImplicit stackItem
  333. stackItem[2].sameLine = no if isImplicitObject stackItem
  334. # End indented-continuation-line implicit objects once that indentation is over.
  335. if tag is 'TERMINATOR' and token.endsContinuationLineIndentation
  336. {preContinuationLineIndent} = token.endsContinuationLineIndentation
  337. while inImplicitObject() and (implicitObjectIndent = stackTop()[2].continuationLineIndent)? and implicitObjectIndent > preContinuationLineIndent
  338. endImplicitObject()
  339. newLine = prevTag is 'OUTDENT' or prevToken.newLine
  340. if tag in IMPLICIT_END or
  341. (tag in CALL_CLOSERS and newLine) or
  342. (tag in ['..', '...'] and @findTagsBackwards(i, ["INDEX_START"]))
  343. while inImplicit()
  344. [stackTag, stackIdx, {sameLine, startsLine}] = stackTop()
  345. # Close implicit calls when reached end of argument list
  346. if inImplicitCall() and prevTag isnt ',' or
  347. (prevTag is ',' and tag is 'TERMINATOR' and not nextTag?)
  348. endImplicitCall()
  349. # Close implicit objects such as:
  350. # return a: 1, b: 2 unless true
  351. else if inImplicitObject() and sameLine and
  352. tag isnt 'TERMINATOR' and prevTag isnt ':' and
  353. not (tag in ['POST_IF', 'FOR', 'WHILE', 'UNTIL'] and startsLine and implicitObjectContinues(i + 1))
  354. endImplicitObject()
  355. # Close implicit objects when at end of line, line didn't end with a comma
  356. # and the implicit object didn't start the line or the next line doesn’t look like
  357. # the continuation of an object.
  358. else if inImplicitObject() and tag is 'TERMINATOR' and prevTag isnt ',' and
  359. not (startsLine and @looksObjectish(i + 1))
  360. endImplicitObject()
  361. else if inImplicitControl() and tokens[stackTop()[1]][0] is 'CLASS' and tag is 'TERMINATOR'
  362. stack.pop()
  363. else
  364. break
  365. # Close implicit object if comma is the last character
  366. # and what comes after doesnt look like it belongs.
  367. # This is used for trailing commas and calls, like:
  368. #
  369. # x =
  370. # a: b,
  371. # c: d,
  372. # e = 2
  373. #
  374. # and
  375. #
  376. # f a, b: c, d: e, f, g: h: i, j
  377. #
  378. if tag is ',' and not @looksObjectish(i + 1) and inImplicitObject() and not (@tag(i + 2) in ['FOROF', 'FORIN']) and
  379. (nextTag isnt 'TERMINATOR' or not @looksObjectish(i + 2))
  380. # When nextTag is OUTDENT the comma is insignificant and
  381. # should just be ignored so embed it in the implicit object.
  382. #
  383. # When it isnt the comma go on to play a role in a call or
  384. # array further up the stack, so give it a chance.
  385. offset = if nextTag is 'OUTDENT' then 1 else 0
  386. while inImplicitObject()
  387. endImplicitObject i + offset
  388. return forward(1)
  389. # Make sure only strings and wrapped expressions are used in JSX attributes.
  390. enforceValidJSXAttributes: ->
  391. @scanTokens (token, i, tokens) ->
  392. if token.jsxColon
  393. next = tokens[i + 1]
  394. if next[0] not in ['STRING_START', 'STRING', '(']
  395. throwSyntaxError 'expected wrapped or quoted JSX attribute', next[2]
  396. return 1
  397. # Not all tokens survive processing by the parser. To avoid comments getting
  398. # lost into the ether, find comments attached to doomed tokens and move them
  399. # to a token that will make it to the other side.
  400. rescueStowawayComments: ->
  401. insertPlaceholder = (token, j, tokens, method) ->
  402. tokens[method] generate 'TERMINATOR', '\n', tokens[j] unless tokens[j][0] is 'TERMINATOR'
  403. tokens[method] generate 'JS', '', tokens[j], token
  404. dontShiftForward = (i, tokens) ->
  405. j = i + 1
  406. while j isnt tokens.length and tokens[j][0] in DISCARDED
  407. return yes if tokens[j][0] is 'INTERPOLATION_END'
  408. j++
  409. no
  410. shiftCommentsForward = (token, i, tokens) ->
  411. # Find the next surviving token and attach this tokens comments to it,
  412. # with a flag that we know to output such comments *before* that
  413. # tokens own compilation. (Otherwise comments are output following
  414. # the token theyre attached to.)
  415. j = i
  416. j++ while j isnt tokens.length and tokens[j][0] in DISCARDED
  417. unless j is tokens.length or tokens[j][0] in DISCARDED
  418. comment.unshift = yes for comment in token.comments
  419. moveComments token, tokens[j]
  420. return 1
  421. else # All following tokens are doomed!
  422. j = tokens.length - 1
  423. insertPlaceholder token, j, tokens, 'push'
  424. # The generated tokens were added to the end, not inline, so we dont skip.
  425. return 1
  426. shiftCommentsBackward = (token, i, tokens) ->
  427. # Find the last surviving token and attach this tokens comments to it.
  428. j = i
  429. j-- while j isnt -1 and tokens[j][0] in DISCARDED
  430. unless j is -1 or tokens[j][0] in DISCARDED
  431. moveComments token, tokens[j]
  432. return 1
  433. else # All previous tokens are doomed!
  434. insertPlaceholder token, 0, tokens, 'unshift'
  435. # We added two tokens, so shift forward to account for the insertion.
  436. return 3
  437. @scanTokens (token, i, tokens) ->
  438. return 1 unless token.comments
  439. ret = 1
  440. if token[0] in DISCARDED
  441. # This token wont survive passage through the parser, so we need to
  442. # rescue its attached tokens and redistribute them to nearby tokens.
  443. # Comments that dont start a new line can shift backwards to the last
  444. # safe token, while other tokens should shift forward.
  445. dummyToken = comments: []
  446. j = token.comments.length - 1
  447. until j is -1
  448. if token.comments[j].newLine is no and token.comments[j].here is no
  449. dummyToken.comments.unshift token.comments[j]
  450. token.comments.splice j, 1
  451. j--
  452. if dummyToken.comments.length isnt 0
  453. ret = shiftCommentsBackward dummyToken, i - 1, tokens
  454. if token.comments.length isnt 0
  455. shiftCommentsForward token, i, tokens
  456. else unless dontShiftForward i, tokens
  457. # If any of this tokens comments start a linetheres only
  458. # whitespace between the preceding newline and the start of the
  459. # commentand this isnt one of the special `JS` tokens, then
  460. # shift this comment forward to precede the next valid token.
  461. # `Block.compileComments` also has logic to make sure that
  462. # starting new line comments follow or precede the nearest
  463. # newline relative to the token that the comment is attached to,
  464. # but that newline might be inside a `}` or `)` or other generated
  465. # token that we really want this comment to output after. Therefore
  466. # we need to shift the comments here, avoiding such generated and
  467. # discarded tokens.
  468. dummyToken = comments: []
  469. j = token.comments.length - 1
  470. until j is -1
  471. if token.comments[j].newLine and not token.comments[j].unshift and
  472. not (token[0] is 'JS' and token.generated)
  473. dummyToken.comments.unshift token.comments[j]
  474. token.comments.splice j, 1
  475. j--
  476. if dummyToken.comments.length isnt 0
  477. ret = shiftCommentsForward dummyToken, i + 1, tokens
  478. delete token.comments if token.comments?.length is 0
  479. ret
  480. # Add location data to all tokens generated by the rewriter.
  481. addLocationDataToGeneratedTokens: ->
  482. @scanTokens (token, i, tokens) ->
  483. return 1 if token[2]
  484. return 1 unless token.generated or token.explicit
  485. if token.fromThen and token[0] is 'INDENT'
  486. token[2] = token.origin[2]
  487. return 1
  488. if token[0] is '{' and nextLocation=tokens[i + 1]?[2]
  489. {first_line: line, first_column: column, range: [rangeIndex]} = nextLocation
  490. else if prevLocation = tokens[i - 1]?[2]
  491. {last_line: line, last_column: column, range: [, rangeIndex]} = prevLocation
  492. column += 1
  493. else
  494. line = column = 0
  495. rangeIndex = 0
  496. token[2] = {
  497. first_line: line
  498. first_column: column
  499. last_line: line
  500. last_column: column
  501. last_line_exclusive: line
  502. last_column_exclusive: column
  503. range: [rangeIndex, rangeIndex]
  504. }
  505. return 1
  506. # `OUTDENT` tokens should always be positioned at the last character of the
  507. # previous token, so that AST nodes ending in an `OUTDENT` token end up with a
  508. # location corresponding to the last real token under the node.
  509. fixIndentationLocationData: ->
  510. @allComments ?= extractAllCommentTokens @tokens
  511. findPrecedingComment = (token, {afterPosition, indentSize, first, indented}) =>
  512. tokenStart = token[2].range[0]
  513. matches = (comment) ->
  514. if comment.outdented
  515. return no unless indentSize? and comment.indentSize > indentSize
  516. return no if indented and not comment.indented
  517. return no unless comment.locationData.range[0] < tokenStart
  518. return no unless comment.locationData.range[0] > afterPosition
  519. yes
  520. if first
  521. lastMatching = null
  522. for comment in @allComments by -1
  523. if matches comment
  524. lastMatching = comment
  525. else if lastMatching
  526. return lastMatching
  527. return lastMatching
  528. for comment in @allComments when matches comment by -1
  529. return comment
  530. null
  531. @scanTokens (token, i, tokens) ->
  532. return 1 unless token[0] in ['INDENT', 'OUTDENT'] or
  533. (token.generated and token[0] is 'CALL_END' and not token.data?.closingTagNameToken) or
  534. (token.generated and token[0] is '}')
  535. isIndent = token[0] is 'INDENT'
  536. prevToken = token.prevToken ? tokens[i - 1]
  537. prevLocationData = prevToken[2]
  538. # addLocationDataToGeneratedTokens() set the outdents location data
  539. # to the preceding tokens, but in order to detect comments inside an
  540. # empty "block" we want to look for comments preceding the next token.
  541. useNextToken = token.explicit or token.generated
  542. if useNextToken
  543. nextToken = token
  544. nextTokenIndex = i
  545. nextToken = tokens[nextTokenIndex++] while (nextToken.explicit or nextToken.generated) and nextTokenIndex isnt tokens.length - 1
  546. precedingComment = findPrecedingComment(
  547. if useNextToken
  548. nextToken
  549. else
  550. token
  551. afterPosition: prevLocationData.range[0]
  552. indentSize: token.indentSize
  553. first: isIndent
  554. indented: useNextToken
  555. )
  556. if isIndent
  557. return 1 unless precedingComment?.newLine
  558. # We dont want e.g. an implicit call at the end of an `if` condition to
  559. # include a following indented comment.
  560. return 1 if token.generated and token[0] is 'CALL_END' and precedingComment?.indented
  561. prevLocationData = precedingComment.locationData if precedingComment?
  562. token[2] =
  563. first_line:
  564. if precedingComment?
  565. prevLocationData.first_line
  566. else
  567. prevLocationData.last_line
  568. first_column:
  569. if precedingComment?
  570. if isIndent
  571. 0
  572. else
  573. prevLocationData.first_column
  574. else
  575. prevLocationData.last_column
  576. last_line: prevLocationData.last_line
  577. last_column: prevLocationData.last_column
  578. last_line_exclusive: prevLocationData.last_line_exclusive
  579. last_column_exclusive: prevLocationData.last_column_exclusive
  580. range:
  581. if isIndent and precedingComment?
  582. [
  583. prevLocationData.range[0] - precedingComment.indentSize
  584. prevLocationData.range[1]
  585. ]
  586. else
  587. prevLocationData.range
  588. return 1
  589. # Because our grammar is LALR(1), it cant handle some single-line
  590. # expressions that lack ending delimiters. The **Rewriter** adds the implicit
  591. # blocks, so it doesnt need to. To keep the grammar clean and tidy, trailing
  592. # newlines within expressions are removed and the indentation tokens of empty
  593. # blocks are added.
  594. normalizeLines: ->
  595. starter = indent = outdent = null
  596. leading_switch_when = null
  597. leading_if_then = null
  598. # Count `THEN` tags
  599. ifThens = []
  600. condition = (token, i) ->
  601. token[1] isnt ';' and token[0] in SINGLE_CLOSERS and
  602. not (token[0] is 'TERMINATOR' and @tag(i + 1) in EXPRESSION_CLOSE) and
  603. not (token[0] is 'ELSE' and
  604. (starter isnt 'THEN' or (leading_if_then or leading_switch_when))) and
  605. not (token[0] in ['CATCH', 'FINALLY'] and starter in ['->', '=>']) or
  606. token[0] in CALL_CLOSERS and
  607. (@tokens[i - 1].newLine or @tokens[i - 1][0] is 'OUTDENT')
  608. action = (token, i) ->
  609. ifThens.pop() if token[0] is 'ELSE' and starter is 'THEN'
  610. @tokens.splice (if @tag(i - 1) is ',' then i - 1 else i), 0, outdent
  611. closeElseTag = (tokens, i) =>
  612. tlen = ifThens.length
  613. return i unless tlen > 0
  614. lastThen = ifThens.pop()
  615. [, outdentElse] = @indentation tokens[lastThen]
  616. # Insert `OUTDENT` to close inner `IF`.
  617. outdentElse[1] = tlen*2
  618. tokens.splice(i, 0, outdentElse)
  619. # Insert `OUTDENT` to close outer `IF`.
  620. outdentElse[1] = 2
  621. tokens.splice(i + 1, 0, outdentElse)
  622. # Remove outdents from the end.
  623. @detectEnd i + 2,
  624. (token, i) -> token[0] in ['OUTDENT', 'TERMINATOR']
  625. (token, i) ->
  626. if @tag(i) is 'OUTDENT' and @tag(i + 1) is 'OUTDENT'
  627. tokens.splice i, 2
  628. i + 2
  629. @scanTokens (token, i, tokens) ->
  630. [tag] = token
  631. conditionTag = tag in ['->', '=>'] and
  632. @findTagsBackwards(i, ['IF', 'WHILE', 'FOR', 'UNTIL', 'SWITCH', 'WHEN', 'LEADING_WHEN', '[', 'INDEX_START']) and
  633. not (@findTagsBackwards i, ['THEN', '..', '...'])
  634. if tag is 'TERMINATOR'
  635. if @tag(i + 1) is 'ELSE' and @tag(i - 1) isnt 'OUTDENT'
  636. tokens.splice i, 1, @indentation()...
  637. return 1
  638. if @tag(i + 1) in EXPRESSION_CLOSE
  639. if token[1] is ';' and @tag(i + 1) is 'OUTDENT'
  640. tokens[i + 1].prevToken = token
  641. moveComments token, tokens[i + 1]
  642. tokens.splice i, 1
  643. return 0
  644. if tag is 'CATCH'
  645. for j in [1..2] when @tag(i + j) in ['OUTDENT', 'TERMINATOR', 'FINALLY']
  646. tokens.splice i + j, 0, @indentation()...
  647. return 2 + j
  648. if tag in ['->', '=>'] and (@tag(i + 1) in [',', ']'] or @tag(i + 1) is '.' and token.newLine)
  649. [indent, outdent] = @indentation tokens[i]
  650. tokens.splice i + 1, 0, indent, outdent
  651. return 1
  652. if tag in SINGLE_LINERS and @tag(i + 1) isnt 'INDENT' and
  653. not (tag is 'ELSE' and @tag(i + 1) is 'IF') and
  654. not conditionTag
  655. starter = tag
  656. [indent, outdent] = @indentation tokens[i]
  657. indent.fromThen = true if starter is 'THEN'
  658. if tag is 'THEN'
  659. leading_switch_when = @findTagsBackwards(i, ['LEADING_WHEN']) and @tag(i + 1) is 'IF'
  660. leading_if_then = @findTagsBackwards(i, ['IF']) and @tag(i + 1) is 'IF'
  661. ifThens.push i if tag is 'THEN' and @findTagsBackwards(i, ['IF'])
  662. # `ELSE` tag is not closed.
  663. if tag is 'ELSE' and @tag(i - 1) isnt 'OUTDENT'
  664. i = closeElseTag tokens, i
  665. tokens.splice i + 1, 0, indent
  666. @detectEnd i + 2, condition, action
  667. tokens.splice i, 1 if tag is 'THEN'
  668. return 1
  669. return 1
  670. # Tag postfix conditionals as such, so that we can parse them with a
  671. # different precedence.
  672. tagPostfixConditionals: ->
  673. original = null
  674. condition = (token, i) ->
  675. [tag] = token
  676. [prevTag] = @tokens[i - 1]
  677. tag is 'TERMINATOR' or (tag is 'INDENT' and prevTag not in SINGLE_LINERS)
  678. action = (token, i) ->
  679. if token[0] isnt 'INDENT' or (token.generated and not token.fromThen)
  680. original[0] = 'POST_' + original[0]
  681. @scanTokens (token, i) ->
  682. return 1 unless token[0] is 'IF'
  683. original = token
  684. @detectEnd i + 1, condition, action
  685. return 1
  686. # For tokens with extra data, we want to make that data visible to the grammar
  687. # by wrapping the token value as a String() object and setting the data as
  688. # properties of that object. The grammar should then be responsible for
  689. # cleaning this up for the node constructor: unwrapping the token value to a
  690. # primitive string and separately passing any expected token data properties
  691. exposeTokenDataToGrammar: ->
  692. @scanTokens (token, i) ->
  693. if token.generated or (token.data and Object.keys(token.data).length isnt 0)
  694. token[1] = new String token[1]
  695. token[1][key] = val for own key, val of (token.data ? {})
  696. token[1].generated = yes if token.generated
  697. 1
  698. # Generate the indentation tokens, based on another token on the same line.
  699. indentation: (origin) ->
  700. indent = ['INDENT', 2]
  701. outdent = ['OUTDENT', 2]
  702. if origin
  703. indent.generated = outdent.generated = yes
  704. indent.origin = outdent.origin = origin
  705. else
  706. indent.explicit = outdent.explicit = yes
  707. [indent, outdent]
  708. generate: generate
  709. # Look up a tag by token index.
  710. tag: (i) -> @tokens[i]?[0]
  711. # Constants
  712. # ---------
  713. # List of the token pairs that must be balanced.
  714. BALANCED_PAIRS = [
  715. ['(', ')']
  716. ['[', ']']
  717. ['{', '}']
  718. ['INDENT', 'OUTDENT'],
  719. ['CALL_START', 'CALL_END']
  720. ['PARAM_START', 'PARAM_END']
  721. ['INDEX_START', 'INDEX_END']
  722. ['STRING_START', 'STRING_END']
  723. ['INTERPOLATION_START', 'INTERPOLATION_END']
  724. ['REGEX_START', 'REGEX_END']
  725. ]
  726. # The inverse mappings of `BALANCED_PAIRS` were trying to fix up, so we can
  727. # look things up from either end.
  728. exports.INVERSES = INVERSES = {}
  729. # The tokens that signal the start/end of a balanced pair.
  730. EXPRESSION_START = []
  731. EXPRESSION_END = []
  732. for [left, right] in BALANCED_PAIRS
  733. EXPRESSION_START.push INVERSES[right] = left
  734. EXPRESSION_END .push INVERSES[left] = right
  735. # Tokens that indicate the close of a clause of an expression.
  736. EXPRESSION_CLOSE = ['CATCH', 'THEN', 'ELSE', 'FINALLY'].concat EXPRESSION_END
  737. # Tokens that, if followed by an `IMPLICIT_CALL`, indicate a function invocation.
  738. IMPLICIT_FUNC = ['IDENTIFIER', 'PROPERTY', 'SUPER', ')', 'CALL_END', ']', 'INDEX_END', '@', 'THIS']
  739. # If preceded by an `IMPLICIT_FUNC`, indicates a function invocation.
  740. IMPLICIT_CALL = [
  741. 'IDENTIFIER', 'JSX_TAG', 'PROPERTY', 'NUMBER', 'INFINITY', 'NAN'
  742. 'STRING', 'STRING_START', 'REGEX', 'REGEX_START', 'JS'
  743. 'NEW', 'PARAM_START', 'CLASS', 'IF', 'TRY', 'SWITCH', 'THIS'
  744. 'UNDEFINED', 'NULL', 'BOOL'
  745. 'UNARY', 'DO', 'DO_IIFE', 'YIELD', 'AWAIT', 'UNARY_MATH', 'SUPER', 'THROW'
  746. '@', '->', '=>', '[', '(', '{', '--', '++'
  747. ]
  748. IMPLICIT_UNSPACED_CALL = ['+', '-']
  749. # Tokens that always mark the end of an implicit call for single-liners.
  750. IMPLICIT_END = ['POST_IF', 'FOR', 'WHILE', 'UNTIL', 'WHEN', 'BY',
  751. 'LOOP', 'TERMINATOR']
  752. # Single-line flavors of block expressions that have unclosed endings.
  753. # The grammar cant disambiguate them, so we insert the implicit indentation.
  754. SINGLE_LINERS = ['ELSE', '->', '=>', 'TRY', 'FINALLY', 'THEN']
  755. SINGLE_CLOSERS = ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN']
  756. # Tokens that end a line.
  757. LINEBREAKS = ['TERMINATOR', 'INDENT', 'OUTDENT']
  758. # Tokens that close open calls when they follow a newline.
  759. CALL_CLOSERS = ['.', '?.', '::', '?::']
  760. # Tokens that prevent a subsequent indent from ending implicit calls/objects
  761. CONTROL_IN_IMPLICIT = ['IF', 'TRY', 'FINALLY', 'CATCH', 'CLASS', 'SWITCH']
  762. # Tokens that are swallowed up by the parser, never leading to code generation.
  763. # You can spot these in `grammar.coffee` because the `o` function second
  764. # argument doesnt contain a `new` call for these tokens.
  765. # `STRING_START` isnt on this list because its `locationData` matches that of
  766. # the node that becomes `StringWithInterpolations`, and therefore
  767. # `addDataToNode` attaches `STRING_START`s tokens to that node.
  768. DISCARDED = ['(', ')', '[', ']', '{', '}', ':', '.', '..', '...', ',', '=', '++', '--', '?',
  769. 'AS', 'AWAIT', 'CALL_START', 'CALL_END', 'DEFAULT', 'DO', 'DO_IIFE', 'ELSE',
  770. 'EXTENDS', 'EXPORT', 'FORIN', 'FOROF', 'FORFROM', 'IMPORT', 'INDENT', 'INDEX_SOAK',
  771. 'INTERPOLATION_START', 'INTERPOLATION_END', 'LEADING_WHEN', 'OUTDENT', 'PARAM_END',
  772. 'REGEX_START', 'REGEX_END', 'RETURN', 'STRING_END', 'THROW', 'UNARY', 'YIELD'
  773. ].concat IMPLICIT_UNSPACED_CALL.concat IMPLICIT_END.concat CALL_CLOSERS.concat CONTROL_IN_IMPLICIT
  774. # Tokens that, when appearing at the end of a line, suppress a following TERMINATOR/INDENT token
  775. exports.UNFINISHED = UNFINISHED = ['\\', '.', '?.', '?::', 'UNARY', 'DO', 'DO_IIFE', 'MATH', 'UNARY_MATH', '+', '-',
  776. '**', 'SHIFT', 'RELATION', 'COMPARE', '&', '^', '|', '&&', '||',
  777. 'BIN?', 'EXTENDS']