PageRenderTime 52ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/src/parser.js

https://github.com/marcuswestin/fun
JavaScript | 646 lines | 499 code | 95 blank | 52 comment | 118 complexity | d32aa691399470c37ce3bbe7046cc274 MD5 | raw file
  1. var util = require('./util'),
  2. curry = require('std/curry'),
  3. isArray = require('std/isArray'),
  4. q = util.q,
  5. halt = util.halt,
  6. assert = util.assert
  7. var L_PAREN = '(', R_PAREN = ')',
  8. L_CURLY = '{', R_CURLY = '}',
  9. L_BRACKET = '[', R_BRACKET = ']'
  10. var gToken, gIndex, gTokens, gState
  11. exports.parse = function(tokens) {
  12. gTokens = tokens
  13. gIndex = -1
  14. gToken = null
  15. var ast = []
  16. var setupAST
  17. while (setupAST = parseImports()) { ast.push(setupAST) }
  18. while (peek()) { ast.push(parseTemplateBlock()) }
  19. return util.cleanup(ast)
  20. }
  21. /************************************************
  22. * Imports come before any of the emitting code *
  23. ************************************************/
  24. function parseImports() {
  25. if (peek('keyword', 'import')) { return _parseImportStatement() }
  26. }
  27. var _parseImportStatement = astGenerator(function() {
  28. advance('keyword', 'import')
  29. if (peek('string')) {
  30. // back compat import "./foo/bar"
  31. var path = advance('string').value
  32. return { type: 'IMPORT_FILE', path: path.value }
  33. } else {
  34. if (peekNewline()) { halt(gToken, 'Expected an import path') }
  35. if (!peek('symbol', ['.', '/']) && !peek('name')) {
  36. halt(peek(), 'Expected an import path')
  37. }
  38. var first = advance(['symbol', 'name'])
  39. var path = first.value
  40. if (first.type == 'name' && peekNoWhitespace('symbol', '/')) {
  41. path += advance().value
  42. } else if (first.value == '.') {
  43. while(peekNoWhitespace('symbol', ['.','/'])) {
  44. path += advance().value
  45. }
  46. }
  47. assert(gToken, path[path.length-1] != '.', 'Bad import path')
  48. while(peekNoWhitespace('name')) {
  49. path += advance().value
  50. if (peekNoWhitespace('symbol', '/')) {
  51. path += advance().value
  52. }
  53. }
  54. return { type:'IMPORT', path:path }
  55. }
  56. })
  57. /*************
  58. * Templates *
  59. *************/
  60. var parseTemplateLiteral = astGenerator(function() {
  61. var callable = parseSignatureAndBlock('template', parseTemplateBlock)
  62. return { type:'TEMPLATE', signature:callable[0], block:callable[1] }
  63. })
  64. var parseTemplateBlock = function() {
  65. var controlStatement = tryParseControlStatement(parseTemplateBlock)
  66. if (controlStatement) { return controlStatement }
  67. var inlineScript = tryParseInlineScript(parseTemplateBlock)
  68. if (inlineScript) { return inlineScript }
  69. if (peek('symbol', '<')) { return parseXML() }
  70. return parseExpression()
  71. }
  72. /*************
  73. * Functions *
  74. *************/
  75. var parseFunctionLiteral = astGenerator(function() {
  76. var callable = parseSignatureAndBlock('function', _parseFunctionBlock)
  77. return { type:'FUNCTION', signature:callable[0], block:callable[1] }
  78. })
  79. var _parseFunctionBlock = function() {
  80. var controlStatement = tryParseControlStatement(_parseFunctionBlock)
  81. if (controlStatement) { return controlStatement }
  82. var inlineScript = tryParseInlineScript(_parseFunctionBlock)
  83. if (inlineScript) { return inlineScript }
  84. if (peek('keyword', 'return')) { return _parseReturnStatement() }
  85. halt(peek(), 'Expected either a return statement or a control statement in this function block.')
  86. }
  87. var _parseReturnStatement = astGenerator(function() {
  88. advance('keyword', 'return')
  89. var value = parseExpression()
  90. return { type:'RETURN', value:value }
  91. })
  92. /************
  93. * Handlers *
  94. ************/
  95. var parseHandlerLiteral = astGenerator(function() {
  96. var callable = parseSignatureAndBlock('handler', _parseHandlerBlock)
  97. return { type:'HANDLER', signature:callable[0], block:callable[1] }
  98. })
  99. var _parseHandlerBlock = function() {
  100. var controlStatement = tryParseControlStatement(_parseHandlerBlock)
  101. if (controlStatement) { return controlStatement }
  102. var inlineScript = tryParseInlineScript(_parseHandlerBlock)
  103. if (inlineScript) { return inlineScript }
  104. return _parseMutationOrInvocation()
  105. }
  106. var _parseMutationOrInvocation = astGenerator(function() {
  107. var expression = parseExpression()
  108. if (!(peek('name') && peek('symbol', ':', 2))) {
  109. return expression
  110. }
  111. var operator = advance('name').value
  112. advance('symbol', ':')
  113. var args = [parseExpression()]
  114. while (peek('symbol', ',')) {
  115. advance('symbol', ',')
  116. args.push(parseExpression())
  117. }
  118. return { type:'MUTATION', operand:expression, operator:operator, arguments:args }
  119. })
  120. /***************************
  121. * Control flow statements *
  122. ***************************/
  123. var tryParseControlStatement = function(blockParseFunction) {
  124. if (peek('name') && peek('symbol', '=', 2)) {
  125. return _parseDeclaration()
  126. }
  127. switch(peek().value) {
  128. case 'for': return _parseForLoopStatement(blockParseFunction)
  129. case 'if': return _parseIfStatement(blockParseFunction)
  130. case 'switch': return _parseSwitchStatement(blockParseFunction)
  131. case 'debugger': return _parseDebuggerLiteral()
  132. }
  133. }
  134. var _parseForLoopStatement = astGenerator(function(statementParseFunction) {
  135. advance('keyword', 'for')
  136. var iteratorName, iterator
  137. _allowParens(function() {
  138. iteratorName = advance('name', null, 'for_loop\'s iterator reference').value
  139. iterator = createAST({ type:'REFERENCE', name:iteratorName })
  140. })
  141. advance('keyword', 'in', 'for_loop\'s "in" keyword')
  142. var iterable = parseExpression()
  143. var block = parseBlock(statementParseFunction, 'for_loop')
  144. return { type:'FOR_LOOP', iterable:iterable, iterator:iterator, block:block }
  145. })
  146. var _allowParens = function(fn) {
  147. if (peek('symbol', L_PAREN)) {
  148. advance()
  149. _allowParens(fn)
  150. advance('symbol', R_PAREN)
  151. } else {
  152. fn()
  153. }
  154. }
  155. var _parseDeclaration = astGenerator(function() {
  156. var name = advance('name').value
  157. assert(gToken, 'a' <= name[0] && name[0] <= 'z', 'Variable names must start with a lowercase letter')
  158. advance('symbol', '=')
  159. var initialValue = parseExpression(parseExpression)
  160. return { type:'DECLARATION', name:name, initialValue:initialValue }
  161. })
  162. var _parseIfStatement = astGenerator(function(statementParseFunction) {
  163. advance('keyword', 'if')
  164. var condition = parseExpression()
  165. var ifBlock = parseBlock(statementParseFunction, 'if statement')
  166. var elseBlock = null
  167. if (peek('keyword', 'else')) {
  168. advance('keyword', 'else')
  169. if (peek('keyword', 'if')) {
  170. elseBlock = [_parseIfStatement(statementParseFunction)]
  171. } else {
  172. elseBlock = parseBlock(statementParseFunction, 'else statement')
  173. }
  174. }
  175. return { type:'IF_STATEMENT', condition:condition, ifBlock:ifBlock, elseBlock:elseBlock }
  176. })
  177. var _parseSwitchStatement = astGenerator(function(statementParseFunction) {
  178. advance('keyword', 'switch')
  179. var controlValue = parseExpression()
  180. var cases = parseBlock(curry(_parseCase, statementParseFunction), 'switch case statement')
  181. return { type:'SWITCH_STATEMENT', controlValue:controlValue, cases:cases }
  182. })
  183. var _parseCase = astGenerator(function(statementParseFunction) {
  184. var labelToken = advance('keyword', ['case', 'default']),
  185. isDefault = (labelToken.value == 'default'),
  186. values = [],
  187. statements = []
  188. if (labelToken.value == 'case') {
  189. while (true) {
  190. values.push(parseExpression())
  191. if (!peek('symbol', ',')) { break }
  192. advance('symbol', ',')
  193. }
  194. }
  195. advance('symbol', ':')
  196. while (!peek('keyword', ['case', 'default']) && !peek('symbol', R_CURLY)) {
  197. statements.push(statementParseFunction())
  198. }
  199. return { type:'SWITCH_CASE', values:values, statements:statements, isDefault:isDefault }
  200. })
  201. var _parseDebuggerLiteral = astGenerator(function() {
  202. advance('keyword', 'debugger')
  203. return { type:'DEBUGGER' }
  204. })
  205. /**********************
  206. * Inline script tags *
  207. **********************/
  208. var tryParseInlineScript = function() {
  209. if (peek('symbol', '<') && peek('name', 'script', 2)) { return _parseInlineScript() }
  210. }
  211. var _parseInlineScript = astGenerator(function() {
  212. advance('symbol', '<', 'Script tag open')
  213. advance('name', 'script', 'Script tag name')
  214. var attributes = _parseXMLAttributes(false),
  215. js = []
  216. advance('symbol', '>', 'end of the script tag')
  217. while (!(peek('symbol', '</', 1) && peek('name', 'script', 2) && peek('symbol', '>', 3))) {
  218. advance()
  219. if (gToken.hadNewline) { js.push('\n') }
  220. if (gToken.hadSpace) { js.push(' ') }
  221. if (gToken.type == 'string') {
  222. js.push(gToken.annotations.single ? "'"+gToken.value+"'" : '"'+gToken.value+'"')
  223. } else {
  224. js.push(gToken.value)
  225. }
  226. }
  227. advance('symbol', '</')
  228. advance('name', 'script')
  229. advance('symbol', '>')
  230. return { type:'SCRIPT_TAG', tagName:'script', attributes:attributes, inlineJavascript:js.join('') }
  231. })
  232. /******************************************************************
  233. * Expressions (literals, references, invocations, operators ...) *
  234. ******************************************************************/
  235. var prefixOperators = ['-', '!'],
  236. binaryOperators = ['+','-','*','/','%','?'],
  237. conditionalOperators = ['<', '>', '<=', '>=', '==', '=', '!=', '!'],
  238. conditionalJoiners = ['and', 'or']
  239. var bindingPowers = {
  240. '?':10,
  241. 'and': 20, 'or': 20,
  242. '<': 30, '>': 30, '<=': 30, '>=': 30, '==': 30, '=': 30, '!=': 30, '!': 30,
  243. '+': 40, '-': 40,
  244. '*': 50, '/': 50, '%': 50
  245. }
  246. var parseExpression = function() {
  247. return _parseMore(0)
  248. }
  249. var _parseMore = astGenerator(function(leftOperatorBinding) {
  250. if (leftOperatorBinding == null) {
  251. throw new Error("leftOperatorBinding should be defined: ")
  252. }
  253. if (peek('symbol', prefixOperators)) {
  254. // Prefix operators simply apply to the next expression
  255. // and does not modify the left operator binding
  256. var prefixOperator = advance('symbol').value
  257. return { type:'UNARY_OP', operator:prefixOperator, value:_parseMore(leftOperatorBinding) }
  258. }
  259. var expression
  260. if (peek('symbol', L_PAREN)) {
  261. // There are no value literals with parentheseseses.
  262. // If wee see a paren, group the inside expression.
  263. advance('symbol', L_PAREN)
  264. expression = _parseMore(0)
  265. advance('symbol', R_PAREN)
  266. expression = _addTightOperators(expression)
  267. } else {
  268. expression = _addTightOperators(_parseAtomicExpressions())
  269. }
  270. var rightOperatorToken, impliedEqualityOp
  271. while (true) {
  272. // All conditional comparisons require the is keyword (e.g. `foo is < 10`)
  273. // to avoid ambiguity between the conditional operator < and the beginning of XML
  274. if (peek('keyword', 'is')) {
  275. rightOperatorToken = peek('symbol', conditionalOperators, 2)
  276. // It is OK to skip the comparative operator, and simple say `foo is "bar"` in place of `foo is = "bar"`
  277. if (!rightOperatorToken) {
  278. rightOperatorToken = { value:'=' }
  279. impliedEqualityOp = true
  280. }
  281. } else {
  282. rightOperatorToken = peek('symbol', binaryOperators)
  283. }
  284. var rightOperator = rightOperatorToken && rightOperatorToken.value,
  285. rightOperatorBinding = (bindingPowers[rightOperator] || 0)
  286. if (!rightOperator || leftOperatorBinding > rightOperatorBinding) {
  287. return expression
  288. }
  289. if (peek('symbol', '?')) {
  290. advance()
  291. var ifValue = _parseMore(0)
  292. advance('symbol',':')
  293. return { type:'TERNARY_OP', condition:expression, ifValue:ifValue, elseValue:_parseMore(0) }
  294. }
  295. if (peek('keyword', 'is')) {
  296. advance() // the "is" keyword
  297. }
  298. if (!impliedEqualityOp) {
  299. advance() // the operator
  300. }
  301. expression = { type:'BINARY_OP', left:expression, operator:rightOperator, right:_parseMore(rightOperatorBinding) }
  302. }
  303. })
  304. var _parseAtomicExpressions = function() {
  305. // references, literals
  306. switch (peek().type) {
  307. case 'string': return _parseTextLiteral()
  308. case 'number': return _parseNumberLiteral()
  309. case 'name': return _parseReference()
  310. case 'symbol':
  311. switch(peek().value) {
  312. case L_BRACKET: return _parseListLiteral()
  313. case L_CURLY: return _parseObjectLiteral()
  314. default: halt(peek(), 'Unexpected symbol "'+peek().value+'" while looking for a value')
  315. }
  316. case 'keyword':
  317. switch(peek().value) {
  318. case 'null': return _parseNullLiteral()
  319. case 'true': return _parseTrueLiteral()
  320. case 'false': return _parseFalseLiteral()
  321. case 'template': return parseTemplateLiteral()
  322. case 'handler': return parseHandlerLiteral()
  323. case 'function': return parseFunctionLiteral()
  324. default: halt(peek(), 'Unexpected keyword "'+peek().value+'" while looking for a value')
  325. }
  326. default: halt(peek(), 'Unexpected token type "'+peek().type+'" while looking for a value')
  327. }
  328. }
  329. var _parseReference = astGenerator(function() {
  330. var name = advance('name').value
  331. return { type:'REFERENCE', name:name }
  332. })
  333. var _parseNullLiteral = astGenerator(function() {
  334. advance('keyword', 'null')
  335. return { type:'NULL_LITERAL', value:null }
  336. })
  337. var _parseTrueLiteral = astGenerator(function() {
  338. advance('keyword', 'true')
  339. return { type:'LOGIC_LITERAL', value:true }
  340. })
  341. var _parseFalseLiteral = astGenerator(function() {
  342. advance('keyword', 'false')
  343. return { type:'LOGIC_LITERAL', value:false }
  344. })
  345. var _parseTextLiteral = astGenerator(function() {
  346. return { type:'TEXT_LITERAL', value:advance('string').value }
  347. })
  348. var _parseNumberLiteral = astGenerator(function() {
  349. return { type:'NUMBER_LITERAL', value:advance('number').value }
  350. })
  351. var tightOperators = ['.', L_BRACKET, L_PAREN]
  352. var _addTightOperators = astGenerator(function(expression) {
  353. if (!peekNoWhitespace('symbol', tightOperators)) { return expression }
  354. switch (advance().value) {
  355. case '.':
  356. var key = { type:'TEXT_LITERAL', value:advance('name').value }
  357. return _addTightOperators({ type:'DEREFERENCE', key:key, value:expression })
  358. case L_BRACKET:
  359. var key = parseExpression(),
  360. value = _addTightOperators({ type:'DEREFERENCE', key:key, value:expression })
  361. advance('symbol', R_BRACKET)
  362. return value
  363. case L_PAREN:
  364. var args = parseList(R_PAREN, parseExpression)
  365. return _addTightOperators({ type:'INVOCATION', operand:expression, arguments:args })
  366. default:
  367. throw new Error("Bad tight operator")
  368. }
  369. })
  370. var _parseListLiteral = astGenerator(function() {
  371. advance('symbol', L_BRACKET)
  372. var content = parseList(R_BRACKET, parseExpression)
  373. return { type:'LIST_LITERAL', content:content }
  374. })
  375. var _parseObjectLiteral = astGenerator(function() {
  376. advance('symbol', L_CURLY)
  377. var content = parseList(R_CURLY, astGenerator(function() {
  378. var name = advance(['name','string']).value
  379. parseSemiOrEqual()
  380. var value = parseExpression()
  381. return { name:name, value:value }
  382. }))
  383. return { type:'DICTIONARY_LITERAL', content:content }
  384. })
  385. var parseSemiOrEqual = function() {
  386. if (peek('symbol', '=')) { advance('symbol', '=') }
  387. else { advance('symbol', ':') }
  388. }
  389. /****************
  390. * XML literals *
  391. ****************/
  392. var parseXML= astGenerator(function() {
  393. advance('symbol', '<', 'XML tag opening')
  394. advance('name', null, 'XML tag name')
  395. var tagName = gToken.value
  396. var attributes = _parseXMLAttributes(true)
  397. advance('symbol', ['/>', '>'], 'end of the XML tag')
  398. if (gToken.value == '/>') {
  399. return { type:'XML', tagName:tagName, attributes:attributes, block:[] }
  400. } else {
  401. var statements = []
  402. while(true) {
  403. if (peek('symbol', '</')) { break }
  404. statements.push(parseTemplateBlock())
  405. }
  406. advance('symbol', '</')
  407. advance('name', tagName, 'matching XML tags')
  408. // allow for attributes on closing tag, e.g. <button>"Click"</button onClick=handler(){ ... }>
  409. attributes = attributes.concat(_parseXMLAttributes(true))
  410. advance('symbol', '>')
  411. return { type:'XML', tagName:tagName, attributes:attributes, block:statements }
  412. }
  413. })
  414. var _parseXMLAttributes = function(allowHashExpand) {
  415. var XMLAttributes = []
  416. while (!peek('symbol', ['/>','>'])) {
  417. XMLAttributes.push(_parseXMLAttribute(allowHashExpand))
  418. if (peek('symbol', ',')) { advance() } // Allow for <div foo="bar", cat="qwe"/>
  419. }
  420. return XMLAttributes
  421. }
  422. var _parseXMLAttribute = astGenerator(function(allowHashExpand) {
  423. if (peek('symbol', '#')) {
  424. if (!allowHashExpand) {
  425. halt(peek(), "Hash expanded attributes are not allowed in script tags - trust me, it would be messy")
  426. }
  427. advance()
  428. return { expand:parseExpression() }
  429. } else {
  430. var name = advance(['name', 'keyword']).value
  431. parseSemiOrEqual()
  432. return { name:name, value:parseExpression() }
  433. }
  434. })
  435. /****************************
  436. * Shared parsing functions *
  437. ****************************/
  438. // parses comma-seperated statements until <breakSymbol> is encounteded (e.g. R_PAREN or R_BRACKET)
  439. var parseList = function(breakSymbol, statementParseFunction) {
  440. var list = []
  441. while (true) {
  442. if (peek('symbol', breakSymbol)) { break }
  443. list.push(statementParseFunction())
  444. if (peek('symbol', ',')) { advance() } // Allow for both "foo", "bar", "key" and "foo" "bar" "key"
  445. }
  446. advance('symbol', breakSymbol)
  447. return list
  448. }
  449. // parses a series of statements enclosed by curlies, e.g. { <statement> <statement> <statement> }
  450. var parseBlock = function(statementParseFn, statementType) {
  451. advance('symbol', L_CURLY, 'beginning of the '+statementType+'\'s block')
  452. var block = []
  453. while(true) {
  454. if (peek('symbol', R_CURLY)) { break }
  455. block.push(statementParseFn())
  456. }
  457. advance('symbol', R_CURLY, 'end of the '+statementType+' statement\'s block')
  458. return block
  459. }
  460. function parseSignatureAndBlock(keyword, blockParseFn) {
  461. advance('keyword', keyword)
  462. advance('symbol', L_PAREN)
  463. var signature = parseList(R_PAREN, function() {
  464. return createAST({ type:'ARGUMENT', name:advance('name').value })
  465. })
  466. var block = parseBlock(blockParseFn, keyword)
  467. return [signature, block]
  468. }
  469. /****************
  470. * Token stream *
  471. ****************/
  472. function advance(type, value, expressionType) {
  473. var nextToken = peek()
  474. if (!nextToken) { halt(null, 'Unexpected end of file') }
  475. function check(v1, v2) {
  476. if (v1 == v2) { return }
  477. halt(peek(), 'Expected a ' + q(type)
  478. + (value ? ' of value ' + (value instanceof Array ? value.join(' or ') : value) : ',')
  479. + (expressionType ? ' for the ' + expressionType : ''),
  480. + ' but found a' + q(nextToken.type)
  481. + ' of value' + q(nextToken.value))
  482. }
  483. if (type) { check(findInArray(type, nextToken.type), nextToken.type) }
  484. if (value) { check(findInArray(value, nextToken.value), nextToken.value) }
  485. gToken = gTokens[++gIndex]
  486. return gToken
  487. }
  488. var peek = function(type, value, steps) {
  489. var token = gTokens[gIndex + (steps || 1)]
  490. if (!token) { return false }
  491. if (type && findInArray(type, token.type) != token.type) { return false }
  492. if (value && findInArray(value, token.value) != token.value) { return false }
  493. return token
  494. }
  495. var peekNoWhitespace = function(type, value, steps) {
  496. if (peekWhitespace(steps)) { return null }
  497. return peek(type, value)
  498. }
  499. var peekWhitespace = function(steps) {
  500. var token = gTokens[gIndex + 1]
  501. return token && token.hadSpace
  502. }
  503. var peekNewline = function(steps) {
  504. return gTokens[gIndex + 1].hadNewline
  505. }
  506. // Find an item in an array and return it
  507. // if target is in array, return target
  508. // if target is not in array, return array
  509. // if array is not an array, return array
  510. function findInArray(array, target) {
  511. if (!(array instanceof Array)) { return array }
  512. for (var i=0, item; item = array[i]; i++) {
  513. if (item == target) { return item }
  514. }
  515. return array
  516. }
  517. /*********************
  518. * Utility functions *
  519. *********************/
  520. // Upgrades a function that creates AST to return properly annotated ASTs
  521. function astGenerator(generatorFn) {
  522. return function() {
  523. var startToken = peek(),
  524. ast = generatorFn.apply(this, arguments),
  525. endToken = peek()
  526. return createAST(ast, startToken, endToken)
  527. }
  528. }
  529. // Creates a proper AST object, annotated with info about where
  530. // in the source file it appeared (based on startToken and endToken)
  531. function createAST(astObj, startToken, endToken) {
  532. if (!startToken) { startToken = gToken }
  533. if (!endToken) { endToken = gToken }
  534. if (isArray(astObj)) { return astObj }
  535. var ast = Object.create({
  536. info: {
  537. inputFile: startToken.inputFile,
  538. inputString: startToken.inputString,
  539. line: startToken.line,
  540. column: startToken.column,
  541. span: (startToken.line == endToken.line
  542. ? endToken.column - startToken.column + endToken.span
  543. : startToken.span)
  544. }
  545. })
  546. for (var key in astObj) {
  547. if (!astObj.hasOwnProperty(key)) { continue }
  548. ast[key] = astObj[key]
  549. }
  550. return ast
  551. }