PageRenderTime 56ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/src/compiler.txt

https://bitbucket.org/Spivey/geomlab
Plain Text | 930 lines | 774 code | 156 blank | 0 comment | 0 complexity | 9a5cce858924b921052a52b3072eb70b MD5 | raw file
  1. {*
  2. * compiler.txt
  3. *
  4. * This file is part of GeomLab
  5. * Copyright (c) 2008 J. M. Spivey
  6. * All rights reserved
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright notice,
  12. * this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright notice,
  14. * this list of conditions and the following disclaimer in the documentation
  15. * and/or other materials provided with the distribution.
  16. * 3. The name of the author may not be used to endorse or promote products
  17. * derived from this software without specific prior written permission.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  20. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  21. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  22. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  24. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  25. * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26. * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  27. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  28. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. *}
  30. { LEXICAL TOKENS }
  31. _update(_syntax, #define, [#define, 0, 0]);
  32. _update(_syntax, #=, [#=, 3, 4]);
  33. define _token(tag, tok, p, rp) = _update(_syntax, tag, [tok, p, rp]);
  34. define _tok(tag) = _token(tag, tag, 0, 0);
  35. _tok(#if); _tok(#then); _tok(#else); _tok(#let); _tok(#in);
  36. _tok(#function); _tok(#when); _tok(#_); _tok(#>>); _tok(#..); _tok(#<-);
  37. _token(#+, #+, 5, 6);
  38. _token(#-, #-, 5, 6);
  39. _token(#:, #:, 7, 7);
  40. define _infixl(tag, p) = _token(tag, #binop, p, p+1);
  41. define _infixr(tag, p) = _token(tag, #binop, p, p);
  42. _infixl(#or, 1);
  43. _infixl(#and, 2);
  44. _infixl(#<, 3); _infixl(#<=, 3); _infixl(#<>, 3);
  45. _infixl(#>, 3); _infixl(#>=, 3);
  46. _infixr(#++, 4);
  47. _infixl(#^, 5);
  48. _infixl(#*, 6); _infixl(#/, 6);
  49. _token(#not, #monop, 0, 0);
  50. define _priority(op) =
  51. let tk = _lookup(_syntax, op) in
  52. if tk <> [] then tail(tk) else [0, 0];
  53. { BASIC DEFINITIONS }
  54. { Some of these are redefined in the prelude with better error handling. }
  55. define true = numeric(0);
  56. define false = numeric(true);
  57. define not (p) = if p then false else true;
  58. define ++ ([], ys) = ys
  59. | ++ (x:xs, ys) = x:(xs ++ ys);
  60. define concat([]) = []
  61. | concat(xs:xss) = xs ++ concat(xss);
  62. define reverse(xs) =
  63. let reva([], vs) = vs | reva(u:us, vs) = reva(us, u:vs) in
  64. reva(xs, []);
  65. define length([]) = 0
  66. | length(x:xs) = length(xs)+1;
  67. define assoc(x, []) = []
  68. | assoc(x, [u,v]:zs) = if x = u then v else assoc(x, zs);
  69. define map(f, []) = [] | map(f, x:xs) = f(x) : map(f, xs);
  70. define filter(p, []) = []
  71. | filter(p, x:xs) = if p(x) then x:filter(p, xs) else filter(p, xs);
  72. define foldr(f, a, []) = a | foldr(f, a, x:xs) = f(x, foldr(f, a, xs));
  73. define foldl(f, a, []) = a | foldl(f, a, x:xs) = foldl(f, f(a, x), xs);
  74. { Helper function for list comprehensions }
  75. define _mapa(f, [], acc) = acc
  76. | _mapa(f, x:xs, acc) = f(x, _mapa(f, xs, acc));
  77. { Helper function for lists [a..b] }
  78. define _range(a, b) = if a > b then [] else a:_range(a+1, b);
  79. { Helper functions for operator sections }
  80. define _lsect(f, x) = function (y) f(x, y);
  81. define _rsect(f, y) = function (x) f(x, y);
  82. { All definitions that are specific to the compiler are made local to it:
  83. this avoids 'polluting the name space' and makes bootstrapping easier.
  84. But the bootstrap loader does not support closures that have free variables,
  85. so here we create a function with no arguments and no free variables
  86. that (when it is called) builds a network of closures and returns the main
  87. compiling function. Later (at the start of the prelude) we can call the
  88. no-argument function one last time and save the resulting closure. }
  89. define __top() =
  90. let debug(n, x) = if _debug() > n then _print(x) else [] in
  91. let member(x, []) = false | member(x, y:ys) = (x = y) or member(x, ys) in
  92. let number(_, []) = []
  93. | number(n, x:xs) = [n, x] : number(n+1, xs) in
  94. let fst(x:_) = x in
  95. let snd(_:y:_) = y in
  96. let max(x, y) = if x > y then x else y in
  97. { PARSER -- a slightly hacked-up recursive descent parser. The global
  98. variable tok contains the next token, and each parser routine p_thing
  99. recognises an instance of thing and returns the AST, expecting tok
  100. to contain the first token of the thing on entry, and leaving it with
  101. the first token after the thing. The trickiness surrounds operator
  102. sections like (2*x+), where we discover that it is a section only after
  103. parsing the 2*x. }
  104. let synerror(tag) = _synerror(tag, []) in
  105. { describe -- string describing a token for error messages }
  106. let describe(#ident) = "an identifier"
  107. | describe(#number) = "a number"
  108. | describe(#atom) = "an atom"
  109. | describe(#lpar) = "'('"
  110. | describe(#rpar) = "')'"
  111. | describe(#comma) = "','"
  112. | describe(#semi) = "';'"
  113. | describe(#bra) = "'['"
  114. | describe(#ket) = "']'"
  115. | describe(#vbar) = "'|'"
  116. | describe(#>>) = "'>>'"
  117. | describe(#..) = "'..'"
  118. | describe(#string) = "a string constant"
  119. | describe(#binop) = "a binary operator"
  120. | describe(#monop) = "a unary operator"
  121. | describe(#lbrace) = "'{'"
  122. | describe(#rbrace) = "'}'"
  123. | describe(#eol) = "end of line"
  124. | describe(#eof) = "end of input"
  125. | describe(x) = "'" ^ _spelling(x) ^ "'" in
  126. { tok -- latest lexer token }
  127. let tok = _new(0) in
  128. { val -- value associated with latest token }
  129. let val = _new(0) in
  130. { scan -- call the lexer and set tok and val }
  131. let scan() =
  132. let t = _scan() in
  133. let case(#ident) =
  134. let tk = _lookup(_syntax, snd(t)) in
  135. _set(tok, if tk <> [] then fst(tk) else #ident)
  136. | case(#op) =
  137. let tk = _lookup(_syntax, snd(t)) in
  138. if tk <> [] then _set(tok, fst(tk)) else synerror("#badtok")
  139. | case(x) =
  140. _set(tok, x) in
  141. case(fst(t)) >> _set(val, snd(t)) in
  142. let isbinop(t) = member(t, [#binop, #=, #-, #+, #:]) in
  143. { see -- test for possible token }
  144. let see(t) = (_get(tok) = t) in
  145. { eat -- match and consume token or report syntax error }
  146. let eat(t) =
  147. if see(t) then scan() else _synerror("#eat", [describe(t)]) in
  148. { can_eat -- match and consume token or return false }
  149. let can_eat(t) =
  150. if see(t) then (scan() >> true) else false in
  151. { A switch "case !tok of x -> ... | y -> ..." is rendered below as
  152. "let case(x) = ... | case(y) = ... in whichever(case)". }
  153. { whichever -- case branch on next token }
  154. let whichever(case) = case(_get(tok)) in
  155. { p_sym -- match and consume a token and return its value }
  156. let p_sym(t) =
  157. let v = _get(val) in eat(t) >> v in
  158. { brack -- parse a phrase between brackets }
  159. let brack(open, p, close) =
  160. eat(open) >> let x = p() in eat(close) >> x in
  161. { brack1 -- parse phrase between brackets, passing close as argument }
  162. let brack1(open, p, close) =
  163. eat(open) >> let x = p(close) in eat(close) >> x in
  164. { p_tail -- parse tail of list with separator }
  165. let p_tail(p, sep) =
  166. if can_eat(sep) then (let e1 = p() in e1 : p_tail(p, sep)) else [] in
  167. { p_list1 -- parse non-empty list separated by commas }
  168. let p_list1(p) =
  169. let e1 = p() in e1 : p_tail(p, #comma) in
  170. { p_list -- parse optional list }
  171. let p_list(p, endtok) =
  172. if see(endtok) or see(#eof) then [] else p_list1(p) in
  173. { We allow mutual recursion by tying the knot with reference cells.
  174. Each cell made here has a function f that fetches the contents of
  175. the cell and calls it. Later, we'll see a function f_body and an
  176. assignment _set(knot, f_body) that sets the cell. }
  177. let eknot = _new(0) in
  178. let p_expr0(secok) = let p = _get(eknot) in p(secok) in
  179. let p_expr() = p_expr0(false) in
  180. let tknot = _new(0) in
  181. let p_term(min, secok) = let p = _get(tknot) in p(min, secok) in
  182. let pknot = _new(0) in
  183. let p_pattern() = let p = _get(pknot) in p() in
  184. let dknot = _new(0) in
  185. let p_defn() = let p = _get(dknot) in p() in
  186. { listify -- form list expression or pattern }
  187. let listify(es) = foldr(function (h, t) [#cons, h, t], [#nil], es) in
  188. let p_patterns(endtok) = p_list(p_pattern, endtok) in
  189. let p_patprim() =
  190. let case(#ident) =
  191. let x = p_sym(#ident) in
  192. if not see(#lpar) then [#var, x] else
  193. #prim:x:brack1(#lpar, p_patterns, #rpar)
  194. | case(#atom) = [#const, p_sym(#atom)]
  195. | case(#_) = eat(#_) >> [#anon]
  196. | case(#number) = [#const, p_sym(#number)]
  197. | case(#-) = eat(#-) >> [#const, -p_sym(#number)]
  198. | case(#string) = [#const, p_sym(#string)]
  199. | case(#lpar) = brack(#lpar, p_pattern, #rpar)
  200. | case(#bra) = #list:brack1(#bra, p_patterns, #ket)
  201. | case(_) = synerror("#pattern") in
  202. whichever(case) in
  203. let p_patfactor() =
  204. let p = p_patprim() in
  205. if can_eat(#:) then [#cons, p, p_patfactor()] else p in
  206. let p_pattern_body() =
  207. let chain(p) =
  208. if can_eat(#+) then chain([#plus, p, p_sym(#number)]) else p in
  209. chain(p_patfactor()) in
  210. { p_formals -- parse formal parameters }
  211. let p_formals() = brack1(#lpar, p_patterns, #rpar) in
  212. { p_exprs -- parse optional list of expressions }
  213. let p_exprs(endtok) = p_list(p_expr, endtok) in
  214. { expand -- expand list comprehension into code with accumulator }
  215. let expand(e1, [], a) =
  216. { Base case: "[ e1 | ] ++ a" = e1:a }
  217. [#cons, e1, a]
  218. | expand(e1, [#gen, [#var, x], e2]:gs, a) =
  219. { Simple generator: "[ e1 | x <- e2, ... ] ++ a" =
  220. _mapa(function (x, b) "[ e1 | ... ] ++ b", e2, a) }
  221. let b = [#var, _gensym()] in
  222. [#apply, [#var, #_mapa], [#function, 2,
  223. [[[[#var, x], b], expand(e1, gs, b)]]], e2, a]
  224. | expand(e1, [#gen, patt, e2]:gs, a) =
  225. { Generator : "[ e1 | patt <- e2, ... ] ++ a" =
  226. _mapa((function (patt, b) -> "[ e1 | gs ] ++ b" | (_, b) -> b),
  227. e2, a) }
  228. let b = [#var, _gensym()] in
  229. [#apply, [#var, #_mapa], [#function, 2,
  230. [[[patt, b], expand(e1, gs, b)], [[[#anon], b], b]]], e2, a]
  231. | expand(e1, [#when, e2]:gs, a) =
  232. { Test: "[ e1 | when e2, ...] ++ a" =
  233. if e2 then "[e1 | ...] ++ a" else a }
  234. [#if, e2, expand(e1, gs, a), a] in
  235. let p_gen() =
  236. let p = p_pattern() in eat(#<-) >> [#gen, p, p_expr()] in
  237. { p_gens -- parse generators for a list comprehension }
  238. let p_gens() =
  239. let p_tail() =
  240. let case(#when) =
  241. eat(#when) >> let e = p_expr() in [#when, e] : p_tail()
  242. | case(#comma) =
  243. eat(#comma) >> let g = p_gen() in g : p_tail()
  244. | case(_) = [] in
  245. whichever(case) in
  246. let g = p_gen() in g : p_tail() in
  247. { p_listexp -- parse contents of [ ... ] }
  248. let p_listexp() =
  249. if see(#ket) then
  250. { An empty list }
  251. [#nil]
  252. else
  253. (let e1 = p_expr() in
  254. let case(#comma) =
  255. { A display of two or more items }
  256. #list:e1:p_tail(p_expr, #comma)
  257. | case(#..) =
  258. { A range [e1 .. e2] }
  259. eat(#..) >> [#apply, [#var, #_range], e1, p_expr()]
  260. | case(#vbar) =
  261. { A list comprehension }
  262. eat(#vbar) >> expand(e1, p_gens(), [#nil])
  263. | case(_) =
  264. { A singleton list }
  265. [#list, e1] in
  266. whichever(case)) in
  267. { p_parenexp -- parse expression after left paren }
  268. let p_parenexp() =
  269. if not isbinop(_get(tok)) then
  270. p_expr0(true)
  271. else
  272. (let w = p_sym(_get(tok)) in
  273. let prio = _priority(w) in
  274. if see(#rpar) then
  275. { An operator name (+) }
  276. [#var, w]
  277. else
  278. { A right section (+1) }
  279. [#apply, [#var, #_rsect], [#var, w], p_term(snd(prio), false)]) in
  280. let p_primary() =
  281. let case(#number) = [#const, p_sym(#number)]
  282. | case(#atom) = [#const, p_sym(#atom)]
  283. | case(#string) = [#const, p_sym(#string)]
  284. | case(#ident) =
  285. let x = p_sym(#ident) in
  286. if not see(#lpar) then [#var, x] else
  287. #apply:[#var, x]:brack1(#lpar, p_exprs, #rpar)
  288. | case(#lpar) = brack(#lpar, p_parenexp, #rpar)
  289. | case(#bra) = brack(#bra, p_listexp, #ket)
  290. | case(#eof) = synerror("#exp")
  291. | case(_) = synerror("#badexp") in
  292. whichever(case) in
  293. let p_factor() =
  294. let case(#monop) =
  295. let w = [#var, p_sym(#monop)] in [#apply, w, p_factor()]
  296. | case(#-) =
  297. eat(#-) >>
  298. if see(#number) then
  299. [#const, - p_sym(#number)]
  300. else
  301. [#apply, [#var, #_uminus], p_factor()]
  302. | case(_) = p_primary() in
  303. whichever(case) in
  304. { makebin -- create binary operator, treating 'and' and 'or' as special }
  305. let makebin(w, e1, e2) =
  306. let case(#and) = [#if, e1, e2, [#const, false]]
  307. | case(#or) = [#if, e1, [#const, true], e2]
  308. | case(_) = [#apply, [#var, w], e1, e2] in
  309. case(w) in
  310. { p_term -- parse a term containing operators with priority >= min.
  311. If secok is true, allow a left section (1+) }
  312. let p_term_body(min, secok) =
  313. { p_termcont -- loop to parse a sequence of operators and operands }
  314. let p_termcont(e1, min) =
  315. let t = _get(tok) in
  316. if not isbinop(t) then e1 else
  317. (let w = _get(val) in
  318. let prio = _priority(w) in
  319. if fst(prio) < min then e1 else
  320. (eat(t) >>
  321. if secok and see(#rpar) then
  322. { A left section }
  323. [#apply, [#var, #_lsect], [#var, w], e1]
  324. else
  325. { Got an operator: look for its right operand }
  326. (let e2 = p_term(snd(prio), false) in
  327. { Continue by looking for the next operator }
  328. p_termcont(makebin(w, e1, e2), min)))) in
  329. p_termcont(p_factor(), min) in
  330. { p_cond -- parse a conditional, maybe also allowing a left section }
  331. let p_cond(secok) =
  332. if can_eat(#if) then
  333. (let e1 = p_cond(false) in
  334. eat(#then) >> let e2 = p_cond(false) in
  335. eat(#else) >> let e3 = p_cond(false) in [#if, e1, e2, e3])
  336. else
  337. p_term(1, secok) in
  338. { p_expr -- parse an expression or perhaps a left section }
  339. let p_expr_body(secok) =
  340. let case(#let) =
  341. eat(#let) >> let d = p_defn() in
  342. eat(#in) >> [#let, d, p_expr()]
  343. | case(#function) =
  344. eat(#function) >> let formals = p_formals() in
  345. [#function, length(formals), [[formals, p_expr()]]]
  346. | case(_) =
  347. let e = p_cond(secok) in
  348. if can_eat(#>>) then [#seq, e, p_expr()] else e in
  349. whichever(case) in
  350. { p_name -- parse the name on the LHS of a definition }
  351. let p_name() =
  352. if isbinop(_get(tok)) or see(#monop) then
  353. p_sym(_get(tok))
  354. else
  355. p_sym(#ident) in
  356. { p_rhs -- parse right hand side of equation }
  357. let p_rhs(lhs) =
  358. eat(#=) >> let e = p_expr() in
  359. if can_eat(#when) then [lhs, p_expr(), e] else [lhs, e] in
  360. { p_rule -- parse one clause of a function definition }
  361. let p_rule(x, arity) =
  362. let y = p_name() in
  363. if x = y then [] else synerror("#names") >>
  364. let lhs = p_formals() in
  365. if length(lhs) = arity then [] else synerror("#arity") >>
  366. p_rhs(lhs) in
  367. { p_defn -- parse a definition }
  368. let p_defn_body() =
  369. let x = p_name() in
  370. if not see(#lpar) then
  371. (eat(#=) >> [#val, x, p_expr()])
  372. else
  373. (let lhs = p_formals() in
  374. let arity = length(lhs) in
  375. let rule = p_rhs(lhs) in
  376. [#fun, x, arity, rule :
  377. p_tail(function () p_rule(x, arity), #vbar)]) in
  378. { p_para -- parse a top-level paragraph }
  379. let p_para() =
  380. if see(#eof) then #eof else
  381. (let p = if can_eat(#define) then p_defn() else p_expr() in
  382. if see(#rpar) then synerror("#parenmatch")
  383. else if see(#ket) then synerror("#bramatch")
  384. else if not see(#semi) and not see(#eof) then synerror("#junk")
  385. else [] >>
  386. p) in
  387. _set(eknot, p_expr_body) >>
  388. _set(tknot, p_term_body) >>
  389. _set(pknot, p_pattern_body) >>
  390. _set(dknot, p_defn_body) >>
  391. let parser() = scan() >> p_para() in
  392. { CODE LISTS -- The compiler puts together the object code as a tree,
  393. with instructions as the leaves, and internal nodes (marked with #seq)
  394. that signify concatenation of their children. The function flatten
  395. makes the tree into a list in linear time. }
  396. { flatten -- arrange instruction tree into a list }
  397. let flatten(c) =
  398. let flat([], a) = a
  399. | flat(#seq:cs, a) = foldr(flat, a, cs)
  400. | flat(c, a) = c:a in
  401. flat(c, []) in
  402. let assemble(f, n, code) =
  403. { Determine stack and frame sizes }
  404. let ssize = _new(0) in
  405. let fsize = _new(0) in
  406. let labdepth = _hash() in
  407. let setlab(lab, d) = _update(labdepth, lab, d) in
  408. let visit(d, n) =
  409. let d1 = _lookup(labdepth, n) in if d1 <> [] then d1 else d
  410. when numeric(n)
  411. | visit(d, [#GLOBAL, _]) = d+1
  412. | visit(d, [#LOCAL, _]) = d+1
  413. | visit(d, [#ARG, _]) = d+1
  414. | visit(d, [#FVAR, _]) = d+1
  415. | visit(d, [#BIND, n]) = _set(fsize, max(n+1, _get(fsize))) >> d-1
  416. | visit(d, [#POP]) = d-1
  417. | visit(d, [#QUOTE, _]) = d+1
  418. | visit(d, [#NIL]) = d+1
  419. | visit(d, [#CONS]) = d-1
  420. | visit(d, [#TRAP, lab]) = setlab(lab, d-1) >> d
  421. | visit(d, [#FAIL]) = d
  422. | visit(d, [#JFALSE, lab]) = setlab(lab, d-1) >> d-1
  423. | visit(d, [#JUMP, lab]) = setlab(lab, d) >> d
  424. | visit(d, [#PREP, _]) = d
  425. | visit(d, [#CLOPREP, _]) = d
  426. | visit(d, [#RETURN]) = d-1
  427. | visit(d, [#MPLUS, _]) = d
  428. | visit(d, [#MEQ]) = d-2
  429. | visit(d, [#MNIL]) = d-1
  430. | visit(d, [#MCONS]) = d+1
  431. | visit(d, [#GETTAIL]) = d
  432. | visit(d, [#TCALL, _]) = d
  433. | visit(d, [#PUTARG, _]) = d
  434. | visit(d, [#PUTFVAR, _]) = d
  435. | visit(d, [#CALL, n]) = d-n
  436. | visit(d, [#CLOSURE, n]) = d-n
  437. | visit(d, [#MPRIM, n]) = d+n-2 in
  438. foldl((function (d, i) let d1 = visit(d, i) in
  439. _set(ssize, max(d1, _get(ssize))) >> d1), 0, code) >>
  440. { Fix up labels }
  441. let ltab = _hash() in
  442. let fixlab(lab) = _lookup(ltab, lab) in
  443. let pass1(n, [], a) = a
  444. | pass1(n, lab:code, a) =
  445. _update(ltab, lab, n) >> pass1(n, code, a) when numeric(lab)
  446. | pass1(n, inst:code, a) = pass1(n+1, code, inst:a) in
  447. let fixup([#JUMP, lab]) = [#JUMP, fixlab(lab)]
  448. | fixup([#JFALSE, lab]) = [#JFALSE, fixlab(lab)]
  449. | fixup([#TRAP, lab]) = [#TRAP, fixlab(lab)]
  450. | fixup(inst) = inst in
  451. let pass2(code) =
  452. foldl((function (a, inst) fixup(inst):a), [], code) in
  453. _assemble(f, n, _get(fsize), _get(ssize), pass2(pass1(0, code, []))) in
  454. { ENVIRONMENTS -- An environment is a 5-list [lev, arity, dict, fvs, size],
  455. where
  456. lev is the integer level.
  457. arity is the number of arguments of the current function.
  458. dict is a cell containing an a-list of variables,
  459. each mapped to information needed to load it. The triple
  460. [n, i, a] represents a definition at level n that requires
  461. the instruction [i, a] to load it.
  462. fvs is a cell containing a list of free variables that will be
  463. present in the closure
  464. size is a cell containing the current frame size
  465. For an inner function, fvs includes the name of the function as its
  466. first element. Each closure has itself as the first free variable
  467. as a way of implementing local recursion. }
  468. let lookup(x, [_, _, dict, _, _]) = assoc(x, _get(dict)) in
  469. { empty -- empty environment }
  470. let empty() = [0, 0, _new([]), _new([]), _new(0)] in
  471. { newblock -- create new block for nested function }
  472. let newblock(f, arity, [lev, _, dict, _, _]) =
  473. let d = if f = "<function>" then [] else [[f, [lev+1, #FVAR, 0]]] in
  474. [lev+1, arity, _new(d++_get(dict)), _new([]), _new(0)] in
  475. { e_level -- get level of nesting }
  476. let e_level([lev, _, _, _, _]) = lev in
  477. { e_arity -- get arity }
  478. let e_arity([_, arity, _, _, _]) = arity in
  479. { e_fvars -- get list of free variables }
  480. let e_fvars([_, _, _, fvs, _]) = _get(fvs) in
  481. { e_size -- get size of local frame }
  482. let e_size([_, _, _, _, size]) = _get(size) in
  483. { inc_size -- adjust size of local frame }
  484. let inc_size([_, _, _, _, size], delta) =
  485. _set(size, _get(size)+delta) in
  486. { bind -- define name as local variable }
  487. let bind(x, i, a, [lev, _, dict, _, _]) =
  488. _set(dict, [x, [lev, i, a]] : _get(dict)) in
  489. { unbind -- remove local binding }
  490. let unbind(x, [_, _, dict, _, _]) =
  491. let h([y, _] : d) = d when x = y
  492. | h(v : d) = v : h(d)
  493. | h([]) = [] in
  494. _set(dict, h(_get(dict))) in
  495. { alloc -- allocate space in frame }
  496. let alloc(x, env) =
  497. let a = e_size(env) in
  498. bind(x, #LOCAL, a, env) >> inc_size(env, 1) >> a in
  499. { dealloc -- remove local variable and shrink frame }
  500. let dealloc(x, env) =
  501. unbind(x, env) >> inc_size(env, -1) in
  502. { alloc_fv -- allocate free variable slot }
  503. let alloc_fv(x, [_, _, _, fvs, _]) =
  504. let a = length(_get(fvs)) + 1 in
  505. _set(fvs, _get(fvs) ++ [x]) >> a in
  506. { islocal -- test if name is a local variable }
  507. let islocal(x, env) =
  508. let case([n, i, _]) = (n = e_level(env)) when i = #LOCAL or i = #ARG
  509. | case(_) = false in
  510. case(lookup(x, env)) in
  511. { selfrec -- test if name is a recursive call of the same function }
  512. let selfrec(x, env) =
  513. lookup(x, env) = [e_level(env), #FVAR, 0] in
  514. { reset -- delete local variables at end of clause }
  515. let reset([lev, _, dict, _, size]) =
  516. let h([_, [n, i, _]]) = (n < lev) when i = #LOCAL or i = #ARG
  517. | h(_) = true in
  518. _set(dict, filter(h, _get(dict))) >> _set(size, 0) in
  519. { CODE GENERATOR -- Translate AST into funcode }
  520. let labcount = _new(0) in
  521. let label() = _set(labcount, _get(labcount)+1) in
  522. { c_ref -- compile a variable reference }
  523. let c_ref(x, env) =
  524. let case([n, i, a]) =
  525. { x is a local or constant or known free variable }
  526. [i, a] when n = e_level(env) or i = #QUOTE
  527. | case([_, _, _]) =
  528. { x is local to an enclosing scope -- make it a free variable }
  529. let a = alloc_fv(x, env) in
  530. bind(x, #FVAR, a, env) >> [#FVAR, a]
  531. | case([]) =
  532. { x is not bound at all -- treat it as global }
  533. [#GLOBAL, x] in
  534. case(lookup(x, env)) in
  535. { trapsort -- sort trap list by increasing depth }
  536. let trapsort(traps) =
  537. let insert([f, d], []) = [[f, d]]
  538. | insert([f, d], [f1, d1]:ys) =
  539. if d <= d1 then [f, d]:[f1, d1]:ys
  540. else [f1, d1]:insert([f, d], ys) in
  541. foldr(insert, [], traps) in
  542. { The pattern matching compiler c_patt returns a pair [code, traps] where
  543. code is code to match the pattern, and traps is a list of pairs
  544. [f, d] consisting of a failure label f and a stack depth d when
  545. that label is reached. The function pgen combines several such pairs into
  546. one pair corresponding to a compound pattern. }
  547. { pgen -- accumulate code for pattern matching }
  548. let pgen(root, kids, traps) =
  549. [[#seq, root, #seq:map(fst, kids)], traps ++ concat(map(snd, kids))] in
  550. { c_patt -- compile a pattern, assuming d+1 things on the stack }
  551. let c_patt([#const, v], d, env) =
  552. let f = label() in
  553. pgen([#seq, [#TRAP, f], [#QUOTE, v], [#MEQ]], [], [[f, d]])
  554. | c_patt([#var, x], d, env) =
  555. let f = label() in
  556. if islocal(x, env) then
  557. pgen([#seq, [#TRAP, f], c_ref(x, env), [#MEQ]], [], [[f, d]])
  558. else
  559. pgen([#seq, [#BIND, alloc(x, env)]], [], [])
  560. | c_patt([#anon], d, env) = pgen([#POP], [], [])
  561. | c_patt(#prim:cn:args, d, env) =
  562. { A constructor pattern h(args) }
  563. let f = label() in
  564. let n = length(args) in
  565. pgen([#seq, [#TRAP, f], c_ref(cn, env), [#MPRIM, n]],
  566. reverse([ c_patt(p1, d1, env) | [d1, p1] <- number(d, args) ]),
  567. [[f, d]])
  568. | c_patt([#cons, h, [#anon]], d, env) =
  569. { A cons pattern h : _ }
  570. let f = label() in
  571. pgen([#seq, [#TRAP, f], [#MCONS]],
  572. [c_patt(h, d+1, env), [[#POP], []]],
  573. [[f, d]])
  574. | c_patt([#cons, h, t], d, env) =
  575. { A cons pattern h : t }
  576. let f = label() in
  577. pgen([#seq, [#TRAP, f], [#MCONS]],
  578. [c_patt(h, d+1, env), [[#GETTAIL], []], c_patt(t, d, env)],
  579. [[f, d]])
  580. | c_patt([#nil], d, env) =
  581. { A nil pattern [] }
  582. let f = label() in
  583. pgen([#seq, [#TRAP, f], [#MNIL]], [], [[f, d]])
  584. | c_patt(#list:es, d, env) =
  585. c_patt(listify(es), d, env)
  586. | c_patt([#plus, p1, n], d, env) =
  587. { A plus pattern p + n }
  588. let f = label() in
  589. pgen([#seq, [#TRAP, f], [#MPLUS, n]],
  590. [c_patt(p1, d, env)], [[f, d]]) in
  591. { c_arg -- compile code to match an argument }
  592. let c_arg(i, [#var, x], env) =
  593. { variable matches whole argument }
  594. bind(x, #ARG, i, env) >> pgen([], [], []) when not islocal(x, env)
  595. | c_arg(i, [#anon], env) =
  596. { anon matches whole argument }
  597. pgen([], [], [])
  598. | c_arg(i, p, env) =
  599. pgen([#seq, [#ARG, i]], [c_patt(p, 0, env)], []) in
  600. { c_match -- compile code to match a list of arguments }
  601. let c_match(ps, env) =
  602. { Carefully evaluate from left to right }
  603. let compile(_, []) = []
  604. | compile(i, p:patts) =
  605. let x = c_arg(i, p, env) in x : compile(i+1, patts) in
  606. let code = compile(0, ps) in
  607. [#seq:map(fst, code), concat(map(snd, code))] in
  608. { The functions c_rule and c_body are mutually recursive with c_exp.
  609. Since our language does not support mutual recursion for local
  610. functions, we fake it by tying the knot with a reference cell. }
  611. let knot = _new(0) in
  612. { c_rule -- compile code for one rule in a function }
  613. let c_rule([patts, body], env) =
  614. let c_exp = _get(knot) in
  615. let match = c_match(patts, env) in
  616. let eval = c_exp(body, env, true) in
  617. reset(env) >>
  618. [[#seq, fst(match), eval], snd(match)]
  619. | c_rule([patts, guard, body], env) =
  620. let f = label() in
  621. let c_exp = _get(knot) in
  622. let match = c_match(patts, env) in
  623. let test = c_exp(guard, env, false) in
  624. let eval = c_exp(body, env, true) in
  625. reset(env) >>
  626. [[#seq, fst(match), test, [#JFALSE, f], eval], [f, 0]:snd(match)] in
  627. { A list of traps is accumulated for the whole of a rule, each containing
  628. a label and the stack depth when control reaches it. On the JVM, we
  629. have to pop all the junk from the stack explicitly, so we sort the
  630. traps in decreasing order of depth and intersperse the labels with
  631. the right number of POP instructions. }
  632. { c_traps -- compile popping code for traps }
  633. let c_traps(traps) =
  634. let h(d0, [], acc) = acc
  635. | h(d0, [f, d1]:ys, acc) =
  636. if d0 = d1 then h(d1, ys, f:acc)
  637. else h(d0+1, [f, d1]:ys, [#POP]:acc) in
  638. #seq:h(0, traps, []) in
  639. { c_body -- compile code for a function body }
  640. let c_body([], env) = [#FAIL]
  641. | c_body(r:rs, env) =
  642. let rcode = c_rule(r, env) in
  643. let flabs = map(fst, snd(rcode)) in
  644. let traps = trapsort(snd(rcode)) in
  645. [#seq, fst(rcode), if traps = [] then [] else
  646. [#seq, c_traps(traps), c_body(rs, env)]] in
  647. { c_closure -- compile code to form a closure }
  648. let c_closure(f, n, body, env) =
  649. let env1 = newblock(f, n, env) in
  650. let code = flatten(c_body(body, env1)) in
  651. let fvs = e_fvars(env1) in
  652. let nfvs = length(fvs) in
  653. debug(1, code) >>
  654. [#seq, [#QUOTE, assemble(f, n, code)], [#CLOPREP, nfvs],
  655. #seq:[ [#seq, c_ref(x, env), [#PUTFVAR, i]] | [i, x] <- number(1, fvs) ],
  656. [#CLOSURE, nfvs]] in
  657. { yield -- append RETURN instuction if needed }
  658. let yield(code, tl) =
  659. if tl then [#seq, code, [#RETURN]] else code in
  660. { c_exp -- compile code for an expression, including a RETURN if tl is true }
  661. let c_exp([#const, v], env, tl) = yield([#QUOTE, v], tl)
  662. | c_exp([#var, x], env, tl) = yield(c_ref(x, env), tl)
  663. | c_exp(#apply:[#var, f]:args, env, tl) =
  664. { Tail call to the same function }
  665. [#seq, #seq:[ c_exp(e, env, false) | e <- args ],
  666. [#TCALL, length(args)]]
  667. when tl and selfrec(f, env) and length(args) = e_arity(env)
  668. | c_exp(#apply:f:args, env, tl) =
  669. { A general function call -- PREP and PUTARG provide hooks
  670. for the back end to do inlining of primitives }
  671. let nargs = length(args) in
  672. yield([#seq, c_exp(f, env, false),
  673. [#seq, [#PREP, nargs],
  674. #seq:[ [#seq, c_exp(e, env, false), [#PUTARG, i]]
  675. | [i, e] <- number(0, args) ],
  676. [#CALL, nargs]]], tl)
  677. | c_exp([#if, e1, e2, e3], env, tl) =
  678. let l1 = label() in let l2 = label() in
  679. if tl then
  680. [#seq, c_exp(e1, env, false), [#JFALSE, l1],
  681. c_exp(e2, env, true), l1, c_exp(e3, env, true)]
  682. else
  683. [#seq, c_exp(e1, env, false), [#JFALSE, l1],
  684. c_exp(e2, env, false), [#JUMP, l2],
  685. l1, c_exp(e3, env, false), l2]
  686. | c_exp([#let, [#val, x, [#const, v]], e2], env, tl) =
  687. { Special case: treat constants by substituting them }
  688. bind(x, #QUOTE, v, env) >>
  689. let c2 = c_exp(e2, env, tl) in
  690. unbind(x, env) >> c2
  691. | c_exp([#let, [#val, x, e1], e2], env, tl) =
  692. { Local value definition let x = e1 in e2 }
  693. let c1 = c_exp(e1, env, false) in
  694. let a = alloc(x, env) in
  695. let c2 = c_exp(e2, env, tl) in
  696. dealloc(x, env) >> [#seq, c1, [#BIND, a], c2]
  697. | c_exp([#let, [#fun, f, n, rules], e2], env, tl) =
  698. { Local function definition }
  699. let c1 = c_closure(f, n, rules, env) in
  700. let a = alloc(f, env) in
  701. let c2 = c_exp(e2, env, tl) in
  702. dealloc(f, env) >>
  703. [#seq, c1, [#BIND, a], c2]
  704. | c_exp([#function, n, rules], env, tl) =
  705. { A lambda expression function (patt_1, ..., patt_n) e1 ... }
  706. yield(c_closure("<function>", n, rules, env), tl)
  707. | c_exp([#cons, e1, e2], env, tl) =
  708. yield([#seq, c_exp(e1, env, false),
  709. c_exp(e2, env, false), [#CONS]], tl)
  710. | c_exp([#nil], env, tl) = yield([#NIL], tl)
  711. | c_exp(#list:es, env, tl) = c_exp(listify(es), env, tl)
  712. | c_exp([#seq, e1, e2], env, tl) =
  713. { Sequential composition e1 >> e2 }
  714. [#seq, c_exp(e1, env, false), [#POP], c_exp(e2, env, tl)] in
  715. _set(knot, c_exp) >>
  716. { i_func -- compile a function for the interpreter }
  717. let i_func(f, n, body, env) =
  718. let code = flatten(c_body(body, newblock(f, n, env))) in
  719. debug(1, code) >> _closure(assemble(f, n, code)) in
  720. { Function bodies are compiled, but expressions typed at the top-level
  721. prompt are evaluated by a little metacircular interpreter, which
  722. is itself compiled. }
  723. { interp -- interpret an expression, compiling any embedded functions }
  724. let interp([#const, v], env) = v
  725. | interp([#var, x], env) =
  726. let case([_, #QUOTE, v]) = v
  727. | case([]) = _glodef(x) in
  728. case(lookup(x, env))
  729. | interp(#apply:f:args, env) =
  730. _apply(interp(f, env), [ interp(e, env) | e <- args ])
  731. | interp([#if, e1, e2, e3], env) =
  732. if interp(e1, env) then interp(e2, env) else interp(e3, env)
  733. | interp([#let, [#val, x, e1], e2], env) =
  734. bind(x, #QUOTE, interp(e1, env), env) >>
  735. let v = interp(e2, env) in
  736. unbind(x, env) >> v
  737. | interp([#let, [#fun, f, n, rules], e2], env) =
  738. bind(f, #QUOTE, i_func(f, n, rules, env), env) >>
  739. let v = interp(e2, env) in
  740. unbind(f, env) >> v
  741. | interp([#function, n, rules], env) =
  742. i_func("<function>", n, rules, env)
  743. | interp([#cons, e1, e2], env) =
  744. interp(e1, env) : interp(e2, env)
  745. | interp([#nil], env) =
  746. []
  747. | interp(#list:es, env) =
  748. [interp(e, env) | e <- es ]
  749. | interp([#seq, e1, e2], env) =
  750. interp(e1, env) >> interp(e2, env) in
  751. { exec -- execute a top-level phrase }
  752. let exec([#val, x, e]) =
  753. { A global value definition }
  754. _redefine(x) >>
  755. _topdef(x, interp(e, empty()))
  756. | exec([#fun, f, n, rules]) =
  757. { A global function definition }
  758. _redefine(f) >>
  759. _topdef(f, i_func(f, n, rules, empty()))
  760. | exec(exp) =
  761. { A top-level expression }
  762. _topval(interp(exp, empty())) in
  763. { The read-eval-print routine }
  764. let read_eval_print() =
  765. let p = parser() in
  766. if p = #eof then false else
  767. (_toptext() >> debug(0, p) >>
  768. _set(labcount, 0) >> _setroot(interp) >>
  769. exec(p) >> true) in
  770. read_eval_print;
  771. define _top() =
  772. if not _defined(#_syntax) then _topdef(#_syntax, _hash()) else [] >>
  773. (let t = __top() in t());
  774. { After bootstrapping, redefine _top = __top() for efficiency }