PageRenderTime 46ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/lime.php

https://bitbucket.org/_richardJ/lime
PHP | 1316 lines | 925 code | 232 blank | 159 comment | 77 complexity | 828c6e63991900359f9f5689715ea226 MD5 | raw file
  1. #!/usr/bin/php -q
  2. <?php
  3. /*
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU Library General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17. */
  18. define('LIME_DIR', __DIR__);
  19. define('INDENT', ' ');
  20. function emit($str) {
  21. fputs(STDERR, $str . PHP_EOL);
  22. }
  23. class Bug extends Exception {
  24. }
  25. function bug($gripe = 'Bug found.') {
  26. throw new Bug($gripe);
  27. }
  28. function bug_if($fallacy, $gripe = 'Bug found.') {
  29. if ($fallacy) {
  30. throw new Bug($gripe);
  31. }
  32. }
  33. function bug_unless($assertion, $gripe = 'Bug found.') {
  34. if (!$assertion) {
  35. throw new Bug($gripe);
  36. }
  37. }
  38. require LIME_DIR . '/parse_engine.php';
  39. require LIME_DIR . '/set.so.php';
  40. require LIME_DIR . '/flex_token_stream.php';
  41. function lime_token_reference($pos) {
  42. return '$tokens[' . $pos . ']';
  43. }
  44. function lime_token_reference_callback($foo) {
  45. if ($foo[1] === '$') {
  46. // always
  47. return '$result';
  48. }
  49. return lime_token_reference($foo[1] - 1);
  50. }
  51. function lime_export($var) {
  52. if (is_array($var)) {
  53. $i = is_indexed($var);
  54. $out = array();
  55. foreach($var as $k => $v) {
  56. $out[] = (!$i ? lime_export($k).' => ' : '') . lime_export($v);
  57. }
  58. $result = 'array(' . PHP_EOL . preg_replace('~^~m', INDENT, implode(',' . PHP_EOL, $out)) . PHP_EOL . ')';
  59. } elseif (is_int($var) || is_float($var)) {
  60. $result = (string)$var;
  61. } elseif (is_string($var)) {
  62. $opt1 = '\'' . str_replace(array('\\', '\''), array('\\\\', '\\\''), $var) . '\'';
  63. $opt2 = $opt1;
  64. if (strpos($var, '$') === false) {
  65. $opt2 = '"' . str_replace(array('\\', '"'), array('\\\\', '\"'), $var) . '"';
  66. }
  67. if (strlen($opt1) <= strlen($opt2)) {
  68. $result = $opt1;
  69. } else {
  70. $result = $opt2;
  71. }
  72. } elseif (is_bool($var)) {
  73. $result = $var ? 'true' : 'false';
  74. } else {
  75. bug('Wrong type: ' . gettype($var));
  76. }
  77. return $result;
  78. }
  79. function is_indexed(array $array) {
  80. $i = 0;
  81. foreach($array as $k => $v) {
  82. if ($k !== $i++) {
  83. return false;
  84. }
  85. }
  86. return true;
  87. }
  88. function unindent($text) {
  89. if (preg_match('{\A[\r\n]*([ \t]+)[^\r\n]*+(?:[\r\n]++(?>\1[^\r\n]*+(?:[\r\n]+|\z)|[\r\n]+)+)?\z}', rtrim($text), $match)) {
  90. $text = preg_replace('{^' . $match[1] . '}m', '', $text);
  91. }
  92. return $text;
  93. }
  94. class cf_action {
  95. protected $code;
  96. public function __construct($code) {
  97. $this->code = $code;
  98. }
  99. }
  100. /**
  101. * Base class for parse table instructions. The main idea is to make the
  102. * subclasses responsible for conflict resolution among themselves. It also
  103. * forms a sort of interface to the parse table.
  104. */
  105. abstract class step {
  106. public $sym;
  107. public function __construct(sym $sym) {
  108. $this->sym = $sym;
  109. }
  110. public function glyph() {
  111. return $this->sym->name;
  112. }
  113. public function sane() {
  114. return true;
  115. }
  116. abstract public function instruction();
  117. abstract public function decide($that);
  118. }
  119. class error extends step {
  120. public function sane() {
  121. return false;
  122. }
  123. public function instruction() {
  124. bug('This should not happen.');
  125. }
  126. public function decide($that) {
  127. // An error shall remain one
  128. return $this;
  129. }
  130. }
  131. class shift extends step {
  132. public $q;
  133. public function __construct(sym $sym, $q) {
  134. parent::__construct($sym);
  135. $this->q = $q;
  136. }
  137. public function instruction() {
  138. return 's ' . $this->q;
  139. }
  140. public function decide($that) {
  141. // shift-shift conflicts are impossible.
  142. // shift-accept conflicts are a bug.
  143. // so we can infer:
  144. bug_unless($that instanceof reduce);
  145. // That being said, the resolution is a matter of precedence.
  146. $shift_prec = $this->sym->right_prec;
  147. $reduce_prec = $that->rule->prec;
  148. // If we don't have defined precedence levels for both options,
  149. // then we default to shifting:
  150. if (!($shift_prec and $reduce_prec)) {
  151. return $this;
  152. }
  153. // Otherwise, use the step with higher precedence.
  154. if ($shift_prec > $reduce_prec) {
  155. return $this;
  156. }
  157. if ($reduce_prec > $shift_prec) {
  158. return $that;
  159. }
  160. // The "nonassoc" works by giving equal precedence to both options,
  161. // which means to put an error instruction in the parse table.
  162. return new error($this->sym);
  163. }
  164. }
  165. class reduce extends step {
  166. public function __construct($sym, rule $rule) {
  167. parent::__construct($sym);
  168. $this->rule = $rule;
  169. }
  170. public function instruction() {
  171. return 'r ' . $this->rule->id;
  172. }
  173. function decide($that) {
  174. // This means that the input grammar has a reduce-reduce conflict.
  175. // Such things are considered an error in the input.
  176. throw new RRC($this, $that);
  177. // BISON would go with the first encountered reduce thus:
  178. // return $this;
  179. }
  180. }
  181. class accept extends step {
  182. public function __construct(sym $sym) {
  183. parent::__construct($sym);
  184. }
  185. public function instruction() {
  186. return 'a ' . $this->sym->name;
  187. }
  188. public function decide($that) {
  189. return $this;
  190. }
  191. }
  192. class RRC extends Exception {
  193. public function __construct($a, $b) {
  194. parent::__construct('Reduce-Reduce Conflict');
  195. $this->a = $a;
  196. $this->b = $b;
  197. }
  198. function make_noise() {
  199. emit(sprintf(
  200. 'Reduce-Reduce Conflict:' . PHP_EOL . '%s' . PHP_EOL . '%s' . PHP_EOL . 'Lookahead is (%s)',
  201. $this->a->rule->text(),
  202. $this->b->rule->text(),
  203. $this->a->glyph()
  204. ));
  205. }
  206. }
  207. class state {
  208. public $id;
  209. public $key;
  210. public $close;
  211. public $action = array();
  212. public function __construct($id, $key, $close) {
  213. $this->id = $id;
  214. $this->key = $key;
  215. $this->close = $close; // config key -> object
  216. ksort($this->close);
  217. }
  218. public function dump() {
  219. echo ' * ' . $this->id . ' / ' . $this->key . PHP_EOL;
  220. foreach ($this->close as $config) {
  221. $config->dump();
  222. }
  223. }
  224. public function add_shift(sym $sym, $state) {
  225. $this->add_instruction(new shift($sym, $state->id));
  226. }
  227. public function add_reduce(sym $sym, $rule) {
  228. $this->add_instruction(new reduce($sym, $rule));
  229. }
  230. public function add_accept(sym $sym) {
  231. $this->add_instruction(new accept($sym));
  232. }
  233. public function add_instruction(step $step) {
  234. $this->action[] = $step;
  235. }
  236. function find_reductions($lime) {
  237. // rightmost configurations followset yields reduce.
  238. foreach($this->close as $c) {
  239. if ($c->rightmost) {
  240. foreach ($c->follow->all() as $glyph) {
  241. $this->add_reduce($lime->sym($glyph), $c->rule);
  242. }
  243. }
  244. }
  245. }
  246. function resolve_conflicts() {
  247. // For each possible lookahead, find one (and only one) step to take.
  248. $table = array();
  249. foreach ($this->action as $step) {
  250. $glyph = $step->glyph();
  251. if (isset($table[$glyph])) {
  252. // There's a conflict. The shifts all came first, which
  253. // simplifies the coding for the step->decide() methods.
  254. try {
  255. $table[$glyph] = $table[$glyph]->decide($step);
  256. } catch (RRC $e) {
  257. emit('State ' . $this->id . ':');
  258. $e->make_noise();
  259. }
  260. } else {
  261. // This glyph is yet unprocessed, so the step at hand is
  262. // our best current guess at what the grammar indicates.
  263. $table[$glyph] = $step;
  264. }
  265. }
  266. // Now that we have the correct steps chosen, this routine is oddly
  267. // also responsible for turning that table into the form that will
  268. // eventually be passed to the parse engine. (So FIXME?)
  269. $out = array();
  270. foreach ($table as $glyph => $step) {
  271. if ($step->sane()) {
  272. $out[$glyph] = $step->instruction();
  273. }
  274. }
  275. return $out;
  276. }
  277. function segment_config() {
  278. // Filter $this->close into categories based on the symbol_after_the_dot.
  279. $f = array();
  280. foreach ($this->close as $c) {
  281. $p = $c->symbol_after_the_dot;
  282. if (!$p) {
  283. continue;
  284. }
  285. $f[$p->name][] = $c;
  286. }
  287. return $f;
  288. }
  289. }
  290. class sym {
  291. public function __construct($name, $id) {
  292. $this->name = $name;
  293. $this->id = $id;
  294. $this->term = true; // Until proven otherwise.
  295. $this->rule = array();
  296. $this->config = array();
  297. $this->lambda = false;
  298. $this->first = new set();
  299. $this->left_prec = $this->right_prec = 0;
  300. }
  301. public function summary() {
  302. $out = '';
  303. foreach ($this->rule as $rule) {
  304. $out .= $rule->text() . PHP_EOL;
  305. }
  306. return $out;
  307. }
  308. }
  309. class rule {
  310. public function __construct($id, $sym, $rhs, $code, $look, $replace) {
  311. bug_unless(is_int($look));
  312. $this->id = $id;
  313. $this->sym = $sym;
  314. $this->rhs = $rhs;
  315. $this->code = $code;
  316. $this->look = $look;
  317. $this->replace = $replace;
  318. //$this->prec_sym = $prec_sym;
  319. $this->prec = 0;
  320. $this->first = array();
  321. $this->epsilon = count($rhs);
  322. }
  323. public function lhs_glyph() {
  324. return $this->sym->name;
  325. }
  326. public function determine_precedence() {
  327. // We may eventually expand to allow explicit prec_symbol declarations.
  328. // Until then, we'll go with the rightmost terminal, which is what
  329. // BISON does. People probably expect that. The leftmost terminal
  330. // is a reasonable alternative behaviour, but I don't see the big
  331. // deal just now.
  332. //$prec_sym = $this->prec_sym;
  333. //if (!$prec_sym)
  334. $prec_sym = $this->rightmost_terminal();
  335. if (!$prec_sym) {
  336. return;
  337. }
  338. $this->prec = $prec_sym->left_prec;
  339. }
  340. private function rightmost_terminal() {
  341. $symbol = null;
  342. $rhs = $this->rhs;
  343. while ($rhs) {
  344. $symbol = array_pop($rhs);
  345. if ($symbol->term) {
  346. break;
  347. }
  348. }
  349. return $symbol;
  350. }
  351. public function text() {
  352. $t = '(' . $this->id . ') ' . $this->lhs_glyph() . ' :=';
  353. foreach($this->rhs as $s) {
  354. $t .= ' ' . $s->name;
  355. }
  356. return $t;
  357. }
  358. public function table(lime_language $lang) {
  359. return array(
  360. 'symbol' => $this->lhs_glyph(),
  361. 'len' => $this->look,
  362. 'replace' => $this->replace,
  363. 'code' => $lang->fixup($this->code),
  364. 'text' => $this->text(),
  365. );
  366. }
  367. public function lambda() {
  368. foreach ($this->rhs as $sym) {
  369. if (!$sym->lambda) {
  370. return false;
  371. }
  372. }
  373. return true;
  374. }
  375. public function find_first() {
  376. $dot = count($this->rhs);
  377. $last = $this->first[$dot] = new set();
  378. while ($dot--) {
  379. $symbol_after_the_dot = $this->rhs[$dot];
  380. $first = $symbol_after_the_dot->first->all();
  381. bug_if(empty($first) and !$symbol_after_the_dot->lambda);
  382. $set = new set($first);
  383. if ($symbol_after_the_dot->lambda) {
  384. $set->union($last);
  385. if ($this->epsilon == $dot + 1) {
  386. $this->epsilon = $dot;
  387. }
  388. }
  389. $last = $this->first[$dot] = $set;
  390. }
  391. }
  392. public function teach_symbol_of_first_set() {
  393. $go = false;
  394. foreach ($this->rhs as $sym) {
  395. if ($this->sym->first->union($sym->first)) {
  396. $go = true;
  397. }
  398. if (!$sym->lambda) {
  399. break;
  400. }
  401. }
  402. return $go;
  403. }
  404. public function lambda_from($dot) {
  405. return $this->epsilon <= $dot;
  406. }
  407. public function leftmost($follow) {
  408. return new config($this, 0, $follow);
  409. }
  410. public function dotted_text($dot) {
  411. $out = $this->lhs_glyph() . ' :=';
  412. $idx = -1;
  413. foreach($this->rhs as $idx => $s) {
  414. if ($idx == $dot) {
  415. $out .= ' .';
  416. }
  417. $out .= ' ' . $s->name;
  418. }
  419. if ($dot > $idx) {
  420. $out .= ' .';
  421. }
  422. return $out;
  423. }
  424. }
  425. class config {
  426. public function __construct($rule, $dot, $follow) {
  427. $this->rule = $rule;
  428. $this->dot = $dot;
  429. $this->key = $rule->id . '.' . $dot;
  430. $this->rightmost = count($rule->rhs) <= $dot;
  431. $this->symbol_after_the_dot = $this->rightmost ? null : $rule->rhs[$dot];
  432. $this->_blink = array();
  433. $this->follow = new set($follow);
  434. $this->_flink = array();
  435. bug_unless($this->rightmost or count($rule));
  436. }
  437. public function text() {
  438. return $this->rule->dotted_text($this->dot)
  439. . ' [ ' . implode(' ', $this->follow->all()) . ' ]';
  440. }
  441. public function blink($config) {
  442. $this->_blink[] = $config;
  443. }
  444. public function next() {
  445. bug_if($this->rightmost);
  446. $c = new config($this->rule, $this->dot+1, array());
  447. // Anything in the follow set for this config will also be in the next.
  448. // However, we link it backwards because we might wind up selecting a
  449. // pre-existing state, and the housekeeping is easier in the first half
  450. // of the program. We'll fix it before doing the propagation.
  451. $c->blink($this);
  452. return $c;
  453. }
  454. public function copy_links_from($that) {
  455. foreach($that->_blink as $c) {
  456. $this->blink($c);
  457. }
  458. }
  459. public function lambda() {
  460. return $this->rule->lambda_from($this->dot);
  461. }
  462. public function simple_follow() {
  463. return $this->rule->first[$this->dot + 1]->all();
  464. }
  465. public function epsilon_follows() {
  466. return $this->rule->lambda_from($this->dot + 1);
  467. }
  468. public function fixlinks() {
  469. foreach ($this->_blink as $that) {
  470. $that->_flink[] = $this;
  471. }
  472. $this->blink = array();
  473. }
  474. public function dump() {
  475. echo ' * ';
  476. echo $this->key . ' : ';
  477. echo $this->rule->dotted_text($this->dot);
  478. echo $this->follow->text();
  479. foreach ($this->_flink as $c) {
  480. echo $c->key . ' / ';
  481. }
  482. echo PHP_EOL;
  483. }
  484. }
  485. class lime {
  486. public $parser_class = 'parser';
  487. public function __construct() {
  488. $this->p_next = 1;
  489. $this->sym = array();
  490. $this->rule = array();
  491. $this->start_symbol_set = array();
  492. $this->state = array();
  493. $this->stop = $this->sym('#');
  494. if ($err = $this->sym('error')) {
  495. $err->term = false;
  496. }
  497. $this->lang = new lime_language_php();
  498. }
  499. function language() {
  500. return $this->lang;
  501. }
  502. function build_parser() {
  503. $this->add_start_rule();
  504. foreach ($this->rule as $r) {
  505. $r->determine_precedence();
  506. }
  507. $this->find_sym_lamdba();
  508. $this->find_sym_first();
  509. foreach ($this->rule as $rule) {
  510. $rule->find_first();
  511. }
  512. $initial = $this->find_states();
  513. $this->fixlinks();
  514. // $this->dump_configurations();
  515. $this->find_follow_sets();
  516. foreach($this->state as $s) {
  517. $s->find_reductions($this);
  518. }
  519. $i = $this->resolve_conflicts();
  520. $a = $this->rule_table();
  521. $qi = $initial->id;
  522. return $this->lang->ptab_to_class($this->parser_class, compact('a', 'qi', 'i'));
  523. }
  524. function rule_table() {
  525. $s = array();
  526. foreach ($this->rule as $i => $r) {
  527. $s[$i] = $r->table($this->lang);
  528. }
  529. return $s;
  530. }
  531. function add_rule($symbol, $rhs, $code) {
  532. $this->add_raw_rule($symbol, $rhs, $code, count($rhs), true);
  533. }
  534. function trump_up_bogus_lhs($real) {
  535. return "'{$real}'" . count($this->rule);
  536. }
  537. function add_raw_rule($lhs, $rhs, $code, $look, $replace) {
  538. $sym = $this->sym($lhs);
  539. $sym->term = false;
  540. if (!$rhs) {
  541. $sym->lambda = true;
  542. }
  543. $rs = array();
  544. foreach ($rhs as $str) {
  545. $rs[] = $this->sym($str);
  546. }
  547. $rid = count($this->rule);
  548. $r = new rule($rid, $sym, $rs, $code, $look, $replace);
  549. $this->rule[$rid] = $r;
  550. $sym->rule[] = $r;
  551. }
  552. function sym($str) {
  553. if (!isset($this->sym[$str])) {
  554. $this->sym[$str] = new sym($str, count($this->sym));
  555. }
  556. return $this->sym[$str];
  557. }
  558. function summary() {
  559. $out = '';
  560. foreach ($this->sym as $sym) {
  561. if (!$sym->term) {
  562. $out .= $sym->summary();
  563. }
  564. }
  565. return $out;
  566. }
  567. private function find_sym_lamdba() {
  568. do {
  569. $go = false;
  570. foreach ($this->sym as $sym) {
  571. if (!$sym->lambda) {
  572. foreach ($sym->rule as $rule) {
  573. if ($rule->lambda()) {
  574. $go = true;
  575. $sym->lambda = true;
  576. }
  577. }
  578. }
  579. }
  580. } while ($go);
  581. }
  582. private function teach_terminals_first_set() {
  583. foreach ($this->sym as $sym) {
  584. if ($sym->term) {
  585. $sym->first->add($sym->name);
  586. }
  587. }
  588. }
  589. private function find_sym_first() {
  590. $this->teach_terminals_first_set();
  591. do {
  592. $go = false;
  593. foreach ($this->rule as $r) {
  594. if ($r->teach_symbol_of_first_set()) {
  595. $go = true;
  596. }
  597. }
  598. } while ($go);
  599. }
  600. function add_start_rule() {
  601. $rewrite = new lime_rewrite("'start'");
  602. $rhs = new lime_rhs();
  603. $rhs->add(new lime_glyph($this->deduce_start_symbol()->name, null));
  604. //$rhs->add(new lime_glyph($this->stop->name, null));
  605. $rewrite->add_rhs($rhs);
  606. $rewrite->update($this);
  607. }
  608. private function deduce_start_symbol() {
  609. $candidate = current($this->start_symbol_set);
  610. // Did the person try to set a start symbol at all?
  611. if (!$candidate) {
  612. return $this->first_rule_lhs();
  613. }
  614. // Do we actually have such a symbol on the left of a rule?
  615. if ($candidate->terminal) {
  616. return $this->first_rule_lhs();
  617. }
  618. // Ok, it's a decent choice. We need to return the symbol entry.
  619. return $this->sym($candidate);
  620. }
  621. private function first_rule_lhs() {
  622. reset($this->rule);
  623. $r = current($this->rule);
  624. return $r->sym;
  625. }
  626. /**
  627. * Build an initial state. This is a recursive process which digs out
  628. * the LR(0) state graph.
  629. */
  630. function find_states() {
  631. $start_glyph = "'start'";
  632. $sym = $this->sym($start_glyph);
  633. $basis = array();
  634. foreach($sym->rule as $rule) {
  635. $c = $rule->leftmost(array('#'));
  636. $basis[$c->key] = $c;
  637. }
  638. $initial = $this->get_state($basis);
  639. $initial->add_accept($sym);
  640. return $initial;
  641. }
  642. function get_state($basis) {
  643. $key = array_keys($basis);
  644. sort($key);
  645. $key = implode(' ', $key);
  646. if (isset($this->state[$key])) {
  647. // Copy all the links around...
  648. $state = $this->state[$key];
  649. foreach($basis as $config) {
  650. $state->close[$config->key]->copy_links_from($config);
  651. }
  652. return $state;
  653. } else {
  654. $close = $this->state_closure($basis);
  655. $this->state[$key] = $state = new state(count($this->state), $key, $close);
  656. $this->build_shifts($state);
  657. return $state;
  658. }
  659. }
  660. private function state_closure($q) {
  661. // $q is a list of config.
  662. $close = array();
  663. while ($config = array_pop($q)) {
  664. if (isset($close[$config->key])) {
  665. $close[$config->key]->copy_links_from($config);
  666. $close[$config->key]->follow->union($config->follow);
  667. continue;
  668. }
  669. $close[$config->key] = $config;
  670. $symbol_after_the_dot = $config->symbol_after_the_dot;
  671. if (!$symbol_after_the_dot) {
  672. continue;
  673. }
  674. if (!$symbol_after_the_dot->term) {
  675. foreach ($symbol_after_the_dot->rule as $r) {
  676. $station = $r->leftmost($config->simple_follow());
  677. if ($config->epsilon_follows()) {
  678. $station->blink($config);
  679. }
  680. $q[] = $station;
  681. }
  682. // The following turned out to be wrong. Don't do it.
  683. //if ($symbol_after_the_dot->lambda) {
  684. // $q[] = $config->next();
  685. //}
  686. }
  687. }
  688. return $close;
  689. }
  690. function build_shifts($state) {
  691. foreach ($state->segment_config() as $glyph => $segment) {
  692. $basis = array();
  693. foreach ($segment as $preshift) {
  694. $postshift = $preshift->next();
  695. $basis[$postshift->key] = $postshift;
  696. }
  697. $dest = $this->get_state($basis);
  698. $state->add_shift($this->sym($glyph), $dest);
  699. }
  700. }
  701. function fixlinks() {
  702. foreach ($this->state as $s) {
  703. foreach ($s->close as $c) {
  704. $c->fixlinks();
  705. }
  706. }
  707. }
  708. function find_follow_sets() {
  709. $q = array();
  710. foreach ($this->state as $s) {
  711. foreach ($s->close as $c) {
  712. $q[] = $c;
  713. }
  714. }
  715. while ($q) {
  716. $c = array_shift($q);
  717. foreach ($c->_flink as $d) {
  718. if ($d->follow->union($c->follow)) {
  719. $q[] = $d;
  720. }
  721. }
  722. }
  723. }
  724. private function set_assoc($ss, $l, $r) {
  725. $p = ($this->p_next++) * 2;
  726. foreach ($ss as $glyph) {
  727. $s = $this->sym($glyph);
  728. $s->left_prec = $p + $l;
  729. $s->right_prec = $p + $r;
  730. }
  731. }
  732. function left_assoc($ss) {
  733. $this->set_assoc($ss, 1, 0);
  734. }
  735. function right_assoc($ss) {
  736. $this->set_assoc($ss, 0, 1);
  737. }
  738. function non_assoc($ss) {
  739. $this->set_assoc($ss, 0, 0);
  740. }
  741. private function resolve_conflicts() {
  742. // For each state, try to find one and only one
  743. // thing to do for any given lookahead.
  744. $i = array();
  745. foreach ($this->state as $s) {
  746. $i[$s->id] = $s->resolve_conflicts();
  747. }
  748. return $i;
  749. }
  750. function dump_configurations() {
  751. foreach ($this->state as $q) {
  752. $q->dump();
  753. }
  754. }
  755. function dump_first_sets() {
  756. foreach ($this->sym as $s) {
  757. echo ' * ';
  758. echo $s->name . ' : ';
  759. echo $s->first->text();
  760. echo PHP_EOL;
  761. }
  762. }
  763. function add_rule_with_actions($lhs, $rhs) {
  764. // First, make sure this thing is well-formed.
  765. if(!is_object(end($rhs))) {
  766. $rhs[] = new cf_action('');
  767. }
  768. // Now, split it into chunks based on the actions.
  769. $look = -1;
  770. $subrule = array();
  771. $subsymbol = '';
  772. while ($rhs) {
  773. $it = array_shift($rhs);
  774. ++$look;
  775. if (is_string($it)) {
  776. $subrule[] = $it;
  777. } else {
  778. $code = $it->code;
  779. // It's an action.
  780. // Is it the last one?
  781. if ($rhs) {
  782. // no.
  783. $subsymbol = $this->trump_up_bogus_lhs($lhs);
  784. $this->add_raw_rule($subsymbol, $subrule, $code, $look, false);
  785. $subrule = array($subsymbol);
  786. } else {
  787. // yes.
  788. $this->add_raw_rule($lhs, $subrule, $code, $look, true);
  789. }
  790. }
  791. }
  792. }
  793. function pragma($type, $args) {
  794. switch ($type) {
  795. case 'left':
  796. $this->left_assoc($args);
  797. break;
  798. case 'right':
  799. $this->right_assoc($args);
  800. break;
  801. case 'nonassoc':
  802. $this->non_assoc($args);
  803. break;
  804. case 'start':
  805. $this->start_symbol_set = $args;
  806. break;
  807. case 'class':
  808. $this->parser_class = $args[0];
  809. break;
  810. default:
  811. emit(sprintf('Bad Parser Pragma: (%s)', $type));
  812. exit(1);
  813. }
  814. }
  815. }
  816. class lime_language {
  817. }
  818. class lime_language_php extends lime_language {
  819. protected function result_code($expr) {
  820. return '$result = ' . $expr . ';' . PHP_EOL;
  821. }
  822. public function default_result() {
  823. return $this->result_code('reset($tokens)');
  824. }
  825. public function result_pos($pos) {
  826. return $this->result_code(lime_token_reference($pos));
  827. }
  828. public function bind($name, $pos) {
  829. return '$' . $name . ' = &$tokens[' . $pos . '];' . PHP_EOL;
  830. }
  831. public function fixup($code) {
  832. return preg_replace_callback('~\$(\d+|\$)~', function ($foo) {
  833. if ($foo[1] === '$') {
  834. // always
  835. return '$result';
  836. }
  837. return lime_token_reference($foo[1] - 1);
  838. }, $code);
  839. }
  840. function to_php($code) {
  841. return $code;
  842. }
  843. public function ptab_to_class($parser_class, $ptab) {
  844. $code = '';
  845. $code .= 'public $qi = ' . lime_export($ptab['qi'], true) . ';' . PHP_EOL;
  846. $code .= 'public $i = '.lime_export($ptab['i'], true).';' . PHP_EOL;
  847. $rc = array();
  848. $method = array();
  849. $rules = array();
  850. foreach($ptab['a'] as $k => $a) {
  851. $symbol = preg_replace('/[^\w]/', '', $a['symbol']);
  852. $rn = @++$rc[$symbol];
  853. $mn = 'reduce_' . $k . '_' . $symbol . '_' . $rn;
  854. $method[$k] = $mn;
  855. $comment = '// ' . $a['text'] . PHP_EOL;
  856. $php = $this->to_php($a['code']);
  857. $code .= 'function ' . $mn . '(' . LIME_CALL_PROTOCOL . ') {' . PHP_EOL .
  858. rtrim(preg_replace('~^~m', INDENT, $comment . $php)) . PHP_EOL .
  859. '}' .
  860. PHP_EOL .
  861. PHP_EOL;
  862. unset($a['code']);
  863. unset($a['text']);
  864. $rules[$k] = $a;
  865. }
  866. $code .= 'public $method = ' . lime_export($method, true) . ';' . PHP_EOL;
  867. $code .= 'public $a = '.lime_export($rules, true) . ';' . PHP_EOL;
  868. return 'class ' . $parser_class . ' extends lime_parser {' . PHP_EOL .
  869. preg_replace(array('~^~m', '~^\h+$~m'), array(INDENT, ''), $code) .
  870. '}' . PHP_EOL;
  871. }
  872. }
  873. class lime_rhs {
  874. function __construct() {
  875. // Construct and add glyphs and actions in whatever order.
  876. // Then, add this to a lime_rewrite.
  877. //
  878. // Don't call install_rule.
  879. // The rewrite will do that for you when you "update" with it.
  880. $this->rhs = array();
  881. }
  882. function add(lime_slot $slot) {
  883. $this->rhs[] = $slot;
  884. }
  885. function install_rule(lime $lime, $lhs) {
  886. // This is the part that has to break the rule into subrules if necessary.
  887. $rhs = $this->rhs;
  888. // First, make sure this thing is well-formed.
  889. if (!(end($rhs) instanceof lime_action)) {
  890. $rhs[] = new lime_action('', null);
  891. }
  892. // Now, split it into chunks based on the actions.
  893. $lang = $lime->language();
  894. $result_code = $lang->default_result();
  895. $look = -1;
  896. $subrule = array();
  897. $subsymbol = '';
  898. $preamble = '';
  899. while ($rhs) {
  900. $it = array_shift($rhs);
  901. ++$look;
  902. if ($it instanceof lime_glyph) {
  903. $subrule[] = $it->data;
  904. } elseif ($it instanceof lime_action) {
  905. $code = unindent($it->data);
  906. // It's an action.
  907. // Is it the last one?
  908. if ($rhs) {
  909. // no.
  910. $subsymbol = $lime->trump_up_bogus_lhs($lhs);
  911. $action = $lang->default_result() . $preamble . $code;
  912. $lime->add_raw_rule($subsymbol, $subrule, $action, $look, false);
  913. $subrule = array($subsymbol);
  914. } else {
  915. // yes.
  916. $action = $result_code . $preamble . $code;
  917. $lime->add_raw_rule($lhs, $subrule, $action, $look, true);
  918. }
  919. } else {
  920. impossible();
  921. }
  922. if ($it->name == '$') {
  923. $result_code = $lang->result_pos($look);
  924. } elseif ($it->name) {
  925. $preamble .= $lang->bind($it->name, $look);
  926. }
  927. }
  928. }
  929. }
  930. class lime_rewrite {
  931. function __construct($glyph) {
  932. // Construct one of these with the name of the lhs.
  933. // Add some rhs-es to it.
  934. // Finally, "update" the lime you're building.
  935. $this->glyph = $glyph;
  936. $this->rhs = array();
  937. }
  938. function add_rhs(lime_rhs $rhs) {
  939. $this->rhs[] = $rhs;
  940. }
  941. function update(lime $lime) {
  942. foreach ($this->rhs as $rhs) {
  943. $rhs->install_rule($lime, $this->glyph);
  944. }
  945. }
  946. }
  947. /**
  948. * This keeps track of one position in an rhs.
  949. * We specialize to handle actions and glyphs.
  950. *
  951. * If there is a name for the slot, we store it here.
  952. * Later on, this structure will be consulted in the formation of
  953. * actual production rules.
  954. */
  955. class lime_slot {
  956. public function __construct($data, $name) {
  957. $this->data = $data;
  958. $this->name = $name;
  959. }
  960. public function preamble($pos) {
  961. if (strlen($this->name) > 0) {
  962. return '$' . $this->name . ' = &$tokens[' . $pos . '];' . PHP_EOL;
  963. }
  964. }
  965. }
  966. class lime_glyph extends lime_slot {
  967. }
  968. class lime_action extends lime_slot {
  969. }
  970. /**
  971. * This function isn't too terribly interesting to the casual observer.
  972. * You're probably better off looking at parse_lime_grammar() instead.
  973. *
  974. * Ok, if you insist, I'll explain.
  975. *
  976. * The input to Lime is a CFG parser definition. That definition is
  977. * written in some language. (The Lime language, to be exact.)
  978. * Anyway, I have to parse the Lime language and compile it into a
  979. * very complex data structure from which a parser is eventually
  980. * built. What better way than to use Lime itself to parse its own
  981. * language? Well, it's almost that simple, but not quite.
  982. * The Lime language is fairly potent, but a restricted subset of
  983. * its features was used to write a metagrammar. Then, I hand-translated
  984. * that metagrammar into another form which is easy to snarf up.
  985. * In the process of reading that simplified form, this function
  986. * builds the same sort of data structure that later gets turned into
  987. * a parser. The last step is to run the parser generation algorithm,
  988. * eval() the resulting PHP code, and voila! With no hard work, I can
  989. * suddenly read and comprehend the full range of the Lime language
  990. * without ever having written an algorithm to do so. It feels like magic.
  991. */
  992. function lime_bootstrap() {
  993. $bootstrap = LIME_DIR . '/lime.bootstrap';
  994. $lime = new lime();
  995. $lime->parser_class = 'lime_metaparser';
  996. $rhs = array();
  997. bug_unless(is_readable($bootstrap));
  998. foreach(file($bootstrap) as $l) {
  999. $a = explode(':', $l, 2);
  1000. if (count($a) == 2) {
  1001. list($pattern, $code) = $a;
  1002. $sl = new lime_rhs();
  1003. $pattern = trim($pattern);
  1004. if (strlen($pattern) > 0) {
  1005. foreach (explode(' ', $pattern) as $glyph) {
  1006. $sl->add(new lime_glyph($glyph, null));
  1007. }
  1008. }
  1009. $sl->add(new lime_action($code, NULL));
  1010. $rhs[] = $sl;
  1011. } else {
  1012. if (preg_match('~^to (\w+)$~', $l, $r)) {
  1013. $g = $r[1];
  1014. $rw = new lime_rewrite($g);
  1015. foreach($rhs as $b) {
  1016. $rw->add_rhs($b);
  1017. }
  1018. $rw->update($lime);
  1019. $rhs = array();
  1020. }
  1021. }
  1022. }
  1023. $parser_code = $lime->build_parser();
  1024. eval($parser_code);
  1025. }
  1026. /**
  1027. * The voodoo is in the way I do lexical processing on grammar definition
  1028. * files. They contain embedded bits of PHP, and it's important to keep
  1029. * track of things like strings, comments, and matched braces. It seemed
  1030. * like an ideal problem to solve with GNU flex, so I wrote a little
  1031. * scanner in flex and C to dig out the tokens for me. Of course, I need
  1032. * the tokens in PHP, so I designed a simple binary wrapper for them which
  1033. * also contains line-number information, guaranteed to help out if you
  1034. * write a grammar which surprises the parser in any manner.
  1035. */
  1036. class voodoo_scanner extends flex_scanner {
  1037. function executable() { return LIME_DIR.'/lime_scan_tokens'; }
  1038. }
  1039. /**
  1040. * This is a good function to read because it teaches you how to interface
  1041. * with a Lime parser. I've tried to isolate out the bits that aren't
  1042. * instructive in that regard.
  1043. */
  1044. function parse_lime_grammar($path) {
  1045. if (!class_exists('lime_metaparser', false)) {
  1046. lime_bootstrap();
  1047. }
  1048. $parse_engine = new parse_engine(new lime_metaparser());
  1049. $scanner = new voodoo_scanner($path);
  1050. try {
  1051. // The result of parsing a Lime grammar is a Lime object.
  1052. $lime = $scanner->feed($parse_engine);
  1053. // Calling its build_parser() method gets the output PHP code.
  1054. return $lime->build_parser();
  1055. } catch (parse_error $e) {
  1056. die ($e->getMessage() . " in {$path} line {$scanner->lineno}." . PHP_EOL);
  1057. }
  1058. }
  1059. if ($_SERVER['argv']) {
  1060. $code = '';
  1061. array_shift($_SERVER['argv']); // Strip out the program name.
  1062. foreach ($_SERVER['argv'] as $path) {
  1063. $code .= parse_lime_grammar($path);
  1064. }
  1065. echo <<<CODE
  1066. <?php
  1067. /*
  1068. *** DON'T EDIT THIS FILE! ***
  1069. *
  1070. * This file was automatically generated by the Lime parser generator.
  1071. * The real source code you should be looking at is in one or more
  1072. * grammar files in the Lime format.
  1073. *
  1074. * THE ONLY REASON TO LOOK AT THIS FILE is to see where in the grammar
  1075. * file that your error happened, because there are enough comments to
  1076. * help you debug your grammar.
  1077. * If you ignore this warning, you're shooting yourself in the brain,
  1078. * not the foot.
  1079. */
  1080. {$code}
  1081. CODE;
  1082. }