/lime.php
PHP | 1316 lines | 925 code | 232 blank | 159 comment | 77 complexity | 828c6e63991900359f9f5689715ea226 MD5 | raw file
- #!/usr/bin/php -q
- <?php
- /*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Library General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
- define('LIME_DIR', __DIR__);
- define('INDENT', ' ');
- function emit($str) {
- fputs(STDERR, $str . PHP_EOL);
- }
- class Bug extends Exception {
- }
- function bug($gripe = 'Bug found.') {
- throw new Bug($gripe);
- }
- function bug_if($fallacy, $gripe = 'Bug found.') {
- if ($fallacy) {
- throw new Bug($gripe);
- }
- }
- function bug_unless($assertion, $gripe = 'Bug found.') {
- if (!$assertion) {
- throw new Bug($gripe);
- }
- }
- require LIME_DIR . '/parse_engine.php';
- require LIME_DIR . '/set.so.php';
- require LIME_DIR . '/flex_token_stream.php';
- function lime_token_reference($pos) {
- return '$tokens[' . $pos . ']';
- }
- function lime_token_reference_callback($foo) {
- if ($foo[1] === '$') {
- // always
- return '$result';
- }
- return lime_token_reference($foo[1] - 1);
- }
- function lime_export($var) {
- if (is_array($var)) {
- $i = is_indexed($var);
- $out = array();
- foreach($var as $k => $v) {
- $out[] = (!$i ? lime_export($k).' => ' : '') . lime_export($v);
- }
- $result = 'array(' . PHP_EOL . preg_replace('~^~m', INDENT, implode(',' . PHP_EOL, $out)) . PHP_EOL . ')';
- } elseif (is_int($var) || is_float($var)) {
- $result = (string)$var;
- } elseif (is_string($var)) {
- $opt1 = '\'' . str_replace(array('\\', '\''), array('\\\\', '\\\''), $var) . '\'';
- $opt2 = $opt1;
- if (strpos($var, '$') === false) {
- $opt2 = '"' . str_replace(array('\\', '"'), array('\\\\', '\"'), $var) . '"';
- }
- if (strlen($opt1) <= strlen($opt2)) {
- $result = $opt1;
- } else {
- $result = $opt2;
- }
- } elseif (is_bool($var)) {
- $result = $var ? 'true' : 'false';
- } else {
- bug('Wrong type: ' . gettype($var));
- }
- return $result;
- }
- function is_indexed(array $array) {
- $i = 0;
- foreach($array as $k => $v) {
- if ($k !== $i++) {
- return false;
- }
- }
- return true;
- }
- function unindent($text) {
- if (preg_match('{\A[\r\n]*([ \t]+)[^\r\n]*+(?:[\r\n]++(?>\1[^\r\n]*+(?:[\r\n]+|\z)|[\r\n]+)+)?\z}', rtrim($text), $match)) {
- $text = preg_replace('{^' . $match[1] . '}m', '', $text);
- }
- return $text;
- }
- class cf_action {
- protected $code;
- public function __construct($code) {
- $this->code = $code;
- }
- }
- /**
- * Base class for parse table instructions. The main idea is to make the
- * subclasses responsible for conflict resolution among themselves. It also
- * forms a sort of interface to the parse table.
- */
- abstract class step {
- public $sym;
- public function __construct(sym $sym) {
- $this->sym = $sym;
- }
- public function glyph() {
- return $this->sym->name;
- }
- public function sane() {
- return true;
- }
- abstract public function instruction();
- abstract public function decide($that);
- }
- class error extends step {
- public function sane() {
- return false;
- }
- public function instruction() {
- bug('This should not happen.');
- }
- public function decide($that) {
- // An error shall remain one
- return $this;
- }
- }
- class shift extends step {
- public $q;
- public function __construct(sym $sym, $q) {
- parent::__construct($sym);
- $this->q = $q;
- }
- public function instruction() {
- return 's ' . $this->q;
- }
- public function decide($that) {
- // shift-shift conflicts are impossible.
- // shift-accept conflicts are a bug.
- // so we can infer:
- bug_unless($that instanceof reduce);
- // That being said, the resolution is a matter of precedence.
- $shift_prec = $this->sym->right_prec;
- $reduce_prec = $that->rule->prec;
- // If we don't have defined precedence levels for both options,
- // then we default to shifting:
- if (!($shift_prec and $reduce_prec)) {
- return $this;
- }
- // Otherwise, use the step with higher precedence.
- if ($shift_prec > $reduce_prec) {
- return $this;
- }
- if ($reduce_prec > $shift_prec) {
- return $that;
- }
- // The "nonassoc" works by giving equal precedence to both options,
- // which means to put an error instruction in the parse table.
- return new error($this->sym);
- }
- }
- class reduce extends step {
- public function __construct($sym, rule $rule) {
- parent::__construct($sym);
- $this->rule = $rule;
- }
- public function instruction() {
- return 'r ' . $this->rule->id;
- }
- function decide($that) {
- // This means that the input grammar has a reduce-reduce conflict.
- // Such things are considered an error in the input.
- throw new RRC($this, $that);
- // BISON would go with the first encountered reduce thus:
- // return $this;
- }
- }
- class accept extends step {
- public function __construct(sym $sym) {
- parent::__construct($sym);
- }
- public function instruction() {
- return 'a ' . $this->sym->name;
- }
- public function decide($that) {
- return $this;
- }
- }
- class RRC extends Exception {
- public function __construct($a, $b) {
- parent::__construct('Reduce-Reduce Conflict');
- $this->a = $a;
- $this->b = $b;
- }
- function make_noise() {
- emit(sprintf(
- 'Reduce-Reduce Conflict:' . PHP_EOL . '%s' . PHP_EOL . '%s' . PHP_EOL . 'Lookahead is (%s)',
- $this->a->rule->text(),
- $this->b->rule->text(),
- $this->a->glyph()
- ));
- }
- }
- class state {
- public $id;
- public $key;
- public $close;
- public $action = array();
- public function __construct($id, $key, $close) {
- $this->id = $id;
- $this->key = $key;
- $this->close = $close; // config key -> object
- ksort($this->close);
- }
- public function dump() {
- echo ' * ' . $this->id . ' / ' . $this->key . PHP_EOL;
- foreach ($this->close as $config) {
- $config->dump();
- }
- }
- public function add_shift(sym $sym, $state) {
- $this->add_instruction(new shift($sym, $state->id));
- }
- public function add_reduce(sym $sym, $rule) {
- $this->add_instruction(new reduce($sym, $rule));
- }
- public function add_accept(sym $sym) {
- $this->add_instruction(new accept($sym));
- }
- public function add_instruction(step $step) {
- $this->action[] = $step;
- }
- function find_reductions($lime) {
- // rightmost configurations followset yields reduce.
- foreach($this->close as $c) {
- if ($c->rightmost) {
- foreach ($c->follow->all() as $glyph) {
- $this->add_reduce($lime->sym($glyph), $c->rule);
- }
- }
- }
- }
- function resolve_conflicts() {
- // For each possible lookahead, find one (and only one) step to take.
- $table = array();
- foreach ($this->action as $step) {
- $glyph = $step->glyph();
- if (isset($table[$glyph])) {
- // There's a conflict. The shifts all came first, which
- // simplifies the coding for the step->decide() methods.
- try {
- $table[$glyph] = $table[$glyph]->decide($step);
- } catch (RRC $e) {
- emit('State ' . $this->id . ':');
- $e->make_noise();
- }
- } else {
- // This glyph is yet unprocessed, so the step at hand is
- // our best current guess at what the grammar indicates.
- $table[$glyph] = $step;
- }
- }
- // Now that we have the correct steps chosen, this routine is oddly
- // also responsible for turning that table into the form that will
- // eventually be passed to the parse engine. (So FIXME?)
- $out = array();
- foreach ($table as $glyph => $step) {
- if ($step->sane()) {
- $out[$glyph] = $step->instruction();
- }
- }
- return $out;
- }
- function segment_config() {
- // Filter $this->close into categories based on the symbol_after_the_dot.
- $f = array();
- foreach ($this->close as $c) {
- $p = $c->symbol_after_the_dot;
- if (!$p) {
- continue;
- }
- $f[$p->name][] = $c;
- }
- return $f;
- }
- }
- class sym {
- public function __construct($name, $id) {
- $this->name = $name;
- $this->id = $id;
- $this->term = true; // Until proven otherwise.
- $this->rule = array();
- $this->config = array();
- $this->lambda = false;
- $this->first = new set();
- $this->left_prec = $this->right_prec = 0;
- }
- public function summary() {
- $out = '';
- foreach ($this->rule as $rule) {
- $out .= $rule->text() . PHP_EOL;
- }
- return $out;
- }
- }
- class rule {
- public function __construct($id, $sym, $rhs, $code, $look, $replace) {
- bug_unless(is_int($look));
- $this->id = $id;
- $this->sym = $sym;
- $this->rhs = $rhs;
- $this->code = $code;
- $this->look = $look;
- $this->replace = $replace;
- //$this->prec_sym = $prec_sym;
- $this->prec = 0;
- $this->first = array();
- $this->epsilon = count($rhs);
- }
- public function lhs_glyph() {
- return $this->sym->name;
- }
- public function determine_precedence() {
- // We may eventually expand to allow explicit prec_symbol declarations.
- // Until then, we'll go with the rightmost terminal, which is what
- // BISON does. People probably expect that. The leftmost terminal
- // is a reasonable alternative behaviour, but I don't see the big
- // deal just now.
- //$prec_sym = $this->prec_sym;
- //if (!$prec_sym)
- $prec_sym = $this->rightmost_terminal();
- if (!$prec_sym) {
- return;
- }
- $this->prec = $prec_sym->left_prec;
- }
- private function rightmost_terminal() {
- $symbol = null;
- $rhs = $this->rhs;
- while ($rhs) {
- $symbol = array_pop($rhs);
- if ($symbol->term) {
- break;
- }
- }
- return $symbol;
- }
- public function text() {
- $t = '(' . $this->id . ') ' . $this->lhs_glyph() . ' :=';
- foreach($this->rhs as $s) {
- $t .= ' ' . $s->name;
- }
- return $t;
- }
- public function table(lime_language $lang) {
- return array(
- 'symbol' => $this->lhs_glyph(),
- 'len' => $this->look,
- 'replace' => $this->replace,
- 'code' => $lang->fixup($this->code),
- 'text' => $this->text(),
- );
- }
- public function lambda() {
- foreach ($this->rhs as $sym) {
- if (!$sym->lambda) {
- return false;
- }
- }
- return true;
- }
- public function find_first() {
- $dot = count($this->rhs);
- $last = $this->first[$dot] = new set();
- while ($dot--) {
- $symbol_after_the_dot = $this->rhs[$dot];
- $first = $symbol_after_the_dot->first->all();
- bug_if(empty($first) and !$symbol_after_the_dot->lambda);
- $set = new set($first);
- if ($symbol_after_the_dot->lambda) {
- $set->union($last);
- if ($this->epsilon == $dot + 1) {
- $this->epsilon = $dot;
- }
- }
- $last = $this->first[$dot] = $set;
- }
- }
- public function teach_symbol_of_first_set() {
- $go = false;
- foreach ($this->rhs as $sym) {
- if ($this->sym->first->union($sym->first)) {
- $go = true;
- }
- if (!$sym->lambda) {
- break;
- }
- }
- return $go;
- }
- public function lambda_from($dot) {
- return $this->epsilon <= $dot;
- }
- public function leftmost($follow) {
- return new config($this, 0, $follow);
- }
- public function dotted_text($dot) {
- $out = $this->lhs_glyph() . ' :=';
- $idx = -1;
- foreach($this->rhs as $idx => $s) {
- if ($idx == $dot) {
- $out .= ' .';
- }
- $out .= ' ' . $s->name;
- }
- if ($dot > $idx) {
- $out .= ' .';
- }
- return $out;
- }
- }
- class config {
- public function __construct($rule, $dot, $follow) {
- $this->rule = $rule;
- $this->dot = $dot;
- $this->key = $rule->id . '.' . $dot;
- $this->rightmost = count($rule->rhs) <= $dot;
- $this->symbol_after_the_dot = $this->rightmost ? null : $rule->rhs[$dot];
- $this->_blink = array();
- $this->follow = new set($follow);
- $this->_flink = array();
- bug_unless($this->rightmost or count($rule));
- }
- public function text() {
- return $this->rule->dotted_text($this->dot)
- . ' [ ' . implode(' ', $this->follow->all()) . ' ]';
- }
- public function blink($config) {
- $this->_blink[] = $config;
- }
- public function next() {
- bug_if($this->rightmost);
- $c = new config($this->rule, $this->dot+1, array());
- // Anything in the follow set for this config will also be in the next.
- // However, we link it backwards because we might wind up selecting a
- // pre-existing state, and the housekeeping is easier in the first half
- // of the program. We'll fix it before doing the propagation.
- $c->blink($this);
- return $c;
- }
- public function copy_links_from($that) {
- foreach($that->_blink as $c) {
- $this->blink($c);
- }
- }
- public function lambda() {
- return $this->rule->lambda_from($this->dot);
- }
- public function simple_follow() {
- return $this->rule->first[$this->dot + 1]->all();
- }
- public function epsilon_follows() {
- return $this->rule->lambda_from($this->dot + 1);
- }
- public function fixlinks() {
- foreach ($this->_blink as $that) {
- $that->_flink[] = $this;
- }
- $this->blink = array();
- }
- public function dump() {
- echo ' * ';
- echo $this->key . ' : ';
- echo $this->rule->dotted_text($this->dot);
- echo $this->follow->text();
- foreach ($this->_flink as $c) {
- echo $c->key . ' / ';
- }
- echo PHP_EOL;
- }
- }
- class lime {
- public $parser_class = 'parser';
- public function __construct() {
- $this->p_next = 1;
- $this->sym = array();
- $this->rule = array();
- $this->start_symbol_set = array();
- $this->state = array();
- $this->stop = $this->sym('#');
- if ($err = $this->sym('error')) {
- $err->term = false;
- }
- $this->lang = new lime_language_php();
- }
- function language() {
- return $this->lang;
- }
- function build_parser() {
- $this->add_start_rule();
- foreach ($this->rule as $r) {
- $r->determine_precedence();
- }
- $this->find_sym_lamdba();
- $this->find_sym_first();
- foreach ($this->rule as $rule) {
- $rule->find_first();
- }
- $initial = $this->find_states();
- $this->fixlinks();
- // $this->dump_configurations();
- $this->find_follow_sets();
- foreach($this->state as $s) {
- $s->find_reductions($this);
- }
- $i = $this->resolve_conflicts();
- $a = $this->rule_table();
- $qi = $initial->id;
- return $this->lang->ptab_to_class($this->parser_class, compact('a', 'qi', 'i'));
- }
- function rule_table() {
- $s = array();
- foreach ($this->rule as $i => $r) {
- $s[$i] = $r->table($this->lang);
- }
- return $s;
- }
- function add_rule($symbol, $rhs, $code) {
- $this->add_raw_rule($symbol, $rhs, $code, count($rhs), true);
- }
- function trump_up_bogus_lhs($real) {
- return "'{$real}'" . count($this->rule);
- }
- function add_raw_rule($lhs, $rhs, $code, $look, $replace) {
- $sym = $this->sym($lhs);
- $sym->term = false;
- if (!$rhs) {
- $sym->lambda = true;
- }
- $rs = array();
- foreach ($rhs as $str) {
- $rs[] = $this->sym($str);
- }
- $rid = count($this->rule);
- $r = new rule($rid, $sym, $rs, $code, $look, $replace);
- $this->rule[$rid] = $r;
- $sym->rule[] = $r;
- }
- function sym($str) {
- if (!isset($this->sym[$str])) {
- $this->sym[$str] = new sym($str, count($this->sym));
- }
- return $this->sym[$str];
- }
- function summary() {
- $out = '';
- foreach ($this->sym as $sym) {
- if (!$sym->term) {
- $out .= $sym->summary();
- }
- }
- return $out;
- }
- private function find_sym_lamdba() {
- do {
- $go = false;
- foreach ($this->sym as $sym) {
- if (!$sym->lambda) {
- foreach ($sym->rule as $rule) {
- if ($rule->lambda()) {
- $go = true;
- $sym->lambda = true;
- }
- }
- }
- }
- } while ($go);
- }
- private function teach_terminals_first_set() {
- foreach ($this->sym as $sym) {
- if ($sym->term) {
- $sym->first->add($sym->name);
- }
- }
- }
- private function find_sym_first() {
- $this->teach_terminals_first_set();
- do {
- $go = false;
- foreach ($this->rule as $r) {
- if ($r->teach_symbol_of_first_set()) {
- $go = true;
- }
- }
- } while ($go);
- }
- function add_start_rule() {
- $rewrite = new lime_rewrite("'start'");
- $rhs = new lime_rhs();
- $rhs->add(new lime_glyph($this->deduce_start_symbol()->name, null));
- //$rhs->add(new lime_glyph($this->stop->name, null));
- $rewrite->add_rhs($rhs);
- $rewrite->update($this);
- }
- private function deduce_start_symbol() {
- $candidate = current($this->start_symbol_set);
- // Did the person try to set a start symbol at all?
- if (!$candidate) {
- return $this->first_rule_lhs();
- }
- // Do we actually have such a symbol on the left of a rule?
- if ($candidate->terminal) {
- return $this->first_rule_lhs();
- }
- // Ok, it's a decent choice. We need to return the symbol entry.
- return $this->sym($candidate);
- }
- private function first_rule_lhs() {
- reset($this->rule);
- $r = current($this->rule);
- return $r->sym;
- }
- /**
- * Build an initial state. This is a recursive process which digs out
- * the LR(0) state graph.
- */
- function find_states() {
- $start_glyph = "'start'";
- $sym = $this->sym($start_glyph);
- $basis = array();
- foreach($sym->rule as $rule) {
- $c = $rule->leftmost(array('#'));
- $basis[$c->key] = $c;
- }
- $initial = $this->get_state($basis);
- $initial->add_accept($sym);
- return $initial;
- }
- function get_state($basis) {
- $key = array_keys($basis);
- sort($key);
- $key = implode(' ', $key);
- if (isset($this->state[$key])) {
- // Copy all the links around...
- $state = $this->state[$key];
- foreach($basis as $config) {
- $state->close[$config->key]->copy_links_from($config);
- }
- return $state;
- } else {
- $close = $this->state_closure($basis);
- $this->state[$key] = $state = new state(count($this->state), $key, $close);
- $this->build_shifts($state);
- return $state;
- }
- }
- private function state_closure($q) {
- // $q is a list of config.
- $close = array();
- while ($config = array_pop($q)) {
- if (isset($close[$config->key])) {
- $close[$config->key]->copy_links_from($config);
- $close[$config->key]->follow->union($config->follow);
- continue;
- }
- $close[$config->key] = $config;
- $symbol_after_the_dot = $config->symbol_after_the_dot;
- if (!$symbol_after_the_dot) {
- continue;
- }
- if (!$symbol_after_the_dot->term) {
- foreach ($symbol_after_the_dot->rule as $r) {
- $station = $r->leftmost($config->simple_follow());
- if ($config->epsilon_follows()) {
- $station->blink($config);
- }
- $q[] = $station;
- }
- // The following turned out to be wrong. Don't do it.
- //if ($symbol_after_the_dot->lambda) {
- // $q[] = $config->next();
- //}
- }
- }
- return $close;
- }
- function build_shifts($state) {
- foreach ($state->segment_config() as $glyph => $segment) {
- $basis = array();
- foreach ($segment as $preshift) {
- $postshift = $preshift->next();
- $basis[$postshift->key] = $postshift;
- }
- $dest = $this->get_state($basis);
- $state->add_shift($this->sym($glyph), $dest);
- }
- }
- function fixlinks() {
- foreach ($this->state as $s) {
- foreach ($s->close as $c) {
- $c->fixlinks();
- }
- }
- }
- function find_follow_sets() {
- $q = array();
- foreach ($this->state as $s) {
- foreach ($s->close as $c) {
- $q[] = $c;
- }
- }
- while ($q) {
- $c = array_shift($q);
- foreach ($c->_flink as $d) {
- if ($d->follow->union($c->follow)) {
- $q[] = $d;
- }
- }
- }
- }
- private function set_assoc($ss, $l, $r) {
- $p = ($this->p_next++) * 2;
- foreach ($ss as $glyph) {
- $s = $this->sym($glyph);
- $s->left_prec = $p + $l;
- $s->right_prec = $p + $r;
- }
- }
- function left_assoc($ss) {
- $this->set_assoc($ss, 1, 0);
- }
- function right_assoc($ss) {
- $this->set_assoc($ss, 0, 1);
- }
- function non_assoc($ss) {
- $this->set_assoc($ss, 0, 0);
- }
- private function resolve_conflicts() {
- // For each state, try to find one and only one
- // thing to do for any given lookahead.
- $i = array();
- foreach ($this->state as $s) {
- $i[$s->id] = $s->resolve_conflicts();
- }
- return $i;
- }
- function dump_configurations() {
- foreach ($this->state as $q) {
- $q->dump();
- }
- }
- function dump_first_sets() {
- foreach ($this->sym as $s) {
- echo ' * ';
- echo $s->name . ' : ';
- echo $s->first->text();
- echo PHP_EOL;
- }
- }
- function add_rule_with_actions($lhs, $rhs) {
- // First, make sure this thing is well-formed.
- if(!is_object(end($rhs))) {
- $rhs[] = new cf_action('');
- }
- // Now, split it into chunks based on the actions.
- $look = -1;
- $subrule = array();
- $subsymbol = '';
- while ($rhs) {
- $it = array_shift($rhs);
- ++$look;
- if (is_string($it)) {
- $subrule[] = $it;
- } else {
- $code = $it->code;
- // It's an action.
- // Is it the last one?
- if ($rhs) {
- // no.
- $subsymbol = $this->trump_up_bogus_lhs($lhs);
- $this->add_raw_rule($subsymbol, $subrule, $code, $look, false);
- $subrule = array($subsymbol);
- } else {
- // yes.
- $this->add_raw_rule($lhs, $subrule, $code, $look, true);
- }
- }
- }
- }
- function pragma($type, $args) {
- switch ($type) {
- case 'left':
- $this->left_assoc($args);
- break;
- case 'right':
- $this->right_assoc($args);
- break;
- case 'nonassoc':
- $this->non_assoc($args);
- break;
- case 'start':
- $this->start_symbol_set = $args;
- break;
- case 'class':
- $this->parser_class = $args[0];
- break;
- default:
- emit(sprintf('Bad Parser Pragma: (%s)', $type));
- exit(1);
- }
- }
- }
- class lime_language {
- }
- class lime_language_php extends lime_language {
- protected function result_code($expr) {
- return '$result = ' . $expr . ';' . PHP_EOL;
- }
- public function default_result() {
- return $this->result_code('reset($tokens)');
- }
- public function result_pos($pos) {
- return $this->result_code(lime_token_reference($pos));
- }
- public function bind($name, $pos) {
- return '$' . $name . ' = &$tokens[' . $pos . '];' . PHP_EOL;
- }
- public function fixup($code) {
- return preg_replace_callback('~\$(\d+|\$)~', function ($foo) {
- if ($foo[1] === '$') {
- // always
- return '$result';
- }
- return lime_token_reference($foo[1] - 1);
- }, $code);
- }
- function to_php($code) {
- return $code;
- }
- public function ptab_to_class($parser_class, $ptab) {
- $code = '';
- $code .= 'public $qi = ' . lime_export($ptab['qi'], true) . ';' . PHP_EOL;
- $code .= 'public $i = '.lime_export($ptab['i'], true).';' . PHP_EOL;
- $rc = array();
- $method = array();
- $rules = array();
- foreach($ptab['a'] as $k => $a) {
- $symbol = preg_replace('/[^\w]/', '', $a['symbol']);
- $rn = @++$rc[$symbol];
- $mn = 'reduce_' . $k . '_' . $symbol . '_' . $rn;
- $method[$k] = $mn;
- $comment = '// ' . $a['text'] . PHP_EOL;
- $php = $this->to_php($a['code']);
- $code .= 'function ' . $mn . '(' . LIME_CALL_PROTOCOL . ') {' . PHP_EOL .
- rtrim(preg_replace('~^~m', INDENT, $comment . $php)) . PHP_EOL .
- '}' .
- PHP_EOL .
- PHP_EOL;
- unset($a['code']);
- unset($a['text']);
- $rules[$k] = $a;
- }
- $code .= 'public $method = ' . lime_export($method, true) . ';' . PHP_EOL;
- $code .= 'public $a = '.lime_export($rules, true) . ';' . PHP_EOL;
- return 'class ' . $parser_class . ' extends lime_parser {' . PHP_EOL .
- preg_replace(array('~^~m', '~^\h+$~m'), array(INDENT, ''), $code) .
- '}' . PHP_EOL;
- }
- }
- class lime_rhs {
- function __construct() {
- // Construct and add glyphs and actions in whatever order.
- // Then, add this to a lime_rewrite.
- //
- // Don't call install_rule.
- // The rewrite will do that for you when you "update" with it.
- $this->rhs = array();
- }
- function add(lime_slot $slot) {
- $this->rhs[] = $slot;
- }
- function install_rule(lime $lime, $lhs) {
- // This is the part that has to break the rule into subrules if necessary.
- $rhs = $this->rhs;
- // First, make sure this thing is well-formed.
- if (!(end($rhs) instanceof lime_action)) {
- $rhs[] = new lime_action('', null);
- }
- // Now, split it into chunks based on the actions.
- $lang = $lime->language();
- $result_code = $lang->default_result();
- $look = -1;
- $subrule = array();
- $subsymbol = '';
- $preamble = '';
- while ($rhs) {
- $it = array_shift($rhs);
- ++$look;
- if ($it instanceof lime_glyph) {
- $subrule[] = $it->data;
- } elseif ($it instanceof lime_action) {
- $code = unindent($it->data);
- // It's an action.
- // Is it the last one?
- if ($rhs) {
- // no.
- $subsymbol = $lime->trump_up_bogus_lhs($lhs);
- $action = $lang->default_result() . $preamble . $code;
- $lime->add_raw_rule($subsymbol, $subrule, $action, $look, false);
- $subrule = array($subsymbol);
- } else {
- // yes.
- $action = $result_code . $preamble . $code;
- $lime->add_raw_rule($lhs, $subrule, $action, $look, true);
- }
- } else {
- impossible();
- }
- if ($it->name == '$') {
- $result_code = $lang->result_pos($look);
- } elseif ($it->name) {
- $preamble .= $lang->bind($it->name, $look);
- }
- }
- }
- }
- class lime_rewrite {
- function __construct($glyph) {
- // Construct one of these with the name of the lhs.
- // Add some rhs-es to it.
- // Finally, "update" the lime you're building.
- $this->glyph = $glyph;
- $this->rhs = array();
- }
- function add_rhs(lime_rhs $rhs) {
- $this->rhs[] = $rhs;
- }
- function update(lime $lime) {
- foreach ($this->rhs as $rhs) {
- $rhs->install_rule($lime, $this->glyph);
- }
- }
- }
- /**
- * This keeps track of one position in an rhs.
- * We specialize to handle actions and glyphs.
- *
- * If there is a name for the slot, we store it here.
- * Later on, this structure will be consulted in the formation of
- * actual production rules.
- */
- class lime_slot {
- public function __construct($data, $name) {
- $this->data = $data;
- $this->name = $name;
- }
- public function preamble($pos) {
- if (strlen($this->name) > 0) {
- return '$' . $this->name . ' = &$tokens[' . $pos . '];' . PHP_EOL;
- }
- }
- }
- class lime_glyph extends lime_slot {
- }
- class lime_action extends lime_slot {
- }
- /**
- * This function isn't too terribly interesting to the casual observer.
- * You're probably better off looking at parse_lime_grammar() instead.
- *
- * Ok, if you insist, I'll explain.
- *
- * The input to Lime is a CFG parser definition. That definition is
- * written in some language. (The Lime language, to be exact.)
- * Anyway, I have to parse the Lime language and compile it into a
- * very complex data structure from which a parser is eventually
- * built. What better way than to use Lime itself to parse its own
- * language? Well, it's almost that simple, but not quite.
- * The Lime language is fairly potent, but a restricted subset of
- * its features was used to write a metagrammar. Then, I hand-translated
- * that metagrammar into another form which is easy to snarf up.
- * In the process of reading that simplified form, this function
- * builds the same sort of data structure that later gets turned into
- * a parser. The last step is to run the parser generation algorithm,
- * eval() the resulting PHP code, and voila! With no hard work, I can
- * suddenly read and comprehend the full range of the Lime language
- * without ever having written an algorithm to do so. It feels like magic.
- */
- function lime_bootstrap() {
- $bootstrap = LIME_DIR . '/lime.bootstrap';
- $lime = new lime();
- $lime->parser_class = 'lime_metaparser';
- $rhs = array();
- bug_unless(is_readable($bootstrap));
- foreach(file($bootstrap) as $l) {
- $a = explode(':', $l, 2);
- if (count($a) == 2) {
- list($pattern, $code) = $a;
- $sl = new lime_rhs();
- $pattern = trim($pattern);
- if (strlen($pattern) > 0) {
- foreach (explode(' ', $pattern) as $glyph) {
- $sl->add(new lime_glyph($glyph, null));
- }
- }
- $sl->add(new lime_action($code, NULL));
- $rhs[] = $sl;
- } else {
- if (preg_match('~^to (\w+)$~', $l, $r)) {
- $g = $r[1];
- $rw = new lime_rewrite($g);
- foreach($rhs as $b) {
- $rw->add_rhs($b);
- }
- $rw->update($lime);
- $rhs = array();
- }
- }
- }
- $parser_code = $lime->build_parser();
- eval($parser_code);
- }
- /**
- * The voodoo is in the way I do lexical processing on grammar definition
- * files. They contain embedded bits of PHP, and it's important to keep
- * track of things like strings, comments, and matched braces. It seemed
- * like an ideal problem to solve with GNU flex, so I wrote a little
- * scanner in flex and C to dig out the tokens for me. Of course, I need
- * the tokens in PHP, so I designed a simple binary wrapper for them which
- * also contains line-number information, guaranteed to help out if you
- * write a grammar which surprises the parser in any manner.
- */
- class voodoo_scanner extends flex_scanner {
- function executable() { return LIME_DIR.'/lime_scan_tokens'; }
- }
- /**
- * This is a good function to read because it teaches you how to interface
- * with a Lime parser. I've tried to isolate out the bits that aren't
- * instructive in that regard.
- */
- function parse_lime_grammar($path) {
- if (!class_exists('lime_metaparser', false)) {
- lime_bootstrap();
- }
- $parse_engine = new parse_engine(new lime_metaparser());
- $scanner = new voodoo_scanner($path);
- try {
- // The result of parsing a Lime grammar is a Lime object.
- $lime = $scanner->feed($parse_engine);
- // Calling its build_parser() method gets the output PHP code.
- return $lime->build_parser();
- } catch (parse_error $e) {
- die ($e->getMessage() . " in {$path} line {$scanner->lineno}." . PHP_EOL);
- }
- }
- if ($_SERVER['argv']) {
- $code = '';
- array_shift($_SERVER['argv']); // Strip out the program name.
- foreach ($_SERVER['argv'] as $path) {
- $code .= parse_lime_grammar($path);
- }
- echo <<<CODE
- <?php
- /*
- *** DON'T EDIT THIS FILE! ***
- *
- * This file was automatically generated by the Lime parser generator.
- * The real source code you should be looking at is in one or more
- * grammar files in the Lime format.
- *
- * THE ONLY REASON TO LOOK AT THIS FILE is to see where in the grammar
- * file that your error happened, because there are enough comments to
- * help you debug your grammar.
- * If you ignore this warning, you're shooting yourself in the brain,
- * not the foot.
- */
- {$code}
- CODE;
- }