PageRenderTime 65ms CodeModel.GetById 2ms app.highlight 54ms RepoModel.GetById 1ms app.codeStats 1ms

/lime.php

https://bitbucket.org/_richardJ/lime
PHP | 1316 lines | 925 code | 232 blank | 159 comment | 77 complexity | 828c6e63991900359f9f5689715ea226 MD5 | raw file
   1#!/usr/bin/php -q
   2<?php
   3/*
   4 *  This program is free software; you can redistribute it and/or modify
   5 *  it under the terms of the GNU General Public License as published by
   6 *  the Free Software Foundation; either version 2 of the License, or
   7 *  (at your option) any later version.
   8 *
   9 *  This program is distributed in the hope that it will be useful,
  10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 *  GNU Library General Public License for more details.
  13 *
  14 *  You should have received a copy of the GNU General Public License
  15 *  along with this program; if not, write to the Free Software
  16 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 */
  18
  19define('LIME_DIR', __DIR__);
  20define('INDENT', '  ');
  21
  22function emit($str) {
  23	fputs(STDERR, $str . PHP_EOL);
  24}
  25
  26class Bug extends Exception {
  27}
  28
  29function bug($gripe = 'Bug found.') {
  30	throw new Bug($gripe);
  31}
  32
  33function bug_if($fallacy, $gripe = 'Bug found.') {
  34	if ($fallacy) {
  35		throw new Bug($gripe);
  36	}
  37}
  38
  39function bug_unless($assertion, $gripe = 'Bug found.') {
  40	if (!$assertion) {
  41		throw new Bug($gripe);
  42	}
  43}
  44
  45require LIME_DIR . '/parse_engine.php';
  46require LIME_DIR . '/set.so.php';
  47require LIME_DIR . '/flex_token_stream.php';
  48
  49function lime_token_reference($pos) {
  50	return '$tokens[' . $pos . ']';
  51}
  52
  53function lime_token_reference_callback($foo) {
  54	if ($foo[1] === '$') {
  55		// always
  56		return '$result';
  57	}
  58
  59	return lime_token_reference($foo[1] - 1);
  60}
  61
  62function lime_export($var) {
  63	if (is_array($var)) {
  64		$i = is_indexed($var);
  65		$out = array();
  66		foreach($var as $k => $v) {
  67			$out[] = (!$i ? lime_export($k).' => ' : '') . lime_export($v);
  68		}
  69
  70		$result = 'array(' . PHP_EOL . preg_replace('~^~m', INDENT, implode(',' . PHP_EOL, $out)) . PHP_EOL . ')';
  71	} elseif (is_int($var) || is_float($var)) {
  72		$result = (string)$var;
  73	} elseif (is_string($var)) {
  74		$opt1 = '\'' . str_replace(array('\\', '\''), array('\\\\', '\\\''), $var) . '\'';
  75		$opt2 = $opt1;
  76
  77		if (strpos($var, '$') === false) {
  78			$opt2 = '"' . str_replace(array('\\', '"'), array('\\\\', '\"'), $var) . '"';
  79		}
  80
  81		if (strlen($opt1) <= strlen($opt2)) {
  82			$result = $opt1;
  83		} else {
  84			$result = $opt2;
  85		}
  86	} elseif (is_bool($var)) {
  87		$result = $var ? 'true' : 'false';
  88	} else {
  89		bug('Wrong type: ' . gettype($var));
  90	}
  91
  92	return $result;
  93}
  94
  95function is_indexed(array $array) {
  96	$i = 0;
  97	foreach($array as $k => $v) {
  98		if ($k !== $i++) {
  99			return false;
 100		}
 101	}
 102
 103	return true;
 104}
 105
 106function unindent($text) {
 107    if (preg_match('{\A[\r\n]*([ \t]+)[^\r\n]*+(?:[\r\n]++(?>\1[^\r\n]*+(?:[\r\n]+|\z)|[\r\n]+)+)?\z}', rtrim($text), $match)) {
 108        $text = preg_replace('{^' . $match[1] . '}m', '', $text);
 109    }
 110
 111    return $text;
 112}
 113
 114class cf_action {
 115	protected $code;
 116
 117	public function __construct($code) {
 118		$this->code = $code;
 119	}
 120}
 121
 122/**
 123 * Base class for parse table instructions. The main idea is to make the
 124 * subclasses responsible for conflict resolution among themselves. It also
 125 * forms a sort of interface to the parse table.
 126 */
 127abstract class step {
 128	public $sym;
 129
 130	public function __construct(sym $sym) {
 131		$this->sym = $sym;
 132	}
 133
 134	public function glyph() {
 135		return $this->sym->name;
 136	}
 137
 138	public function sane() {
 139		return true;
 140	}
 141
 142	abstract public function instruction();
 143	abstract public function decide($that);
 144}
 145
 146class error extends step {
 147	public function sane() {
 148		return false;
 149	}
 150
 151	public function instruction() {
 152		bug('This should not happen.');
 153	}
 154
 155	public function decide($that) {
 156		// An error shall remain one
 157		return $this;
 158	}
 159}
 160
 161class shift extends step {
 162	public $q;
 163
 164	public function __construct(sym $sym, $q) {
 165		parent::__construct($sym);
 166
 167		$this->q = $q;
 168	}
 169
 170	public function instruction() {
 171		return 's ' . $this->q;
 172	}
 173
 174	public function decide($that) {
 175		// shift-shift conflicts are impossible.
 176		// shift-accept conflicts are a bug.
 177		// so we can infer:
 178		bug_unless($that instanceof reduce);
 179
 180		// That being said, the resolution is a matter of precedence.
 181		$shift_prec = $this->sym->right_prec;
 182		$reduce_prec = $that->rule->prec;
 183
 184		// If we don't have defined precedence levels for both options,
 185		// then we default to shifting:
 186		if (!($shift_prec and $reduce_prec)) {
 187			return $this;
 188		}
 189
 190		// Otherwise, use the step with higher precedence.
 191		if ($shift_prec > $reduce_prec) {
 192			return $this;
 193		}
 194
 195		if ($reduce_prec > $shift_prec) {
 196			return $that;
 197		}
 198
 199		// The "nonassoc" works by giving equal precedence to both options,
 200		// which means to put an error instruction in the parse table.
 201		return new error($this->sym);
 202	}
 203}
 204
 205class reduce extends step {
 206	public function __construct($sym, rule $rule) {
 207		parent::__construct($sym);
 208		$this->rule = $rule;
 209	}
 210
 211	public function instruction() {
 212		return 'r ' . $this->rule->id;
 213	}
 214
 215	function decide($that) {
 216		// This means that the input grammar has a reduce-reduce conflict.
 217		// Such things are considered an error in the input.
 218		throw new RRC($this, $that);
 219
 220		// BISON would go with the first encountered reduce thus:
 221		// return $this;
 222	}
 223}
 224
 225class accept extends step {
 226	public function __construct(sym $sym) {
 227		parent::__construct($sym);
 228	}
 229
 230	public function instruction() {
 231		return 'a ' . $this->sym->name;
 232	}
 233
 234	public function decide($that) {
 235		return $this;
 236	}
 237}
 238
 239class RRC extends Exception {
 240	public function __construct($a, $b) {
 241		parent::__construct('Reduce-Reduce Conflict');
 242
 243		$this->a = $a;
 244		$this->b = $b;
 245	}
 246
 247	function make_noise() {
 248		emit(sprintf(
 249			'Reduce-Reduce Conflict:' . PHP_EOL . '%s' . PHP_EOL . '%s' . PHP_EOL . 'Lookahead is (%s)',
 250			$this->a->rule->text(),
 251			$this->b->rule->text(),
 252			$this->a->glyph()
 253		));
 254	}
 255}
 256
 257class state {
 258	public $id;
 259	public $key;
 260	public $close;
 261	public $action = array();
 262
 263	public function __construct($id, $key, $close) {
 264		$this->id = $id;
 265		$this->key = $key;
 266		$this->close = $close; // config key -> object
 267		ksort($this->close);
 268	}
 269
 270	public function dump() {
 271		echo ' * ' . $this->id . ' / ' . $this->key . PHP_EOL;
 272		foreach ($this->close as $config) {
 273			$config->dump();
 274		}
 275	}
 276
 277	public function add_shift(sym $sym, $state) {
 278		$this->add_instruction(new shift($sym, $state->id));
 279	}
 280
 281	public function add_reduce(sym $sym, $rule) {
 282		$this->add_instruction(new reduce($sym, $rule));
 283	}
 284
 285	public function add_accept(sym $sym) {
 286		$this->add_instruction(new accept($sym));
 287	}
 288
 289	public function add_instruction(step $step) {
 290		$this->action[] = $step;
 291	}
 292
 293	function find_reductions($lime) {
 294		// rightmost configurations followset yields reduce.
 295		foreach($this->close as $c) {
 296			if ($c->rightmost) {
 297				foreach ($c->follow->all() as $glyph) {
 298					$this->add_reduce($lime->sym($glyph), $c->rule);
 299				}
 300			}
 301		}
 302	}
 303
 304	function resolve_conflicts() {
 305		// For each possible lookahead, find one (and only one) step to take.
 306		$table = array();
 307		foreach ($this->action as $step) {
 308			$glyph = $step->glyph();
 309			if (isset($table[$glyph])) {
 310				// There's a conflict. The shifts all came first, which
 311				// simplifies the coding for the step->decide() methods.
 312				try {
 313					$table[$glyph] = $table[$glyph]->decide($step);
 314				} catch (RRC $e) {
 315					emit('State ' . $this->id . ':');
 316					$e->make_noise();
 317				}
 318			} else {
 319				// This glyph is yet unprocessed, so the step at hand is
 320				// our best current guess at what the grammar indicates.
 321				$table[$glyph] = $step;
 322			}
 323		}
 324
 325		// Now that we have the correct steps chosen, this routine is oddly
 326		// also responsible for turning that table into the form that will
 327		// eventually be passed to the parse engine. (So FIXME?)
 328		$out = array();
 329		foreach ($table as $glyph => $step) {
 330			if ($step->sane()) {
 331				$out[$glyph] = $step->instruction();
 332			}
 333		}
 334
 335		return $out;
 336	}
 337
 338	function segment_config() {
 339		// Filter $this->close into categories based on the symbol_after_the_dot.
 340		$f = array();
 341
 342		foreach ($this->close as $c) {
 343			$p = $c->symbol_after_the_dot;
 344			if (!$p) {
 345				continue;
 346			}
 347
 348			$f[$p->name][] = $c;
 349		}
 350
 351		return $f;
 352	}
 353}
 354
 355class sym {
 356	public function __construct($name, $id) {
 357		$this->name = $name;
 358		$this->id = $id;
 359		$this->term = true;	// Until proven otherwise.
 360		$this->rule = array();
 361		$this->config = array();
 362		$this->lambda = false;
 363		$this->first = new set();
 364		$this->left_prec = $this->right_prec = 0;
 365	}
 366
 367	public function summary() {
 368		$out = '';
 369		foreach ($this->rule as $rule) {
 370			$out .= $rule->text() . PHP_EOL;
 371		}
 372
 373		return $out;
 374	}
 375}
 376
 377class rule {
 378	public function __construct($id, $sym, $rhs, $code, $look, $replace) {
 379		bug_unless(is_int($look));
 380
 381		$this->id = $id;
 382		$this->sym = $sym;
 383		$this->rhs = $rhs;
 384		$this->code = $code;
 385		$this->look = $look;
 386		$this->replace = $replace;
 387		//$this->prec_sym = $prec_sym;
 388		$this->prec = 0;
 389		$this->first = array();
 390		$this->epsilon = count($rhs);
 391	}
 392
 393	public function lhs_glyph() {
 394		return $this->sym->name;
 395	}
 396
 397	public function determine_precedence() {
 398		// We may eventually expand to allow explicit prec_symbol declarations.
 399		// Until then, we'll go with the rightmost terminal, which is what
 400		// BISON does. People probably expect that. The leftmost terminal
 401		// is a reasonable alternative behaviour, but I don't see the big
 402		// deal just now.
 403
 404		//$prec_sym = $this->prec_sym;
 405		//if (!$prec_sym)
 406		$prec_sym = $this->rightmost_terminal();
 407
 408		if (!$prec_sym) {
 409			return;
 410		}
 411
 412		$this->prec = $prec_sym->left_prec;
 413	}
 414
 415	private function rightmost_terminal() {
 416		$symbol = null;
 417		$rhs = $this->rhs;
 418
 419		while ($rhs) {
 420			$symbol = array_pop($rhs);
 421			if ($symbol->term) {
 422				break;
 423			}
 424		}
 425
 426		return $symbol;
 427	}
 428
 429	public function text() {
 430		$t = '(' . $this->id . ') ' . $this->lhs_glyph() . ' :=';
 431
 432		foreach($this->rhs as $s) {
 433			$t .= '  ' . $s->name;
 434		}
 435
 436		return $t;
 437	}
 438
 439	public function table(lime_language $lang) {
 440		return array(
 441			'symbol' => $this->lhs_glyph(),
 442			'len' => $this->look,
 443			'replace' => $this->replace,
 444			'code' => $lang->fixup($this->code),
 445			'text' => $this->text(),
 446		);
 447	}
 448
 449	public function lambda() {
 450		foreach ($this->rhs as $sym) {
 451			if (!$sym->lambda) {
 452				return false;
 453			}
 454		}
 455
 456		return true;
 457	}
 458
 459	public function find_first() {
 460		$dot = count($this->rhs);
 461		$last = $this->first[$dot] = new set();
 462		while ($dot--) {
 463			$symbol_after_the_dot = $this->rhs[$dot];
 464			$first = $symbol_after_the_dot->first->all();
 465
 466			bug_if(empty($first) and !$symbol_after_the_dot->lambda);
 467
 468			$set = new set($first);
 469			if ($symbol_after_the_dot->lambda) {
 470				$set->union($last);
 471				if ($this->epsilon == $dot + 1) {
 472					$this->epsilon = $dot;
 473				}
 474			}
 475
 476			$last = $this->first[$dot] = $set;
 477		}
 478	}
 479
 480	public function teach_symbol_of_first_set() {
 481		$go = false;
 482		foreach ($this->rhs as $sym) {
 483			if ($this->sym->first->union($sym->first)) {
 484				$go = true;
 485			}
 486
 487			if (!$sym->lambda) {
 488				break;
 489			}
 490		}
 491
 492		return $go;
 493	}
 494
 495	public function lambda_from($dot) {
 496		return $this->epsilon <= $dot;
 497	}
 498
 499	public function leftmost($follow) {
 500		return new config($this, 0, $follow);
 501	}
 502
 503	public function dotted_text($dot) {
 504		$out = $this->lhs_glyph() . ' :=';
 505		$idx = -1;
 506		foreach($this->rhs as $idx => $s) {
 507			if ($idx == $dot) {
 508				$out .= ' .';
 509			}
 510
 511			$out .= '  ' . $s->name;
 512		}
 513
 514		if ($dot > $idx) {
 515			$out .= ' .';
 516		}
 517
 518		return $out;
 519	}
 520}
 521
 522class config {
 523	public function __construct($rule, $dot, $follow) {
 524		$this->rule = $rule;
 525		$this->dot = $dot;
 526		$this->key = $rule->id . '.' . $dot;
 527		$this->rightmost = count($rule->rhs) <= $dot;
 528		$this->symbol_after_the_dot = $this->rightmost ? null : $rule->rhs[$dot];
 529		$this->_blink = array();
 530		$this->follow = new set($follow);
 531		$this->_flink = array();
 532
 533		bug_unless($this->rightmost or count($rule));
 534	}
 535
 536	public function text() {
 537		return $this->rule->dotted_text($this->dot)
 538			. ' [ ' . implode(' ', $this->follow->all()) . ' ]';
 539	}
 540
 541	public function blink($config) {
 542		$this->_blink[] = $config;
 543	}
 544
 545	public function next() {
 546		bug_if($this->rightmost);
 547
 548		$c = new config($this->rule, $this->dot+1, array());
 549		// Anything in the follow set for this config will also be in the next.
 550		// However, we link it backwards because we might wind up selecting a
 551		// pre-existing state, and the housekeeping is easier in the first half
 552		// of the program. We'll fix it before doing the propagation.
 553		$c->blink($this);
 554
 555		return $c;
 556	}
 557
 558	public function copy_links_from($that) {
 559		foreach($that->_blink as $c) {
 560			$this->blink($c);
 561		}
 562	}
 563
 564	public function lambda() {
 565		return $this->rule->lambda_from($this->dot);
 566	}
 567
 568	public function simple_follow() {
 569		return $this->rule->first[$this->dot + 1]->all();
 570	}
 571
 572	public function epsilon_follows() {
 573		return $this->rule->lambda_from($this->dot + 1);
 574	}
 575
 576	public function fixlinks() {
 577		foreach ($this->_blink as $that) {
 578			$that->_flink[] = $this;
 579		}
 580
 581		$this->blink = array();
 582	}
 583
 584	public function dump() {
 585		echo '   * ';
 586		echo $this->key . ' : ';
 587		echo $this->rule->dotted_text($this->dot);
 588		echo $this->follow->text();
 589		foreach ($this->_flink as $c) {
 590			echo $c->key . ' / ';
 591		}
 592
 593		echo PHP_EOL;
 594	}
 595}
 596
 597class lime {
 598	public $parser_class = 'parser';
 599
 600	public function __construct() {
 601		$this->p_next = 1;
 602		$this->sym = array();
 603		$this->rule = array();
 604		$this->start_symbol_set = array();
 605		$this->state = array();
 606		$this->stop = $this->sym('#');
 607
 608		if ($err = $this->sym('error')) {
 609			$err->term = false;
 610		}
 611
 612		$this->lang = new lime_language_php();
 613	}
 614
 615	function language() {
 616		return $this->lang;
 617	}
 618
 619	function build_parser() {
 620		$this->add_start_rule();
 621
 622		foreach ($this->rule as $r) {
 623			$r->determine_precedence();
 624		}
 625
 626		$this->find_sym_lamdba();
 627		$this->find_sym_first();
 628
 629		foreach ($this->rule as $rule) {
 630			$rule->find_first();
 631		}
 632
 633		$initial = $this->find_states();
 634		$this->fixlinks();
 635		// $this->dump_configurations();
 636		$this->find_follow_sets();
 637
 638		foreach($this->state as $s) {
 639			$s->find_reductions($this);
 640		}
 641
 642		$i = $this->resolve_conflicts();
 643		$a = $this->rule_table();
 644		$qi = $initial->id;
 645
 646		return $this->lang->ptab_to_class($this->parser_class, compact('a', 'qi', 'i'));
 647	}
 648
 649	function rule_table() {
 650		$s = array();
 651
 652		foreach ($this->rule as $i => $r) {
 653			$s[$i] = $r->table($this->lang);
 654		}
 655
 656		return $s;
 657	}
 658
 659	function add_rule($symbol, $rhs, $code) {
 660		$this->add_raw_rule($symbol, $rhs, $code, count($rhs), true);
 661	}
 662
 663	function trump_up_bogus_lhs($real) {
 664		return "'{$real}'" . count($this->rule);
 665	}
 666
 667	function add_raw_rule($lhs, $rhs, $code, $look, $replace) {
 668		$sym = $this->sym($lhs);
 669		$sym->term = false;
 670
 671		if (!$rhs) {
 672			$sym->lambda = true;
 673		}
 674
 675		$rs = array();
 676
 677		foreach ($rhs as $str) {
 678			$rs[] = $this->sym($str);
 679		}
 680
 681		$rid = count($this->rule);
 682		$r = new rule($rid, $sym, $rs, $code, $look, $replace);
 683		$this->rule[$rid] = $r;
 684		$sym->rule[] = $r;
 685	}
 686
 687	function sym($str) {
 688		if (!isset($this->sym[$str])) {
 689			$this->sym[$str] = new sym($str, count($this->sym));
 690		}
 691
 692		return $this->sym[$str];
 693	}
 694
 695	function summary() {
 696		$out = '';
 697
 698		foreach ($this->sym as $sym) {
 699			if (!$sym->term) {
 700				$out .= $sym->summary();
 701			}
 702		}
 703
 704		return $out;
 705	}
 706
 707	private function find_sym_lamdba() {
 708		do {
 709			$go = false;
 710			foreach ($this->sym as $sym) {
 711				if (!$sym->lambda) {
 712					foreach ($sym->rule as $rule) {
 713						if ($rule->lambda()) {
 714							$go = true;
 715							$sym->lambda = true;
 716						}
 717					}
 718				}
 719			}
 720		} while ($go);
 721	}
 722
 723	private function teach_terminals_first_set() {
 724		foreach ($this->sym as $sym) {
 725			if ($sym->term) {
 726				$sym->first->add($sym->name);
 727			}
 728		}
 729	}
 730
 731	private function find_sym_first() {
 732		$this->teach_terminals_first_set();
 733
 734		do {
 735			$go = false;
 736			foreach ($this->rule as $r) {
 737				if ($r->teach_symbol_of_first_set()) {
 738					$go = true;
 739				}
 740			}
 741		} while ($go);
 742	}
 743
 744	function add_start_rule() {
 745		$rewrite = new lime_rewrite("'start'");
 746		$rhs = new lime_rhs();
 747		$rhs->add(new lime_glyph($this->deduce_start_symbol()->name, null));
 748		//$rhs->add(new lime_glyph($this->stop->name, null));
 749		$rewrite->add_rhs($rhs);
 750		$rewrite->update($this);
 751	}
 752
 753	private function deduce_start_symbol() {
 754		$candidate = current($this->start_symbol_set);
 755
 756		// Did the person try to set a start symbol at all?
 757		if (!$candidate) {
 758			return $this->first_rule_lhs();
 759		}
 760
 761		// Do we actually have such a symbol on the left of a rule?
 762		if ($candidate->terminal) {
 763			return $this->first_rule_lhs();
 764		}
 765
 766		// Ok, it's a decent choice. We need to return the symbol entry.
 767		return $this->sym($candidate);
 768	}
 769
 770	private function first_rule_lhs() {
 771		reset($this->rule);
 772		$r = current($this->rule);
 773		return $r->sym;
 774	}
 775
 776	/**
 777	 * Build an initial state. This is a recursive process which digs out
 778	 * the LR(0) state graph.
 779	 */
 780	function find_states() {
 781		$start_glyph = "'start'";
 782		$sym = $this->sym($start_glyph);
 783		$basis = array();
 784
 785		foreach($sym->rule as $rule) {
 786			$c = $rule->leftmost(array('#'));
 787			$basis[$c->key] = $c;
 788		}
 789
 790		$initial = $this->get_state($basis);
 791		$initial->add_accept($sym);
 792
 793		return $initial;
 794	}
 795
 796	function get_state($basis) {
 797		$key = array_keys($basis);
 798		sort($key);
 799		$key = implode(' ', $key);
 800
 801		if (isset($this->state[$key])) {
 802			// Copy all the links around...
 803			$state = $this->state[$key];
 804
 805			foreach($basis as $config) {
 806				$state->close[$config->key]->copy_links_from($config);
 807			}
 808
 809			return $state;
 810		} else {
 811			$close = $this->state_closure($basis);
 812			$this->state[$key] = $state = new state(count($this->state), $key, $close);
 813			$this->build_shifts($state);
 814
 815			return $state;
 816		}
 817	}
 818
 819	private function state_closure($q) {
 820		// $q is a list of config.
 821		$close = array();
 822		while ($config = array_pop($q)) {
 823			if (isset($close[$config->key])) {
 824				$close[$config->key]->copy_links_from($config);
 825				$close[$config->key]->follow->union($config->follow);
 826				continue;
 827			}
 828
 829			$close[$config->key] = $config;
 830
 831			$symbol_after_the_dot = $config->symbol_after_the_dot;
 832			if (!$symbol_after_the_dot) {
 833				continue;
 834			}
 835
 836			if (!$symbol_after_the_dot->term) {
 837				foreach ($symbol_after_the_dot->rule as $r) {
 838					$station = $r->leftmost($config->simple_follow());
 839
 840					if ($config->epsilon_follows()) {
 841						$station->blink($config);
 842					}
 843
 844					$q[] = $station;
 845				}
 846				// The following turned out to be wrong. Don't do it.
 847				//if ($symbol_after_the_dot->lambda) {
 848				//	$q[] = $config->next();
 849				//}
 850			}
 851		}
 852
 853		return $close;
 854	}
 855
 856	function build_shifts($state) {
 857		foreach ($state->segment_config() as $glyph => $segment) {
 858			$basis = array();
 859			foreach ($segment as $preshift) {
 860				$postshift = $preshift->next();
 861				$basis[$postshift->key] = $postshift;
 862			}
 863
 864			$dest = $this->get_state($basis);
 865			$state->add_shift($this->sym($glyph), $dest);
 866		}
 867	}
 868
 869	function fixlinks() {
 870		foreach ($this->state as $s) {
 871			foreach ($s->close as $c) {
 872				$c->fixlinks();
 873			}
 874		}
 875	}
 876
 877	function find_follow_sets() {
 878		$q = array();
 879
 880		foreach ($this->state as $s) {
 881			foreach ($s->close as $c) {
 882				$q[] = $c;
 883			}
 884		}
 885
 886		while ($q) {
 887			$c = array_shift($q);
 888
 889			foreach ($c->_flink as $d) {
 890				if ($d->follow->union($c->follow)) {
 891					$q[] = $d;
 892				}
 893			}
 894		}
 895	}
 896
 897	private function set_assoc($ss, $l, $r) {
 898		$p = ($this->p_next++) * 2;
 899		foreach ($ss as $glyph) {
 900			$s = $this->sym($glyph);
 901
 902			$s->left_prec = $p + $l;
 903			$s->right_prec = $p + $r;
 904		}
 905	}
 906
 907	function left_assoc($ss) {
 908		$this->set_assoc($ss, 1, 0);
 909	}
 910
 911	function right_assoc($ss) {
 912		$this->set_assoc($ss, 0, 1);
 913	}
 914
 915	function non_assoc($ss) {
 916		$this->set_assoc($ss, 0, 0);
 917	}
 918
 919	private function resolve_conflicts() {
 920		// For each state, try to find one and only one
 921		// thing to do for any given lookahead.
 922		$i = array();
 923
 924		foreach ($this->state as $s) {
 925			$i[$s->id] = $s->resolve_conflicts();
 926		}
 927
 928		return $i;
 929	}
 930
 931	function dump_configurations() {
 932		foreach ($this->state as $q) {
 933			$q->dump();
 934		}
 935	}
 936
 937	function dump_first_sets() {
 938		foreach ($this->sym as $s) {
 939			echo ' * ';
 940			echo $s->name . ' : ';
 941			echo $s->first->text();
 942			echo PHP_EOL;
 943		}
 944	}
 945
 946	function add_rule_with_actions($lhs, $rhs) {
 947		// First, make sure this thing is well-formed.
 948		if(!is_object(end($rhs))) {
 949			$rhs[] = new cf_action('');
 950		}
 951
 952		// Now, split it into chunks based on the actions.
 953		$look = -1;
 954		$subrule = array();
 955		$subsymbol = '';
 956
 957		while ($rhs) {
 958			$it = array_shift($rhs);
 959			++$look;
 960
 961			if (is_string($it)) {
 962				$subrule[] = $it;
 963			} else {
 964				$code = $it->code;
 965				// It's an action.
 966				// Is it the last one?
 967				if ($rhs) {
 968					// no.
 969					$subsymbol = $this->trump_up_bogus_lhs($lhs);
 970					$this->add_raw_rule($subsymbol, $subrule, $code, $look, false);
 971					$subrule = array($subsymbol);
 972				} else {
 973					// yes.
 974					$this->add_raw_rule($lhs, $subrule, $code, $look, true);
 975				}
 976			}
 977		}
 978	}
 979
 980	function pragma($type, $args) {
 981		switch ($type) {
 982		case 'left':
 983			$this->left_assoc($args);
 984			break;
 985		case 'right':
 986			$this->right_assoc($args);
 987			break;
 988		case 'nonassoc':
 989			$this->non_assoc($args);
 990			break;
 991		case 'start':
 992			$this->start_symbol_set = $args;
 993			break;
 994		case 'class':
 995			$this->parser_class = $args[0];
 996			break;
 997		default:
 998			emit(sprintf('Bad Parser Pragma: (%s)', $type));
 999			exit(1);
1000		}
1001	}
1002}
1003
1004class lime_language {
1005}
1006
1007class lime_language_php extends lime_language {
1008	protected function result_code($expr) {
1009		return '$result = ' . $expr . ';' . PHP_EOL;
1010	}
1011
1012	public function default_result() {
1013		return $this->result_code('reset($tokens)');
1014	}
1015
1016	public function result_pos($pos) {
1017		return $this->result_code(lime_token_reference($pos));
1018	}
1019
1020	public function bind($name, $pos) {
1021		return '$' . $name . ' = &$tokens[' . $pos . '];' . PHP_EOL;
1022	}
1023
1024	public function fixup($code) {
1025		return preg_replace_callback('~\$(\d+|\$)~', function ($foo) {
1026			if ($foo[1] === '$') {
1027				// always
1028				return '$result';
1029			}
1030
1031			return lime_token_reference($foo[1] - 1);
1032		}, $code);
1033	}
1034
1035	function to_php($code) {
1036		return $code;
1037	}
1038
1039	public function ptab_to_class($parser_class, $ptab) {
1040		$code  = '';
1041		$code .= 'public $qi = ' . lime_export($ptab['qi'], true) . ';' . PHP_EOL;
1042		$code .= 'public $i = '.lime_export($ptab['i'], true).';' . PHP_EOL;
1043
1044		$rc = array();
1045		$method = array();
1046		$rules = array();
1047
1048		foreach($ptab['a'] as $k => $a) {
1049			$symbol = preg_replace('/[^\w]/', '', $a['symbol']);
1050			$rn = @++$rc[$symbol];
1051			$mn = 'reduce_' . $k . '_' . $symbol . '_' . $rn;
1052			$method[$k] = $mn;
1053			$comment = '// ' . $a['text'] . PHP_EOL;
1054			$php = $this->to_php($a['code']);
1055
1056			$code .= 'function ' . $mn . '(' . LIME_CALL_PROTOCOL . ') {' . PHP_EOL .
1057				rtrim(preg_replace('~^~m', INDENT, $comment . $php)) . PHP_EOL .
1058			'}' .
1059			PHP_EOL .
1060			PHP_EOL;
1061
1062			unset($a['code']);
1063			unset($a['text']);
1064			$rules[$k] = $a;
1065		}
1066
1067		$code .= 'public $method = ' . lime_export($method, true) . ';' . PHP_EOL;
1068		$code .= 'public $a = '.lime_export($rules, true) . ';' . PHP_EOL;
1069
1070		return 'class ' . $parser_class . ' extends lime_parser {' . PHP_EOL .
1071			preg_replace(array('~^~m', '~^\h+$~m'), array(INDENT, ''), $code) .
1072		'}' . PHP_EOL;
1073	}
1074}
1075
1076class lime_rhs {
1077	function __construct() {
1078		// Construct and add glyphs and actions in whatever order.
1079		// Then, add this to a lime_rewrite.
1080		//
1081		// Don't call install_rule.
1082		// The rewrite will do that for you when you "update" with it.
1083		$this->rhs = array();
1084	}
1085
1086	function add(lime_slot $slot) {
1087		$this->rhs[] = $slot;
1088	}
1089
1090	function install_rule(lime $lime, $lhs) {
1091		// This is the part that has to break the rule into subrules if necessary.
1092		$rhs = $this->rhs;
1093		// First, make sure this thing is well-formed.
1094		if (!(end($rhs) instanceof lime_action)) {
1095			$rhs[] = new lime_action('', null);
1096		}
1097
1098		// Now, split it into chunks based on the actions.
1099
1100		$lang = $lime->language();
1101		$result_code = $lang->default_result();
1102		$look = -1;
1103		$subrule = array();
1104		$subsymbol = '';
1105		$preamble = '';
1106
1107		while ($rhs) {
1108			$it = array_shift($rhs);
1109			++$look;
1110
1111			if ($it instanceof lime_glyph) {
1112				$subrule[] = $it->data;
1113			} elseif ($it instanceof lime_action) {
1114				$code = unindent($it->data);
1115				// It's an action.
1116				// Is it the last one?
1117				if ($rhs) {
1118					// no.
1119					$subsymbol = $lime->trump_up_bogus_lhs($lhs);
1120					$action = $lang->default_result() . $preamble . $code;
1121					$lime->add_raw_rule($subsymbol, $subrule, $action, $look, false);
1122					$subrule = array($subsymbol);
1123				} else {
1124					// yes.
1125					$action = $result_code . $preamble . $code;
1126					$lime->add_raw_rule($lhs, $subrule, $action, $look, true);
1127				}
1128			} else {
1129				impossible();
1130			}
1131
1132			if ($it->name == '$') {
1133				$result_code = $lang->result_pos($look);
1134			} elseif ($it->name) {
1135				$preamble .= $lang->bind($it->name, $look);
1136			}
1137		}
1138	}
1139}
1140
1141class lime_rewrite {
1142	function __construct($glyph) {
1143		// Construct one of these with the name of the lhs.
1144		// Add some rhs-es to it.
1145		// Finally, "update" the lime you're building.
1146		$this->glyph = $glyph;
1147		$this->rhs = array();
1148	}
1149
1150	function add_rhs(lime_rhs $rhs) {
1151		$this->rhs[] = $rhs;
1152	}
1153
1154	function update(lime $lime) {
1155		foreach ($this->rhs as $rhs) {
1156			$rhs->install_rule($lime, $this->glyph);
1157		}
1158	}
1159}
1160
1161/**
1162 * This keeps track of one position in an rhs.
1163 *  We specialize to handle actions and glyphs.
1164 *
1165 * If there is a name for the slot, we store it here.
1166 * Later on, this structure will be consulted in the formation of
1167 * actual production rules.
1168 */
1169class lime_slot {
1170	public function __construct($data, $name) {
1171		$this->data = $data;
1172		$this->name = $name;
1173	}
1174
1175	public function preamble($pos) {
1176		if (strlen($this->name) > 0) {
1177			return '$' . $this->name . ' = &$tokens[' . $pos . '];' . PHP_EOL;
1178		}
1179	}
1180}
1181
1182class lime_glyph extends lime_slot {
1183}
1184class lime_action extends lime_slot {
1185}
1186
1187
1188/**
1189 * This function isn't too terribly interesting to the casual observer.
1190 * You're probably better off looking at parse_lime_grammar() instead.
1191 *
1192 * Ok, if you insist, I'll explain.
1193 *
1194 * The input to Lime is a CFG parser definition. That definition is
1195 * written in some language. (The Lime language, to be exact.)
1196 * Anyway, I have to parse the Lime language and compile it into a
1197 * very complex data structure from which a parser is eventually
1198 * built. What better way than to use Lime itself to parse its own
1199 * language? Well, it's almost that simple, but not quite.
1200
1201 * The Lime language is fairly potent, but a restricted subset of
1202 * its features was used to write a metagrammar. Then, I hand-translated
1203 * that metagrammar into another form which is easy to snarf up.
1204 * In the process of reading that simplified form, this function
1205 * builds the same sort of data structure that later gets turned into
1206 * a parser. The last step is to run the parser generation algorithm,
1207 * eval() the resulting PHP code, and voila! With no hard work, I can
1208 * suddenly read and comprehend the full range of the Lime language
1209 * without ever having written an algorithm to do so. It feels like magic.
1210 */
1211function lime_bootstrap() {
1212	$bootstrap = LIME_DIR . '/lime.bootstrap';
1213	$lime = new lime();
1214	$lime->parser_class = 'lime_metaparser';
1215	$rhs = array();
1216
1217	bug_unless(is_readable($bootstrap));
1218
1219	foreach(file($bootstrap) as $l) {
1220		$a = explode(':', $l, 2);
1221
1222		if (count($a) == 2) {
1223			list($pattern, $code) = $a;
1224			$sl = new lime_rhs();
1225			$pattern = trim($pattern);
1226
1227			if (strlen($pattern) > 0) {
1228				foreach (explode(' ', $pattern) as $glyph) {
1229					$sl->add(new lime_glyph($glyph, null));
1230				}
1231			}
1232
1233			$sl->add(new lime_action($code, NULL));
1234			$rhs[] = $sl;
1235		} else {
1236			if (preg_match('~^to (\w+)$~', $l, $r)) {
1237				$g = $r[1];
1238				$rw = new lime_rewrite($g);
1239
1240				foreach($rhs as $b) {
1241					$rw->add_rhs($b);
1242				}
1243
1244				$rw->update($lime);
1245				$rhs = array();
1246			}
1247		}
1248	}
1249
1250	$parser_code = $lime->build_parser();
1251	eval($parser_code);
1252}
1253
1254/**
1255 * The voodoo is in the way I do lexical processing on grammar definition
1256 * files. They contain embedded bits of PHP, and it's important to keep
1257 * track of things like strings, comments, and matched braces. It seemed
1258 * like an ideal problem to solve with GNU flex, so I wrote a little
1259 * scanner in flex and C to dig out the tokens for me. Of course, I need
1260 * the tokens in PHP, so I designed a simple binary wrapper for them which
1261 * also contains line-number information, guaranteed to help out if you
1262 * write a grammar which surprises the parser in any manner.
1263 */
1264class voodoo_scanner extends flex_scanner {
1265	function executable() { return LIME_DIR.'/lime_scan_tokens'; }
1266}
1267
1268/**
1269 * This is a good function to read because it teaches you how to interface
1270 * with a Lime parser. I've tried to isolate out the bits that aren't
1271 * instructive in that regard.
1272 */
1273function parse_lime_grammar($path) {
1274	if (!class_exists('lime_metaparser', false)) {
1275		lime_bootstrap();
1276	}
1277
1278	$parse_engine = new parse_engine(new lime_metaparser());
1279	$scanner = new voodoo_scanner($path);
1280
1281	try {
1282		// The result of parsing a Lime grammar is a Lime object.
1283		$lime = $scanner->feed($parse_engine);
1284		// Calling its build_parser() method gets the output PHP code.
1285		return $lime->build_parser();
1286	} catch (parse_error $e) {
1287		die ($e->getMessage() . " in {$path} line {$scanner->lineno}." . PHP_EOL);
1288	}
1289}
1290
1291if ($_SERVER['argv']) {
1292	$code = '';
1293	array_shift($_SERVER['argv']); // Strip out the program name.
1294	foreach ($_SERVER['argv'] as $path) {
1295		$code .= parse_lime_grammar($path);
1296	}
1297
1298	echo <<<CODE
1299<?php
1300/*
1301 *** DON'T EDIT THIS FILE! ***
1302 *
1303 * This file was automatically generated by the Lime parser generator.
1304 * The real source code you should be looking at is in one or more
1305 * grammar files in the Lime format.
1306 *
1307 * THE ONLY REASON TO LOOK AT THIS FILE is to see where in the grammar
1308 * file that your error happened, because there are enough comments to
1309 * help you debug your grammar.
1310
1311 * If you ignore this warning, you're shooting yourself in the brain,
1312 * not the foot.
1313 */
1314{$code}
1315CODE;
1316}