PageRenderTime 63ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/sergiosgc/Structures/Grammar.php

https://github.com/sergiosgc/Structures_Grammar
PHP | 407 lines | 197 code | 4 blank | 206 comment | 56 complexity | 8c17bf3a5f92f984f606c0bfc47e3149 MD5 | raw file
  1. <?php
  2. /* vim: set expandtab tabstop=4 shiftwidth=4 foldmethod=marker: */
  3. namespace sergiosgc;
  4. /**
  5. * Structures_Grammar is a representation of a formal generative grammar. The
  6. * data structure represents grammars as proposed by Noam Chomsky, as:
  7. * a) A set of non-terminal symbols (N)
  8. * b) A set of terminal symbols (T)
  9. * c) A set of production rules of the form (N U T)*N(N U T)* -> (N U T)*
  10. * where U is the set union operator and * is the Kleene star operator (the one
  11. * used in regexps).
  12. * d) A start symbol S that is a member of N
  13. * The class automatically restricts sets N and T to be disjoint.
  14. *
  15. * The data structure allows for the grammar to be restricted to a context-free grammar
  16. * and to a regular grammar. You can also test any grammar to check that it is context-free
  17. * or regular.
  18. */
  19. class Structures_Grammar
  20. {
  21. protected $nonTerminals = null;
  22. protected $terminals = null;
  23. protected $rules = array();
  24. protected $startSymbol = null;
  25. protected $contextFree = false;
  26. protected $regular = false;
  27. protected $nullableSymbolSet = null;
  28. /* getStartSymbol {{{ */
  29. /**
  30. * Start symbol getter
  31. *
  32. * @return Structures_Grammar_Symbol The start symbol
  33. */
  34. public function getStartSymbol()
  35. {
  36. if (is_null($this->startSymbol) && $this->isContextFree() && count($this->rules) > 0) {
  37. $this->startSymbol = $this->rules[0]->getLeftSymbol(0);
  38. }
  39. return $this->startSymbol;
  40. }
  41. /* }}} */
  42. /* computeTerminals {{{ */
  43. public function computeTerminals()
  44. {
  45. $this->nonTerminals = array();
  46. $this->terminals = array();
  47. foreach ($this->rules as $rule) {
  48. for ($i=0; $i < $rule->leftCount(); $i++) $this->nonTerminals[] = $rule->getLeftSymbol($i);
  49. }
  50. foreach ($this->rules as $rule) {
  51. for ($i=0; $i < $rule->rightCount(); $i++) if (!$this->isNonTerminal($rule->getRightSymbol($i)) && !$this->isTerminal($rule->getRightSymbol($i))) $this->terminals[] = $rule->getRightSymbol($i);
  52. }
  53. for ($i=count($this->nonTerminals) - 1; $i>=0; $i--) $this->nonTerminals[$i]->setNonTerminal();
  54. for ($i=count($this->terminals) - 1; $i>=0; $i--) $this->terminals[$i]->setTerminal();
  55. }
  56. /* }}} */
  57. /* addNonTerminal {{{ */
  58. /**
  59. * NonTerminals addition accessor
  60. *
  61. * @param Structures_Grammar_Symbol New value
  62. * @return Structures_Grammar_Symbol The added symbol
  63. */
  64. public function addNonTerminal($value)
  65. {
  66. $this->nonTerminals[] = $value;
  67. $value->setTerminal(false);
  68. }
  69. /* }}} */
  70. /* isNonTerminal {{{ */
  71. /**
  72. * Test value for NonTerminals membership
  73. *
  74. * @param Structures_Grammar_Symbol Value to be tested
  75. * @return boolean True iff value is a member of nonTerminals
  76. */
  77. public function isNonTerminal($value)
  78. {
  79. if (is_null($this->nonTerminals)) $this->computeTerminals();
  80. if (!$value instanceof Structures_Grammar_Symbol) $value = Structures_Grammar_Symbol::create($value);
  81. foreach ($this->nonTerminals as $cursor) if ($value->__equals($cursor)) return true;
  82. return false;
  83. }
  84. /* }}} */
  85. /* getNonTerminals {{{ */
  86. /**
  87. * NonTerminals getter
  88. *
  89. * @return boolean True iff value is a member of nonTerminals
  90. */
  91. public function getNonTerminals()
  92. {
  93. return $this->nonTerminals;
  94. }
  95. /* }}} */
  96. /* getNonTerminalSymbolset {{{ */
  97. /**
  98. * NonTerminals getter (as a symbol set
  99. *
  100. * @return boolean True iff value is a member of nonTerminals
  101. */
  102. public function getNonTerminalSymbolSet()
  103. {
  104. return new Structures_Grammar_Symbol_Set($this->getNonTerminals());
  105. }
  106. /* }}} */
  107. /* addTerminal {{{ */
  108. /**
  109. * Terminals addition accessor
  110. *
  111. * @param Structures_Grammar_Symbol New value
  112. * @return Structures_Grammar_Symbol The added symbol
  113. */
  114. public function addTerminal($value)
  115. {
  116. $this->terminals[] = $value;
  117. $value->setTerminal(true);
  118. }
  119. /* }}} */
  120. /* isTerminal {{{ */
  121. /**
  122. * Test value for Terminals membership
  123. *
  124. * @param Structures_Grammar_Symbol|string Value to be tested
  125. * @return boolean True iff value is a member of Terminals
  126. */
  127. public function isTerminal($value)
  128. {
  129. if (is_null($this->terminals)) $this->computeTerminals();
  130. if (!$value instanceof Structures_Grammar_Symbol) $value = Structures_Grammar_Symbol::create($value);
  131. foreach ($this->terminals as $cursor) if ($value == $cursor) return true;
  132. return false;
  133. }
  134. /* }}} */
  135. /* getTerminals {{{ */
  136. /**
  137. * Terminals getter
  138. *
  139. */
  140. public function getTerminals()
  141. {
  142. if (is_null($this->terminals)) $this->computeTerminals();
  143. return $this->terminals;
  144. }
  145. /* }}} */
  146. /* getTerminalSymbolset {{{ */
  147. /**
  148. * Terminals getter (as a symbol set
  149. *
  150. * @return boolean True iff value is a member of nonTerminals
  151. */
  152. public function getTerminalSymbolSet()
  153. {
  154. return new Structures_Grammar_Symbol_Set($this->getTerminals());
  155. }
  156. /* }}} */
  157. /* addRule {{{ */
  158. /**
  159. * Rules addition accessor
  160. *
  161. * @param Structures_Grammar_Symbol New value
  162. */
  163. public function addRule($value)
  164. {
  165. if ($this->isRegular() && !$value->isRegular()) throw new Structures_Grammar_RestrictionException(sprintf(
  166. 'Trying to add non regular rule to regular grammar (%s)', (string) $value));
  167. if ($this->isContextFree() && !$value->isContextFree()) throw new Structures_Grammar_RestrictionException(sprintf(
  168. 'Trying to add non context-free rule to context-free grammar (%s)', (string) $value));
  169. $this->rules[] = $value;
  170. }
  171. /* }}} */
  172. /* addContextFreeRule {{{ */
  173. public function &addContextFreeRule()
  174. {
  175. $symbols = func_get_args();
  176. if (count($symbols) == 0) throw new Structures_Grammar_Exception('At least one symbol is needed in a context-free grammar rule');
  177. foreach($symbols as $i => $symbol) if (!($symbol instanceof Structures_Grammar_Symbol)) $symbols[$i] = Structures_Grammar_Symbol::create($symbol);
  178. $rule = new Structures_Grammar_Rule();
  179. $rule->addSymbolToLeft($symbols[0]);
  180. for($i=1; $i<count($symbols); $i++) $rule->addSymbolToRight($symbols[$i]);
  181. $this->addRule($rule);
  182. return $rule;
  183. }
  184. /* }}} */
  185. /* getRules {{{ */
  186. /**
  187. * Rules getter
  188. *
  189. * @param Structures_Grammar_Symbol Value to be tested
  190. * @return boolean True iff value is a member of Rules
  191. */
  192. public function getRules()
  193. {
  194. return $this->rules;
  195. }
  196. /* }}} */
  197. /* getRule {{{ */
  198. /**
  199. * Rules getter
  200. *
  201. * @param int Rule index to get
  202. * @return Structures_Grammar_Rule|null Rule at index, or null if not found
  203. */
  204. public function getRule($i)
  205. {
  206. if (!array_key_exists($i, $this->rules)) return null;
  207. return $this->rules[$i];
  208. }
  209. /* }}} */
  210. /* getRuleIndex {{{ */
  211. /**
  212. * Find the index of a given rule
  213. *
  214. * @param Structures_Grammar_Rule Rule to find
  215. * @return int Rule index
  216. */
  217. public function getRuleIndex($right)
  218. {
  219. foreach ($this->rules as $index => $left) if ($left == $right) return $index;
  220. return false;
  221. }
  222. /* }}} */
  223. /* getRulesByLeftSymbol {{{ */
  224. /**
  225. * For context-free grammars, find the set of rules whose left-side production symbol is equal to the parameter
  226. *
  227. * @param Structures_Grammar_Symbol The symbol to search
  228. * @return array An array of Structures_Grammar_Rule instances
  229. */
  230. public function getRulesByLeftSymbol($left)
  231. {
  232. $result = array();
  233. for($i=0; $i < count($this->rules); $i++) if ($left == $this->rules[$i]->getLeftSymbol(0)) $result[] = $this->rules[$i];
  234. return $result;
  235. }
  236. /* }}} */
  237. /* isContextFree {{{ */
  238. /**
  239. * contextFree getter
  240. *
  241. * @param boolean True if grammar is restricted to a context-free grammar
  242. */
  243. public function isContextFree()
  244. {
  245. return $this->contextFree;
  246. }
  247. /* }}} */
  248. /* testContextFree {{{ */
  249. /**
  250. * Test the grammar to check if it is context-free.
  251. *
  252. * @return boolean True if the grammar is context-free
  253. */
  254. public function testContextFree()
  255. {
  256. if ($this->isContextFree()) return true;
  257. foreach ($this->rules as $rule) if (!$rule->isContextFree()) return false;
  258. return true;
  259. }
  260. /* }}} */
  261. /* setContextFree {{{ */
  262. /**
  263. * Set grammar restriction to context-free.
  264. *
  265. * If the grammar was not restricted before this call, the method will test if the
  266. * grammar is context-free before setting the restriction. It will throw an exception
  267. * if trying to restrict a non-context-free grammar
  268. *
  269. * @param boolean True if the grammar should be restricted to being context-free
  270. */
  271. public function setContextFree($value)
  272. {
  273. if ($value && !$this->testContextFree()) throw new Structures_Grammar_RestrictionException('Grammar is not context-free. Unable to introduce restriction');
  274. $this->contextFree = $value;
  275. if (!$value) $this->setRegular(false);
  276. }
  277. /* }}} */
  278. /* isRegular {{{ */
  279. /**
  280. * regular getter
  281. *
  282. * @param boolean True if grammar is restricted to a regular grammar
  283. */
  284. public function isRegular()
  285. {
  286. return $this->regular;
  287. }
  288. /* }}} */
  289. /* testRegular {{{ */
  290. /**
  291. * Test the grammar to check if it is regular.
  292. *
  293. * @return boolean True if the grammar is regular
  294. */
  295. public function testRegular()
  296. {
  297. if ($this->isRegular()) return true;
  298. foreach ($this->rules as $rule) if (!$rule->isRegular()) return false;
  299. return true;
  300. }
  301. /* }}} */
  302. /* setRegular {{{ */
  303. /**
  304. * Set grammar restriction to regular.
  305. *
  306. * If the grammar was not restricted before this call, the method will test if the
  307. * grammar is regular before setting the restriction. It will throw an exception
  308. * if trying to restrict a non-regular grammar
  309. *
  310. * @param boolean True if the grammar should be restricted to being regular
  311. */
  312. public function setRegular($value)
  313. {
  314. if ($value && !$this->testRegular()) foreach ($this->rules as $rule) if (!$rule->isRegular()) throw new Structures_Grammar_RestrictionException(sprintf('Grammar is not regular. Unable to introduce restriction. Rule \'%s\' is not regular', (string) $rule));
  315. $this->regular = $value;
  316. if ($value) $this->setContextFree(true);
  317. }
  318. /* }}} */
  319. /* isSymbolNullable {{{ */
  320. /**
  321. * A symbol is nullable if it is non-terminal and there is a nullable rule representing a production for that non-terminal
  322. *
  323. * @return boolean true iff symbol is nullable
  324. */
  325. public function isSymbolNullable($symbol)
  326. {
  327. if (is_null($this->nullableSymbolSet)) $this->computeNullableSymbolSet();
  328. return $this->nullableSymbolSet->symbolExists($symbol);
  329. }
  330. /* }}} */
  331. /* computeNullableSymbolSet {{{ */
  332. protected function computeNullableSymbolSet()
  333. {
  334. if (!$this->testContextFree()) throw new Structures_Grammar_Exception('isSymbolNullable is implemented for context-free grammars only, and this is not a context-free grammar');
  335. $this->nullableSymbolSet = new Structures_Grammar_Symbol_Set();
  336. do {
  337. $cardinality = $this->nullableSymbolSet->getSymbolCount();
  338. foreach ($this->rules as $rule) if ($rule->isNullable($this->nullableSymbolSet)) $this->nullableSymbolSet->addSymbol($rule->getLeftSymbol(0));
  339. } while ($cardinality < $this->nullableSymbolSet->getSymbolCount());
  340. }
  341. /* }}} */
  342. protected $firstSet = array();
  343. /* symbolFirstSet {{{ */
  344. /**
  345. * The grammatical first set for a non-terminal symbol is the set of
  346. * terminal symbols that appear at position 0 on the right hand side of
  347. * all the symbol's productions.
  348. *
  349. * @param Structures_Grammar_Symbol Symbol whose first set is sought
  350. * @return Structures_Grammar_Symbol_Set First set for symbol
  351. */
  352. public function symbolFirstSet($symbol)
  353. {
  354. $result = new Structures_Grammar_Symbol_Set();
  355. if ($symbol->isTerminal()) {
  356. $result->addSymbol($symbol);
  357. } else {
  358. if (array_key_exists($symbol->getId(), $this->firstSet)) return $this->firstSet[$symbol->getId()];
  359. $this->firstSet[$symbol->getId()] = new Structures_Grammar_Symbol_Set();
  360. foreach ($this->rules as $rule) if ($rule->getLeftSymbol(0)->__equals($symbol)) {
  361. for ($i=0; $i<$rule->rightCount(); $i++) {
  362. $result->union($this->symbolFirstSet($rule->getRightSymbol($i)));
  363. if (!$this->isSymbolNullable($rule->getRightSymbol($i))) break;
  364. }
  365. }
  366. $this->firstSet[$symbol->getId()] = $result;
  367. }
  368. return $result;
  369. }
  370. /* }}} */
  371. /* constructor {{{ */
  372. /**
  373. * Create a new Structures_Grammar
  374. *
  375. * @param boolean Should the grammar be context free (defaults to true)
  376. * @param boolean Should the grammar be regular (defaults to false)
  377. */
  378. public function __construct($contextFree = true, $regular = false)
  379. {
  380. $this->setContextFree($contextFree);
  381. $this->setRegular($regular);
  382. }
  383. /* }}} */
  384. /* __toString {{{ */
  385. public function __toString()
  386. {
  387. $result = '';
  388. foreach ($this->rules as $index => $value) $result .= sprintf("[%d] %s\n", $index, (string) $value);
  389. return $result;
  390. }
  391. /* }}} */
  392. /* __equals {{{ */
  393. public function __equals($other)
  394. {
  395. if (!($other instanceof Structures_Grammar)) return false;
  396. $otherRules = $other->getRules();
  397. if (count($otherRules) != count($this->rules)) return false;
  398. foreach ($this->rules as $rule) if ($other->getRuleIndex($rule) === false) return false;
  399. return true;
  400. }
  401. /* }}} */
  402. }
  403. ?>