PageRenderTime 41ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/lexer.php

https://bitbucket.org/kudutest1/moodlegit
PHP | 413 lines | 185 code | 24 blank | 204 comment | 33 complexity | 6e60f4d6c66ac70dbe26ac5eaace3d79 MD5 | raw file
  1. <?php
  2. /**
  3. * PHP lexer code snarfed from the CVS tree for the lamplib project at
  4. * http://sourceforge.net/projects/lamplib
  5. * This project is administered by Markus Baker, Harry Fuecks and Matt
  6. * Mitchell, and the project code is in the public domain.
  7. *
  8. * Thanks, guys!
  9. *
  10. * @package moodlecore
  11. * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
  12. * @license Public Domain {@link http://sourceforge.net/projects/lamplib}
  13. */
  14. /** LEXER_ENTER = 1 */
  15. define("LEXER_ENTER", 1);
  16. /** LEXER_MATCHED = 2 */
  17. define("LEXER_MATCHED", 2);
  18. /** LEXER_UNMATCHED = 3 */
  19. define("LEXER_UNMATCHED", 3);
  20. /** LEXER_EXIT = 4 */
  21. define("LEXER_EXIT", 4);
  22. /** LEXER_SPECIAL = 5 */
  23. define("LEXER_SPECIAL", 5);
  24. /**
  25. * Compounded regular expression. Any of
  26. * the contained patterns could match and
  27. * when one does it's label is returned.
  28. * @package moodlecore
  29. * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
  30. * @license Public Domain {@link http://sourceforge.net/projects/lamplib}
  31. */
  32. class ParallelRegex {
  33. var $_patterns;
  34. var $_labels;
  35. var $_regex;
  36. var $_case;
  37. /**
  38. * Constructor. Starts with no patterns.
  39. * @param bool $case True for case sensitive, false
  40. * for insensitive.
  41. * @access public
  42. */
  43. function ParallelRegex($case) {
  44. $this->_case = $case;
  45. $this->_patterns = array();
  46. $this->_labels = array();
  47. $this->_regex = null;
  48. }
  49. /**
  50. * Adds a pattern with an optional label.
  51. * @param string $pattern Perl style regex, but ( and )
  52. * lose the usual meaning.
  53. * @param string $label Label of regex to be returned
  54. * on a match.
  55. * @access public
  56. */
  57. function addPattern($pattern, $label = true) {
  58. $count = count($this->_patterns);
  59. $this->_patterns[$count] = $pattern;
  60. $this->_labels[$count] = $label;
  61. $this->_regex = null;
  62. }
  63. /**
  64. * Attempts to match all patterns at once against
  65. * a string.
  66. * @param string $subject String to match against.
  67. * @param string $match First matched portion of
  68. * subject.
  69. * @return bool True on success.
  70. * @access public
  71. */
  72. function match($subject, &$match) {
  73. if (count($this->_patterns) == 0) {
  74. return false;
  75. }
  76. if (!preg_match($this->_getCompoundedRegex(), $subject, $matches)) {
  77. $match = "";
  78. return false;
  79. }
  80. $match = $matches[0];
  81. for ($i = 1; $i < count($matches); $i++) {
  82. if ($matches[$i]) {
  83. return $this->_labels[$i - 1];
  84. }
  85. }
  86. return true;
  87. }
  88. /**
  89. * Compounds the patterns into a single
  90. * regular expression separated with the
  91. * "or" operator. Caches the regex.
  92. * Will automatically escape (, ) and / tokens.
  93. * @access private
  94. */
  95. function _getCompoundedRegex() {
  96. if ($this->_regex == null) {
  97. for ($i = 0; $i < count($this->_patterns); $i++) {
  98. $this->_patterns[$i] = '(' . str_replace(
  99. array('/', '(', ')'),
  100. array('\/', '\(', '\)'),
  101. $this->_patterns[$i]) . ')';
  102. }
  103. $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags();
  104. }
  105. return $this->_regex;
  106. }
  107. /**
  108. * Accessor for perl regex mode flags to use.
  109. * @return string Flags as string.
  110. * @access private
  111. */
  112. function _getPerlMatchingFlags() {
  113. return ($this->_case ? "msS" : "msSi");
  114. }
  115. }
  116. /**
  117. * States for a stack machine.
  118. *
  119. * @package moodlecore
  120. * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
  121. * @license Public Domain {@link http://sourceforge.net/projects/lamplib}
  122. */
  123. class StateStack {
  124. var $_stack;
  125. /**
  126. * Constructor. Starts in named state.
  127. * @param string $start Starting state name.
  128. * @access public
  129. */
  130. function StateStack($start) {
  131. $this->_stack = array($start);
  132. }
  133. /**
  134. * Accessor for current state.
  135. * @return string State as string.
  136. * @access public
  137. */
  138. function getCurrent() {
  139. return $this->_stack[count($this->_stack) - 1];
  140. }
  141. /**
  142. * Adds a state to the stack and sets it
  143. * to be the current state.
  144. * @param string $state New state.
  145. * @access public
  146. */
  147. function enter($state) {
  148. array_push($this->_stack, $state);
  149. }
  150. /**
  151. * Leaves the current state and reverts
  152. * to the previous one.
  153. * @return bool False if we drop off
  154. * the bottom of the list.
  155. * @access public
  156. */
  157. function leave() {
  158. if (count($this->_stack) == 1) {
  159. return false;
  160. }
  161. array_pop($this->_stack);
  162. return true;
  163. }
  164. }
  165. /**
  166. * Accepts text and breaks it into tokens.
  167. * Some optimisation to make the sure the
  168. * content is only scanned by the PHP regex
  169. * parser once. Lexer modes must not start
  170. * with leading underscores.
  171. *
  172. * @package moodlecore
  173. * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
  174. * @license Public Domain {@link http://sourceforge.net/projects/lamplib}
  175. */
  176. class Lexer {
  177. var $_regexes;
  178. var $_parser;
  179. var $_mode;
  180. var $_mode_handlers;
  181. var $_case;
  182. /**
  183. * Sets up the lexer in case insensitive matching
  184. * by default.
  185. * @param object $parser Handling strategy by
  186. * reference.
  187. * @param string $start Starting handler.
  188. * @param bool $case True for case sensitive.
  189. * @access public
  190. */
  191. function Lexer(&$parser, $start = "accept", $case = false) {
  192. $this->_case = $case;
  193. $this->_regexes = array();
  194. $this->_parser = &$parser;
  195. $this->_mode = new StateStack($start);
  196. $this->_mode_handlers = array();
  197. }
  198. /**
  199. * Adds a token search pattern for a particular
  200. * parsing mode. The pattern does not change the
  201. * current mode.
  202. * @param string $pattern Perl style regex, but ( and )
  203. * lose the usual meaning.
  204. * @param string $mode Should only apply this
  205. * pattern when dealing with
  206. * this type of input.
  207. * @access public
  208. */
  209. function addPattern($pattern, $mode = "accept") {
  210. if (!isset($this->_regexes[$mode])) {
  211. $this->_regexes[$mode] = new ParallelRegex($this->_case);
  212. }
  213. $this->_regexes[$mode]->addPattern($pattern);
  214. }
  215. /**
  216. * Adds a pattern that will enter a new parsing
  217. * mode. Useful for entering parenthesis, strings,
  218. * tags, etc.
  219. * @param string $pattern Perl style regex, but ( and )
  220. * lose the usual meaning.
  221. * @param string $mode Should only apply this
  222. * pattern when dealing with
  223. * this type of input.
  224. * @param string $new_mode Change parsing to this new
  225. * nested mode.
  226. * @access public
  227. */
  228. function addEntryPattern($pattern, $mode, $new_mode) {
  229. if (!isset($this->_regexes[$mode])) {
  230. $this->_regexes[$mode] = new ParallelRegex($this->_case);
  231. }
  232. $this->_regexes[$mode]->addPattern($pattern, $new_mode);
  233. }
  234. /**
  235. * Adds a pattern that will exit the current mode
  236. * and re-enter the previous one.
  237. * @param string $pattern Perl style regex, but ( and )
  238. * lose the usual meaning.
  239. * @param string $mode Mode to leave.
  240. * @access public
  241. */
  242. function addExitPattern($pattern, $mode) {
  243. if (!isset($this->_regexes[$mode])) {
  244. $this->_regexes[$mode] = new ParallelRegex($this->_case);
  245. }
  246. $this->_regexes[$mode]->addPattern($pattern, "__exit");
  247. }
  248. /**
  249. * Adds a pattern that has a special mode.
  250. * Acts as an entry and exit pattern in one go.
  251. * @param string $pattern Perl style regex, but ( and )
  252. * lose the usual meaning.
  253. * @param string $mode Should only apply this
  254. * pattern when dealing with
  255. * this type of input.
  256. * @param string $special Use this mode for this one token.
  257. * @access public
  258. */
  259. function addSpecialPattern($pattern, $mode, $special) {
  260. if (!isset($this->_regexes[$mode])) {
  261. $this->_regexes[$mode] = new ParallelRegex($this->_case);
  262. }
  263. $this->_regexes[$mode]->addPattern($pattern, "_$special");
  264. }
  265. /**
  266. * Adds a mapping from a mode to another handler.
  267. * @param string $mode Mode to be remapped.
  268. * @param string $handler New target handler.
  269. * @access public
  270. */
  271. function mapHandler($mode, $handler) {
  272. $this->_mode_handlers[$mode] = $handler;
  273. }
  274. /**
  275. * Splits the page text into tokens. Will fail
  276. * if the handlers report an error or if no
  277. * content is consumed. If successful then each
  278. * unparsed and parsed token invokes a call to the
  279. * held listener.
  280. * @param string $raw Raw HTML text.
  281. * @return bool True on success, else false.
  282. * @access public
  283. */
  284. function parse($raw) {
  285. if (!isset($this->_parser)) {
  286. return false;
  287. }
  288. $length = strlen($raw);
  289. while (is_array($parsed = $this->_reduce($raw))) {
  290. list($unmatched, $matched, $mode) = $parsed;
  291. if (!$this->_dispatchTokens($unmatched, $matched, $mode)) {
  292. return false;
  293. }
  294. if (strlen($raw) == $length) {
  295. return false;
  296. }
  297. $length = strlen($raw);
  298. }
  299. if (!$parsed) {
  300. return false;
  301. }
  302. return $this->_invokeParser($raw, LEXER_UNMATCHED);
  303. }
  304. /**
  305. * Sends the matched token and any leading unmatched
  306. * text to the parser changing the lexer to a new
  307. * mode if one is listed.
  308. * @param string $unmatched Unmatched leading portion.
  309. * @param string $matched Actual token match.
  310. * @param string $mode Mode after match. The "_exit"
  311. * mode causes a stack pop. An
  312. * false mode causes no change.
  313. * @return bool False if there was any error
  314. * from the parser.
  315. * @access private
  316. */
  317. function _dispatchTokens($unmatched, $matched, $mode = false) {
  318. if (!$this->_invokeParser($unmatched, LEXER_UNMATCHED)) {
  319. return false;
  320. }
  321. if ($mode === "__exit") {
  322. if (!$this->_invokeParser($matched, LEXER_EXIT)) {
  323. return false;
  324. }
  325. return $this->_mode->leave();
  326. }
  327. if (strncmp($mode, "_", 1) == 0) {
  328. $mode = substr($mode, 1);
  329. $this->_mode->enter($mode);
  330. if (!$this->_invokeParser($matched, LEXER_SPECIAL)) {
  331. return false;
  332. }
  333. return $this->_mode->leave();
  334. }
  335. if (is_string($mode)) {
  336. $this->_mode->enter($mode);
  337. return $this->_invokeParser($matched, LEXER_ENTER);
  338. }
  339. return $this->_invokeParser($matched, LEXER_MATCHED);
  340. }
  341. /**
  342. * Calls the parser method named after the current
  343. * mode. Empty content will be ignored.
  344. * @param string $content Text parsed.
  345. * @param string $is_match Token is recognised rather
  346. * than unparsed data.
  347. * @access private
  348. */
  349. function _invokeParser($content, $is_match) {
  350. if (($content === "") || ($content === false)) {
  351. return true;
  352. }
  353. $handler = $this->_mode->getCurrent();
  354. if (isset($this->_mode_handlers[$handler])) {
  355. $handler = $this->_mode_handlers[$handler];
  356. }
  357. return $this->_parser->$handler($content, $is_match);
  358. }
  359. /**
  360. * Tries to match a chunk of text and if successful
  361. * removes the recognised chunk and any leading
  362. * unparsed data. Empty strings will not be matched.
  363. * @param string $raw The subject to parse. This is the
  364. * content that will be eaten.
  365. * @return bool|array Three item list of unparsed
  366. * content followed by the
  367. * recognised token and finally the
  368. * action the parser is to take.
  369. * True if no match, false if there
  370. * is a parsing error.
  371. * @access private
  372. */
  373. function _reduce(&$raw) {
  374. if (!isset($this->_regexes[$this->_mode->getCurrent()])) {
  375. return false;
  376. }
  377. if ($raw === "") {
  378. return true;
  379. }
  380. if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) {
  381. $count = strpos($raw, $match);
  382. $unparsed = substr($raw, 0, $count);
  383. $raw = substr($raw, $count + strlen($match));
  384. return array($unparsed, $match, $action);
  385. }
  386. return true;
  387. }
  388. }
  389. ?>