/vendor/nette/nette/Nette/Utils/Tokenizer.php

https://bitbucket.org/iiic/iszp · PHP · 301 lines · 152 code · 64 blank · 85 comment · 19 complexity · e1756cb1fd5fa26954aa1dc5f5850fda MD5 · raw file

  1. <?php
  2. /**
  3. * This file is part of the Nette Framework (http://nette.org)
  4. *
  5. * Copyright (c) 2004 David Grudl (http://davidgrudl.com)
  6. *
  7. * For the full copyright and license information, please view
  8. * the file license.txt that was distributed with this source code.
  9. */
  10. namespace Nette\Utils;
  11. use Nette;
  12. /**
  13. * Simple lexical analyser.
  14. *
  15. * @author David Grudl
  16. */
  17. class Tokenizer extends Nette\Object
  18. {
  19. /** @var array */
  20. public $tokens;
  21. /** @var int */
  22. public $position = 0;
  23. /** @var array */
  24. public $ignored = array();
  25. /** @var string */
  26. private $input;
  27. /** @var string */
  28. private $re;
  29. /** @var array */
  30. private $types;
  31. /** @var array|string */
  32. public $current;
  33. /**
  34. * @param array of [(int) symbol type => pattern]
  35. * @param string regular expression flag
  36. */
  37. public function __construct(array $patterns, $flags = '')
  38. {
  39. $this->re = '~(' . implode(')|(', $patterns) . ')~A' . $flags;
  40. $keys = array_keys($patterns);
  41. $this->types = $keys === range(0, count($patterns) - 1) ? FALSE : $keys;
  42. }
  43. /**
  44. * Tokenize string.
  45. * @param string
  46. * @return array
  47. */
  48. public function tokenize($input)
  49. {
  50. $this->input = $input;
  51. if ($this->types) {
  52. $this->tokens = Strings::matchAll($input, $this->re);
  53. $len = 0;
  54. $count = count($this->types);
  55. $line = 1;
  56. foreach ($this->tokens as & $match) {
  57. $type = NULL;
  58. for ($i = 1; $i <= $count; $i++) {
  59. if (!isset($match[$i])) {
  60. break;
  61. } elseif ($match[$i] != NULL) {
  62. $type = $this->types[$i - 1]; break;
  63. }
  64. }
  65. $match = self::createToken($match[0], $type, $line);
  66. $len += strlen($match['value']);
  67. $line += substr_count($match['value'], "\n");
  68. }
  69. if ($len !== strlen($input)) {
  70. $errorOffset = $len;
  71. }
  72. } else {
  73. $this->tokens = Strings::split($input, $this->re, PREG_SPLIT_NO_EMPTY);
  74. if ($this->tokens && !Strings::match(end($this->tokens), $this->re)) {
  75. $tmp = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
  76. list(, $errorOffset) = end($tmp);
  77. }
  78. }
  79. if (isset($errorOffset)) {
  80. $line = $errorOffset ? substr_count($this->input, "\n", 0, $errorOffset) + 1 : 1;
  81. $col = $errorOffset - strrpos(substr($this->input, 0, $errorOffset), "\n") + 1;
  82. $token = str_replace("\n", '\n', substr($input, $errorOffset, 10));
  83. throw new TokenizerException("Unexpected '$token' on line $line, column $col.");
  84. }
  85. return $this->tokens;
  86. }
  87. public static function createToken($value, $type = NULL, $line = NULL)
  88. {
  89. return array('value' => $value, 'type' => $type, 'line' => $line);
  90. }
  91. /**
  92. * Returns position of token in input string.
  93. * @param int token number
  94. * @return array [offset, line, column]
  95. */
  96. public function getOffset($i)
  97. {
  98. $tokens = Strings::split($this->input, $this->re, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
  99. $offset = isset($tokens[$i]) ? $tokens[$i][1] : strlen($this->input);
  100. return array(
  101. $offset,
  102. ($offset ? substr_count($this->input, "\n", 0, $offset) + 1 : 1),
  103. $offset - strrpos(substr($this->input, 0, $offset), "\n"),
  104. );
  105. }
  106. /**
  107. * Returns next token as string.
  108. * @param desired token
  109. * @return string
  110. */
  111. public function fetch()
  112. {
  113. $args = func_get_args();
  114. return $this->scan($args, TRUE);
  115. }
  116. /**
  117. * Returns next token.
  118. * @param desired token
  119. * @return array|string
  120. */
  121. public function fetchToken()
  122. {
  123. $args = func_get_args();
  124. return $this->scan($args, TRUE) === FALSE ? FALSE : $this->current;
  125. }
  126. /**
  127. * Returns concatenation of all next tokens.
  128. * @param desired token
  129. * @return string
  130. */
  131. public function fetchAll()
  132. {
  133. $args = func_get_args();
  134. return $this->scan($args, FALSE);
  135. }
  136. /**
  137. * Returns concatenation of all next tokens until it sees a token with the given value.
  138. * @param tokens
  139. * @return string
  140. */
  141. public function fetchUntil($arg)
  142. {
  143. $args = func_get_args();
  144. return $this->scan($args, FALSE, TRUE, TRUE);
  145. }
  146. /**
  147. * Checks the next token.
  148. * @param token
  149. * @return string
  150. */
  151. public function isNext($arg)
  152. {
  153. $args = func_get_args();
  154. return (bool) $this->scan($args, TRUE, FALSE);
  155. }
  156. /**
  157. * Checks the previous token.
  158. * @param token
  159. * @return string
  160. */
  161. public function isPrev($arg)
  162. {
  163. $args = func_get_args();
  164. return (bool) $this->scan($args, TRUE, FALSE, FALSE, TRUE);
  165. }
  166. /**
  167. * Checks existence of next token.
  168. * @return bool
  169. */
  170. public function hasNext()
  171. {
  172. return isset($this->tokens[$this->position]);
  173. }
  174. /**
  175. * Checks existence of previous token.
  176. * @return bool
  177. */
  178. public function hasPrev()
  179. {
  180. return $this->position > 1;
  181. }
  182. /**
  183. * Checks the current token.
  184. * @param token
  185. * @return string
  186. */
  187. public function isCurrent($arg)
  188. {
  189. $args = func_get_args();
  190. if (is_array($this->current)) {
  191. return in_array($this->current['value'], $args, TRUE)
  192. || in_array($this->current['type'], $args, TRUE);
  193. } else {
  194. return in_array($this->current, $args, TRUE);
  195. }
  196. }
  197. public function reset()
  198. {
  199. $this->position = 0;
  200. $this->current = NULL;
  201. }
  202. /**
  203. * Looks for (first) (not) wanted tokens.
  204. * @param int token number
  205. * @return array
  206. */
  207. private function scan($wanted, $first, $advance = TRUE, $neg = FALSE, $prev = FALSE)
  208. {
  209. $res = FALSE;
  210. $pos = $this->position + ($prev ? -2 : 0);
  211. while (isset($this->tokens[$pos])) {
  212. $token = $this->tokens[$pos];
  213. $pos += $prev ? -1 : 1;
  214. $value = is_array($token) ? $token['value'] : $token;
  215. $type = is_array($token) ? $token['type'] : $token;
  216. if (!$wanted || (in_array($value, $wanted, TRUE) || in_array($type, $wanted, TRUE)) ^ $neg) {
  217. if ($advance) {
  218. $this->position = $pos;
  219. $this->current = $token;
  220. }
  221. $res .= $value;
  222. if ($first) {
  223. break;
  224. }
  225. } elseif ($neg || !in_array($type, $this->ignored, TRUE)) {
  226. break;
  227. }
  228. }
  229. return $res;
  230. }
  231. }
  232. /**
  233. * The exception that indicates tokenizer error.
  234. */
  235. class TokenizerException extends \Exception
  236. {
  237. }