PageRenderTime 42ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/vendor/Twig/Lexer.php

https://gitlab.com/dcnf/dcbase.org
PHP | 497 lines | 384 code | 62 blank | 51 comment | 48 complexity | c80fbb1bc4e54c86d152da56b00e310d MD5 | raw file
  1. <?php
  2. /*
  3. * This file is part of Twig.
  4. *
  5. * (c) Fabien Potencier
  6. * (c) Armin Ronacher
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. namespace Twig;
  12. use Twig\Error\SyntaxError;
  13. /**
  14. * @author Fabien Potencier <fabien@symfony.com>
  15. */
  16. class Lexer
  17. {
  18. private $tokens;
  19. private $code;
  20. private $cursor;
  21. private $lineno;
  22. private $end;
  23. private $state;
  24. private $states;
  25. private $brackets;
  26. private $env;
  27. private $source;
  28. private $options;
  29. private $regexes;
  30. private $position;
  31. private $positions;
  32. private $currentVarBlockLine;
  33. const STATE_DATA = 0;
  34. const STATE_BLOCK = 1;
  35. const STATE_VAR = 2;
  36. const STATE_STRING = 3;
  37. const STATE_INTERPOLATION = 4;
  38. const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  39. const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?([Ee][\+\-][0-9]+)?/A';
  40. const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  41. const REGEX_DQ_STRING_DELIM = '/"/A';
  42. const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  43. const PUNCTUATION = '()[]{}?:.,|';
  44. public function __construct(Environment $env, array $options = [])
  45. {
  46. $this->env = $env;
  47. $this->options = array_merge([
  48. 'tag_comment' => ['{#', '#}'],
  49. 'tag_block' => ['{%', '%}'],
  50. 'tag_variable' => ['{{', '}}'],
  51. 'whitespace_trim' => '-',
  52. 'whitespace_line_trim' => '~',
  53. 'whitespace_line_chars' => ' \t\0\x0B',
  54. 'interpolation' => ['#{', '}'],
  55. ], $options);
  56. // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  57. $this->regexes = [
  58. // }}
  59. 'lex_var' => '{
  60. \s*
  61. (?:'.
  62. preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'. // -}}\s*
  63. '|'.
  64. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~}}[ \t\0\x0B]*
  65. '|'.
  66. preg_quote($this->options['tag_variable'][1], '#'). // }}
  67. ')
  68. }Ax',
  69. // %}
  70. 'lex_block' => '{
  71. \s*
  72. (?:'.
  73. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'. // -%}\s*\n?
  74. '|'.
  75. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  76. '|'.
  77. preg_quote($this->options['tag_block'][1], '#').'\n?'. // %}\n?
  78. ')
  79. }Ax',
  80. // {% endverbatim %}
  81. 'lex_raw_data' => '{'.
  82. preg_quote($this->options['tag_block'][0], '#'). // {%
  83. '('.
  84. $this->options['whitespace_trim']. // -
  85. '|'.
  86. $this->options['whitespace_line_trim']. // ~
  87. ')?\s*endverbatim\s*'.
  88. '(?:'.
  89. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}
  90. '|'.
  91. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  92. '|'.
  93. preg_quote($this->options['tag_block'][1], '#'). // %}
  94. ')
  95. }sx',
  96. 'operator' => $this->getOperatorRegex(),
  97. // #}
  98. 'lex_comment' => '{
  99. (?:'.
  100. preg_quote($this->options['whitespace_trim']).preg_quote($this->options['tag_comment'][1], '#').'\s*\n?'. // -#}\s*\n?
  101. '|'.
  102. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~#}[ \t\0\x0B]*
  103. '|'.
  104. preg_quote($this->options['tag_comment'][1], '#').'\n?'. // #}\n?
  105. ')
  106. }sx',
  107. // verbatim %}
  108. 'lex_block_raw' => '{
  109. \s*verbatim\s*
  110. (?:'.
  111. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}\s*
  112. '|'.
  113. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  114. '|'.
  115. preg_quote($this->options['tag_block'][1], '#'). // %}
  116. ')
  117. }Asx',
  118. 'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
  119. // {{ or {% or {#
  120. 'lex_tokens_start' => '{
  121. ('.
  122. preg_quote($this->options['tag_variable'][0], '#'). // {{
  123. '|'.
  124. preg_quote($this->options['tag_block'][0], '#'). // {%
  125. '|'.
  126. preg_quote($this->options['tag_comment'][0], '#'). // {#
  127. ')('.
  128. preg_quote($this->options['whitespace_trim'], '#'). // -
  129. '|'.
  130. preg_quote($this->options['whitespace_line_trim'], '#'). // ~
  131. ')?
  132. }sx',
  133. 'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
  134. 'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
  135. ];
  136. }
  137. public function tokenize(Source $source): TokenStream
  138. {
  139. $this->source = $source;
  140. $this->code = str_replace(["\r\n", "\r"], "\n", $source->getCode());
  141. $this->cursor = 0;
  142. $this->lineno = 1;
  143. $this->end = \strlen($this->code);
  144. $this->tokens = [];
  145. $this->state = self::STATE_DATA;
  146. $this->states = [];
  147. $this->brackets = [];
  148. $this->position = -1;
  149. // find all token starts in one go
  150. preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, PREG_OFFSET_CAPTURE);
  151. $this->positions = $matches;
  152. while ($this->cursor < $this->end) {
  153. // dispatch to the lexing functions depending
  154. // on the current state
  155. switch ($this->state) {
  156. case self::STATE_DATA:
  157. $this->lexData();
  158. break;
  159. case self::STATE_BLOCK:
  160. $this->lexBlock();
  161. break;
  162. case self::STATE_VAR:
  163. $this->lexVar();
  164. break;
  165. case self::STATE_STRING:
  166. $this->lexString();
  167. break;
  168. case self::STATE_INTERPOLATION:
  169. $this->lexInterpolation();
  170. break;
  171. }
  172. }
  173. $this->pushToken(/* Token::EOF_TYPE */ -1);
  174. if (!empty($this->brackets)) {
  175. list($expect, $lineno) = array_pop($this->brackets);
  176. throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  177. }
  178. return new TokenStream($this->tokens, $this->source);
  179. }
  180. private function lexData(): void
  181. {
  182. // if no matches are left we return the rest of the template as simple text token
  183. if ($this->position == \count($this->positions[0]) - 1) {
  184. $this->pushToken(/* Token::TEXT_TYPE */ 0, substr($this->code, $this->cursor));
  185. $this->cursor = $this->end;
  186. return;
  187. }
  188. // Find the first token after the current cursor
  189. $position = $this->positions[0][++$this->position];
  190. while ($position[1] < $this->cursor) {
  191. if ($this->position == \count($this->positions[0]) - 1) {
  192. return;
  193. }
  194. $position = $this->positions[0][++$this->position];
  195. }
  196. // push the template text first
  197. $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
  198. // trim?
  199. if (isset($this->positions[2][$this->position][0])) {
  200. if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
  201. // whitespace_trim detected ({%-, {{- or {#-)
  202. $text = rtrim($text);
  203. } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
  204. // whitespace_line_trim detected ({%~, {{~ or {#~)
  205. // don't trim \r and \n
  206. $text = rtrim($text, " \t\0\x0B");
  207. }
  208. }
  209. $this->pushToken(/* Token::TEXT_TYPE */ 0, $text);
  210. $this->moveCursor($textContent.$position[0]);
  211. switch ($this->positions[1][$this->position][0]) {
  212. case $this->options['tag_comment'][0]:
  213. $this->lexComment();
  214. break;
  215. case $this->options['tag_block'][0]:
  216. // raw data?
  217. if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, 0, $this->cursor)) {
  218. $this->moveCursor($match[0]);
  219. $this->lexRawData();
  220. // {% line \d+ %}
  221. } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, 0, $this->cursor)) {
  222. $this->moveCursor($match[0]);
  223. $this->lineno = (int) $match[1];
  224. } else {
  225. $this->pushToken(/* Token::BLOCK_START_TYPE */ 1);
  226. $this->pushState(self::STATE_BLOCK);
  227. $this->currentVarBlockLine = $this->lineno;
  228. }
  229. break;
  230. case $this->options['tag_variable'][0]:
  231. $this->pushToken(/* Token::VAR_START_TYPE */ 2);
  232. $this->pushState(self::STATE_VAR);
  233. $this->currentVarBlockLine = $this->lineno;
  234. break;
  235. }
  236. }
  237. private function lexBlock(): void
  238. {
  239. if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code, $match, 0, $this->cursor)) {
  240. $this->pushToken(/* Token::BLOCK_END_TYPE */ 3);
  241. $this->moveCursor($match[0]);
  242. $this->popState();
  243. } else {
  244. $this->lexExpression();
  245. }
  246. }
  247. private function lexVar(): void
  248. {
  249. if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code, $match, 0, $this->cursor)) {
  250. $this->pushToken(/* Token::VAR_END_TYPE */ 4);
  251. $this->moveCursor($match[0]);
  252. $this->popState();
  253. } else {
  254. $this->lexExpression();
  255. }
  256. }
  257. private function lexExpression(): void
  258. {
  259. // whitespace
  260. if (preg_match('/\s+/A', $this->code, $match, 0, $this->cursor)) {
  261. $this->moveCursor($match[0]);
  262. if ($this->cursor >= $this->end) {
  263. throw new SyntaxError(sprintf('Unclosed "%s".', self::STATE_BLOCK === $this->state ? 'block' : 'variable'), $this->currentVarBlockLine, $this->source);
  264. }
  265. }
  266. // arrow function
  267. if ('=' === $this->code[$this->cursor] && '>' === $this->code[$this->cursor + 1]) {
  268. $this->pushToken(Token::ARROW_TYPE, '=>');
  269. $this->moveCursor('=>');
  270. }
  271. // operators
  272. elseif (preg_match($this->regexes['operator'], $this->code, $match, 0, $this->cursor)) {
  273. $this->pushToken(/* Token::OPERATOR_TYPE */ 8, preg_replace('/\s+/', ' ', $match[0]));
  274. $this->moveCursor($match[0]);
  275. }
  276. // names
  277. elseif (preg_match(self::REGEX_NAME, $this->code, $match, 0, $this->cursor)) {
  278. $this->pushToken(/* Token::NAME_TYPE */ 5, $match[0]);
  279. $this->moveCursor($match[0]);
  280. }
  281. // numbers
  282. elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) {
  283. $number = (float) $match[0]; // floats
  284. if (ctype_digit($match[0]) && $number <= PHP_INT_MAX) {
  285. $number = (int) $match[0]; // integers lower than the maximum
  286. }
  287. $this->pushToken(/* Token::NUMBER_TYPE */ 6, $number);
  288. $this->moveCursor($match[0]);
  289. }
  290. // punctuation
  291. elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
  292. // opening bracket
  293. if (false !== strpos('([{', $this->code[$this->cursor])) {
  294. $this->brackets[] = [$this->code[$this->cursor], $this->lineno];
  295. }
  296. // closing bracket
  297. elseif (false !== strpos(')]}', $this->code[$this->cursor])) {
  298. if (empty($this->brackets)) {
  299. throw new SyntaxError(sprintf('Unexpected "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  300. }
  301. list($expect, $lineno) = array_pop($this->brackets);
  302. if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
  303. throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  304. }
  305. }
  306. $this->pushToken(/* Token::PUNCTUATION_TYPE */ 9, $this->code[$this->cursor]);
  307. ++$this->cursor;
  308. }
  309. // strings
  310. elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
  311. $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes(substr($match[0], 1, -1)));
  312. $this->moveCursor($match[0]);
  313. }
  314. // opening double quoted string
  315. elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  316. $this->brackets[] = ['"', $this->lineno];
  317. $this->pushState(self::STATE_STRING);
  318. $this->moveCursor($match[0]);
  319. }
  320. // unlexable
  321. else {
  322. throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  323. }
  324. }
  325. private function lexRawData(): void
  326. {
  327. if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
  328. throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.', $this->lineno, $this->source);
  329. }
  330. $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
  331. $this->moveCursor($text.$match[0][0]);
  332. // trim?
  333. if (isset($match[1][0])) {
  334. if ($this->options['whitespace_trim'] === $match[1][0]) {
  335. // whitespace_trim detected ({%-, {{- or {#-)
  336. $text = rtrim($text);
  337. } else {
  338. // whitespace_line_trim detected ({%~, {{~ or {#~)
  339. // don't trim \r and \n
  340. $text = rtrim($text, " \t\0\x0B");
  341. }
  342. }
  343. $this->pushToken(/* Token::TEXT_TYPE */ 0, $text);
  344. }
  345. private function lexComment(): void
  346. {
  347. if (!preg_match($this->regexes['lex_comment'], $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
  348. throw new SyntaxError('Unclosed comment.', $this->lineno, $this->source);
  349. }
  350. $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
  351. }
  352. private function lexString(): void
  353. {
  354. if (preg_match($this->regexes['interpolation_start'], $this->code, $match, 0, $this->cursor)) {
  355. $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
  356. $this->pushToken(/* Token::INTERPOLATION_START_TYPE */ 10);
  357. $this->moveCursor($match[0]);
  358. $this->pushState(self::STATE_INTERPOLATION);
  359. } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && \strlen($match[0]) > 0) {
  360. $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes($match[0]));
  361. $this->moveCursor($match[0]);
  362. } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  363. list($expect, $lineno) = array_pop($this->brackets);
  364. if ('"' != $this->code[$this->cursor]) {
  365. throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  366. }
  367. $this->popState();
  368. ++$this->cursor;
  369. } else {
  370. // unlexable
  371. throw new SyntaxError(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  372. }
  373. }
  374. private function lexInterpolation(): void
  375. {
  376. $bracket = end($this->brackets);
  377. if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, 0, $this->cursor)) {
  378. array_pop($this->brackets);
  379. $this->pushToken(/* Token::INTERPOLATION_END_TYPE */ 11);
  380. $this->moveCursor($match[0]);
  381. $this->popState();
  382. } else {
  383. $this->lexExpression();
  384. }
  385. }
  386. private function pushToken($type, $value = ''): void
  387. {
  388. // do not push empty text tokens
  389. if (/* Token::TEXT_TYPE */ 0 === $type && '' === $value) {
  390. return;
  391. }
  392. $this->tokens[] = new Token($type, $value, $this->lineno);
  393. }
  394. private function moveCursor($text): void
  395. {
  396. $this->cursor += \strlen($text);
  397. $this->lineno += substr_count($text, "\n");
  398. }
  399. private function getOperatorRegex(): string
  400. {
  401. $operators = array_merge(
  402. ['='],
  403. array_keys($this->env->getUnaryOperators()),
  404. array_keys($this->env->getBinaryOperators())
  405. );
  406. $operators = array_combine($operators, array_map('strlen', $operators));
  407. arsort($operators);
  408. $regex = [];
  409. foreach ($operators as $operator => $length) {
  410. // an operator that ends with a character must be followed by
  411. // a whitespace or a parenthesis
  412. if (ctype_alpha($operator[$length - 1])) {
  413. $r = preg_quote($operator, '/').'(?=[\s()])';
  414. } else {
  415. $r = preg_quote($operator, '/');
  416. }
  417. // an operator with a space can be any amount of whitespaces
  418. $r = preg_replace('/\s+/', '\s+', $r);
  419. $regex[] = $r;
  420. }
  421. return '/'.implode('|', $regex).'/A';
  422. }
  423. private function pushState($state): void
  424. {
  425. $this->states[] = $this->state;
  426. $this->state = $state;
  427. }
  428. private function popState(): void
  429. {
  430. if (0 === \count($this->states)) {
  431. throw new \LogicException('Cannot pop state without a previous state.');
  432. }
  433. $this->state = array_pop($this->states);
  434. }
  435. }