/lib/mustache/src/Mustache/Tokenizer.php

https://github.com/jfilip/moodle · PHP · 331 lines · 226 code · 32 blank · 73 comment · 25 complexity · b39a4fa1151f6b5c01552eafdd9b2998 MD5 · raw file

  1. <?php
  2. /*
  3. * This file is part of Mustache.php.
  4. *
  5. * (c) 2010-2014 Justin Hileman
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. /**
  11. * Mustache Tokenizer class.
  12. *
  13. * This class is responsible for turning raw template source into a set of Mustache tokens.
  14. */
  15. class Mustache_Tokenizer
  16. {
  17. // Finite state machine states
  18. const IN_TEXT = 0;
  19. const IN_TAG_TYPE = 1;
  20. const IN_TAG = 2;
  21. // Token types
  22. const T_SECTION = '#';
  23. const T_INVERTED = '^';
  24. const T_END_SECTION = '/';
  25. const T_COMMENT = '!';
  26. const T_PARTIAL = '>';
  27. const T_PARENT = '<';
  28. const T_DELIM_CHANGE = '=';
  29. const T_ESCAPED = '_v';
  30. const T_UNESCAPED = '{';
  31. const T_UNESCAPED_2 = '&';
  32. const T_TEXT = '_t';
  33. const T_PRAGMA = '%';
  34. const T_BLOCK_VAR = '$';
  35. const T_BLOCK_ARG = '$arg';
  36. // Valid token types
  37. private static $tagTypes = array(
  38. self::T_SECTION => true,
  39. self::T_INVERTED => true,
  40. self::T_END_SECTION => true,
  41. self::T_COMMENT => true,
  42. self::T_PARTIAL => true,
  43. self::T_PARENT => true,
  44. self::T_DELIM_CHANGE => true,
  45. self::T_ESCAPED => true,
  46. self::T_UNESCAPED => true,
  47. self::T_UNESCAPED_2 => true,
  48. self::T_PRAGMA => true,
  49. self::T_BLOCK_VAR => true,
  50. );
  51. // Interpolated tags
  52. private static $interpolatedTags = array(
  53. self::T_ESCAPED => true,
  54. self::T_UNESCAPED => true,
  55. self::T_UNESCAPED_2 => true,
  56. );
  57. // Token properties
  58. const TYPE = 'type';
  59. const NAME = 'name';
  60. const OTAG = 'otag';
  61. const CTAG = 'ctag';
  62. const LINE = 'line';
  63. const INDEX = 'index';
  64. const END = 'end';
  65. const INDENT = 'indent';
  66. const NODES = 'nodes';
  67. const VALUE = 'value';
  68. const FILTERS = 'filters';
  69. private $state;
  70. private $tagType;
  71. private $tag;
  72. private $buffer;
  73. private $tokens;
  74. private $seenTag;
  75. private $line;
  76. private $otag;
  77. private $ctag;
  78. private $otagLen;
  79. private $ctagLen;
  80. /**
  81. * Scan and tokenize template source.
  82. *
  83. * @throws Mustache_Exception_SyntaxException when mismatched section tags are encountered.
  84. *
  85. * @param string $text Mustache template source to tokenize
  86. * @param string $delimiters Optionally, pass initial opening and closing delimiters (default: null)
  87. *
  88. * @return array Set of Mustache tokens
  89. */
  90. public function scan($text, $delimiters = null)
  91. {
  92. // Setting mbstring.func_overload makes things *really* slow.
  93. // Let's do everyone a favor and scan this string as ASCII instead.
  94. $encoding = null;
  95. if (function_exists('mb_internal_encoding') && ini_get('mbstring.func_overload') & 2) {
  96. $encoding = mb_internal_encoding();
  97. mb_internal_encoding('ASCII');
  98. }
  99. $this->reset();
  100. if ($delimiters = trim($delimiters)) {
  101. $this->setDelimiters($delimiters);
  102. }
  103. $len = strlen($text);
  104. for ($i = 0; $i < $len; $i++) {
  105. switch ($this->state) {
  106. case self::IN_TEXT:
  107. if ($this->tagChange($this->otag, $this->otagLen, $text, $i)) {
  108. $i--;
  109. $this->flushBuffer();
  110. $this->state = self::IN_TAG_TYPE;
  111. } else {
  112. $char = $text[$i];
  113. $this->buffer .= $char;
  114. if ($char === "\n") {
  115. $this->flushBuffer();
  116. $this->line++;
  117. }
  118. }
  119. break;
  120. case self::IN_TAG_TYPE:
  121. $i += $this->otagLen - 1;
  122. $char = $text[$i + 1];
  123. if (isset(self::$tagTypes[$char])) {
  124. $tag = $char;
  125. $this->tagType = $tag;
  126. } else {
  127. $tag = null;
  128. $this->tagType = self::T_ESCAPED;
  129. }
  130. if ($this->tagType === self::T_DELIM_CHANGE) {
  131. $i = $this->changeDelimiters($text, $i);
  132. $this->state = self::IN_TEXT;
  133. } elseif ($this->tagType === self::T_PRAGMA) {
  134. $i = $this->addPragma($text, $i);
  135. $this->state = self::IN_TEXT;
  136. } else {
  137. if ($tag !== null) {
  138. $i++;
  139. }
  140. $this->state = self::IN_TAG;
  141. }
  142. $this->seenTag = $i;
  143. break;
  144. default:
  145. if ($this->tagChange($this->ctag, $this->ctagLen, $text, $i)) {
  146. $token = array(
  147. self::TYPE => $this->tagType,
  148. self::NAME => trim($this->buffer),
  149. self::OTAG => $this->otag,
  150. self::CTAG => $this->ctag,
  151. self::LINE => $this->line,
  152. self::INDEX => ($this->tagType === self::T_END_SECTION) ? $this->seenTag - $this->otagLen : $i + $this->ctagLen
  153. );
  154. if ($this->tagType === self::T_UNESCAPED) {
  155. // Clean up `{{{ tripleStache }}}` style tokens.
  156. if ($this->ctag === '}}') {
  157. if (($i + 2 < $len) && $text[$i + 2] === '}') {
  158. $i++;
  159. } else {
  160. $msg = sprintf(
  161. 'Mismatched tag delimiters: %s on line %d',
  162. $token[self::NAME],
  163. $token[self::LINE]
  164. );
  165. throw new Mustache_Exception_SyntaxException($msg, $token);
  166. }
  167. } else {
  168. $lastName = $token[self::NAME];
  169. if (substr($lastName, -1) === '}') {
  170. $token[self::NAME] = trim(substr($lastName, 0, -1));
  171. } else {
  172. $msg = sprintf(
  173. 'Mismatched tag delimiters: %s on line %d',
  174. $token[self::NAME],
  175. $token[self::LINE]
  176. );
  177. throw new Mustache_Exception_SyntaxException($msg, $token);
  178. }
  179. }
  180. }
  181. $this->buffer = '';
  182. $i += $this->ctagLen - 1;
  183. $this->state = self::IN_TEXT;
  184. $this->tokens[] = $token;
  185. } else {
  186. $this->buffer .= $text[$i];
  187. }
  188. break;
  189. }
  190. }
  191. $this->flushBuffer();
  192. // Restore the user's encoding...
  193. if ($encoding) {
  194. mb_internal_encoding($encoding);
  195. }
  196. return $this->tokens;
  197. }
  198. /**
  199. * Helper function to reset tokenizer internal state.
  200. */
  201. private function reset()
  202. {
  203. $this->state = self::IN_TEXT;
  204. $this->tagType = null;
  205. $this->tag = null;
  206. $this->buffer = '';
  207. $this->tokens = array();
  208. $this->seenTag = false;
  209. $this->line = 0;
  210. $this->otag = '{{';
  211. $this->ctag = '}}';
  212. $this->otagLen = 2;
  213. $this->ctagLen = 2;
  214. }
  215. /**
  216. * Flush the current buffer to a token.
  217. */
  218. private function flushBuffer()
  219. {
  220. if (strlen($this->buffer) > 0) {
  221. $this->tokens[] = array(
  222. self::TYPE => self::T_TEXT,
  223. self::LINE => $this->line,
  224. self::VALUE => $this->buffer
  225. );
  226. $this->buffer = '';
  227. }
  228. }
  229. /**
  230. * Change the current Mustache delimiters. Set new `otag` and `ctag` values.
  231. *
  232. * @param string $text Mustache template source
  233. * @param int $index Current tokenizer index
  234. *
  235. * @return int New index value
  236. */
  237. private function changeDelimiters($text, $index)
  238. {
  239. $startIndex = strpos($text, '=', $index) + 1;
  240. $close = '='.$this->ctag;
  241. $closeIndex = strpos($text, $close, $index);
  242. $this->setDelimiters(trim(substr($text, $startIndex, $closeIndex - $startIndex)));
  243. $this->tokens[] = array(
  244. self::TYPE => self::T_DELIM_CHANGE,
  245. self::LINE => $this->line,
  246. );
  247. return $closeIndex + strlen($close) - 1;
  248. }
  249. /**
  250. * Set the current Mustache `otag` and `ctag` delimiters.
  251. *
  252. * @param string $delimiters
  253. */
  254. private function setDelimiters($delimiters)
  255. {
  256. list($otag, $ctag) = explode(' ', $delimiters);
  257. $this->otag = $otag;
  258. $this->ctag = $ctag;
  259. $this->otagLen = strlen($otag);
  260. $this->ctagLen = strlen($ctag);
  261. }
  262. /**
  263. * Add pragma token.
  264. *
  265. * Pragmas are hoisted to the front of the template, so all pragma tokens
  266. * will appear at the front of the token list.
  267. *
  268. * @param string $text
  269. * @param int $index
  270. *
  271. * @return int New index value
  272. */
  273. private function addPragma($text, $index)
  274. {
  275. $end = strpos($text, $this->ctag, $index);
  276. $pragma = trim(substr($text, $index + 2, $end - $index - 2));
  277. // Pragmas are hoisted to the front of the template.
  278. array_unshift($this->tokens, array(
  279. self::TYPE => self::T_PRAGMA,
  280. self::NAME => $pragma,
  281. self::LINE => 0,
  282. ));
  283. return $end + $this->ctagLen - 1;
  284. }
  285. /**
  286. * Test whether it's time to change tags.
  287. *
  288. * @param string $tag Current tag name
  289. * @param int $tagLen Current tag name length
  290. * @param string $text Mustache template source
  291. * @param int $index Current tokenizer index
  292. *
  293. * @return boolean True if this is a closing section tag
  294. */
  295. private function tagChange($tag, $tagLen, $text, $index)
  296. {
  297. return substr($text, $index, $tagLen) === $tag;
  298. }
  299. }