PageRenderTime 50ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/mustache/src/Mustache/Tokenizer.php

https://github.com/pauln/moodle
PHP | 322 lines | 219 code | 31 blank | 72 comment | 25 complexity | 98fa1659d3d7bcaa260d01de55812ee0 MD5 | raw file
  1. <?php
  2. /*
  3. * This file is part of Mustache.php.
  4. *
  5. * (c) 2010-2015 Justin Hileman
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. /**
  11. * Mustache Tokenizer class.
  12. *
  13. * This class is responsible for turning raw template source into a set of Mustache tokens.
  14. */
  15. class Mustache_Tokenizer
  16. {
  17. // Finite state machine states
  18. const IN_TEXT = 0;
  19. const IN_TAG_TYPE = 1;
  20. const IN_TAG = 2;
  21. // Token types
  22. const T_SECTION = '#';
  23. const T_INVERTED = '^';
  24. const T_END_SECTION = '/';
  25. const T_COMMENT = '!';
  26. const T_PARTIAL = '>';
  27. const T_PARENT = '<';
  28. const T_DELIM_CHANGE = '=';
  29. const T_ESCAPED = '_v';
  30. const T_UNESCAPED = '{';
  31. const T_UNESCAPED_2 = '&';
  32. const T_TEXT = '_t';
  33. const T_PRAGMA = '%';
  34. const T_BLOCK_VAR = '$';
  35. const T_BLOCK_ARG = '$arg';
  36. // Valid token types
  37. private static $tagTypes = array(
  38. self::T_SECTION => true,
  39. self::T_INVERTED => true,
  40. self::T_END_SECTION => true,
  41. self::T_COMMENT => true,
  42. self::T_PARTIAL => true,
  43. self::T_PARENT => true,
  44. self::T_DELIM_CHANGE => true,
  45. self::T_ESCAPED => true,
  46. self::T_UNESCAPED => true,
  47. self::T_UNESCAPED_2 => true,
  48. self::T_PRAGMA => true,
  49. self::T_BLOCK_VAR => true,
  50. );
  51. // Token properties
  52. const TYPE = 'type';
  53. const NAME = 'name';
  54. const OTAG = 'otag';
  55. const CTAG = 'ctag';
  56. const LINE = 'line';
  57. const INDEX = 'index';
  58. const END = 'end';
  59. const INDENT = 'indent';
  60. const NODES = 'nodes';
  61. const VALUE = 'value';
  62. const FILTERS = 'filters';
  63. private $state;
  64. private $tagType;
  65. private $buffer;
  66. private $tokens;
  67. private $seenTag;
  68. private $line;
  69. private $otag;
  70. private $ctag;
  71. private $otagLen;
  72. private $ctagLen;
  73. /**
  74. * Scan and tokenize template source.
  75. *
  76. * @throws Mustache_Exception_SyntaxException when mismatched section tags are encountered.
  77. *
  78. * @param string $text Mustache template source to tokenize
  79. * @param string $delimiters Optionally, pass initial opening and closing delimiters (default: null)
  80. *
  81. * @return array Set of Mustache tokens
  82. */
  83. public function scan($text, $delimiters = null)
  84. {
  85. // Setting mbstring.func_overload makes things *really* slow.
  86. // Let's do everyone a favor and scan this string as ASCII instead.
  87. $encoding = null;
  88. if (function_exists('mb_internal_encoding') && ini_get('mbstring.func_overload') & 2) {
  89. $encoding = mb_internal_encoding();
  90. mb_internal_encoding('ASCII');
  91. }
  92. $this->reset();
  93. if ($delimiters = trim($delimiters)) {
  94. $this->setDelimiters($delimiters);
  95. }
  96. $len = strlen($text);
  97. for ($i = 0; $i < $len; $i++) {
  98. switch ($this->state) {
  99. case self::IN_TEXT:
  100. if ($this->tagChange($this->otag, $this->otagLen, $text, $i)) {
  101. $i--;
  102. $this->flushBuffer();
  103. $this->state = self::IN_TAG_TYPE;
  104. } else {
  105. $char = $text[$i];
  106. $this->buffer .= $char;
  107. if ($char === "\n") {
  108. $this->flushBuffer();
  109. $this->line++;
  110. }
  111. }
  112. break;
  113. case self::IN_TAG_TYPE:
  114. $i += $this->otagLen - 1;
  115. $char = $text[$i + 1];
  116. if (isset(self::$tagTypes[$char])) {
  117. $tag = $char;
  118. $this->tagType = $tag;
  119. } else {
  120. $tag = null;
  121. $this->tagType = self::T_ESCAPED;
  122. }
  123. if ($this->tagType === self::T_DELIM_CHANGE) {
  124. $i = $this->changeDelimiters($text, $i);
  125. $this->state = self::IN_TEXT;
  126. } elseif ($this->tagType === self::T_PRAGMA) {
  127. $i = $this->addPragma($text, $i);
  128. $this->state = self::IN_TEXT;
  129. } else {
  130. if ($tag !== null) {
  131. $i++;
  132. }
  133. $this->state = self::IN_TAG;
  134. }
  135. $this->seenTag = $i;
  136. break;
  137. default:
  138. if ($this->tagChange($this->ctag, $this->ctagLen, $text, $i)) {
  139. $token = array(
  140. self::TYPE => $this->tagType,
  141. self::NAME => trim($this->buffer),
  142. self::OTAG => $this->otag,
  143. self::CTAG => $this->ctag,
  144. self::LINE => $this->line,
  145. self::INDEX => ($this->tagType === self::T_END_SECTION) ? $this->seenTag - $this->otagLen : $i + $this->ctagLen,
  146. );
  147. if ($this->tagType === self::T_UNESCAPED) {
  148. // Clean up `{{{ tripleStache }}}` style tokens.
  149. if ($this->ctag === '}}') {
  150. if (($i + 2 < $len) && $text[$i + 2] === '}') {
  151. $i++;
  152. } else {
  153. $msg = sprintf(
  154. 'Mismatched tag delimiters: %s on line %d',
  155. $token[self::NAME],
  156. $token[self::LINE]
  157. );
  158. throw new Mustache_Exception_SyntaxException($msg, $token);
  159. }
  160. } else {
  161. $lastName = $token[self::NAME];
  162. if (substr($lastName, -1) === '}') {
  163. $token[self::NAME] = trim(substr($lastName, 0, -1));
  164. } else {
  165. $msg = sprintf(
  166. 'Mismatched tag delimiters: %s on line %d',
  167. $token[self::NAME],
  168. $token[self::LINE]
  169. );
  170. throw new Mustache_Exception_SyntaxException($msg, $token);
  171. }
  172. }
  173. }
  174. $this->buffer = '';
  175. $i += $this->ctagLen - 1;
  176. $this->state = self::IN_TEXT;
  177. $this->tokens[] = $token;
  178. } else {
  179. $this->buffer .= $text[$i];
  180. }
  181. break;
  182. }
  183. }
  184. $this->flushBuffer();
  185. // Restore the user's encoding...
  186. if ($encoding) {
  187. mb_internal_encoding($encoding);
  188. }
  189. return $this->tokens;
  190. }
  191. /**
  192. * Helper function to reset tokenizer internal state.
  193. */
  194. private function reset()
  195. {
  196. $this->state = self::IN_TEXT;
  197. $this->tagType = null;
  198. $this->buffer = '';
  199. $this->tokens = array();
  200. $this->seenTag = false;
  201. $this->line = 0;
  202. $this->otag = '{{';
  203. $this->ctag = '}}';
  204. $this->otagLen = 2;
  205. $this->ctagLen = 2;
  206. }
  207. /**
  208. * Flush the current buffer to a token.
  209. */
  210. private function flushBuffer()
  211. {
  212. if (strlen($this->buffer) > 0) {
  213. $this->tokens[] = array(
  214. self::TYPE => self::T_TEXT,
  215. self::LINE => $this->line,
  216. self::VALUE => $this->buffer,
  217. );
  218. $this->buffer = '';
  219. }
  220. }
  221. /**
  222. * Change the current Mustache delimiters. Set new `otag` and `ctag` values.
  223. *
  224. * @param string $text Mustache template source
  225. * @param int $index Current tokenizer index
  226. *
  227. * @return int New index value
  228. */
  229. private function changeDelimiters($text, $index)
  230. {
  231. $startIndex = strpos($text, '=', $index) + 1;
  232. $close = '=' . $this->ctag;
  233. $closeIndex = strpos($text, $close, $index);
  234. $this->setDelimiters(trim(substr($text, $startIndex, $closeIndex - $startIndex)));
  235. $this->tokens[] = array(
  236. self::TYPE => self::T_DELIM_CHANGE,
  237. self::LINE => $this->line,
  238. );
  239. return $closeIndex + strlen($close) - 1;
  240. }
  241. /**
  242. * Set the current Mustache `otag` and `ctag` delimiters.
  243. *
  244. * @param string $delimiters
  245. */
  246. private function setDelimiters($delimiters)
  247. {
  248. list($otag, $ctag) = explode(' ', $delimiters);
  249. $this->otag = $otag;
  250. $this->ctag = $ctag;
  251. $this->otagLen = strlen($otag);
  252. $this->ctagLen = strlen($ctag);
  253. }
  254. /**
  255. * Add pragma token.
  256. *
  257. * Pragmas are hoisted to the front of the template, so all pragma tokens
  258. * will appear at the front of the token list.
  259. *
  260. * @param string $text
  261. * @param int $index
  262. *
  263. * @return int New index value
  264. */
  265. private function addPragma($text, $index)
  266. {
  267. $end = strpos($text, $this->ctag, $index);
  268. $pragma = trim(substr($text, $index + 2, $end - $index - 2));
  269. // Pragmas are hoisted to the front of the template.
  270. array_unshift($this->tokens, array(
  271. self::TYPE => self::T_PRAGMA,
  272. self::NAME => $pragma,
  273. self::LINE => 0,
  274. ));
  275. return $end + $this->ctagLen - 1;
  276. }
  277. /**
  278. * Test whether it's time to change tags.
  279. *
  280. * @param string $tag Current tag name
  281. * @param int $tagLen Current tag name length
  282. * @param string $text Mustache template source
  283. * @param int $index Current tokenizer index
  284. *
  285. * @return bool True if this is a closing section tag
  286. */
  287. private function tagChange($tag, $tagLen, $text, $index)
  288. {
  289. return substr($text, $index, $tagLen) === $tag;
  290. }
  291. }