PageRenderTime 43ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/vendor/symfony/symfony/src/Symfony/Component/CssSelector/Tokenizer.php

https://bitbucket.org/hill2steve/mobileroom
PHP | 201 lines | 155 code | 20 blank | 26 comment | 21 complexity | 7e0fa480ab99e2183d9729cecba6f44e MD5 | raw file
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\CssSelector;
  11. use Symfony\Component\CssSelector\Exception\ParseException;
  12. /**
  13. * Tokenizer lexes a CSS Selector to tokens.
  14. *
  15. * This component is a port of the Python lxml library,
  16. * which is copyright Infrae and distributed under the BSD license.
  17. *
  18. * @author Fabien Potencier <fabien@symfony.com>
  19. */
  20. class Tokenizer
  21. {
  22. /**
  23. * Takes a CSS selector and returns an array holding the Tokens
  24. * it contains.
  25. *
  26. * @param string $s The selector to lex.
  27. *
  28. * @return array Token[]
  29. */
  30. public function tokenize($s)
  31. {
  32. if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
  33. $mbEncoding = mb_internal_encoding();
  34. mb_internal_encoding('ASCII');
  35. }
  36. $tokens = array();
  37. $pos = 0;
  38. $s = preg_replace('#/\*.*?\*/#s', '', $s);
  39. while (true) {
  40. if (preg_match('#\s+#A', $s, $match, 0, $pos)) {
  41. $precedingWhitespacePos = $pos;
  42. $pos += strlen($match[0]);
  43. } else {
  44. $precedingWhitespacePos = 0;
  45. }
  46. if ($pos >= strlen($s)) {
  47. if (isset($mbEncoding)) {
  48. mb_internal_encoding($mbEncoding);
  49. }
  50. return $tokens;
  51. }
  52. if (preg_match('#[+-]?\d*n(?:[+-]\d+)?#A', $s, $match, 0, $pos) && 'n' !== $match[0]) {
  53. $sym = substr($s, $pos, strlen($match[0]));
  54. $tokens[] = new Token('Symbol', $sym, $pos);
  55. $pos += strlen($match[0]);
  56. continue;
  57. }
  58. $c = $s[$pos];
  59. $c2 = substr($s, $pos, 2);
  60. if (in_array($c2, array('~=', '|=', '^=', '$=', '*=', '::', '!='))) {
  61. $tokens[] = new Token('Token', $c2, $pos);
  62. $pos += 2;
  63. continue;
  64. }
  65. if (in_array($c, array('>', '+', '~', ',', '.', '*', '=', '[', ']', '(', ')', '|', ':', '#'))) {
  66. if (in_array($c, array('.', '#', '[')) && $precedingWhitespacePos > 0) {
  67. $tokens[] = new Token('Token', ' ', $precedingWhitespacePos);
  68. }
  69. $tokens[] = new Token('Token', $c, $pos);
  70. ++$pos;
  71. continue;
  72. }
  73. if ('"' === $c || "'" === $c) {
  74. // Quoted string
  75. $oldPos = $pos;
  76. list($sym, $pos) = $this->tokenizeEscapedString($s, $pos);
  77. $tokens[] = new Token('String', $sym, $oldPos);
  78. continue;
  79. }
  80. $oldPos = $pos;
  81. list($sym, $pos) = $this->tokenizeSymbol($s, $pos);
  82. $tokens[] = new Token('Symbol', $sym, $oldPos);
  83. continue;
  84. }
  85. }
  86. /**
  87. * Tokenizes a quoted string (i.e. 'A string quoted with \' characters'),
  88. * and returns an array holding the unquoted string contained by $s and
  89. * the new position from which tokenizing should take over.
  90. *
  91. * @param string $s The selector string containing the quoted string.
  92. * @param integer $pos The starting position for the quoted string.
  93. *
  94. * @return array
  95. *
  96. * @throws ParseException When expected closing is not found
  97. */
  98. private function tokenizeEscapedString($s, $pos)
  99. {
  100. $quote = $s[$pos];
  101. $pos = $pos + 1;
  102. $start = $pos;
  103. while (true) {
  104. $next = strpos($s, $quote, $pos);
  105. if (false === $next) {
  106. throw new ParseException(sprintf('Expected closing %s for string in: %s', $quote, substr($s, $start)));
  107. }
  108. $result = substr($s, $start, $next - $start);
  109. if (strlen($result) > 0 && '\\' === $result[strlen($result) - 1]) {
  110. // next quote character is escaped
  111. $pos = $next + 1;
  112. continue;
  113. }
  114. if (false !== strpos($result, '\\')) {
  115. $result = $this->unescapeStringLiteral($result);
  116. }
  117. return array($result, $next + 1);
  118. }
  119. }
  120. /**
  121. * Unescapes a string literal and returns the unescaped string.
  122. *
  123. * @param string $literal The string literal to unescape.
  124. *
  125. * @return string
  126. *
  127. * @throws ParseException When invalid escape sequence is found
  128. */
  129. private function unescapeStringLiteral($literal)
  130. {
  131. return preg_replace_callback('#(\\\\(?:[A-Fa-f0-9]{1,6}(?:\r\n|\s)?|[^A-Fa-f0-9]))#', function ($matches) use ($literal) {
  132. if ($matches[0][0] == '\\' && strlen($matches[0]) > 1) {
  133. $matches[0] = substr($matches[0], 1);
  134. if (in_array($matches[0][0], array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'))) {
  135. return chr(trim($matches[0]));
  136. }
  137. } else {
  138. throw new ParseException(sprintf('Invalid escape sequence %s in string %s', $matches[0], $literal));
  139. }
  140. }, $literal);
  141. }
  142. /**
  143. * Lexes selector $s and returns an array holding the name of the symbol
  144. * contained in it and the new position from which tokenizing should take
  145. * over.
  146. *
  147. * @param string $s The selector string.
  148. * @param integer $pos The position in $s at which the symbol starts.
  149. *
  150. * @return array
  151. *
  152. * @throws ParseException When Unexpected symbol is found
  153. */
  154. private function tokenizeSymbol($s, $pos)
  155. {
  156. $start = $pos;
  157. if (!preg_match('#[^\w\-]#', $s, $match, PREG_OFFSET_CAPTURE, $pos)) {
  158. // Goes to end of s
  159. return array(substr($s, $start), strlen($s));
  160. }
  161. $matchStart = $match[0][1];
  162. if ($matchStart == $pos) {
  163. throw new ParseException(sprintf('Unexpected symbol: %s at %s', $s[$pos], $pos));
  164. }
  165. $result = substr($s, $start, $matchStart - $start);
  166. $pos = $matchStart;
  167. return array($result, $pos);
  168. }
  169. }