PageRenderTime 44ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/vendor/symfony/src/Symfony/Component/CssSelector/Parser.php

https://github.com/tumf/tepco
PHP | 307 lines | 180 code | 37 blank | 90 comment | 64 complexity | 9d0f970236601bd5fde62bcaae5c549e MD5 | raw file
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\CssSelector;
  11. /**
  12. * Parser is the main entry point of the component and can convert CSS
  13. * selectors to XPath expressions.
  14. *
  15. * $xpath = Parser::cssToXpath('h1.foo');
  16. *
  17. * This component is a port of the Python lxml library,
  18. * which is copyright Infrae and distributed under the BSD license.
  19. *
  20. * @author Fabien Potencier <fabien@symfony.com>
  21. */
  22. class Parser
  23. {
  24. /**
  25. * Translates a CSS expression to its XPath equivalent.
  26. * Optionally, a prefix can be added to the resulting XPath
  27. * expression with the $prefix parameter.
  28. *
  29. * @throws SyntaxError When got None for xpath expression
  30. *
  31. * @param mixed $cssExpr The CSS expression.
  32. * @param string $prefix An optional prefix for the XPath expression.
  33. *
  34. * @return string
  35. */
  36. static public function cssToXpath($cssExpr, $prefix = 'descendant-or-self::')
  37. {
  38. if (is_string($cssExpr)) {
  39. if (preg_match('#^\w+\s*$#u', $cssExpr, $match)) {
  40. return $prefix.trim($match[0]);
  41. }
  42. if (preg_match('~^(\w*)#(\w+)\s*$~u', $cssExpr, $match)) {
  43. return sprintf("%s%s[@id = '%s']", $prefix, $match[1] ? $match[1] : '*', $match[2]);
  44. }
  45. if (preg_match('#^(\w*)\.(\w+)\s*$#u', $cssExpr, $match)) {
  46. return sprintf("%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]", $prefix, $match[1] ? $match[1] : '*', $match[2]);
  47. }
  48. $parser = new self();
  49. $cssExpr = $parser->parse($cssExpr);
  50. }
  51. $expr = $cssExpr->toXpath();
  52. // @codeCoverageIgnoreStart
  53. if (!$expr) {
  54. throw new SyntaxError(sprintf('Got None for xpath expression from %s.', $cssExpr));
  55. }
  56. // @codeCoverageIgnoreEnd
  57. if ($prefix) {
  58. $expr->addPrefix($prefix);
  59. }
  60. return (string) $expr;
  61. }
  62. /**
  63. * Parses an expression and returns the Node object that represents
  64. * the parsed expression.
  65. *
  66. * @throws \Exception When tokenizer throws it while parsing
  67. *
  68. * @param string $string The expression to parse
  69. *
  70. * @return Node\NodeInterface
  71. */
  72. public function parse($string)
  73. {
  74. $tokenizer = new Tokenizer();
  75. $stream = new TokenStream($tokenizer->tokenize($string), $string);
  76. try {
  77. return $this->parseSelectorGroup($stream);
  78. } catch (\Exception $e) {
  79. $class = get_class($e);
  80. throw new $class(sprintf('%s at %s -> %s', $e->getMessage(), implode($stream->getUsed(), ''), $stream->peek()), 0, $e);
  81. }
  82. }
  83. /**
  84. * Parses a selector group contained in $stream and returns
  85. * the Node object that represents the expression.
  86. *
  87. * @param TokenStream $stream The stream to parse.
  88. *
  89. * @return Node\NodeInterface
  90. */
  91. protected function parseSelectorGroup($stream)
  92. {
  93. $result = array();
  94. while (true) {
  95. $result[] = $this->parseSelector($stream);
  96. if ($stream->peek() == ',') {
  97. $stream->next();
  98. } else {
  99. break;
  100. }
  101. }
  102. if (count($result) == 1) {
  103. return $result[0];
  104. }
  105. return new Node\OrNode($result);
  106. }
  107. /**
  108. * Parses a selector contained in $stream and returns the Node
  109. * object that represents it.
  110. *
  111. * @throws SyntaxError When expected selector but got something else
  112. *
  113. * @param TokenStrem $stream The stream containing the selector.
  114. *
  115. * @return Node\NodeInterface
  116. */
  117. protected function parseSelector($stream)
  118. {
  119. $result = $this->parseSimpleSelector($stream);
  120. while (true) {
  121. $peek = $stream->peek();
  122. if (',' == $peek || null === $peek) {
  123. return $result;
  124. } elseif (in_array($peek, array('+', '>', '~'))) {
  125. // A combinator
  126. $combinator = (string) $stream->next();
  127. } else {
  128. $combinator = ' ';
  129. }
  130. $consumed = count($stream->getUsed());
  131. $nextSelector = $this->parseSimpleSelector($stream);
  132. if ($consumed == count($stream->getUsed())) {
  133. throw new SyntaxError(sprintf("Expected selector, got '%s'", $stream->peek()));
  134. }
  135. $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
  136. }
  137. return $result;
  138. }
  139. /**
  140. * Parses a simple selector (the current token) from $stream and returns
  141. * the resulting Node object.
  142. *
  143. * @throws SyntaxError When expected symbol but got something else
  144. *
  145. * @param TokenStream The stream containing the selector.
  146. *
  147. * @return Node\NodeInterface
  148. */
  149. protected function parseSimpleSelector($stream)
  150. {
  151. $peek = $stream->peek();
  152. if ('*' != $peek && !$peek->isType('Symbol')) {
  153. $element = $namespace = '*';
  154. } else {
  155. $next = $stream->next();
  156. if ('*' != $next && !$next->isType('Symbol')) {
  157. throw new SyntaxError(sprintf("Expected symbol, got '%s'", $next));
  158. }
  159. if ($stream->peek() == '|') {
  160. $namespace = $next;
  161. $stream->next();
  162. $element = $stream->next();
  163. if ('*' != $element && !$next->isType('Symbol')) {
  164. throw new SyntaxError(sprintf("Expected symbol, got '%s'", $next));
  165. }
  166. } else {
  167. $namespace = '*';
  168. $element = $next;
  169. }
  170. }
  171. $result = new Node\ElementNode($namespace, $element);
  172. $hasHash = false;
  173. while (true) {
  174. $peek = $stream->peek();
  175. if ('#' == $peek) {
  176. if ($hasHash) {
  177. /* You can't have two hashes
  178. (FIXME: is there some more general rule I'm missing?) */
  179. // @codeCoverageIgnoreStart
  180. break;
  181. // @codeCoverageIgnoreEnd
  182. }
  183. $stream->next();
  184. $result = new Node\HashNode($result, $stream->next());
  185. $hasHash = true;
  186. continue;
  187. } elseif ('.' == $peek) {
  188. $stream->next();
  189. $result = new Node\ClassNode($result, $stream->next());
  190. continue;
  191. } elseif ('[' == $peek) {
  192. $stream->next();
  193. $result = $this->parseAttrib($result, $stream);
  194. $next = $stream->next();
  195. if (']' != $next) {
  196. throw new SyntaxError(sprintf("] expected, got '%s'", $next));
  197. }
  198. continue;
  199. } elseif (':' == $peek || '::' == $peek) {
  200. $type = $stream->next();
  201. $ident = $stream->next();
  202. if (!$ident || !$ident->isType('Symbol')) {
  203. throw new SyntaxError(sprintf("Expected symbol, got '%s'", $ident));
  204. }
  205. if ($stream->peek() == '(') {
  206. $stream->next();
  207. $peek = $stream->peek();
  208. if ($peek->isType('String')) {
  209. $selector = $stream->next();
  210. } elseif ($peek->isType('Symbol') && is_int($peek)) {
  211. $selector = intval($stream->next());
  212. } else {
  213. // FIXME: parseSimpleSelector, or selector, or...?
  214. $selector = $this->parseSimpleSelector($stream);
  215. }
  216. $next = $stream->next();
  217. if (')' != $next) {
  218. throw new SyntaxError(sprintf("Expected ')', got '%s' and '%s'", $next, $selector));
  219. }
  220. $result = new Node\FunctionNode($result, $type, $ident, $selector);
  221. } else {
  222. $result = new Node\PseudoNode($result, $type, $ident);
  223. }
  224. continue;
  225. } else {
  226. if (' ' == $peek) {
  227. $stream->next();
  228. }
  229. break;
  230. }
  231. // FIXME: not sure what "negation" is
  232. }
  233. return $result;
  234. }
  235. /**
  236. * Parses an attribute from a selector contained in $stream and returns
  237. * the resulting AttribNode object.
  238. *
  239. * @throws SyntaxError When encountered unexpected selector
  240. *
  241. * @param Node\NodeInterface $selector The selector object whose attribute
  242. * is to be parsed.
  243. * @param TokenStream $stream The container token stream.
  244. *
  245. * @return Node\AttribNode
  246. */
  247. protected function parseAttrib($selector, $stream)
  248. {
  249. $attrib = $stream->next();
  250. if ($stream->peek() == '|') {
  251. $namespace = $attrib;
  252. $stream->next();
  253. $attrib = $stream->next();
  254. } else {
  255. $namespace = '*';
  256. }
  257. if ($stream->peek() == ']') {
  258. return new Node\AttribNode($selector, $namespace, $attrib, 'exists', null);
  259. }
  260. $op = $stream->next();
  261. if (!in_array($op, array('^=', '$=', '*=', '=', '~=', '|=', '!='))) {
  262. throw new SyntaxError(sprintf("Operator expected, got '%s'", $op));
  263. }
  264. $value = $stream->next();
  265. if (!$value->isType('Symbol') && !$value->isType('String')) {
  266. throw new SyntaxError(sprintf("Expected string or symbol, got '%s'", $value));
  267. }
  268. return new Node\AttribNode($selector, $namespace, $attrib, $op, $value);
  269. }
  270. }