PageRenderTime 48ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/Symfony/Component/CssSelector/CssSelector.php

https://github.com/outlawscumbag/symfony
PHP | 313 lines | 181 code | 38 blank | 94 comment | 64 complexity | 41f926bb6c50d496603a825790cc0d9c MD5 | raw file
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\CssSelector;
  11. use Symfony\Component\CssSelector\Exception\ParseException;
  12. /**
  13. * CssSelector is the main entry point of the component and can convert CSS
  14. * selectors to XPath expressions.
  15. *
  16. * $xpath = CssSelector::toXpath('h1.foo');
  17. *
  18. * This component is a port of the Python lxml library,
  19. * which is copyright Infrae and distributed under the BSD license.
  20. *
  21. * @author Fabien Potencier <fabien@symfony.com>
  22. *
  23. * @api
  24. */
  25. class CssSelector
  26. {
  27. /**
  28. * Translates a CSS expression to its XPath equivalent.
  29. * Optionally, a prefix can be added to the resulting XPath
  30. * expression with the $prefix parameter.
  31. *
  32. * @param mixed $cssExpr The CSS expression.
  33. * @param string $prefix An optional prefix for the XPath expression.
  34. *
  35. * @return string
  36. *
  37. * @throws ParseException When got None for xpath expression
  38. *
  39. * @api
  40. */
  41. static public function toXPath($cssExpr, $prefix = 'descendant-or-self::')
  42. {
  43. if (is_string($cssExpr)) {
  44. if (preg_match('#^\w+\s*$#u', $cssExpr, $match)) {
  45. return $prefix.trim($match[0]);
  46. }
  47. if (preg_match('~^(\w*)#(\w+)\s*$~u', $cssExpr, $match)) {
  48. return sprintf("%s%s[@id = '%s']", $prefix, $match[1] ? $match[1] : '*', $match[2]);
  49. }
  50. if (preg_match('#^(\w*)\.(\w+)\s*$#u', $cssExpr, $match)) {
  51. return sprintf("%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]", $prefix, $match[1] ? $match[1] : '*', $match[2]);
  52. }
  53. $parser = new self();
  54. $cssExpr = $parser->parse($cssExpr);
  55. }
  56. $expr = $cssExpr->toXpath();
  57. // @codeCoverageIgnoreStart
  58. if (!$expr) {
  59. throw new ParseException(sprintf('Got None for xpath expression from %s.', $cssExpr));
  60. }
  61. // @codeCoverageIgnoreEnd
  62. if ($prefix) {
  63. $expr->addPrefix($prefix);
  64. }
  65. return (string) $expr;
  66. }
  67. /**
  68. * Parses an expression and returns the Node object that represents
  69. * the parsed expression.
  70. *
  71. * @throws \Exception When tokenizer throws it while parsing
  72. *
  73. * @param string $string The expression to parse
  74. *
  75. * @return Node\NodeInterface
  76. */
  77. public function parse($string)
  78. {
  79. $tokenizer = new Tokenizer();
  80. $stream = new TokenStream($tokenizer->tokenize($string), $string);
  81. try {
  82. return $this->parseSelectorGroup($stream);
  83. } catch (\Exception $e) {
  84. $class = get_class($e);
  85. throw new $class(sprintf('%s at %s -> %s', $e->getMessage(), implode($stream->getUsed(), ''), $stream->peek()), 0, $e);
  86. }
  87. }
  88. /**
  89. * Parses a selector group contained in $stream and returns
  90. * the Node object that represents the expression.
  91. *
  92. * @param TokenStream $stream The stream to parse.
  93. *
  94. * @return Node\NodeInterface
  95. */
  96. private function parseSelectorGroup($stream)
  97. {
  98. $result = array();
  99. while (true) {
  100. $result[] = $this->parseSelector($stream);
  101. if ($stream->peek() == ',') {
  102. $stream->next();
  103. } else {
  104. break;
  105. }
  106. }
  107. if (count($result) == 1) {
  108. return $result[0];
  109. }
  110. return new Node\OrNode($result);
  111. }
  112. /**
  113. * Parses a selector contained in $stream and returns the Node
  114. * object that represents it.
  115. *
  116. * @throws ParseException When expected selector but got something else
  117. *
  118. * @param TokenStream $stream The stream containing the selector.
  119. *
  120. * @return Node\NodeInterface
  121. */
  122. private function parseSelector($stream)
  123. {
  124. $result = $this->parseSimpleSelector($stream);
  125. while (true) {
  126. $peek = $stream->peek();
  127. if (',' == $peek || null === $peek) {
  128. return $result;
  129. } elseif (in_array($peek, array('+', '>', '~'))) {
  130. // A combinator
  131. $combinator = (string) $stream->next();
  132. } else {
  133. $combinator = ' ';
  134. }
  135. $consumed = count($stream->getUsed());
  136. $nextSelector = $this->parseSimpleSelector($stream);
  137. if ($consumed == count($stream->getUsed())) {
  138. throw new ParseException(sprintf("Expected selector, got '%s'", $stream->peek()));
  139. }
  140. $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
  141. }
  142. return $result;
  143. }
  144. /**
  145. * Parses a simple selector (the current token) from $stream and returns
  146. * the resulting Node object.
  147. *
  148. * @throws ParseException When expected symbol but got something else
  149. *
  150. * @param TokenStream $stream The stream containing the selector.
  151. *
  152. * @return Node\NodeInterface
  153. */
  154. private function parseSimpleSelector($stream)
  155. {
  156. $peek = $stream->peek();
  157. if ('*' != $peek && !$peek->isType('Symbol')) {
  158. $element = $namespace = '*';
  159. } else {
  160. $next = $stream->next();
  161. if ('*' != $next && !$next->isType('Symbol')) {
  162. throw new ParseException(sprintf("Expected symbol, got '%s'", $next));
  163. }
  164. if ($stream->peek() == '|') {
  165. $namespace = $next;
  166. $stream->next();
  167. $element = $stream->next();
  168. if ('*' != $element && !$next->isType('Symbol')) {
  169. throw new ParseException(sprintf("Expected symbol, got '%s'", $next));
  170. }
  171. } else {
  172. $namespace = '*';
  173. $element = $next;
  174. }
  175. }
  176. $result = new Node\ElementNode($namespace, $element);
  177. $hasHash = false;
  178. while (true) {
  179. $peek = $stream->peek();
  180. if ('#' == $peek) {
  181. if ($hasHash) {
  182. /* You can't have two hashes
  183. (FIXME: is there some more general rule I'm missing?) */
  184. // @codeCoverageIgnoreStart
  185. break;
  186. // @codeCoverageIgnoreEnd
  187. }
  188. $stream->next();
  189. $result = new Node\HashNode($result, $stream->next());
  190. $hasHash = true;
  191. continue;
  192. } elseif ('.' == $peek) {
  193. $stream->next();
  194. $result = new Node\ClassNode($result, $stream->next());
  195. continue;
  196. } elseif ('[' == $peek) {
  197. $stream->next();
  198. $result = $this->parseAttrib($result, $stream);
  199. $next = $stream->next();
  200. if (']' != $next) {
  201. throw new ParseException(sprintf("] expected, got '%s'", $next));
  202. }
  203. continue;
  204. } elseif (':' == $peek || '::' == $peek) {
  205. $type = $stream->next();
  206. $ident = $stream->next();
  207. if (!$ident || !$ident->isType('Symbol')) {
  208. throw new ParseException(sprintf("Expected symbol, got '%s'", $ident));
  209. }
  210. if ($stream->peek() == '(') {
  211. $stream->next();
  212. $peek = $stream->peek();
  213. if ($peek->isType('String')) {
  214. $selector = $stream->next();
  215. } elseif ($peek->isType('Symbol') && is_int($peek)) {
  216. $selector = intval($stream->next());
  217. } else {
  218. // FIXME: parseSimpleSelector, or selector, or...?
  219. $selector = $this->parseSimpleSelector($stream);
  220. }
  221. $next = $stream->next();
  222. if (')' != $next) {
  223. throw new ParseException(sprintf("Expected ')', got '%s' and '%s'", $next, $selector));
  224. }
  225. $result = new Node\FunctionNode($result, $type, $ident, $selector);
  226. } else {
  227. $result = new Node\PseudoNode($result, $type, $ident);
  228. }
  229. continue;
  230. } else {
  231. if (' ' == $peek) {
  232. $stream->next();
  233. }
  234. break;
  235. }
  236. // FIXME: not sure what "negation" is
  237. }
  238. return $result;
  239. }
  240. /**
  241. * Parses an attribute from a selector contained in $stream and returns
  242. * the resulting AttribNode object.
  243. *
  244. * @throws ParseException When encountered unexpected selector
  245. *
  246. * @param Node\NodeInterface $selector The selector object whose attribute
  247. * is to be parsed.
  248. * @param TokenStream $stream The container token stream.
  249. *
  250. * @return Node\AttribNode
  251. */
  252. private function parseAttrib($selector, $stream)
  253. {
  254. $attrib = $stream->next();
  255. if ($stream->peek() == '|') {
  256. $namespace = $attrib;
  257. $stream->next();
  258. $attrib = $stream->next();
  259. } else {
  260. $namespace = '*';
  261. }
  262. if ($stream->peek() == ']') {
  263. return new Node\AttribNode($selector, $namespace, $attrib, 'exists', null);
  264. }
  265. $op = $stream->next();
  266. if (!in_array($op, array('^=', '$=', '*=', '=', '~=', '|=', '!='))) {
  267. throw new ParseException(sprintf("Operator expected, got '%s'", $op));
  268. }
  269. $value = $stream->next();
  270. if (!$value->isType('Symbol') && !$value->isType('String')) {
  271. throw new ParseException(sprintf("Expected string or symbol, got '%s'", $value));
  272. }
  273. return new Node\AttribNode($selector, $namespace, $attrib, $op, $value);
  274. }
  275. }