PageRenderTime 45ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/vendor/symfony/lib/util/sfDomCssSelector.class.php

https://github.com/IDCI-Consulting/WebsiteEval
PHP | 626 lines | 494 code | 66 blank | 66 comment | 80 complexity | f625d1b08b8a9fae7f29f9f1d6e0cb6f MD5 | raw file
  1. <?php
  2. /*
  3. * This file is part of the symfony package.
  4. * (c) 2004-2006 Fabien Potencier <fabien.potencier@symfony-project.com>
  5. *
  6. * For the full copyright and license information, please view the LICENSE
  7. * file that was distributed with this source code.
  8. */
  9. /**
  10. * sfDomCssSelector allows to navigate a DOM with CSS selector.
  11. *
  12. * Based on getElementsBySelector version 0.4 - Simon Willison, March 25th 2003
  13. * http://simon.incutio.com/archive/2003/03/25/getElementsBySelector
  14. *
  15. * Some methods based on the jquery library
  16. *
  17. * @package symfony
  18. * @subpackage util
  19. * @author Fabien Potencier <fabien.potencier@symfony-project.com>
  20. * @version SVN: $Id: sfDomCssSelector.class.php 31893 2011-01-24 18:11:45Z fabien $
  21. */
  22. class sfDomCssSelector implements Countable, Iterator
  23. {
  24. public $nodes = array();
  25. private $count;
  26. public function __construct($nodes)
  27. {
  28. if (!is_array($nodes))
  29. {
  30. $nodes = array($nodes);
  31. }
  32. $this->nodes = $nodes;
  33. }
  34. public function getNodes()
  35. {
  36. return $this->nodes;
  37. }
  38. public function getNode()
  39. {
  40. return $this->nodes ? $this->nodes[0] : null;
  41. }
  42. public function getValue()
  43. {
  44. return $this->nodes[0]->nodeValue;
  45. }
  46. public function getValues()
  47. {
  48. $values = array();
  49. foreach ($this->nodes as $node)
  50. {
  51. $values[] = $node->nodeValue;
  52. }
  53. return $values;
  54. }
  55. public function matchSingle($selector)
  56. {
  57. $nodes = $this->getElements($selector);
  58. return $nodes ? new sfDomCssSelector($nodes[0]) : new sfDomCssSelector(array());
  59. }
  60. public function matchAll($selector)
  61. {
  62. $nodes = $this->getElements($selector);
  63. return $nodes ? new sfDomCssSelector($nodes) : new sfDomCssSelector(array());
  64. }
  65. protected function getElements($selector)
  66. {
  67. $nodes = array();
  68. foreach ($this->nodes as $node)
  69. {
  70. $result_nodes = $this->getElementsForNode($selector, $node);
  71. if ($result_nodes)
  72. {
  73. $nodes = array_merge($nodes, $result_nodes);
  74. }
  75. }
  76. foreach ($nodes as $node)
  77. {
  78. $node->removeAttribute('sf_matched');
  79. }
  80. return $nodes;
  81. }
  82. protected function getElementsForNode($selector, $root_node)
  83. {
  84. $all_nodes = array();
  85. foreach ($this->tokenize_selectors($selector) as $selector)
  86. {
  87. $nodes = array($root_node);
  88. foreach ($this->tokenize($selector) as $token)
  89. {
  90. $combinator = $token['combinator'];
  91. $selector = $token['selector'];
  92. $token = trim($token['name']);
  93. $pos = strpos($token, '#');
  94. if (false !== $pos && preg_match('/^[A-Za-z0-9]*$/', substr($token, 0, $pos)))
  95. {
  96. // Token is an ID selector
  97. $tagName = substr($token, 0, $pos);
  98. $id = substr($token, $pos + 1);
  99. $xpath = new DomXPath($root_node);
  100. $element = $xpath->query(sprintf("//*[@id = '%s']", $id))->item(0);
  101. if (!$element || ($tagName && strtolower($element->nodeName) != $tagName))
  102. {
  103. // tag with that ID not found
  104. return array();
  105. }
  106. // Set nodes to contain just this element
  107. $nodes = array($element);
  108. $nodes = $this->matchMultipleCustomSelectors($nodes, $selector);
  109. continue; // Skip to next token
  110. }
  111. $pos = strpos($token, '.');
  112. if (false !== $pos && preg_match('/^[A-Za-z0-9\*]*$/', substr($token, 0, $pos)))
  113. {
  114. // Token contains a class selector
  115. $tagName = substr($token, 0, $pos);
  116. if (!$tagName)
  117. {
  118. $tagName = '*';
  119. }
  120. $className = substr($token, $pos + 1);
  121. // Get elements matching tag, filter them for class selector
  122. $founds = $this->getElementsByTagName($nodes, $tagName, $combinator);
  123. $nodes = array();
  124. foreach ($founds as $found)
  125. {
  126. if (preg_match('/(^|\s+)'.$className.'($|\s+)/', $found->getAttribute('class')))
  127. {
  128. $nodes[] = $found;
  129. }
  130. }
  131. $nodes = $this->matchMultipleCustomSelectors($nodes, $selector);
  132. continue; // Skip to next token
  133. }
  134. // Code to deal with attribute selectors
  135. if (preg_match('/^(\w+|\*)(\[.+\])$/', $token, $matches))
  136. {
  137. $tagName = $matches[1] ? $matches[1] : '*';
  138. preg_match_all('/
  139. \[
  140. ([\w\-]+) # attribute
  141. ([=~\|\^\$\*]?) # modifier (optional)
  142. =? # equal (optional)
  143. (
  144. "([^"]*)" # quoted value (optional)
  145. |
  146. ([^\]]*) # non quoted value (optional)
  147. )
  148. \]
  149. /x', $matches[2], $matches, PREG_SET_ORDER);
  150. // Grab all of the tagName elements within current node
  151. $founds = $this->getElementsByTagName($nodes, $tagName, $combinator);
  152. $nodes = array();
  153. foreach ($founds as $found)
  154. {
  155. $ok = false;
  156. foreach ($matches as $match)
  157. {
  158. $attrName = $match[1];
  159. $attrOperator = $match[2];
  160. $attrValue = $match[4] === '' ? (isset($match[5]) ? $match[5] : '') : $match[4];
  161. switch ($attrOperator)
  162. {
  163. case '=': // Equality
  164. $ok = $found->getAttribute($attrName) == $attrValue;
  165. break;
  166. case '~': // Match one of space seperated words
  167. $ok = preg_match('/\b'.preg_quote($attrValue, '/').'\b/', $found->getAttribute($attrName));
  168. break;
  169. case '|': // Match start with value followed by optional hyphen
  170. $ok = preg_match('/^'.preg_quote($attrValue, '/').'-?/', $found->getAttribute($attrName));
  171. break;
  172. case '^': // Match starts with value
  173. $ok = 0 === strpos($found->getAttribute($attrName), $attrValue);
  174. break;
  175. case '$': // Match ends with value
  176. $ok = $attrValue == substr($found->getAttribute($attrName), -strlen($attrValue));
  177. break;
  178. case '*': // Match ends with value
  179. $ok = false !== strpos($found->getAttribute($attrName), $attrValue);
  180. break;
  181. default :
  182. // Just test for existence of attribute
  183. $ok = $found->hasAttribute($attrName);
  184. }
  185. if (false == $ok)
  186. {
  187. break;
  188. }
  189. }
  190. if ($ok)
  191. {
  192. $nodes[] = $found;
  193. }
  194. }
  195. continue; // Skip to next token
  196. }
  197. // If we get here, token is JUST an element (not a class or ID selector)
  198. $nodes = $this->getElementsByTagName($nodes, $token, $combinator);
  199. $nodes = $this->matchMultipleCustomSelectors($nodes, $selector);
  200. }
  201. foreach ($nodes as $node)
  202. {
  203. if (!$node->getAttribute('sf_matched'))
  204. {
  205. $node->setAttribute('sf_matched', true);
  206. $all_nodes[] = $node;
  207. }
  208. }
  209. }
  210. return $all_nodes;
  211. }
  212. protected function getElementsByTagName($nodes, $tagName, $combinator = ' ')
  213. {
  214. $founds = array();
  215. foreach ($nodes as $node)
  216. {
  217. switch ($combinator)
  218. {
  219. case ' ':
  220. // Descendant selector
  221. foreach ($node->getElementsByTagName($tagName) as $element)
  222. {
  223. $founds[] = $element;
  224. }
  225. break;
  226. case '>':
  227. // Child selector
  228. foreach ($node->childNodes as $element)
  229. {
  230. if ($tagName == $element->nodeName)
  231. {
  232. $founds[] = $element;
  233. }
  234. }
  235. break;
  236. case '+':
  237. // Adjacent selector
  238. $element = $node->nextSibling;
  239. if ($element && '#text' == $element->nodeName)
  240. {
  241. $element = $element->nextSibling;
  242. }
  243. if ($element && $tagName == $element->nodeName)
  244. {
  245. $founds[] = $element;
  246. }
  247. break;
  248. default:
  249. throw new Exception(sprintf('Unrecognized combinator "%s".', $combinator));
  250. }
  251. }
  252. return $founds;
  253. }
  254. protected function tokenize_selectors($selector)
  255. {
  256. // split tokens by , except in an attribute selector
  257. $tokens = array();
  258. $quoted = false;
  259. $token = '';
  260. for ($i = 0, $max = strlen($selector); $i < $max; $i++)
  261. {
  262. if (',' == $selector[$i] && !$quoted)
  263. {
  264. $tokens[] = trim($token);
  265. $token = '';
  266. }
  267. else if ('"' == $selector[$i])
  268. {
  269. $token .= $selector[$i];
  270. $quoted = $quoted ? false : true;
  271. }
  272. else
  273. {
  274. $token .= $selector[$i];
  275. }
  276. }
  277. if ($token)
  278. {
  279. $tokens[] = trim($token);
  280. }
  281. return $tokens;
  282. }
  283. protected function tokenize($selector)
  284. {
  285. // split tokens by space except if space is in an attribute selector
  286. $tokens = array();
  287. $combinators = array(' ', '>', '+');
  288. $quoted = false;
  289. $token = array('combinator' => ' ', 'name' => '');
  290. for ($i = 0, $max = strlen($selector); $i < $max; $i++)
  291. {
  292. if (in_array($selector[$i], $combinators) && !$quoted)
  293. {
  294. // remove all whitespaces around the combinator
  295. $combinator = $selector[$i];
  296. while (in_array($selector[$i + 1], $combinators))
  297. {
  298. if (' ' != $selector[++$i])
  299. {
  300. $combinator = $selector[$i];
  301. }
  302. }
  303. $tokens[] = $token;
  304. $token = array('combinator' => $combinator, 'name' => '');
  305. }
  306. else if ('"' == $selector[$i])
  307. {
  308. $token['name'] .= $selector[$i];
  309. $quoted = $quoted ? false : true;
  310. }
  311. else
  312. {
  313. $token['name'] .= $selector[$i];
  314. }
  315. }
  316. if ($token['name'])
  317. {
  318. $tokens[] = $token;
  319. }
  320. foreach ($tokens as &$token)
  321. {
  322. list($token['name'], $token['selector']) = $this->tokenize_selector_name($token['name']);
  323. }
  324. return $tokens;
  325. }
  326. protected function tokenize_selector_name($token_name)
  327. {
  328. // split custom selector
  329. $quoted = false;
  330. $name = '';
  331. $selector = '';
  332. $in_selector = false;
  333. for ($i = 0, $max = strlen($token_name); $i < $max; $i++)
  334. {
  335. if ('"' == $token_name[$i])
  336. {
  337. $quoted = $quoted ? false : true;
  338. }
  339. if (!$quoted && ':' == $token_name[$i])
  340. {
  341. $in_selector = true;
  342. }
  343. if ($in_selector)
  344. {
  345. $selector .= $token_name[$i];
  346. }
  347. else
  348. {
  349. $name .= $token_name[$i];
  350. }
  351. }
  352. return array($name, $selector);
  353. }
  354. protected function matchMultipleCustomSelectors($nodes, $selector)
  355. {
  356. if (!$selector)
  357. {
  358. return $nodes;
  359. }
  360. foreach ($this->split_custom_selector($selector) as $selector) {
  361. $nodes = $this->matchCustomSelector($nodes, $selector);
  362. }
  363. return $nodes;
  364. }
  365. protected function matchCustomSelector($nodes, $selector)
  366. {
  367. if (!$selector)
  368. {
  369. return $nodes;
  370. }
  371. $selector = $this->tokenize_custom_selector($selector);
  372. $matchingNodes = array();
  373. for ($i = 0, $max = count($nodes); $i < $max; $i++)
  374. {
  375. switch ($selector['selector'])
  376. {
  377. case 'contains':
  378. if (false !== strpos($nodes[$i]->textContent, $selector['parameter']))
  379. {
  380. $matchingNodes[] = $nodes[$i];
  381. }
  382. break;
  383. case 'nth-child':
  384. if ($nodes[$i] === $this->nth($nodes[$i]->parentNode->firstChild, (integer) $selector['parameter']))
  385. {
  386. $matchingNodes[] = $nodes[$i];
  387. }
  388. break;
  389. case 'first-child':
  390. if ($nodes[$i] === $this->nth($nodes[$i]->parentNode->firstChild))
  391. {
  392. $matchingNodes[] = $nodes[$i];
  393. }
  394. break;
  395. case 'last-child':
  396. if ($nodes[$i] === $this->nth($nodes[$i]->parentNode->lastChild, 1, 'previousSibling'))
  397. {
  398. $matchingNodes[] = $nodes[$i];
  399. }
  400. break;
  401. case 'lt':
  402. if ($i < (integer) $selector['parameter'])
  403. {
  404. $matchingNodes[] = $nodes[$i];
  405. }
  406. break;
  407. case 'gt':
  408. if ($i > (integer) $selector['parameter'])
  409. {
  410. $matchingNodes[] = $nodes[$i];
  411. }
  412. break;
  413. case 'odd':
  414. if ($i % 2)
  415. {
  416. $matchingNodes[] = $nodes[$i];
  417. }
  418. break;
  419. case 'even':
  420. if (0 == $i % 2)
  421. {
  422. $matchingNodes[] = $nodes[$i];
  423. }
  424. break;
  425. case 'nth':
  426. case 'eq':
  427. if ($i == (integer) $selector['parameter'])
  428. {
  429. $matchingNodes[] = $nodes[$i];
  430. }
  431. break;
  432. case 'first':
  433. if ($i == 0)
  434. {
  435. $matchingNodes[] = $nodes[$i];
  436. }
  437. break;
  438. case 'last':
  439. if ($i == $max - 1)
  440. {
  441. $matchingNodes[] = $nodes[$i];
  442. }
  443. break;
  444. default:
  445. throw new Exception(sprintf('Unrecognized selector "%s".', $selector['selector']));
  446. }
  447. }
  448. return $matchingNodes;
  449. }
  450. protected function split_custom_selector($selectors)
  451. {
  452. if (!preg_match_all('/
  453. :
  454. (?:[a-zA-Z0-9\-]+)
  455. (?:
  456. \(
  457. (?:
  458. ("|\')(?:.*?)?\1
  459. |
  460. (?:.*?)
  461. )
  462. \)
  463. )?
  464. /x', $selectors, $matches, PREG_PATTERN_ORDER))
  465. {
  466. throw new Exception(sprintf('Unable to split custom selector "%s".', $selectors));
  467. }
  468. return $matches[0];
  469. }
  470. protected function tokenize_custom_selector($selector)
  471. {
  472. if (!preg_match('/
  473. ([a-zA-Z0-9\-]+)
  474. (?:
  475. \(
  476. (?:
  477. ("|\')(.*)?\2
  478. |
  479. (.*?)
  480. )
  481. \)
  482. )?
  483. /x', substr($selector, 1), $matches))
  484. {
  485. throw new Exception(sprintf('Unable to parse custom selector "%s".', $selector));
  486. }
  487. return array('selector' => $matches[1], 'parameter' => isset($matches[3]) ? ($matches[3] ? $matches[3] : $matches[4]) : '');
  488. }
  489. protected function nth($cur, $result = 1, $dir = 'nextSibling')
  490. {
  491. $num = 0;
  492. for (; $cur; $cur = $cur->$dir)
  493. {
  494. if (1 == $cur->nodeType)
  495. {
  496. ++$num;
  497. }
  498. if ($num == $result)
  499. {
  500. return $cur;
  501. }
  502. }
  503. }
  504. /**
  505. * Reset the array to the beginning (as required for the Iterator interface).
  506. */
  507. public function rewind()
  508. {
  509. reset($this->nodes);
  510. $this->count = count($this->nodes);
  511. }
  512. /**
  513. * Get the key associated with the current value (as required by the Iterator interface).
  514. *
  515. * @return string The key
  516. */
  517. public function key()
  518. {
  519. return key($this->nodes);
  520. }
  521. /**
  522. * Escapes and return the current value (as required by the Iterator interface).
  523. *
  524. * @return mixed The escaped value
  525. */
  526. public function current()
  527. {
  528. return current($this->nodes);
  529. }
  530. /**
  531. * Moves to the next element (as required by the Iterator interface).
  532. */
  533. public function next()
  534. {
  535. next($this->nodes);
  536. $this->count --;
  537. }
  538. /**
  539. * Returns true if the current element is valid (as required by the Iterator interface).
  540. *
  541. * The current element will not be valid if {@link next()} has fallen off the
  542. * end of the array or if there are no elements in the array and {@link
  543. * rewind()} was called.
  544. *
  545. * @return bool The validity of the current element; true if it is valid
  546. */
  547. public function valid()
  548. {
  549. return $this->count > 0;
  550. }
  551. /**
  552. * Returns the number of matching nodes (implements Countable).
  553. *
  554. * @param integer The number of matching nodes
  555. */
  556. public function count()
  557. {
  558. return count($this->nodes);
  559. }
  560. }