PageRenderTime 50ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/util/sfDomCssSelector.class.php

https://github.com/theodo/symfony1.0-backports
PHP | 303 lines | 241 code | 29 blank | 33 comment | 45 complexity | f3537a5da3fcb9409d8b1fad5b4d7069 MD5 | raw file
Possible License(s): LGPL-2.1, AGPL-3.0, BSD-3-Clause
  1. <?php
  2. /*
  3. * This file is part of the symfony package.
  4. * (c) 2004-2006 Fabien Potencier <fabien.potencier@symfony-project.com>
  5. *
  6. * For the full copyright and license information, please view the LICENSE
  7. * file that was distributed with this source code.
  8. */
  9. /**
  10. * sfDomCssSelector allows to navigate a DOM with CSS selector.
  11. *
  12. * based on getElementsBySelector version 0.4 - Simon Willison, March 25th 2003
  13. * http://simon.incutio.com/archive/2003/03/25/getElementsBySelector
  14. *
  15. * @package symfony
  16. * @subpackage util
  17. * @author Fabien Potencier <fabien.potencier@symfony-project.com>
  18. * @version SVN: $Id$
  19. */
  20. class sfDomCssSelector
  21. {
  22. protected $dom = null;
  23. public function __construct($dom)
  24. {
  25. $this->dom = $dom;
  26. }
  27. public function getTexts($selector)
  28. {
  29. $texts = array();
  30. foreach ($this->getElements($selector) as $element)
  31. {
  32. $texts[] = $element->nodeValue;
  33. }
  34. return $texts;
  35. }
  36. public function getElements($selector)
  37. {
  38. $all_nodes = array();
  39. foreach ($this->tokenize_selectors($selector) as $selector)
  40. {
  41. $nodes = array($this->dom);
  42. foreach ($this->tokenize($selector) as $token)
  43. {
  44. $combinator = $token['combinator'];
  45. $token = trim($token['name']);
  46. $pos = strpos($token, '#');
  47. if (false !== $pos && preg_match('/^[A-Za-z0-9]*$/', substr($token, 0, $pos)))
  48. {
  49. // Token is an ID selector
  50. $tagName = substr($token, 0, $pos);
  51. $id = substr($token, $pos + 1);
  52. $xpath = new DomXPath($this->dom);
  53. $element = $xpath->query(sprintf("//*[@id = '%s']", $id))->item(0);
  54. if (!$element || ($tagName && strtolower($element->nodeName) != $tagName))
  55. {
  56. // tag with that ID not found
  57. return array();
  58. }
  59. // Set nodes to contain just this element
  60. $nodes = array($element);
  61. continue; // Skip to next token
  62. }
  63. $pos = strpos($token, '.');
  64. if (false !== $pos && preg_match('/^[A-Za-z0-9]*$/', substr($token, 0, $pos)))
  65. {
  66. // Token contains a class selector
  67. $tagName = substr($token, 0, $pos);
  68. if (!$tagName)
  69. {
  70. $tagName = '*';
  71. }
  72. $className = substr($token, $pos + 1);
  73. // Get elements matching tag, filter them for class selector
  74. $founds = $this->getElementsByTagName($nodes, $tagName, $combinator);
  75. $nodes = array();
  76. foreach ($founds as $found)
  77. {
  78. if (preg_match('/\b'.$className.'\b/', $found->getAttribute('class')))
  79. {
  80. $nodes[] = $found;
  81. }
  82. }
  83. continue; // Skip to next token
  84. }
  85. // Code to deal with attribute selectors
  86. if (preg_match('/^(\w*)(\[.+\])$/', $token, $matches))
  87. {
  88. $tagName = $matches[1] ? $matches[1] : '*';
  89. preg_match_all('/
  90. \[
  91. ([\w\-]+) # attribute
  92. ([=~\|\^\$\*]?) # modifier (optional)
  93. =? # equal (optional)
  94. (
  95. "([^"]*)" # quoted value (optional)
  96. |
  97. ([^\]]*) # non quoted value (optional)
  98. )
  99. \]
  100. /x', $matches[2], $matches, PREG_SET_ORDER);
  101. // Grab all of the tagName elements within current node
  102. $founds = $this->getElementsByTagName($nodes, $tagName, $combinator);
  103. $nodes = array();
  104. foreach ($founds as $found)
  105. {
  106. $ok = false;
  107. foreach ($matches as $match)
  108. {
  109. $attrName = $match[1];
  110. $attrOperator = $match[2];
  111. $attrValue = $match[4];
  112. switch ($attrOperator)
  113. {
  114. case '=': // Equality
  115. $ok = $found->getAttribute($attrName) == $attrValue;
  116. break;
  117. case '~': // Match one of space seperated words
  118. $ok = preg_match('/\b'.preg_quote($attrValue, '/').'\b/', $found->getAttribute($attrName));
  119. break;
  120. case '|': // Match start with value followed by optional hyphen
  121. $ok = preg_match('/^'.preg_quote($attrValue, '/').'-?/', $found->getAttribute($attrName));
  122. break;
  123. case '^': // Match starts with value
  124. $ok = 0 === strpos($found->getAttribute($attrName), $attrValue);
  125. break;
  126. case '$': // Match ends with value
  127. $ok = $attrValue == substr($found->getAttribute($attrName), -strlen($attrValue));
  128. break;
  129. case '*': // Match ends with value
  130. $ok = false !== strpos($found->getAttribute($attrName), $attrValue);
  131. break;
  132. default :
  133. // Just test for existence of attribute
  134. $ok = $found->hasAttribute($attrName);
  135. }
  136. if (false == $ok)
  137. {
  138. break;
  139. }
  140. }
  141. if ($ok)
  142. {
  143. $nodes[] = $found;
  144. }
  145. }
  146. continue; // Skip to next token
  147. }
  148. // If we get here, token is JUST an element (not a class or ID selector)
  149. $nodes = $this->getElementsByTagName($nodes, $token, $combinator);
  150. }
  151. foreach ($nodes as $node)
  152. {
  153. if (!$node->getAttribute('sf_matched'))
  154. {
  155. $node->setAttribute('sf_matched', true);
  156. $all_nodes[] = $node;
  157. }
  158. }
  159. }
  160. foreach ($all_nodes as $node)
  161. {
  162. $node->removeAttribute('sf_matched');
  163. }
  164. return $all_nodes;
  165. }
  166. protected function getElementsByTagName($nodes, $tagName, $combinator = ' ')
  167. {
  168. $founds = array();
  169. foreach ($nodes as $node)
  170. {
  171. switch ($combinator)
  172. {
  173. case ' ':
  174. // Descendant selector
  175. foreach ($node->getElementsByTagName($tagName) as $element)
  176. {
  177. $founds[] = $element;
  178. }
  179. break;
  180. case '>':
  181. // Child selector
  182. foreach ($node->childNodes as $element)
  183. {
  184. if ($tagName == $element->nodeName)
  185. {
  186. $founds[] = $element;
  187. }
  188. }
  189. break;
  190. case '+':
  191. // Adjacent selector
  192. $element = $node->nextSibling;
  193. if ($element && '#text' == $element->nodeName)
  194. {
  195. $element = $element->nextSibling;
  196. }
  197. if ($element && $tagName == $element->nodeName)
  198. {
  199. $founds[] = $element;
  200. }
  201. break;
  202. }
  203. }
  204. return $founds;
  205. }
  206. protected function tokenize_selectors($selector)
  207. {
  208. // split tokens by , except in an attribute selector
  209. $tokens = array();
  210. $quoted = false;
  211. $token = '';
  212. for ($i = 0, $max = strlen($selector); $i < $max; $i++)
  213. {
  214. if (',' == $selector[$i] && !$quoted)
  215. {
  216. $tokens[] = trim($token);
  217. $token = '';
  218. }
  219. else if ('"' == $selector[$i])
  220. {
  221. $token .= $selector[$i];
  222. $quoted = $quoted ? false : true;
  223. }
  224. else
  225. {
  226. $token .= $selector[$i];
  227. }
  228. }
  229. if ($token)
  230. {
  231. $tokens[] = trim($token);
  232. }
  233. return $tokens;
  234. }
  235. protected function tokenize($selector)
  236. {
  237. // split tokens by space except if space is in an attribute selector
  238. $tokens = array();
  239. $combinators = array(' ', '>', '+');
  240. $quoted = false;
  241. $token = array('combinator' => ' ', 'name' => '');
  242. for ($i = 0, $max = strlen($selector); $i < $max; $i++)
  243. {
  244. if (in_array($selector[$i], $combinators) && !$quoted)
  245. {
  246. // remove all whitespaces around the combinator
  247. $combinator = $selector[$i];
  248. while (in_array($selector[$i + 1], $combinators))
  249. {
  250. if (' ' != $selector[++$i])
  251. {
  252. $combinator = $selector[$i];
  253. }
  254. }
  255. $tokens[] = $token;
  256. $token = array('combinator' => $combinator, 'name' => '');
  257. }
  258. else if ('"' == $selector[$i])
  259. {
  260. $token['name'] .= $selector[$i];
  261. $quoted = $quoted ? false : true;
  262. }
  263. else
  264. {
  265. $token['name'] .= $selector[$i];
  266. }
  267. }
  268. if ($token['name'])
  269. {
  270. $tokens[] = $token;
  271. }
  272. return $tokens;
  273. }
  274. }