PageRenderTime 47ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/simpletest/extensions/dom_tester/css_selector.php

http://skeleton.googlecode.com/
PHP | 311 lines | 249 code | 30 blank | 32 comment | 47 complexity | a508f1882b6a38dfd0ad02eab141021d MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
  1. <?php
  2. /**
  3. * @package SimpleTest
  4. * @subpackage DomTestCase
  5. * @author Perrick Penet <perrick@noparking.net>
  6. * @version $Id: css_selector.php 1538 2007-06-08 20:37:35Z pp11 $
  7. */
  8. /**
  9. * CssSelector
  10. *
  11. * Allow to navigate a DOM with CSS selector.
  12. *
  13. * based on getElementsBySelector version 0.4 - Simon Willison, 2003-03-25
  14. * http://simon.incutio.com/archive/2003/03/25/getElementsBySelector
  15. *
  16. * derived from sfDomCssSelector Id 3053 (Symfony version 1.0.2) - Fabien Potencier, 2006-12-16
  17. * http://www.symfony-project.com/api/symfony/util/sfDomCssSelector.html
  18. *
  19. * @param DomDocument $dom
  20. *
  21. */
  22. class CssSelector {
  23. protected $dom = null;
  24. public function __construct($dom)
  25. {
  26. $this->dom = $dom;
  27. }
  28. public function getTexts($selector)
  29. {
  30. $texts = array();
  31. foreach ($this->getElements($selector) as $element)
  32. {
  33. $texts[] = $element->nodeValue;
  34. }
  35. return $texts;
  36. }
  37. public function getElements($selector)
  38. {
  39. $all_nodes = array();
  40. foreach ($this->tokenize_selectors($selector) as $selector)
  41. {
  42. $nodes = array($this->dom);
  43. foreach ($this->tokenize($selector) as $token)
  44. {
  45. $combinator = $token['combinator'];
  46. $token = trim($token['name']);
  47. $pos = strpos($token, '#');
  48. if (false !== $pos && preg_match('/^[A-Za-z0-9]*$/', substr($token, 0, $pos)))
  49. {
  50. // Token is an ID selector
  51. $tagName = substr($token, 0, $pos);
  52. $id = substr($token, $pos + 1);
  53. $xpath = new DomXPath($this->dom);
  54. $element = $xpath->query(sprintf("//*[@id = '%s']", $id))->item(0);
  55. if (!$element || ($tagName && strtolower($element->nodeName) != $tagName))
  56. {
  57. // tag with that ID not found
  58. return array();
  59. }
  60. // Set nodes to contain just this element
  61. $nodes = array($element);
  62. continue; // Skip to next token
  63. }
  64. $pos = strpos($token, '.');
  65. if (false !== $pos && preg_match('/^[A-Za-z0-9]*$/', substr($token, 0, $pos)))
  66. {
  67. // Token contains a class selector
  68. $tagName = substr($token, 0, $pos);
  69. if (!$tagName)
  70. {
  71. $tagName = '*';
  72. }
  73. $className = substr($token, $pos + 1);
  74. // Get elements matching tag, filter them for class selector
  75. $founds = $this->getElementsByTagName($nodes, $tagName, $combinator);
  76. $nodes = array();
  77. foreach ($founds as $found)
  78. {
  79. if (preg_match('/\b'.$className.'\b/', $found->getAttribute('class')))
  80. {
  81. $nodes[] = $found;
  82. }
  83. }
  84. continue; // Skip to next token
  85. }
  86. // Code to deal with attribute selectors
  87. if (preg_match('/^(\w*)(\[.+\])$/', $token, $matches))
  88. {
  89. $tagName = $matches[1] ? $matches[1] : '*';
  90. preg_match_all('/
  91. \[
  92. (\w+) # attribute
  93. ([=~\|\^\$\*]?) # modifier (optional)
  94. =? # equal (optional)
  95. (
  96. "([^"]*)" # quoted value (optional)
  97. |
  98. ([^\]]*) # non quoted value (optional)
  99. )
  100. \]
  101. /x', $matches[2], $matches, PREG_SET_ORDER);
  102. // Grab all of the tagName elements within current node
  103. $founds = $this->getElementsByTagName($nodes, $tagName, $combinator);
  104. $nodes = array();
  105. foreach ($founds as $found)
  106. {
  107. $ok = false;
  108. foreach ($matches as $match)
  109. {
  110. $attrName = $match[1];
  111. $attrOperator = $match[2];
  112. $attrValue = $match[4];
  113. switch ($attrOperator)
  114. {
  115. case '=': // Equality
  116. $ok = $found->getAttribute($attrName) == $attrValue;
  117. break;
  118. case '~': // Match one of space seperated words
  119. $ok = preg_match('/\b'.preg_quote($attrValue, '/').'\b/', $found->getAttribute($attrName));
  120. break;
  121. case '|': // Match start with value followed by optional hyphen
  122. $ok = preg_match('/^'.preg_quote($attrValue, '/').'-?/', $found->getAttribute($attrName));
  123. break;
  124. case '^': // Match starts with value
  125. $ok = 0 === strpos($found->getAttribute($attrName), $attrValue);
  126. break;
  127. case '$': // Match ends with value
  128. $ok = $attrValue == substr($found->getAttribute($attrName), -strlen($attrValue));
  129. break;
  130. case '*': // Match ends with value
  131. $ok = false !== strpos($found->getAttribute($attrName), $attrValue);
  132. break;
  133. default :
  134. // Just test for existence of attribute
  135. $ok = $found->hasAttribute($attrName);
  136. }
  137. if (false == $ok)
  138. {
  139. break;
  140. }
  141. }
  142. if ($ok)
  143. {
  144. $nodes[] = $found;
  145. }
  146. }
  147. continue; // Skip to next token
  148. }
  149. if (preg_match('/^(\w*)(:first-child)$/', $token, $matches)) {
  150. $token = $matches[1] ? $matches[1] : '*';
  151. $combinator = $matches[2] ? $matches[2] : '';
  152. }
  153. // If we get here, token is JUST an element (not a class or ID selector)
  154. $nodes = $this->getElementsByTagName($nodes, $token, $combinator);
  155. }
  156. foreach ($nodes as $node)
  157. {
  158. if (!$node->getAttribute('sf_matched'))
  159. {
  160. $node->setAttribute('sf_matched', true);
  161. $all_nodes[] = $node;
  162. }
  163. }
  164. }
  165. foreach ($all_nodes as $node)
  166. {
  167. $node->removeAttribute('sf_matched');
  168. }
  169. return $all_nodes;
  170. }
  171. protected function getElementsByTagName($nodes, $tagName, $combinator = ' ')
  172. {
  173. $founds = array();
  174. foreach ($nodes as $node)
  175. {
  176. switch ($combinator)
  177. {
  178. case ' ':
  179. foreach ($node->getElementsByTagName($tagName) as $element)
  180. {
  181. $founds[] = $element;
  182. }
  183. break;
  184. case '>':
  185. foreach ($node->childNodes as $element)
  186. {
  187. if ($tagName == $element->nodeName)
  188. {
  189. $founds[] = $element;
  190. }
  191. }
  192. break;
  193. case '+':
  194. $element = $node->nextSibling;
  195. if ($element->nodeName == "#text") {
  196. $element = $element->nextSibling;
  197. }
  198. if ($element && $tagName == $element->nodeName) {
  199. $founds[] = $element;
  200. }
  201. break;
  202. case ':first-child':
  203. foreach ($node->getElementsByTagName($tagName) as $element) {
  204. if (count($founds) == 0) {
  205. $founds[] = $element;
  206. }
  207. }
  208. break;
  209. }
  210. }
  211. return $founds;
  212. }
  213. protected function tokenize_selectors($selector)
  214. {
  215. // split tokens by , except in an attribute selector
  216. $tokens = array();
  217. $quoted = false;
  218. $token = '';
  219. for ($i = 0, $max = strlen($selector); $i < $max; $i++)
  220. {
  221. if (',' == $selector[$i] && !$quoted)
  222. {
  223. $tokens[] = trim($token);
  224. $token = '';
  225. }
  226. else if ('"' == $selector[$i])
  227. {
  228. $token .= $selector[$i];
  229. $quoted = $quoted ? false : true;
  230. }
  231. else
  232. {
  233. $token .= $selector[$i];
  234. }
  235. }
  236. if ($token)
  237. {
  238. $tokens[] = trim($token);
  239. }
  240. return $tokens;
  241. }
  242. protected function tokenize($selector)
  243. {
  244. // split tokens by space except if space is in an attribute selector
  245. $tokens = array();
  246. $combinators = array(' ', '>', '+');
  247. $quoted = false;
  248. $token = array('combinator' => ' ', 'name' => '');
  249. for ($i = 0, $max = strlen($selector); $i < $max; $i++)
  250. {
  251. if (in_array($selector[$i], $combinators) && !$quoted)
  252. {
  253. // remove all whitespaces around the combinator
  254. $combinator = $selector[$i];
  255. while (in_array($selector[$i + 1], $combinators))
  256. {
  257. if (' ' != $selector[++$i])
  258. {
  259. $combinator = $selector[$i];
  260. }
  261. }
  262. $tokens[] = $token;
  263. $token = array('combinator' => $combinator, 'name' => '');
  264. }
  265. else if ('"' == $selector[$i])
  266. {
  267. $token['name'] .= $selector[$i];
  268. $quoted = $quoted ? false : true;
  269. }
  270. else
  271. {
  272. $token['name'] .= $selector[$i];
  273. }
  274. }
  275. if ($token['name'])
  276. {
  277. $tokens[] = $token;
  278. }
  279. return $tokens;
  280. }
  281. }