PageRenderTime 60ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/script/lib/PHPUnit/Util/XML.php

https://bitbucket.org/renaatdemuynck/chamilo
PHP | 1072 lines | 677 code | 146 blank | 249 comment | 129 complexity | be21ae5699699357ffb73e17cc424573 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, LGPL-3.0, GPL-3.0, MIT, GPL-2.0
  1. <?php
  2. /**
  3. * PHPUnit
  4. *
  5. * Copyright (c) 2002-2011, Sebastian Bergmann <sebastian@phpunit.de>.
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * * Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * * Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * * Neither the name of Sebastian Bergmann nor the names of his
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  27. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  28. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  29. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  30. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  31. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  32. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  33. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  34. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  35. * POSSIBILITY OF SUCH DAMAGE.
  36. *
  37. * @package PHPUnit
  38. * @subpackage Util
  39. * @author Sebastian Bergmann <sebastian@phpunit.de>
  40. * @copyright 2002-2011 Sebastian Bergmann <sebastian@phpunit.de>
  41. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  42. * @link http://www.phpunit.de/
  43. * @since File available since Release 3.2.0
  44. */
  45. /**
  46. * XML helpers.
  47. *
  48. * @package PHPUnit
  49. * @subpackage Util
  50. * @author Sebastian Bergmann <sebastian@phpunit.de>
  51. * @copyright 2002-2011 Sebastian Bergmann <sebastian@phpunit.de>
  52. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  53. * @version Release: 3.5.9
  54. * @link http://www.phpunit.de/
  55. * @since Class available since Release 3.2.0
  56. */
  57. class PHPUnit_Util_XML
  58. {
  59. /**
  60. * @param string $string
  61. * @return string
  62. * @author Kore Nordmann <mail@kore-nordmann.de>
  63. * @since Method available since Release 3.4.6
  64. */
  65. public static function prepareString($string)
  66. {
  67. return preg_replace('([\\x00-\\x04\\x0b\\x0c\\x0e-\\x1f\\x7f])e', 'sprintf( "&#x%02x;", ord( "\\1" ) )', htmlspecialchars(self :: convertToUtf8($string), ENT_COMPAT, 'UTF-8'));
  68. }
  69. /**
  70. * Converts a string to UTF-8 encoding.
  71. *
  72. * @param string $string
  73. * @return string
  74. * @since Method available since Release 3.2.19
  75. */
  76. protected static function convertToUtf8($string)
  77. {
  78. if (! self :: isUtf8($string))
  79. {
  80. if (function_exists('mb_convert_encoding'))
  81. {
  82. $string = mb_convert_encoding($string, 'UTF-8');
  83. }
  84. else
  85. {
  86. $string = utf8_encode($string);
  87. }
  88. }
  89. return $string;
  90. }
  91. /**
  92. * Checks a string for UTF-8 encoding.
  93. *
  94. * @param string $string
  95. * @return boolean
  96. * @since Method available since Release 3.3.0
  97. */
  98. protected static function isUtf8($string)
  99. {
  100. $length = strlen($string);
  101. for($i = 0; $i < $length; $i ++)
  102. {
  103. if (ord($string[$i]) < 0x80)
  104. {
  105. $n = 0;
  106. }
  107. else
  108. if ((ord($string[$i]) & 0xE0) == 0xC0)
  109. {
  110. $n = 1;
  111. }
  112. else
  113. if ((ord($string[$i]) & 0xF0) == 0xE0)
  114. {
  115. $n = 2;
  116. }
  117. else
  118. if ((ord($string[$i]) & 0xF0) == 0xF0)
  119. {
  120. $n = 3;
  121. }
  122. else
  123. {
  124. return FALSE;
  125. }
  126. for($j = 0; $j < $n; $j ++)
  127. {
  128. if ((++ $i == $length) || ((ord($string[$i]) & 0xC0) != 0x80))
  129. {
  130. return FALSE;
  131. }
  132. }
  133. }
  134. return TRUE;
  135. }
  136. /**
  137. * Loads an XML (or HTML) file into a DOMDocument object.
  138. *
  139. * @param string $filename
  140. * @param boolean $isHtml
  141. * @return DOMDocument
  142. * @since Method available since Release 3.3.0
  143. */
  144. public static function loadFile($filename, $isHtml = FALSE)
  145. {
  146. $reporting = error_reporting(0);
  147. $contents = file_get_contents($filename);
  148. error_reporting($reporting);
  149. if ($contents === FALSE)
  150. {
  151. throw new PHPUnit_Framework_Exception(sprintf('Could not read "%s".', $filename));
  152. }
  153. return self :: load($contents, $isHtml, $filename);
  154. }
  155. /**
  156. * Load an $actual document into a DOMDocument. This is called
  157. * from the selector assertions.
  158. *
  159. * If $actual is already a DOMDocument, it is returned with
  160. * no changes. Otherwise, $actual is loaded into a new DOMDocument
  161. * as either HTML or XML, depending on the value of $isHtml.
  162. *
  163. * Note: prior to PHPUnit 3.3.0, this method loaded a file and
  164. * not a string as it currently does. To load a file into a
  165. * DOMDocument, use loadFile() instead.
  166. *
  167. * @param string|DOMDocument $actual
  168. * @param boolean $isHtml
  169. * @param string $filename
  170. * @return DOMDocument
  171. * @since Method available since Release 3.3.0
  172. * @author Mike Naberezny <mike@maintainable.com>
  173. * @author Derek DeVries <derek@maintainable.com>
  174. */
  175. public static function load($actual, $isHtml = FALSE, $filename = '')
  176. {
  177. if ($actual instanceof DOMDocument)
  178. {
  179. return $actual;
  180. }
  181. $internal = libxml_use_internal_errors(TRUE);
  182. $reporting = error_reporting(0);
  183. $dom = new DOMDocument();
  184. if ($isHtml)
  185. {
  186. $loaded = $dom->loadHTML($actual);
  187. }
  188. else
  189. {
  190. $loaded = $dom->loadXML($actual);
  191. }
  192. libxml_use_internal_errors($internal);
  193. error_reporting($reporting);
  194. if ($loaded === FALSE)
  195. {
  196. $message = '';
  197. foreach (libxml_get_errors() as $error)
  198. {
  199. $message .= $error->message;
  200. }
  201. if ($filename != '')
  202. {
  203. throw new PHPUnit_Framework_Exception(sprintf('Could not load "%s".%s',
  204. $filename, $message != '' ? "\n" . $message : ''));
  205. }
  206. else
  207. {
  208. throw new PHPUnit_Framework_Exception($message);
  209. }
  210. }
  211. return $dom;
  212. }
  213. /**
  214. *
  215. *
  216. * @param DOMNode $node
  217. * @return string
  218. * @since Method available since Release 3.4.0
  219. */
  220. public static function nodeToText(DOMNode $node)
  221. {
  222. if ($node->childNodes->length == 1)
  223. {
  224. return $node->nodeValue;
  225. }
  226. $result = '';
  227. foreach ($node->childNodes as $childNode)
  228. {
  229. $result .= $node->ownerDocument->saveXML($childNode);
  230. }
  231. return $result;
  232. }
  233. /**
  234. *
  235. *
  236. * @param DOMNode $node
  237. * @since Method available since Release 3.3.0
  238. * @author Mattis Stordalen Flister <mattis@xait.no>
  239. */
  240. public static function removeCharacterDataNodes(DOMNode $node)
  241. {
  242. if ($node->hasChildNodes())
  243. {
  244. for($i = $node->childNodes->length - 1; $i >= 0; $i --)
  245. {
  246. if (($child = $node->childNodes->item($i)) instanceof DOMCharacterData)
  247. {
  248. $node->removeChild($child);
  249. }
  250. }
  251. }
  252. }
  253. /**
  254. * "Convert" a DOMElement object into a PHP variable.
  255. *
  256. * @param DOMElement $element
  257. * @return mixed
  258. * @since Method available since Release 3.4.0
  259. */
  260. public static function xmlToVariable(DOMElement $element)
  261. {
  262. $variable = NULL;
  263. switch ($element->tagName)
  264. {
  265. case 'array' :
  266. {
  267. $variable = array();
  268. foreach ($element->getElementsByTagName('element') as $element)
  269. {
  270. $value = self :: xmlToVariable($element->childNodes->item(1));
  271. if ($element->hasAttribute('key'))
  272. {
  273. $variable[(string) $element->getAttribute('key')] = $value;
  274. }
  275. else
  276. {
  277. $variable[] = $value;
  278. }
  279. }
  280. }
  281. break;
  282. case 'object' :
  283. {
  284. $className = $element->getAttribute('class');
  285. if ($element->hasChildNodes())
  286. {
  287. $arguments = $element->childNodes->item(1)->childNodes;
  288. $constructorArgs = array();
  289. foreach ($arguments as $argument)
  290. {
  291. if ($argument instanceof DOMElement)
  292. {
  293. $constructorArgs[] = self :: xmlToVariable($argument);
  294. }
  295. }
  296. $class = new ReflectionClass($className);
  297. $variable = $class->newInstanceArgs($constructorArgs);
  298. }
  299. else
  300. {
  301. $variable = new $className();
  302. }
  303. }
  304. break;
  305. case 'boolean' :
  306. {
  307. $variable = $element->nodeValue == 'true' ? TRUE : FALSE;
  308. }
  309. break;
  310. case 'integer' :
  311. case 'double' :
  312. case 'string' :
  313. {
  314. $variable = $element->nodeValue;
  315. settype($variable, $element->tagName);
  316. }
  317. break;
  318. }
  319. return $variable;
  320. }
  321. /**
  322. * Validate list of keys in the associative array.
  323. *
  324. * @param array $hash
  325. * @param array $validKeys
  326. * @return array
  327. * @throws InvalidArgumentException
  328. * @since Method available since Release 3.3.0
  329. * @author Mike Naberezny <mike@maintainable.com>
  330. * @author Derek DeVries <derek@maintainable.com>
  331. */
  332. public static function assertValidKeys(array $hash, array $validKeys)
  333. {
  334. $valids = array();
  335. // Normalize validation keys so that we can use both indexed and
  336. // associative arrays.
  337. foreach ($validKeys as $key => $val)
  338. {
  339. is_int($key) ? $valids[$val] = NULL : $valids[$key] = $val;
  340. }
  341. $validKeys = array_keys($valids);
  342. // Check for invalid keys.
  343. foreach ($hash as $key => $value)
  344. {
  345. if (! in_array($key, $validKeys))
  346. {
  347. $unknown[] = $key;
  348. }
  349. }
  350. if (! empty($unknown))
  351. {
  352. throw new InvalidArgumentException('Unknown key(s): ' . implode(', ', $unknown));
  353. }
  354. // Add default values for any valid keys that are empty.
  355. foreach ($valids as $key => $value)
  356. {
  357. if (! isset($hash[$key]))
  358. {
  359. $hash[$key] = $value;
  360. }
  361. }
  362. return $hash;
  363. }
  364. /**
  365. * Parse a CSS selector into an associative array suitable for
  366. * use with findNodes().
  367. *
  368. * @param string $selector
  369. * @param mixed $content
  370. * @return array
  371. * @since Method available since Release 3.3.0
  372. * @author Mike Naberezny <mike@maintainable.com>
  373. * @author Derek DeVries <derek@maintainable.com>
  374. */
  375. public static function convertSelectToTag($selector, $content = TRUE)
  376. {
  377. $selector = trim(preg_replace("/\s+/", " ", $selector));
  378. // substitute spaces within attribute value
  379. while (preg_match('/\[[^\]]+"[^"]+\s[^"]+"\]/', $selector))
  380. {
  381. $selector = preg_replace('/(\[[^\]]+"[^"]+)\s([^"]+"\])/', "$1__SPACE__$2", $selector);
  382. }
  383. if (strstr($selector, ' '))
  384. {
  385. $elements = explode(' ', $selector);
  386. }
  387. else
  388. {
  389. $elements = array($selector);
  390. }
  391. $previousTag = array();
  392. foreach (array_reverse($elements) as $element)
  393. {
  394. $element = str_replace('__SPACE__', ' ', $element);
  395. // child selector
  396. if ($element == '>')
  397. {
  398. $previousTag = array('child' => $previousTag['descendant']);
  399. continue;
  400. }
  401. $tag = array();
  402. // match element tag
  403. preg_match("/^([^\.#\[]*)/", $element, $eltMatches);
  404. if (! empty($eltMatches[1]))
  405. {
  406. $tag['tag'] = $eltMatches[1];
  407. }
  408. // match attributes (\[[^\]]*\]*), ids (#[^\.#\[]*),
  409. // and classes (\.[^\.#\[]*))
  410. preg_match_all("/(\[[^\]]*\]*|#[^\.#\[]*|\.[^\.#\[]*)/", $element, $matches);
  411. if (! empty($matches[1]))
  412. {
  413. $classes = array();
  414. $attrs = array();
  415. foreach ($matches[1] as $match)
  416. {
  417. // id matched
  418. if (substr($match, 0, 1) == '#')
  419. {
  420. $tag['id'] = substr($match, 1);
  421. }
  422. // class matched
  423. else
  424. if (substr($match, 0, 1) == '.')
  425. {
  426. $classes[] = substr($match, 1);
  427. }
  428. // attribute matched
  429. else
  430. if (substr($match, 0, 1) == '[' && substr($match, - 1, 1) == ']')
  431. {
  432. $attribute = substr($match, 1, strlen($match) - 2);
  433. $attribute = str_replace('"', '', $attribute);
  434. // match single word
  435. if (strstr($attribute, '~='))
  436. {
  437. list($key, $value) = explode('~=', $attribute);
  438. $value = "regexp:/.*\b$value\b.*/";
  439. }
  440. // match substring
  441. else
  442. if (strstr($attribute, '*='))
  443. {
  444. list($key, $value) = explode('*=', $attribute);
  445. $value = "regexp:/.*$value.*/";
  446. }
  447. // exact match
  448. else
  449. {
  450. list($key, $value) = explode('=', $attribute);
  451. }
  452. $attrs[$key] = $value;
  453. }
  454. }
  455. if ($classes)
  456. {
  457. $tag['class'] = join(' ', $classes);
  458. }
  459. if ($attrs)
  460. {
  461. $tag['attributes'] = $attrs;
  462. }
  463. }
  464. // tag content
  465. if (is_string($content))
  466. {
  467. $tag['content'] = $content;
  468. }
  469. // determine previous child/descendants
  470. if (! empty($previousTag['descendant']))
  471. {
  472. $tag['descendant'] = $previousTag['descendant'];
  473. }
  474. else
  475. if (! empty($previousTag['child']))
  476. {
  477. $tag['child'] = $previousTag['child'];
  478. }
  479. $previousTag = array('descendant' => $tag);
  480. }
  481. return $tag;
  482. }
  483. /**
  484. * Parse an $actual document and return an array of DOMNodes
  485. * matching the CSS $selector. If an error occurs, it will
  486. * return FALSE.
  487. *
  488. * To only return nodes containing a certain content, give
  489. * the $content to match as a string. Otherwise, setting
  490. * $content to TRUE will return all nodes matching $selector.
  491. *
  492. * The $actual document may be a DOMDocument or a string
  493. * containing XML or HTML, identified by $isHtml.
  494. *
  495. * @param array $selector
  496. * @param string $content
  497. * @param mixed $actual
  498. * @param boolean $isHtml
  499. * @return false|array
  500. * @since Method available since Release 3.3.0
  501. * @author Mike Naberezny <mike@maintainable.com>
  502. * @author Derek DeVries <derek@maintainable.com>
  503. * @author Tobias Schlitt <toby@php.net>
  504. */
  505. public static function cssSelect($selector, $content, $actual, $isHtml = TRUE)
  506. {
  507. $matcher = self :: convertSelectToTag($selector, $content);
  508. $dom = self :: load($actual, $isHtml);
  509. $tags = self :: findNodes($dom, $matcher, $isHtml);
  510. return $tags;
  511. }
  512. /**
  513. * Parse out the options from the tag using DOM object tree.
  514. *
  515. * @param DOMDocument $dom
  516. * @param array $options
  517. * @param boolean $isHtml
  518. * @return array
  519. * @since Method available since Release 3.3.0
  520. * @author Mike Naberezny <mike@maintainable.com>
  521. * @author Derek DeVries <derek@maintainable.com>
  522. * @author Tobias Schlitt <toby@php.net>
  523. */
  524. public static function findNodes(DOMDocument $dom, array $options, $isHtml = TRUE)
  525. {
  526. $valid = array('id', 'class', 'tag', 'content', 'attributes', 'parent', 'child', 'ancestor', 'descendant',
  527. 'children');
  528. $filtered = array();
  529. $options = self :: assertValidKeys($options, $valid);
  530. // find the element by id
  531. if ($options['id'])
  532. {
  533. $options['attributes']['id'] = $options['id'];
  534. }
  535. if ($options['class'])
  536. {
  537. $options['attributes']['class'] = $options['class'];
  538. }
  539. // find the element by a tag type
  540. if ($options['tag'])
  541. {
  542. if ($isHtml)
  543. {
  544. $elements = self :: getElementsByCaseInsensitiveTagName($dom, $options['tag']);
  545. }
  546. else
  547. {
  548. $elements = $dom->getElementsByTagName($options['tag']);
  549. }
  550. foreach ($elements as $element)
  551. {
  552. $nodes[] = $element;
  553. }
  554. if (empty($nodes))
  555. {
  556. return FALSE;
  557. }
  558. }
  559. // no tag selected, get them all
  560. else
  561. {
  562. $tags = array('a', 'abbr', 'acronym', 'address', 'area', 'b', 'base', 'bdo', 'big', 'blockquote', 'body',
  563. 'br', 'button', 'caption', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'div', 'dfn', 'dl', 'dt',
  564. 'em', 'fieldset', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr',
  565. 'html', 'i', 'iframe', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'link', 'map', 'meta',
  566. 'noframes', 'noscript', 'object', 'ol', 'optgroup', 'option', 'p', 'param', 'pre', 'q', 'samp',
  567. 'script', 'select', 'small', 'span', 'strong', 'style', 'sub', 'sup', 'table', 'tbody', 'td',
  568. 'textarea', 'tfoot', 'th', 'thead', 'title', 'tr', 'tt', 'ul', 'var');
  569. foreach ($tags as $tag)
  570. {
  571. if ($isHtml)
  572. {
  573. $elements = self :: getElementsByCaseInsensitiveTagName($dom, $tag);
  574. }
  575. else
  576. {
  577. $elements = $dom->getElementsByTagName($tag);
  578. }
  579. foreach ($elements as $element)
  580. {
  581. $nodes[] = $element;
  582. }
  583. }
  584. if (empty($nodes))
  585. {
  586. return FALSE;
  587. }
  588. }
  589. // filter by attributes
  590. if ($options['attributes'])
  591. {
  592. foreach ($nodes as $node)
  593. {
  594. $invalid = FALSE;
  595. foreach ($options['attributes'] as $name => $value)
  596. {
  597. // match by regexp if like "regexp:/foo/i"
  598. if (preg_match('/^regexp\s*:\s*(.*)/i', $value, $matches))
  599. {
  600. if (! preg_match($matches[1], $node->getAttribute($name)))
  601. {
  602. $invalid = TRUE;
  603. }
  604. }
  605. // class can match only a part
  606. else
  607. if ($name == 'class')
  608. {
  609. // split to individual classes
  610. $findClasses = explode(' ', preg_replace("/\s+/", " ", $value));
  611. $allClasses = explode(' ', preg_replace("/\s+/", " ", $node->getAttribute($name)));
  612. // make sure each class given is in the actual node
  613. foreach ($findClasses as $findClass)
  614. {
  615. if (! in_array($findClass, $allClasses))
  616. {
  617. $invalid = TRUE;
  618. }
  619. }
  620. }
  621. // match by exact string
  622. else
  623. {
  624. if ($node->getAttribute($name) != $value)
  625. {
  626. $invalid = TRUE;
  627. }
  628. }
  629. }
  630. // if every attribute given matched
  631. if (! $invalid)
  632. {
  633. $filtered[] = $node;
  634. }
  635. }
  636. $nodes = $filtered;
  637. $filtered = array();
  638. if (empty($nodes))
  639. {
  640. return FALSE;
  641. }
  642. }
  643. // filter by content
  644. if ($options['content'] !== NULL)
  645. {
  646. foreach ($nodes as $node)
  647. {
  648. $invalid = FALSE;
  649. // match by regexp if like "regexp:/foo/i"
  650. if (preg_match('/^regexp\s*:\s*(.*)/i', $options['content'], $matches))
  651. {
  652. if (! preg_match($matches[1], self :: getNodeText($node)))
  653. {
  654. $invalid = TRUE;
  655. }
  656. }
  657. // match by exact string
  658. else
  659. if (strstr(self :: getNodeText($node), $options['content']) === FALSE)
  660. {
  661. $invalid = TRUE;
  662. }
  663. if (! $invalid)
  664. {
  665. $filtered[] = $node;
  666. }
  667. }
  668. $nodes = $filtered;
  669. $filtered = array();
  670. if (empty($nodes))
  671. {
  672. return FALSE;
  673. }
  674. }
  675. // filter by parent node
  676. if ($options['parent'])
  677. {
  678. $parentNodes = self :: findNodes($dom, $options['parent'], $isHtml);
  679. $parentNode = isset($parentNodes[0]) ? $parentNodes[0] : NULL;
  680. foreach ($nodes as $node)
  681. {
  682. if ($parentNode !== $node->parentNode)
  683. {
  684. break;
  685. }
  686. $filtered[] = $node;
  687. }
  688. $nodes = $filtered;
  689. $filtered = array();
  690. if (empty($nodes))
  691. {
  692. return FALSE;
  693. }
  694. }
  695. // filter by child node
  696. if ($options['child'])
  697. {
  698. $childNodes = self :: findNodes($dom, $options['child'], $isHtml);
  699. $childNodes = ! empty($childNodes) ? $childNodes : array();
  700. foreach ($nodes as $node)
  701. {
  702. foreach ($node->childNodes as $child)
  703. {
  704. foreach ($childNodes as $childNode)
  705. {
  706. if ($childNode === $child)
  707. {
  708. $filtered[] = $node;
  709. }
  710. }
  711. }
  712. }
  713. $nodes = $filtered;
  714. $filtered = array();
  715. if (empty($nodes))
  716. {
  717. return FALSE;
  718. }
  719. }
  720. // filter by ancestor
  721. if ($options['ancestor'])
  722. {
  723. $ancestorNodes = self :: findNodes($dom, $options['ancestor'], $isHtml);
  724. $ancestorNode = isset($ancestorNodes[0]) ? $ancestorNodes[0] : NULL;
  725. foreach ($nodes as $node)
  726. {
  727. $parent = $node->parentNode;
  728. while ($parent->nodeType != XML_HTML_DOCUMENT_NODE)
  729. {
  730. if ($parent === $ancestorNode)
  731. {
  732. $filtered[] = $node;
  733. }
  734. $parent = $parent->parentNode;
  735. }
  736. }
  737. $nodes = $filtered;
  738. $filtered = array();
  739. if (empty($nodes))
  740. {
  741. return FALSE;
  742. }
  743. }
  744. // filter by descendant
  745. if ($options['descendant'])
  746. {
  747. $descendantNodes = self :: findNodes($dom, $options['descendant'], $isHtml);
  748. $descendantNodes = ! empty($descendantNodes) ? $descendantNodes : array();
  749. foreach ($nodes as $node)
  750. {
  751. foreach (self :: getDescendants($node) as $descendant)
  752. {
  753. foreach ($descendantNodes as $descendantNode)
  754. {
  755. if ($descendantNode === $descendant)
  756. {
  757. $filtered[] = $node;
  758. }
  759. }
  760. }
  761. }
  762. $nodes = $filtered;
  763. $filtered = array();
  764. if (empty($nodes))
  765. {
  766. return FALSE;
  767. }
  768. }
  769. // filter by children
  770. if ($options['children'])
  771. {
  772. $validChild = array('count', 'greater_than', 'less_than', 'only');
  773. $childOptions = self :: assertValidKeys($options['children'], $validChild);
  774. foreach ($nodes as $node)
  775. {
  776. $childNodes = $node->childNodes;
  777. foreach ($childNodes as $childNode)
  778. {
  779. if ($childNode->nodeType !== XML_CDATA_SECTION_NODE && $childNode->nodeType !== XML_TEXT_NODE)
  780. {
  781. $children[] = $childNode;
  782. }
  783. }
  784. // we must have children to pass this filter
  785. if (! empty($children))
  786. {
  787. // exact count of children
  788. if ($childOptions['count'] !== NULL)
  789. {
  790. if (count($children) !== $childOptions['count'])
  791. {
  792. break;
  793. }
  794. }
  795. // range count of children
  796. else
  797. if ($childOptions['less_than'] !== NULL && $childOptions['greater_than'] !== NULL)
  798. {
  799. if (count($children) >= $childOptions['less_than'] || count($children) <= $childOptions['greater_than'])
  800. {
  801. break;
  802. }
  803. }
  804. // less than a given count
  805. else
  806. if ($childOptions['less_than'] !== NULL)
  807. {
  808. if (count($children) >= $childOptions['less_than'])
  809. {
  810. break;
  811. }
  812. }
  813. // more than a given count
  814. else
  815. if ($childOptions['greater_than'] !== NULL)
  816. {
  817. if (count($children) <= $childOptions['greater_than'])
  818. {
  819. break;
  820. }
  821. }
  822. // match each child against a specific tag
  823. if ($childOptions['only'])
  824. {
  825. $onlyNodes = self :: findNodes($dom, $childOptions['only'], $isHtml);
  826. // try to match each child to one of the 'only' nodes
  827. foreach ($children as $child)
  828. {
  829. $matched = FALSE;
  830. foreach ($onlyNodes as $onlyNode)
  831. {
  832. if ($onlyNode === $child)
  833. {
  834. $matched = TRUE;
  835. }
  836. }
  837. if (! $matched)
  838. {
  839. break (2);
  840. }
  841. }
  842. }
  843. $filtered[] = $node;
  844. }
  845. }
  846. $nodes = $filtered;
  847. $filtered = array();
  848. if (empty($nodes))
  849. {
  850. return;
  851. }
  852. }
  853. // return the first node that matches all criteria
  854. return ! empty($nodes) ? $nodes : array();
  855. }
  856. /**
  857. * Recursively get flat array of all descendants of this node.
  858. *
  859. * @param DOMNode $node
  860. * @return array
  861. * @since Method available since Release 3.3.0
  862. * @author Mike Naberezny <mike@maintainable.com>
  863. * @author Derek DeVries <derek@maintainable.com>
  864. */
  865. protected static function getDescendants(DOMNode $node)
  866. {
  867. $allChildren = array();
  868. $childNodes = $node->childNodes ? $node->childNodes : array();
  869. foreach ($childNodes as $child)
  870. {
  871. if ($child->nodeType === XML_CDATA_SECTION_NODE || $child->nodeType === XML_TEXT_NODE)
  872. {
  873. continue;
  874. }
  875. $children = self :: getDescendants($child);
  876. $allChildren = array_merge($allChildren, $children, array($child));
  877. }
  878. return isset($allChildren) ? $allChildren : array();
  879. }
  880. /**
  881. * Gets elements by case insensitive tagname.
  882. *
  883. * @param DOMDocument $dom
  884. * @param string $tag
  885. * @return DOMNodeList
  886. * @since Method available since Release 3.4.0
  887. */
  888. protected static function getElementsByCaseInsensitiveTagName(DOMDocument $dom, $tag)
  889. {
  890. $elements = $dom->getElementsByTagName(strtolower($tag));
  891. if ($elements->length == 0)
  892. {
  893. $elements = $dom->getElementsByTagName(strtoupper($tag));
  894. }
  895. return $elements;
  896. }
  897. /**
  898. * Get the text value of this node's child text node.
  899. *
  900. * @param DOMNode $node
  901. * @return string
  902. * @since Method available since Release 3.3.0
  903. * @author Mike Naberezny <mike@maintainable.com>
  904. * @author Derek DeVries <derek@maintainable.com>
  905. */
  906. protected static function getNodeText(DOMNode $node)
  907. {
  908. if (! $node->childNodes instanceof DOMNodeList)
  909. {
  910. return '';
  911. }
  912. $result = '';
  913. foreach ($node->childNodes as $childNode)
  914. {
  915. if ($childNode->nodeType === XML_TEXT_NODE)
  916. {
  917. $result .= trim($childNode->data) . ' ';
  918. }
  919. else
  920. {
  921. $result .= self :: getNodeText($childNode);
  922. }
  923. }
  924. return str_replace(' ', ' ', $result);
  925. }
  926. }