PageRenderTime 48ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/PHPUnit/Util/XML.php

https://github.com/tswicegood/phpunit
PHP | 956 lines | 564 code | 145 blank | 247 comment | 137 complexity | 16cc2fc419870f96f216ab5b051c2742 MD5 | raw file
  1. <?php
  2. /**
  3. * PHPUnit
  4. *
  5. * Copyright (c) 2002-2010, Sebastian Bergmann <sebastian@phpunit.de>.
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * * Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * * Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * * Neither the name of Sebastian Bergmann nor the names of his
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  27. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  28. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  29. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  30. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  31. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  32. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  33. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  34. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  35. * POSSIBILITY OF SUCH DAMAGE.
  36. *
  37. * @package PHPUnit
  38. * @subpackage Util
  39. * @author Sebastian Bergmann <sebastian@phpunit.de>
  40. * @copyright 2002-2010 Sebastian Bergmann <sebastian@phpunit.de>
  41. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  42. * @link http://www.phpunit.de/
  43. * @since File available since Release 3.2.0
  44. */
  45. /**
  46. * XML helpers.
  47. *
  48. * @package PHPUnit
  49. * @subpackage Util
  50. * @author Sebastian Bergmann <sebastian@phpunit.de>
  51. * @copyright 2002-2010 Sebastian Bergmann <sebastian@phpunit.de>
  52. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  53. * @version Release: @package_version@
  54. * @link http://www.phpunit.de/
  55. * @since Class available since Release 3.2.0
  56. */
  57. class PHPUnit_Util_XML
  58. {
  59. /**
  60. * @param string $string
  61. * @return string
  62. * @author Kore Nordmann <mail@kore-nordmann.de>
  63. * @since Method available since Release 3.4.6
  64. */
  65. public static function prepareString($string)
  66. {
  67. return preg_replace(
  68. '([\\x00-\\x04\\x0b\\x0c\\x0e-\\x1f\\x7f])e',
  69. 'sprintf( "&#x%02x;", ord( "\\1" ) )',
  70. htmlspecialchars(
  71. self::convertToUtf8($string), ENT_COMPAT, 'UTF-8'
  72. )
  73. );
  74. }
  75. /**
  76. * Converts a string to UTF-8 encoding.
  77. *
  78. * @param string $string
  79. * @return string
  80. * @since Method available since Release 3.2.19
  81. */
  82. protected static function convertToUtf8($string)
  83. {
  84. if (!self::isUtf8($string)) {
  85. if (function_exists('mb_convert_encoding')) {
  86. $string = mb_convert_encoding($string, 'UTF-8');
  87. } else {
  88. $string = utf8_encode($string);
  89. }
  90. }
  91. return $string;
  92. }
  93. /**
  94. * Checks a string for UTF-8 encoding.
  95. *
  96. * @param string $string
  97. * @return boolean
  98. * @since Method available since Release 3.3.0
  99. */
  100. protected static function isUtf8($string)
  101. {
  102. $length = strlen($string);
  103. for ($i = 0; $i < $length; $i++) {
  104. if (ord($string[$i]) < 0x80) {
  105. $n = 0;
  106. }
  107. else if ((ord($string[$i]) & 0xE0) == 0xC0) {
  108. $n = 1;
  109. }
  110. else if ((ord($string[$i]) & 0xF0) == 0xE0) {
  111. $n = 2;
  112. }
  113. else if ((ord($string[$i]) & 0xF0) == 0xF0) {
  114. $n = 3;
  115. }
  116. else {
  117. return FALSE;
  118. }
  119. for ($j = 0; $j < $n; $j++) {
  120. if ((++$i == $length) || ((ord($string[$i]) & 0xC0) != 0x80)) {
  121. return FALSE;
  122. }
  123. }
  124. }
  125. return TRUE;
  126. }
  127. /**
  128. * Loads an XML (or HTML) file into a DOMDocument object.
  129. *
  130. * @param string $filename
  131. * @param boolean $isHtml
  132. * @return DOMDocument
  133. * @since Method available since Release 3.3.0
  134. */
  135. public static function loadFile($filename, $isHtml = FALSE)
  136. {
  137. $reporting = error_reporting(0);
  138. $contents = file_get_contents($filename);
  139. error_reporting($reporting);
  140. if ($contents === FALSE) {
  141. throw new PHPUnit_Framework_Exception(
  142. sprintf(
  143. 'Could not read "%s".',
  144. $filename
  145. )
  146. );
  147. }
  148. return self::load($contents, $isHtml, $filename);
  149. }
  150. /**
  151. * Load an $actual document into a DOMDocument. This is called
  152. * from the selector assertions.
  153. *
  154. * If $actual is already a DOMDocument, it is returned with
  155. * no changes. Otherwise, $actual is loaded into a new DOMDocument
  156. * as either HTML or XML, depending on the value of $isHtml.
  157. *
  158. * Note: prior to PHPUnit 3.3.0, this method loaded a file and
  159. * not a string as it currently does. To load a file into a
  160. * DOMDocument, use loadFile() instead.
  161. *
  162. * @param string|DOMDocument $actual
  163. * @param boolean $isHtml
  164. * @param string $filename
  165. * @return DOMDocument
  166. * @since Method available since Release 3.3.0
  167. * @author Mike Naberezny <mike@maintainable.com>
  168. * @author Derek DeVries <derek@maintainable.com>
  169. */
  170. public static function load($actual, $isHtml = FALSE, $filename = '')
  171. {
  172. if ($actual instanceof DOMDocument) {
  173. return $actual;
  174. }
  175. $internal = libxml_use_internal_errors(TRUE);
  176. $reporting = error_reporting(0);
  177. $dom = new DOMDocument;
  178. if ($isHtml) {
  179. $loaded = $dom->loadHTML($actual);
  180. } else {
  181. $loaded = $dom->loadXML($actual);
  182. }
  183. libxml_use_internal_errors($internal);
  184. error_reporting($reporting);
  185. if ($loaded === FALSE) {
  186. $message = '';
  187. foreach (libxml_get_errors() as $error) {
  188. $message .= $error->message;
  189. }
  190. if ($filename != '') {
  191. throw new PHPUnit_Framework_Exception(
  192. sprintf(
  193. 'Could not load "%s".%s',
  194. $filename,
  195. $message != '' ? "\n" . $message : ''
  196. )
  197. );
  198. } else {
  199. throw new PHPUnit_Framework_Exception($message);
  200. }
  201. }
  202. return $dom;
  203. }
  204. /**
  205. *
  206. *
  207. * @param DOMNode $node
  208. * @return string
  209. * @since Method available since Release 3.4.0
  210. */
  211. public static function nodeToText(DOMNode $node)
  212. {
  213. if ($node->childNodes->length == 1) {
  214. return $node->nodeValue;
  215. }
  216. $result = '';
  217. foreach ($node->childNodes as $childNode) {
  218. $result .= $node->ownerDocument->saveXML($childNode);
  219. }
  220. return $result;
  221. }
  222. /**
  223. *
  224. *
  225. * @param DOMNode $node
  226. * @since Method available since Release 3.3.0
  227. * @author Mattis Stordalen Flister <mattis@xait.no>
  228. */
  229. public static function removeCharacterDataNodes(DOMNode $node)
  230. {
  231. if ($node->hasChildNodes()) {
  232. for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
  233. if (($child = $node->childNodes->item($i)) instanceof DOMCharacterData) {
  234. $node->removeChild($child);
  235. }
  236. }
  237. }
  238. }
  239. /**
  240. * "Convert" a DOMElement object into a PHP variable.
  241. *
  242. * @param DOMElement $element
  243. * @return mixed
  244. * @since Method available since Release 3.4.0
  245. */
  246. public static function xmlToVariable(DOMElement $element)
  247. {
  248. $variable = NULL;
  249. switch ($element->tagName) {
  250. case 'array': {
  251. $variable = array();
  252. foreach ($element->getElementsByTagName('element') as $element) {
  253. $value = self::xmlToVariable($element->childNodes->item(1));
  254. if ($element->hasAttribute('key')) {
  255. $variable[(string)$element->getAttribute('key')] = $value;
  256. } else {
  257. $variable[] = $value;
  258. }
  259. }
  260. }
  261. break;
  262. case 'object': {
  263. $className = $element->getAttribute('class');
  264. if ($element->hasChildNodes()) {
  265. $arguments = $element->childNodes->item(1)->childNodes;
  266. $constructorArgs = array();
  267. foreach ($arguments as $argument) {
  268. if ($argument instanceof DOMElement) {
  269. $constructorArgs[] = self::xmlToVariable($argument);
  270. }
  271. }
  272. $class = new ReflectionClass($className);
  273. $variable = $class->newInstanceArgs($constructorArgs);
  274. } else {
  275. $variable = new $className;
  276. }
  277. }
  278. break;
  279. case 'boolean': {
  280. $variable = $element->nodeValue == 'true' ? TRUE : FALSE;
  281. }
  282. break;
  283. case 'integer':
  284. case 'double':
  285. case 'string': {
  286. $variable = $element->nodeValue;
  287. settype($variable, $element->tagName);
  288. }
  289. break;
  290. }
  291. return $variable;
  292. }
  293. /**
  294. * Validate list of keys in the associative array.
  295. *
  296. * @param array $hash
  297. * @param array $validKeys
  298. * @return array
  299. * @throws InvalidArgumentException
  300. * @since Method available since Release 3.3.0
  301. * @author Mike Naberezny <mike@maintainable.com>
  302. * @author Derek DeVries <derek@maintainable.com>
  303. */
  304. public static function assertValidKeys(array $hash, array $validKeys)
  305. {
  306. $valids = array();
  307. // Normalize validation keys so that we can use both indexed and
  308. // associative arrays.
  309. foreach ($validKeys as $key => $val) {
  310. is_int($key) ? $valids[$val] = NULL : $valids[$key] = $val;
  311. }
  312. $validKeys = array_keys($valids);
  313. // Check for invalid keys.
  314. foreach ($hash as $key => $value) {
  315. if (!in_array($key, $validKeys)) {
  316. $unknown[] = $key;
  317. }
  318. }
  319. if (!empty($unknown)) {
  320. throw new InvalidArgumentException(
  321. 'Unknown key(s): ' . implode(', ', $unknown)
  322. );
  323. }
  324. // Add default values for any valid keys that are empty.
  325. foreach ($valids as $key => $value) {
  326. if (!isset($hash[$key])) {
  327. $hash[$key] = $value;
  328. }
  329. }
  330. return $hash;
  331. }
  332. /**
  333. * Parse a CSS selector into an associative array suitable for
  334. * use with findNodes().
  335. *
  336. * @param string $selector
  337. * @param mixed $content
  338. * @return array
  339. * @since Method available since Release 3.3.0
  340. * @author Mike Naberezny <mike@maintainable.com>
  341. * @author Derek DeVries <derek@maintainable.com>
  342. */
  343. public static function convertSelectToTag($selector, $content = TRUE)
  344. {
  345. $selector = trim(preg_replace("/\s+/", " ", $selector));
  346. // substitute spaces within attribute value
  347. while (preg_match('/\[[^\]]+"[^"]+\s[^"]+"\]/', $selector)) {
  348. $selector = preg_replace(
  349. '/(\[[^\]]+"[^"]+)\s([^"]+"\])/', "$1__SPACE__$2", $selector
  350. );
  351. }
  352. if (strstr($selector, ' ')) {
  353. $elements = explode(' ', $selector);
  354. } else {
  355. $elements = array($selector);
  356. }
  357. $previousTag = array();
  358. foreach (array_reverse($elements) as $element) {
  359. $element = str_replace('__SPACE__', ' ', $element);
  360. // child selector
  361. if ($element == '>') {
  362. $previousTag = array('child' => $previousTag['descendant']);
  363. continue;
  364. }
  365. $tag = array();
  366. // match element tag
  367. preg_match("/^([^\.#\[]*)/", $element, $eltMatches);
  368. if (!empty($eltMatches[1])) {
  369. $tag['tag'] = $eltMatches[1];
  370. }
  371. // match attributes (\[[^\]]*\]*), ids (#[^\.#\[]*),
  372. // and classes (\.[^\.#\[]*))
  373. preg_match_all(
  374. "/(\[[^\]]*\]*|#[^\.#\[]*|\.[^\.#\[]*)/", $element, $matches
  375. );
  376. if (!empty($matches[1])) {
  377. $classes = array();
  378. $attrs = array();
  379. foreach ($matches[1] as $match) {
  380. // id matched
  381. if (substr($match, 0, 1) == '#') {
  382. $tag['id'] = substr($match, 1);
  383. }
  384. // class matched
  385. else if (substr($match, 0, 1) == '.') {
  386. $classes[] = substr($match, 1);
  387. }
  388. // attribute matched
  389. else if (substr($match, 0, 1) == '[' &&
  390. substr($match, -1, 1) == ']') {
  391. $attribute = substr($match, 1, strlen($match) - 2);
  392. $attribute = str_replace('"', '', $attribute);
  393. // match single word
  394. if (strstr($attribute, '~=')) {
  395. list($key, $value) = explode('~=', $attribute);
  396. $value = "regexp:/.*\b$value\b.*/";
  397. }
  398. // match substring
  399. else if (strstr($attribute, '*=')) {
  400. list($key, $value) = explode('*=', $attribute);
  401. $value = "regexp:/.*$value.*/";
  402. }
  403. // exact match
  404. else {
  405. list($key, $value) = explode('=', $attribute);
  406. }
  407. $attrs[$key] = $value;
  408. }
  409. }
  410. if ($classes) {
  411. $tag['class'] = join(' ', $classes);
  412. }
  413. if ($attrs) {
  414. $tag['attributes'] = $attrs;
  415. }
  416. }
  417. // tag content
  418. if (is_string($content)) {
  419. $tag['content'] = $content;
  420. }
  421. // determine previous child/descendants
  422. if (!empty($previousTag['descendant'])) {
  423. $tag['descendant'] = $previousTag['descendant'];
  424. }
  425. else if (!empty($previousTag['child'])) {
  426. $tag['child'] = $previousTag['child'];
  427. }
  428. $previousTag = array('descendant' => $tag);
  429. }
  430. return $tag;
  431. }
  432. /**
  433. * Parse an $actual document and return an array of DOMNodes
  434. * matching the CSS $selector. If an error occurs, it will
  435. * return FALSE.
  436. *
  437. * To only return nodes containing a certain content, give
  438. * the $content to match as a string. Otherwise, setting
  439. * $content to TRUE will return all nodes matching $selector.
  440. *
  441. * The $actual document may be a DOMDocument or a string
  442. * containing XML or HTML, identified by $isHtml.
  443. *
  444. * @param array $selector
  445. * @param string $content
  446. * @param mixed $actual
  447. * @param boolean $isHtml
  448. * @return false|array
  449. * @since Method available since Release 3.3.0
  450. * @author Mike Naberezny <mike@maintainable.com>
  451. * @author Derek DeVries <derek@maintainable.com>
  452. */
  453. public static function cssSelect($selector, $content, $actual, $isHtml = TRUE)
  454. {
  455. $matcher = self::convertSelectToTag($selector, $content);
  456. $dom = self::load($actual, $isHtml);
  457. $tags = self::findNodes($dom, $matcher);
  458. return $tags;
  459. }
  460. /**
  461. * Parse out the options from the tag using DOM object tree.
  462. *
  463. * @param DOMDocument $dom
  464. * @param array $options
  465. * @param boolean $isHtml
  466. * @return array
  467. * @since Method available since Release 3.3.0
  468. * @author Mike Naberezny <mike@maintainable.com>
  469. * @author Derek DeVries <derek@maintainable.com>
  470. */
  471. public static function findNodes(DOMDocument $dom, array $options, $isHtml = TRUE)
  472. {
  473. $valid = array(
  474. 'id', 'class', 'tag', 'content', 'attributes', 'parent',
  475. 'child', 'ancestor', 'descendant', 'children'
  476. );
  477. $filtered = array();
  478. $options = self::assertValidKeys($options, $valid);
  479. // find the element by id
  480. if ($options['id']) {
  481. $options['attributes']['id'] = $options['id'];
  482. }
  483. if ($options['class']) {
  484. $options['attributes']['class'] = $options['class'];
  485. }
  486. // find the element by a tag type
  487. if ($options['tag']) {
  488. if ($isHtml) {
  489. $elements = self::getElementsByCaseInsensitiveTagName(
  490. $dom, $options['tag']
  491. );
  492. } else {
  493. $elements = $dom->getElementsByTagName($options['tag']);
  494. }
  495. foreach ($elements as $element) {
  496. $nodes[] = $element;
  497. }
  498. if (empty($nodes)) {
  499. return FALSE;
  500. }
  501. }
  502. // no tag selected, get them all
  503. else {
  504. $tags = array(
  505. 'a', 'abbr', 'acronym', 'address', 'area', 'b', 'base', 'bdo',
  506. 'big', 'blockquote', 'body', 'br', 'button', 'caption', 'cite',
  507. 'code', 'col', 'colgroup', 'dd', 'del', 'div', 'dfn', 'dl',
  508. 'dt', 'em', 'fieldset', 'form', 'frame', 'frameset', 'h1', 'h2',
  509. 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'html', 'i', 'iframe',
  510. 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'link',
  511. 'map', 'meta', 'noframes', 'noscript', 'object', 'ol', 'optgroup',
  512. 'option', 'p', 'param', 'pre', 'q', 'samp', 'script', 'select',
  513. 'small', 'span', 'strong', 'style', 'sub', 'sup', 'table',
  514. 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'title',
  515. 'tr', 'tt', 'ul', 'var'
  516. );
  517. foreach ($tags as $tag) {
  518. if ($isHtml) {
  519. $elements = self::getElementsByCaseInsensitiveTagName(
  520. $dom, $tag
  521. );
  522. } else {
  523. $elements = $dom->getElementsByTagName($tag);
  524. }
  525. foreach ($elements as $element) {
  526. $nodes[] = $element;
  527. }
  528. }
  529. if (empty($nodes)) {
  530. return FALSE;
  531. }
  532. }
  533. // filter by attributes
  534. if ($options['attributes']) {
  535. foreach ($nodes as $node) {
  536. $invalid = FALSE;
  537. foreach ($options['attributes'] as $name => $value) {
  538. // match by regexp if like "regexp:/foo/i"
  539. if (preg_match('/^regexp\s*:\s*(.*)/i', $value, $matches)) {
  540. if (!preg_match($matches[1], $node->getAttribute($name))) {
  541. $invalid = TRUE;
  542. }
  543. }
  544. // class can match only a part
  545. else if ($name == 'class') {
  546. // split to individual classes
  547. $findClasses = explode(
  548. ' ', preg_replace("/\s+/", " ", $value)
  549. );
  550. $allClasses = explode(
  551. ' ',
  552. preg_replace("/\s+/", " ", $node->getAttribute($name))
  553. );
  554. // make sure each class given is in the actual node
  555. foreach ($findClasses as $findClass) {
  556. if (!in_array($findClass, $allClasses)) {
  557. $invalid = TRUE;
  558. }
  559. }
  560. }
  561. // match by exact string
  562. else {
  563. if ($node->getAttribute($name) != $value) {
  564. $invalid = TRUE;
  565. }
  566. }
  567. }
  568. // if every attribute given matched
  569. if (!$invalid) {
  570. $filtered[] = $node;
  571. }
  572. }
  573. $nodes = $filtered;
  574. $filtered = array();
  575. if (empty($nodes)) {
  576. return FALSE;
  577. }
  578. }
  579. // filter by content
  580. if ($options['content'] !== NULL) {
  581. foreach ($nodes as $node) {
  582. $invalid = FALSE;
  583. // match by regexp if like "regexp:/foo/i"
  584. if (preg_match('/^regexp\s*:\s*(.*)/i', $options['content'], $matches)) {
  585. if (!preg_match($matches[1], self::getNodeText($node))) {
  586. $invalid = TRUE;
  587. }
  588. }
  589. // match by exact string
  590. else if (strstr(self::getNodeText($node), $options['content']) === FALSE) {
  591. $invalid = TRUE;
  592. }
  593. if (!$invalid) {
  594. $filtered[] = $node;
  595. }
  596. }
  597. $nodes = $filtered;
  598. $filtered = array();
  599. if (empty($nodes)) {
  600. return FALSE;
  601. }
  602. }
  603. // filter by parent node
  604. if ($options['parent']) {
  605. $parentNodes = self::findNodes($dom, $options['parent']);
  606. $parentNode = isset($parentNodes[0]) ? $parentNodes[0] : NULL;
  607. foreach ($nodes as $node) {
  608. if ($parentNode !== $node->parentNode) {
  609. break;
  610. }
  611. $filtered[] = $node;
  612. }
  613. $nodes = $filtered;
  614. $filtered = array();
  615. if (empty($nodes)) {
  616. return FALSE;
  617. }
  618. }
  619. // filter by child node
  620. if ($options['child']) {
  621. $childNodes = self::findNodes($dom, $options['child']);
  622. $childNodes = !empty($childNodes) ? $childNodes : array();
  623. foreach ($nodes as $node) {
  624. foreach ($node->childNodes as $child) {
  625. foreach ($childNodes as $childNode) {
  626. if ($childNode === $child) {
  627. $filtered[] = $node;
  628. }
  629. }
  630. }
  631. }
  632. $nodes = $filtered;
  633. $filtered = array();
  634. if (empty($nodes)) {
  635. return FALSE;
  636. }
  637. }
  638. // filter by ancestor
  639. if ($options['ancestor']) {
  640. $ancestorNodes = self::findNodes($dom, $options['ancestor']);
  641. $ancestorNode = isset($ancestorNodes[0]) ? $ancestorNodes[0] : NULL;
  642. foreach ($nodes as $node) {
  643. $parent = $node->parentNode;
  644. while ($parent->nodeType != XML_HTML_DOCUMENT_NODE) {
  645. if ($parent === $ancestorNode) {
  646. $filtered[] = $node;
  647. }
  648. $parent = $parent->parentNode;
  649. }
  650. }
  651. $nodes = $filtered;
  652. $filtered = array();
  653. if (empty($nodes)) {
  654. return FALSE;
  655. }
  656. }
  657. // filter by descendant
  658. if ($options['descendant']) {
  659. $descendantNodes = self::findNodes($dom, $options['descendant']);
  660. $descendantNodes = !empty($descendantNodes) ? $descendantNodes : array();
  661. foreach ($nodes as $node) {
  662. foreach (self::getDescendants($node) as $descendant) {
  663. foreach ($descendantNodes as $descendantNode) {
  664. if ($descendantNode === $descendant) {
  665. $filtered[] = $node;
  666. }
  667. }
  668. }
  669. }
  670. $nodes = $filtered;
  671. $filtered = array();
  672. if (empty($nodes)) {
  673. return FALSE;
  674. }
  675. }
  676. // filter by children
  677. if ($options['children']) {
  678. $validChild = array('count', 'greater_than', 'less_than', 'only');
  679. $childOptions = self::assertValidKeys(
  680. $options['children'], $validChild
  681. );
  682. foreach ($nodes as $node) {
  683. $childNodes = $node->childNodes;
  684. foreach ($childNodes as $childNode) {
  685. if ($childNode->nodeType !== XML_CDATA_SECTION_NODE &&
  686. $childNode->nodeType !== XML_TEXT_NODE) {
  687. $children[] = $childNode;
  688. }
  689. }
  690. // we must have children to pass this filter
  691. if (!empty($children)) {
  692. // exact count of children
  693. if ($childOptions['count'] !== NULL) {
  694. if (count($children) !== $childOptions['count']) {
  695. break;
  696. }
  697. }
  698. // range count of children
  699. else if ($childOptions['less_than'] !== NULL &&
  700. $childOptions['greater_than'] !== NULL) {
  701. if (count($children) >= $childOptions['less_than'] ||
  702. count($children) <= $childOptions['greater_than']) {
  703. break;
  704. }
  705. }
  706. // less than a given count
  707. else if ($childOptions['less_than'] !== NULL) {
  708. if (count($children) >= $childOptions['less_than']) {
  709. break;
  710. }
  711. }
  712. // more than a given count
  713. else if ($childOptions['greater_than'] !== NULL) {
  714. if (count($children) <= $childOptions['greater_than']) {
  715. break;
  716. }
  717. }
  718. // match each child against a specific tag
  719. if ($childOptions['only']) {
  720. $onlyNodes = self::findNodes(
  721. $dom, $childOptions['only']
  722. );
  723. // try to match each child to one of the 'only' nodes
  724. foreach ($children as $child) {
  725. $matched = FALSE;
  726. foreach ($onlyNodes as $onlyNode) {
  727. if ($onlyNode === $child) {
  728. $matched = TRUE;
  729. }
  730. }
  731. if (!$matched) {
  732. break(2);
  733. }
  734. }
  735. }
  736. $filtered[] = $node;
  737. }
  738. }
  739. $nodes = $filtered;
  740. $filtered = array();
  741. if (empty($nodes)) {
  742. return;
  743. }
  744. }
  745. // return the first node that matches all criteria
  746. return !empty($nodes) ? $nodes : array();
  747. }
  748. /**
  749. * Recursively get flat array of all descendants of this node.
  750. *
  751. * @param DOMNode $node
  752. * @return array
  753. * @since Method available since Release 3.3.0
  754. * @author Mike Naberezny <mike@maintainable.com>
  755. * @author Derek DeVries <derek@maintainable.com>
  756. */
  757. protected static function getDescendants(DOMNode $node)
  758. {
  759. $allChildren = array();
  760. $childNodes = $node->childNodes ? $node->childNodes : array();
  761. foreach ($childNodes as $child) {
  762. if ($child->nodeType === XML_CDATA_SECTION_NODE ||
  763. $child->nodeType === XML_TEXT_NODE) {
  764. continue;
  765. }
  766. $children = self::getDescendants($child);
  767. $allChildren = array_merge($allChildren, $children, array($child));
  768. }
  769. return isset($allChildren) ? $allChildren : array();
  770. }
  771. /**
  772. * Gets elements by case insensitive tagname.
  773. *
  774. * @param DOMDocument $dom
  775. * @param string $tag
  776. * @return DOMNodeList
  777. * @since Method available since Release 3.4.0
  778. */
  779. protected static function getElementsByCaseInsensitiveTagName(DOMDocument $dom, $tag)
  780. {
  781. $elements = $dom->getElementsByTagName(strtolower($tag));
  782. if ($elements->length == 0) {
  783. $elements = $dom->getElementsByTagName(strtoupper($tag));
  784. }
  785. return $elements;
  786. }
  787. /**
  788. * Get the text value of this node's child text node.
  789. *
  790. * @param DOMNode $node
  791. * @return string
  792. * @since Method available since Release 3.3.0
  793. * @author Mike Naberezny <mike@maintainable.com>
  794. * @author Derek DeVries <derek@maintainable.com>
  795. */
  796. protected static function getNodeText(DOMNode $node)
  797. {
  798. if (!$node->childNodes instanceof DOMNodeList) {
  799. return '';
  800. }
  801. $result = '';
  802. foreach ($node->childNodes as $childNode) {
  803. if ($childNode->nodeType === XML_TEXT_NODE) {
  804. $result .= trim($childNode->data) . ' ';
  805. } else {
  806. $result .= self::getNodeText($childNode);
  807. }
  808. }
  809. return str_replace(' ', ' ', $result);
  810. }
  811. }