PageRenderTime 32ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/vendor/symfony/symfony/src/Symfony/Component/DomCrawler/Crawler.php

https://bitbucket.org/tippycracker/autokraitis
PHP | 1204 lines | 604 code | 161 blank | 439 comment | 68 complexity | 4bdb967fd4256860eae26cabaec9e852 MD5 | raw file
Possible License(s): BSD-2-Clause, GPL-2.0, GPL-3.0, BSD-3-Clause, Apache-2.0
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\DomCrawler;
  11. use Symfony\Component\CssSelector\CssSelectorConverter;
  12. /**
  13. * Crawler eases navigation of a list of \DOMNode objects.
  14. *
  15. * @author Fabien Potencier <fabien@symfony.com>
  16. */
  17. class Crawler extends \SplObjectStorage
  18. {
  19. protected $uri;
  20. /**
  21. * @var string The default namespace prefix to be used with XPath and CSS expressions
  22. */
  23. private $defaultNamespacePrefix = 'default';
  24. /**
  25. * @var array A map of manually registered namespaces
  26. */
  27. private $namespaces = array();
  28. /**
  29. * @var string The base href value
  30. */
  31. private $baseHref;
  32. /**
  33. * @var \DOMDocument|null
  34. */
  35. private $document;
  36. /**
  37. * Whether the Crawler contains HTML or XML content (used when converting CSS to XPath).
  38. *
  39. * @var bool
  40. */
  41. private $isHtml = true;
  42. /**
  43. * @param mixed $node A Node to use as the base for the crawling
  44. * @param string $uri The current URI
  45. * @param string $baseHref The base href value
  46. */
  47. public function __construct($node = null, $uri = null, $baseHref = null)
  48. {
  49. $this->uri = $uri;
  50. $this->baseHref = $baseHref ?: $uri;
  51. $this->add($node);
  52. }
  53. /**
  54. * Removes all the nodes.
  55. */
  56. public function clear()
  57. {
  58. parent::removeAll($this);
  59. $this->document = null;
  60. }
  61. /**
  62. * Adds a node to the current list of nodes.
  63. *
  64. * This method uses the appropriate specialized add*() method based
  65. * on the type of the argument.
  66. *
  67. * @param \DOMNodeList|\DOMNode|array|string|null $node A node
  68. *
  69. * @throws \InvalidArgumentException when node is not the expected type
  70. */
  71. public function add($node)
  72. {
  73. if ($node instanceof \DOMNodeList) {
  74. $this->addNodeList($node);
  75. } elseif ($node instanceof \DOMNode) {
  76. $this->addNode($node);
  77. } elseif (is_array($node)) {
  78. $this->addNodes($node);
  79. } elseif (is_string($node)) {
  80. $this->addContent($node);
  81. } elseif (null !== $node) {
  82. throw new \InvalidArgumentException(sprintf('Expecting a DOMNodeList or DOMNode instance, an array, a string, or null, but got "%s".', is_object($node) ? get_class($node) : gettype($node)));
  83. }
  84. }
  85. /**
  86. * Adds HTML/XML content.
  87. *
  88. * If the charset is not set via the content type, it is assumed
  89. * to be ISO-8859-1, which is the default charset defined by the
  90. * HTTP 1.1 specification.
  91. *
  92. * @param string $content A string to parse as HTML/XML
  93. * @param null|string $type The content type of the string
  94. */
  95. public function addContent($content, $type = null)
  96. {
  97. if (empty($type)) {
  98. $type = 0 === strpos($content, '<?xml') ? 'application/xml' : 'text/html';
  99. }
  100. // DOM only for HTML/XML content
  101. if (!preg_match('/(x|ht)ml/i', $type, $xmlMatches)) {
  102. return;
  103. }
  104. $charset = null;
  105. if (false !== $pos = stripos($type, 'charset=')) {
  106. $charset = substr($type, $pos + 8);
  107. if (false !== $pos = strpos($charset, ';')) {
  108. $charset = substr($charset, 0, $pos);
  109. }
  110. }
  111. // http://www.w3.org/TR/encoding/#encodings
  112. // http://www.w3.org/TR/REC-xml/#NT-EncName
  113. if (null === $charset &&
  114. preg_match('/\<meta[^\>]+charset *= *["\']?([a-zA-Z\-0-9_:.]+)/i', $content, $matches)) {
  115. $charset = $matches[1];
  116. }
  117. if (null === $charset) {
  118. $charset = 'ISO-8859-1';
  119. }
  120. if ('x' === $xmlMatches[1]) {
  121. $this->addXmlContent($content, $charset);
  122. } else {
  123. $this->addHtmlContent($content, $charset);
  124. }
  125. }
  126. /**
  127. * Adds an HTML content to the list of nodes.
  128. *
  129. * The libxml errors are disabled when the content is parsed.
  130. *
  131. * If you want to get parsing errors, be sure to enable
  132. * internal errors via libxml_use_internal_errors(true)
  133. * and then, get the errors via libxml_get_errors(). Be
  134. * sure to clear errors with libxml_clear_errors() afterward.
  135. *
  136. * @param string $content The HTML content
  137. * @param string $charset The charset
  138. */
  139. public function addHtmlContent($content, $charset = 'UTF-8')
  140. {
  141. $internalErrors = libxml_use_internal_errors(true);
  142. $disableEntities = libxml_disable_entity_loader(true);
  143. $dom = new \DOMDocument('1.0', $charset);
  144. $dom->validateOnParse = true;
  145. set_error_handler(function () { throw new \Exception(); });
  146. try {
  147. // Convert charset to HTML-entities to work around bugs in DOMDocument::loadHTML()
  148. $content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
  149. } catch (\Exception $e) {
  150. }
  151. restore_error_handler();
  152. if ('' !== trim($content)) {
  153. @$dom->loadHTML($content);
  154. }
  155. libxml_use_internal_errors($internalErrors);
  156. libxml_disable_entity_loader($disableEntities);
  157. $this->addDocument($dom);
  158. $base = $this->filterRelativeXPath('descendant-or-self::base')->extract(array('href'));
  159. $baseHref = current($base);
  160. if (count($base) && !empty($baseHref)) {
  161. if ($this->baseHref) {
  162. $linkNode = $dom->createElement('a');
  163. $linkNode->setAttribute('href', $baseHref);
  164. $link = new Link($linkNode, $this->baseHref);
  165. $this->baseHref = $link->getUri();
  166. } else {
  167. $this->baseHref = $baseHref;
  168. }
  169. }
  170. }
  171. /**
  172. * Adds an XML content to the list of nodes.
  173. *
  174. * The libxml errors are disabled when the content is parsed.
  175. *
  176. * If you want to get parsing errors, be sure to enable
  177. * internal errors via libxml_use_internal_errors(true)
  178. * and then, get the errors via libxml_get_errors(). Be
  179. * sure to clear errors with libxml_clear_errors() afterward.
  180. *
  181. * @param string $content The XML content
  182. * @param string $charset The charset
  183. * @param int $options Bitwise OR of the libxml option constants
  184. * LIBXML_PARSEHUGE is dangerous, see
  185. * http://symfony.com/blog/security-release-symfony-2-0-17-released
  186. */
  187. public function addXmlContent($content, $charset = 'UTF-8', $options = LIBXML_NONET)
  188. {
  189. // remove the default namespace if it's the only namespace to make XPath expressions simpler
  190. if (!preg_match('/xmlns:/', $content)) {
  191. $content = str_replace('xmlns', 'ns', $content);
  192. }
  193. $internalErrors = libxml_use_internal_errors(true);
  194. $disableEntities = libxml_disable_entity_loader(true);
  195. $dom = new \DOMDocument('1.0', $charset);
  196. $dom->validateOnParse = true;
  197. if ('' !== trim($content)) {
  198. @$dom->loadXML($content, $options);
  199. }
  200. libxml_use_internal_errors($internalErrors);
  201. libxml_disable_entity_loader($disableEntities);
  202. $this->addDocument($dom);
  203. $this->isHtml = false;
  204. }
  205. /**
  206. * Adds a \DOMDocument to the list of nodes.
  207. *
  208. * @param \DOMDocument $dom A \DOMDocument instance
  209. */
  210. public function addDocument(\DOMDocument $dom)
  211. {
  212. if ($dom->documentElement) {
  213. $this->addNode($dom->documentElement);
  214. }
  215. }
  216. /**
  217. * Adds a \DOMNodeList to the list of nodes.
  218. *
  219. * @param \DOMNodeList $nodes A \DOMNodeList instance
  220. */
  221. public function addNodeList(\DOMNodeList $nodes)
  222. {
  223. foreach ($nodes as $node) {
  224. if ($node instanceof \DOMNode) {
  225. $this->addNode($node);
  226. }
  227. }
  228. }
  229. /**
  230. * Adds an array of \DOMNode instances to the list of nodes.
  231. *
  232. * @param \DOMNode[] $nodes An array of \DOMNode instances
  233. */
  234. public function addNodes(array $nodes)
  235. {
  236. foreach ($nodes as $node) {
  237. $this->add($node);
  238. }
  239. }
  240. /**
  241. * Adds a \DOMNode instance to the list of nodes.
  242. *
  243. * @param \DOMNode $node A \DOMNode instance
  244. */
  245. public function addNode(\DOMNode $node)
  246. {
  247. if ($node instanceof \DOMDocument) {
  248. $node = $node->documentElement;
  249. }
  250. if (null !== $this->document && $this->document !== $node->ownerDocument) {
  251. @trigger_error('Attaching DOM nodes from multiple documents in a Crawler is deprecated as of 2.8 and will be forbidden in 3.0.', E_USER_DEPRECATED);
  252. }
  253. if (null === $this->document) {
  254. $this->document = $node->ownerDocument;
  255. }
  256. parent::attach($node);
  257. }
  258. // Serializing and unserializing a crawler creates DOM objects in a corrupted state. DOM elements are not properly serializable.
  259. public function unserialize($serialized)
  260. {
  261. throw new \BadMethodCallException('A Crawler cannot be serialized.');
  262. }
  263. public function serialize()
  264. {
  265. throw new \BadMethodCallException('A Crawler cannot be serialized.');
  266. }
  267. /**
  268. * Returns a node given its position in the node list.
  269. *
  270. * @param int $position The position
  271. *
  272. * @return self
  273. */
  274. public function eq($position)
  275. {
  276. foreach ($this as $i => $node) {
  277. if ($i == $position) {
  278. return $this->createSubCrawler($node);
  279. }
  280. }
  281. return $this->createSubCrawler(null);
  282. }
  283. /**
  284. * Calls an anonymous function on each node of the list.
  285. *
  286. * The anonymous function receives the position and the node wrapped
  287. * in a Crawler instance as arguments.
  288. *
  289. * Example:
  290. *
  291. * $crawler->filter('h1')->each(function ($node, $i) {
  292. * return $node->text();
  293. * });
  294. *
  295. * @param \Closure $closure An anonymous function
  296. *
  297. * @return array An array of values returned by the anonymous function
  298. */
  299. public function each(\Closure $closure)
  300. {
  301. $data = array();
  302. foreach ($this as $i => $node) {
  303. $data[] = $closure($this->createSubCrawler($node), $i);
  304. }
  305. return $data;
  306. }
  307. /**
  308. * Slices the list of nodes by $offset and $length.
  309. *
  310. * @param int $offset
  311. * @param int $length
  312. *
  313. * @return self
  314. */
  315. public function slice($offset = 0, $length = -1)
  316. {
  317. return $this->createSubCrawler(iterator_to_array(new \LimitIterator($this, $offset, $length)));
  318. }
  319. /**
  320. * Reduces the list of nodes by calling an anonymous function.
  321. *
  322. * To remove a node from the list, the anonymous function must return false.
  323. *
  324. * @param \Closure $closure An anonymous function
  325. *
  326. * @return self
  327. */
  328. public function reduce(\Closure $closure)
  329. {
  330. $nodes = array();
  331. foreach ($this as $i => $node) {
  332. if (false !== $closure($this->createSubCrawler($node), $i)) {
  333. $nodes[] = $node;
  334. }
  335. }
  336. return $this->createSubCrawler($nodes);
  337. }
  338. /**
  339. * Returns the first node of the current selection.
  340. *
  341. * @return self
  342. */
  343. public function first()
  344. {
  345. return $this->eq(0);
  346. }
  347. /**
  348. * Returns the last node of the current selection.
  349. *
  350. * @return self
  351. */
  352. public function last()
  353. {
  354. return $this->eq(count($this) - 1);
  355. }
  356. /**
  357. * Returns the siblings nodes of the current selection.
  358. *
  359. * @return self
  360. *
  361. * @throws \InvalidArgumentException When current node is empty
  362. */
  363. public function siblings()
  364. {
  365. if (!count($this)) {
  366. throw new \InvalidArgumentException('The current node list is empty.');
  367. }
  368. return $this->createSubCrawler($this->sibling($this->getNode(0)->parentNode->firstChild));
  369. }
  370. /**
  371. * Returns the next siblings nodes of the current selection.
  372. *
  373. * @return self
  374. *
  375. * @throws \InvalidArgumentException When current node is empty
  376. */
  377. public function nextAll()
  378. {
  379. if (!count($this)) {
  380. throw new \InvalidArgumentException('The current node list is empty.');
  381. }
  382. return $this->createSubCrawler($this->sibling($this->getNode(0)));
  383. }
  384. /**
  385. * Returns the previous sibling nodes of the current selection.
  386. *
  387. * @return self
  388. *
  389. * @throws \InvalidArgumentException
  390. */
  391. public function previousAll()
  392. {
  393. if (!count($this)) {
  394. throw new \InvalidArgumentException('The current node list is empty.');
  395. }
  396. return $this->createSubCrawler($this->sibling($this->getNode(0), 'previousSibling'));
  397. }
  398. /**
  399. * Returns the parents nodes of the current selection.
  400. *
  401. * @return self
  402. *
  403. * @throws \InvalidArgumentException When current node is empty
  404. */
  405. public function parents()
  406. {
  407. if (!count($this)) {
  408. throw new \InvalidArgumentException('The current node list is empty.');
  409. }
  410. $node = $this->getNode(0);
  411. $nodes = array();
  412. while ($node = $node->parentNode) {
  413. if (XML_ELEMENT_NODE === $node->nodeType) {
  414. $nodes[] = $node;
  415. }
  416. }
  417. return $this->createSubCrawler($nodes);
  418. }
  419. /**
  420. * Returns the children nodes of the current selection.
  421. *
  422. * @return self
  423. *
  424. * @throws \InvalidArgumentException When current node is empty
  425. */
  426. public function children()
  427. {
  428. if (!count($this)) {
  429. throw new \InvalidArgumentException('The current node list is empty.');
  430. }
  431. $node = $this->getNode(0)->firstChild;
  432. return $this->createSubCrawler($node ? $this->sibling($node) : array());
  433. }
  434. /**
  435. * Returns the attribute value of the first node of the list.
  436. *
  437. * @param string $attribute The attribute name
  438. *
  439. * @return string|null The attribute value or null if the attribute does not exist
  440. *
  441. * @throws \InvalidArgumentException When current node is empty
  442. */
  443. public function attr($attribute)
  444. {
  445. if (!count($this)) {
  446. throw new \InvalidArgumentException('The current node list is empty.');
  447. }
  448. $node = $this->getNode(0);
  449. return $node->hasAttribute($attribute) ? $node->getAttribute($attribute) : null;
  450. }
  451. /**
  452. * Returns the node name of the first node of the list.
  453. *
  454. * @return string The node name
  455. *
  456. * @throws \InvalidArgumentException When current node is empty
  457. */
  458. public function nodeName()
  459. {
  460. if (!count($this)) {
  461. throw new \InvalidArgumentException('The current node list is empty.');
  462. }
  463. return $this->getNode(0)->nodeName;
  464. }
  465. /**
  466. * Returns the node value of the first node of the list.
  467. *
  468. * @return string The node value
  469. *
  470. * @throws \InvalidArgumentException When current node is empty
  471. */
  472. public function text()
  473. {
  474. if (!count($this)) {
  475. throw new \InvalidArgumentException('The current node list is empty.');
  476. }
  477. return $this->getNode(0)->nodeValue;
  478. }
  479. /**
  480. * Returns the first node of the list as HTML.
  481. *
  482. * @return string The node html
  483. *
  484. * @throws \InvalidArgumentException When current node is empty
  485. */
  486. public function html()
  487. {
  488. if (!count($this)) {
  489. throw new \InvalidArgumentException('The current node list is empty.');
  490. }
  491. $html = '';
  492. foreach ($this->getNode(0)->childNodes as $child) {
  493. $html .= $child->ownerDocument->saveHTML($child);
  494. }
  495. return $html;
  496. }
  497. /**
  498. * Extracts information from the list of nodes.
  499. *
  500. * You can extract attributes or/and the node value (_text).
  501. *
  502. * Example:
  503. *
  504. * $crawler->filter('h1 a')->extract(array('_text', 'href'));
  505. *
  506. * @param array $attributes An array of attributes
  507. *
  508. * @return array An array of extracted values
  509. */
  510. public function extract($attributes)
  511. {
  512. $attributes = (array) $attributes;
  513. $count = count($attributes);
  514. $data = array();
  515. foreach ($this as $node) {
  516. $elements = array();
  517. foreach ($attributes as $attribute) {
  518. if ('_text' === $attribute) {
  519. $elements[] = $node->nodeValue;
  520. } else {
  521. $elements[] = $node->getAttribute($attribute);
  522. }
  523. }
  524. $data[] = $count > 1 ? $elements : $elements[0];
  525. }
  526. return $data;
  527. }
  528. /**
  529. * Filters the list of nodes with an XPath expression.
  530. *
  531. * The XPath expression is evaluated in the context of the crawler, which
  532. * is considered as a fake parent of the elements inside it.
  533. * This means that a child selector "div" or "./div" will match only
  534. * the div elements of the current crawler, not their children.
  535. *
  536. * @param string $xpath An XPath expression
  537. *
  538. * @return self
  539. */
  540. public function filterXPath($xpath)
  541. {
  542. $xpath = $this->relativize($xpath);
  543. // If we dropped all expressions in the XPath while preparing it, there would be no match
  544. if ('' === $xpath) {
  545. return $this->createSubCrawler(null);
  546. }
  547. return $this->filterRelativeXPath($xpath);
  548. }
  549. /**
  550. * Filters the list of nodes with a CSS selector.
  551. *
  552. * This method only works if you have installed the CssSelector Symfony Component.
  553. *
  554. * @param string $selector A CSS selector
  555. *
  556. * @return self
  557. *
  558. * @throws \RuntimeException if the CssSelector Component is not available
  559. */
  560. public function filter($selector)
  561. {
  562. if (!class_exists('Symfony\\Component\\CssSelector\\CssSelectorConverter')) {
  563. throw new \RuntimeException('Unable to filter with a CSS selector as the Symfony CssSelector 2.8+ is not installed (you can use filterXPath instead).');
  564. }
  565. $converter = new CssSelectorConverter($this->isHtml);
  566. // The CssSelector already prefixes the selector with descendant-or-self::
  567. return $this->filterRelativeXPath($converter->toXPath($selector));
  568. }
  569. /**
  570. * Selects links by name or alt value for clickable images.
  571. *
  572. * @param string $value The link text
  573. *
  574. * @return self
  575. */
  576. public function selectLink($value)
  577. {
  578. $xpath = sprintf('descendant-or-self::a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) ', static::xpathLiteral(' '.$value.' ')).
  579. sprintf('or ./img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]]', static::xpathLiteral(' '.$value.' '));
  580. return $this->filterRelativeXPath($xpath);
  581. }
  582. /**
  583. * Selects a button by name or alt value for images.
  584. *
  585. * @param string $value The button text
  586. *
  587. * @return self
  588. */
  589. public function selectButton($value)
  590. {
  591. $translate = 'translate(@type, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")';
  592. $xpath = sprintf('descendant-or-self::input[((contains(%s, "submit") or contains(%1$s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, static::xpathLiteral(' '.$value.' ')).
  593. sprintf('or (contains(%s, "image") and contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)) or @id=%s or @name=%s] ', $translate, static::xpathLiteral(' '.$value.' '), static::xpathLiteral($value), static::xpathLiteral($value)).
  594. sprintf('| descendant-or-self::button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id=%s or @name=%s]', static::xpathLiteral(' '.$value.' '), static::xpathLiteral($value), static::xpathLiteral($value));
  595. return $this->filterRelativeXPath($xpath);
  596. }
  597. /**
  598. * Returns a Link object for the first node in the list.
  599. *
  600. * @param string $method The method for the link (get by default)
  601. *
  602. * @return Link A Link instance
  603. *
  604. * @throws \InvalidArgumentException If the current node list is empty or the selected node is not instance of DOMElement
  605. */
  606. public function link($method = 'get')
  607. {
  608. if (!count($this)) {
  609. throw new \InvalidArgumentException('The current node list is empty.');
  610. }
  611. $node = $this->getNode(0);
  612. if (!$node instanceof \DOMElement) {
  613. throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', get_class($node)));
  614. }
  615. return new Link($node, $this->baseHref, $method);
  616. }
  617. /**
  618. * Returns an array of Link objects for the nodes in the list.
  619. *
  620. * @return Link[] An array of Link instances
  621. *
  622. * @throws \InvalidArgumentException If the current node list contains non-DOMElement instances
  623. */
  624. public function links()
  625. {
  626. $links = array();
  627. foreach ($this as $node) {
  628. if (!$node instanceof \DOMElement) {
  629. throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', get_class($node)));
  630. }
  631. $links[] = new Link($node, $this->baseHref, 'get');
  632. }
  633. return $links;
  634. }
  635. /**
  636. * Returns a Form object for the first node in the list.
  637. *
  638. * @param array $values An array of values for the form fields
  639. * @param string $method The method for the form
  640. *
  641. * @return Form A Form instance
  642. *
  643. * @throws \InvalidArgumentException If the current node list is empty or the selected node is not instance of DOMElement
  644. */
  645. public function form(array $values = null, $method = null)
  646. {
  647. if (!count($this)) {
  648. throw new \InvalidArgumentException('The current node list is empty.');
  649. }
  650. $node = $this->getNode(0);
  651. if (!$node instanceof \DOMElement) {
  652. throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', get_class($node)));
  653. }
  654. $form = new Form($node, $this->uri, $method, $this->baseHref);
  655. if (null !== $values) {
  656. $form->setValues($values);
  657. }
  658. return $form;
  659. }
  660. /**
  661. * Overloads a default namespace prefix to be used with XPath and CSS expressions.
  662. *
  663. * @param string $prefix
  664. */
  665. public function setDefaultNamespacePrefix($prefix)
  666. {
  667. $this->defaultNamespacePrefix = $prefix;
  668. }
  669. /**
  670. * @param string $prefix
  671. * @param string $namespace
  672. */
  673. public function registerNamespace($prefix, $namespace)
  674. {
  675. $this->namespaces[$prefix] = $namespace;
  676. }
  677. /**
  678. * Converts string for XPath expressions.
  679. *
  680. * Escaped characters are: quotes (") and apostrophe (').
  681. *
  682. * Examples:
  683. * <code>
  684. * echo Crawler::xpathLiteral('foo " bar');
  685. * //prints 'foo " bar'
  686. *
  687. * echo Crawler::xpathLiteral("foo ' bar");
  688. * //prints "foo ' bar"
  689. *
  690. * echo Crawler::xpathLiteral('a\'b"c');
  691. * //prints concat('a', "'", 'b"c')
  692. * </code>
  693. *
  694. * @param string $s String to be escaped
  695. *
  696. * @return string Converted string
  697. */
  698. public static function xpathLiteral($s)
  699. {
  700. if (false === strpos($s, "'")) {
  701. return sprintf("'%s'", $s);
  702. }
  703. if (false === strpos($s, '"')) {
  704. return sprintf('"%s"', $s);
  705. }
  706. $string = $s;
  707. $parts = array();
  708. while (true) {
  709. if (false !== $pos = strpos($string, "'")) {
  710. $parts[] = sprintf("'%s'", substr($string, 0, $pos));
  711. $parts[] = "\"'\"";
  712. $string = substr($string, $pos + 1);
  713. } else {
  714. $parts[] = "'$string'";
  715. break;
  716. }
  717. }
  718. return sprintf('concat(%s)', implode(', ', $parts));
  719. }
  720. /**
  721. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  722. */
  723. public function attach($object, $data = null)
  724. {
  725. $this->triggerDeprecation(__METHOD__);
  726. parent::attach($object, $data);
  727. }
  728. /**
  729. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  730. */
  731. public function detach($object)
  732. {
  733. $this->triggerDeprecation(__METHOD__);
  734. parent::detach($object);
  735. }
  736. /**
  737. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  738. */
  739. public function contains($object)
  740. {
  741. $this->triggerDeprecation(__METHOD__);
  742. return parent::contains($object);
  743. }
  744. /**
  745. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  746. */
  747. public function addAll($storage)
  748. {
  749. $this->triggerDeprecation(__METHOD__);
  750. parent::addAll($storage);
  751. }
  752. /**
  753. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  754. */
  755. public function removeAll($storage)
  756. {
  757. $this->triggerDeprecation(__METHOD__);
  758. parent::removeAll($storage);
  759. }
  760. /**
  761. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  762. */
  763. public function removeAllExcept($storage)
  764. {
  765. $this->triggerDeprecation(__METHOD__);
  766. parent::removeAllExcept($storage);
  767. }
  768. /**
  769. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  770. */
  771. public function getInfo()
  772. {
  773. $this->triggerDeprecation(__METHOD__);
  774. return parent::getInfo();
  775. }
  776. /**
  777. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  778. */
  779. public function setInfo($data)
  780. {
  781. $this->triggerDeprecation(__METHOD__);
  782. parent::setInfo($data);
  783. }
  784. /**
  785. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  786. */
  787. public function offsetExists($object)
  788. {
  789. $this->triggerDeprecation(__METHOD__);
  790. return parent::offsetExists($object);
  791. }
  792. /**
  793. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  794. */
  795. public function offsetSet($object, $data = null)
  796. {
  797. $this->triggerDeprecation(__METHOD__);
  798. parent::offsetSet($object, $data);
  799. }
  800. /**
  801. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  802. */
  803. public function offsetUnset($object)
  804. {
  805. $this->triggerDeprecation(__METHOD__);
  806. parent::offsetUnset($object);
  807. }
  808. /**
  809. * @deprecated Using the SplObjectStorage API on the Crawler is deprecated as of 2.8 and will be removed in 3.0.
  810. */
  811. public function offsetGet($object)
  812. {
  813. $this->triggerDeprecation(__METHOD__);
  814. return parent::offsetGet($object);
  815. }
  816. /**
  817. * Filters the list of nodes with an XPath expression.
  818. *
  819. * The XPath expression should already be processed to apply it in the context of each node.
  820. *
  821. * @param string $xpath
  822. *
  823. * @return self
  824. */
  825. private function filterRelativeXPath($xpath)
  826. {
  827. $prefixes = $this->findNamespacePrefixes($xpath);
  828. $crawler = $this->createSubCrawler(null);
  829. foreach ($this as $node) {
  830. $domxpath = $this->createDOMXPath($node->ownerDocument, $prefixes);
  831. $crawler->add($domxpath->query($xpath, $node));
  832. }
  833. return $crawler;
  834. }
  835. /**
  836. * Make the XPath relative to the current context.
  837. *
  838. * The returned XPath will match elements matching the XPath inside the current crawler
  839. * when running in the context of a node of the crawler.
  840. *
  841. * @param string $xpath
  842. *
  843. * @return string
  844. */
  845. private function relativize($xpath)
  846. {
  847. $expressions = array();
  848. // An expression which will never match to replace expressions which cannot match in the crawler
  849. // We cannot simply drop
  850. $nonMatchingExpression = 'a[name() = "b"]';
  851. $xpathLen = strlen($xpath);
  852. $openedBrackets = 0;
  853. $startPosition = strspn($xpath, " \t\n\r\0\x0B");
  854. for ($i = $startPosition; $i <= $xpathLen; ++$i) {
  855. $i += strcspn($xpath, '"\'[]|', $i);
  856. if ($i < $xpathLen) {
  857. switch ($xpath[$i]) {
  858. case '"':
  859. case "'":
  860. if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) {
  861. return $xpath; // The XPath expression is invalid
  862. }
  863. continue 2;
  864. case '[':
  865. ++$openedBrackets;
  866. continue 2;
  867. case ']':
  868. --$openedBrackets;
  869. continue 2;
  870. }
  871. }
  872. if ($openedBrackets) {
  873. continue;
  874. }
  875. if ($startPosition < $xpathLen && '(' === $xpath[$startPosition]) {
  876. // If the union is inside some braces, we need to preserve the opening braces and apply
  877. // the change only inside it.
  878. $j = 1 + strspn($xpath, "( \t\n\r\0\x0B", $startPosition + 1);
  879. $parenthesis = substr($xpath, $startPosition, $j);
  880. $startPosition += $j;
  881. } else {
  882. $parenthesis = '';
  883. }
  884. $expression = rtrim(substr($xpath, $startPosition, $i - $startPosition));
  885. // BC for Symfony 2.4 and lower were elements were adding in a fake _root parent
  886. if (0 === strpos($expression, '/_root/')) {
  887. @trigger_error('XPath expressions referencing the fake root node are deprecated since version 2.8 and will be unsupported in 3.0. Please use "./" instead of "/_root/".', E_USER_DEPRECATED);
  888. $expression = './'.substr($expression, 7);
  889. } elseif (0 === strpos($expression, 'self::*/')) {
  890. $expression = './'.substr($expression, 8);
  891. }
  892. // add prefix before absolute element selector
  893. if ('' === $expression) {
  894. $expression = $nonMatchingExpression;
  895. } elseif (0 === strpos($expression, '//')) {
  896. $expression = 'descendant-or-self::'.substr($expression, 2);
  897. } elseif (0 === strpos($expression, './/')) {
  898. $expression = 'descendant-or-self::'.substr($expression, 3);
  899. } elseif (0 === strpos($expression, './')) {
  900. $expression = 'self::'.substr($expression, 2);
  901. } elseif (0 === strpos($expression, 'child::')) {
  902. $expression = 'self::'.substr($expression, 7);
  903. } elseif ('/' === $expression[0] || 0 === strpos($expression, 'self::')) {
  904. // the only direct child in Symfony 2.4 and lower is _root, which is already handled previously
  905. // so let's drop the expression entirely
  906. $expression = $nonMatchingExpression;
  907. } elseif ('.' === $expression[0]) {
  908. // '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results
  909. $expression = $nonMatchingExpression;
  910. } elseif (0 === strpos($expression, 'descendant::')) {
  911. $expression = 'descendant-or-self::'.substr($expression, 12);
  912. } elseif (preg_match('/^(ancestor|ancestor-or-self|attribute|following|following-sibling|namespace|parent|preceding|preceding-sibling)::/', $expression)) {
  913. // the fake root has no parent, preceding or following nodes and also no attributes (even no namespace attributes)
  914. $expression = $nonMatchingExpression;
  915. } elseif (0 !== strpos($expression, 'descendant-or-self::')) {
  916. $expression = 'self::'.$expression;
  917. }
  918. $expressions[] = $parenthesis.$expression;
  919. if ($i === $xpathLen) {
  920. return implode(' | ', $expressions);
  921. }
  922. $i += strspn($xpath, " \t\n\r\0\x0B", $i + 1);
  923. $startPosition = $i + 1;
  924. }
  925. return $xpath; // The XPath expression is invalid
  926. }
  927. /**
  928. * @param int $position
  929. *
  930. * @return \DOMElement|null
  931. */
  932. public function getNode($position)
  933. {
  934. foreach ($this as $i => $node) {
  935. if ($i == $position) {
  936. return $node;
  937. }
  938. }
  939. }
  940. /**
  941. * @param \DOMElement $node
  942. * @param string $siblingDir
  943. *
  944. * @return array
  945. */
  946. protected function sibling($node, $siblingDir = 'nextSibling')
  947. {
  948. $nodes = array();
  949. do {
  950. if ($node !== $this->getNode(0) && 1 === $node->nodeType) {
  951. $nodes[] = $node;
  952. }
  953. } while ($node = $node->$siblingDir);
  954. return $nodes;
  955. }
  956. /**
  957. * @param \DOMDocument $document
  958. * @param array $prefixes
  959. *
  960. * @return \DOMXPath
  961. *
  962. * @throws \InvalidArgumentException
  963. */
  964. private function createDOMXPath(\DOMDocument $document, array $prefixes = array())
  965. {
  966. $domxpath = new \DOMXPath($document);
  967. foreach ($prefixes as $prefix) {
  968. $namespace = $this->discoverNamespace($domxpath, $prefix);
  969. if (null !== $namespace) {
  970. $domxpath->registerNamespace($prefix, $namespace);
  971. }
  972. }
  973. return $domxpath;
  974. }
  975. /**
  976. * @param \DOMXPath $domxpath
  977. * @param string $prefix
  978. *
  979. * @return string
  980. *
  981. * @throws \InvalidArgumentException
  982. */
  983. private function discoverNamespace(\DOMXPath $domxpath, $prefix)
  984. {
  985. if (isset($this->namespaces[$prefix])) {
  986. return $this->namespaces[$prefix];
  987. }
  988. // ask for one namespace, otherwise we'd get a collection with an item for each node
  989. $namespaces = $domxpath->query(sprintf('(//namespace::*[name()="%s"])[last()]', $this->defaultNamespacePrefix === $prefix ? '' : $prefix));
  990. if ($node = $namespaces->item(0)) {
  991. return $node->nodeValue;
  992. }
  993. }
  994. /**
  995. * @param string $xpath
  996. *
  997. * @return array
  998. */
  999. private function findNamespacePrefixes($xpath)
  1000. {
  1001. if (preg_match_all('/(?P<prefix>[a-z_][a-z_0-9\-\.]*+):[^"\/:]/i', $xpath, $matches)) {
  1002. return array_unique($matches['prefix']);
  1003. }
  1004. return array();
  1005. }
  1006. /**
  1007. * Creates a crawler for some subnodes.
  1008. *
  1009. * @param \DOMElement|\DOMElement[]|\DOMNodeList|null $nodes
  1010. *
  1011. * @return static
  1012. */
  1013. private function createSubCrawler($nodes)
  1014. {
  1015. $crawler = new static($nodes, $this->uri, $this->baseHref);
  1016. $crawler->isHtml = $this->isHtml;
  1017. $crawler->document = $this->document;
  1018. $crawler->namespaces = $this->namespaces;
  1019. return $crawler;
  1020. }
  1021. private function triggerDeprecation($methodName, $useTrace = false)
  1022. {
  1023. if ($useTrace || defined('HHVM_VERSION')) {
  1024. if (\PHP_VERSION_ID >= 50400) {
  1025. $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3);
  1026. } else {
  1027. $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
  1028. }
  1029. // The SplObjectStorage class performs calls to its own methods. These
  1030. // method calls must not lead to triggered deprecation notices.
  1031. if (isset($trace[2]['class']) && 'SplObjectStorage' === $trace[2]['class']) {
  1032. return;
  1033. }
  1034. }
  1035. @trigger_error('The '.$methodName.' method is deprecated since version 2.8 and will be removed in 3.0.', E_USER_DEPRECATED);
  1036. }
  1037. }