PageRenderTime 25ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/Dom/Query.php

https://github.com/zf/ZendFramework-Library
PHP | 356 lines | 177 code | 28 blank | 151 comment | 17 complexity | bc3e14dd1d7666cf05a5c23d35cae0e2 MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Dom
  17. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /**
  22. * @see Zend_Dom_Query_Css2Xpath
  23. */
  24. require_once 'Zend/Dom/Query/Css2Xpath.php';
  25. /**
  26. * @see Zend_Dom_Query_Result
  27. */
  28. require_once 'Zend/Dom/Query/Result.php';
  29. /** @see Zend_Xml_Security */
  30. require_once 'Zend/Xml/Security.php';
  31. /** @see Zend_Xml_Exception */
  32. require_once 'Zend/Xml/Exception.php';
  33. /**
  34. * Query DOM structures based on CSS selectors and/or XPath
  35. *
  36. * @package Zend_Dom
  37. * @subpackage Query
  38. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  39. * @license http://framework.zend.com/license/new-bsd New BSD License
  40. */
  41. class Zend_Dom_Query
  42. {
  43. /**#@+
  44. * Document types
  45. */
  46. const DOC_DOM = 'docDom';
  47. const DOC_XML = 'docXml';
  48. const DOC_HTML = 'docHtml';
  49. const DOC_XHTML = 'docXhtml';
  50. /**#@-*/
  51. /**
  52. * @var string|DOMDocument
  53. */
  54. protected $_document;
  55. /**
  56. * DOMDocument errors, if any
  57. * @var false|array
  58. */
  59. protected $_documentErrors = false;
  60. /**
  61. * Document type
  62. * @var string
  63. */
  64. protected $_docType;
  65. /**
  66. * Document encoding
  67. * @var null|string
  68. */
  69. protected $_encoding;
  70. /**
  71. * XPath namespaces
  72. * @var array
  73. */
  74. protected $_xpathNamespaces = array();
  75. /**
  76. * Constructor
  77. *
  78. * @param null|string|DOMDocument $document
  79. * @param null|string $encoding
  80. */
  81. public function __construct($document = null, $encoding = null)
  82. {
  83. $this->setEncoding($encoding);
  84. $this->setDocument($document);
  85. }
  86. /**
  87. * Set document encoding
  88. *
  89. * @param string $encoding
  90. * @return Zend_Dom_Query
  91. */
  92. public function setEncoding($encoding)
  93. {
  94. $this->_encoding = (null === $encoding) ? null : (string) $encoding;
  95. return $this;
  96. }
  97. /**
  98. * Get document encoding
  99. *
  100. * @return null|string
  101. */
  102. public function getEncoding()
  103. {
  104. return $this->_encoding;
  105. }
  106. /**
  107. * Set document to query
  108. *
  109. * @param string|DOMDocument $document
  110. * @param null|string $encoding Document encoding
  111. * @return Zend_Dom_Query
  112. */
  113. public function setDocument($document, $encoding = null)
  114. {
  115. if ($document instanceof DOMDocument) {
  116. return $this->setDocumentDom($document);
  117. }
  118. if (0 === strlen($document)) {
  119. return $this;
  120. }
  121. // breaking XML declaration to make syntax highlighting work
  122. if ('<' . '?xml' == substr(trim($document), 0, 5)) {
  123. if (preg_match('/<html[^>]*xmlns="([^"]+)"[^>]*>/i', $document, $matches)) {
  124. $this->_xpathNamespaces[] = $matches[1];
  125. return $this->setDocumentXhtml($document, $encoding);
  126. }
  127. return $this->setDocumentXml($document, $encoding);
  128. }
  129. if (strstr($document, 'DTD XHTML')) {
  130. return $this->setDocumentXhtml($document, $encoding);
  131. }
  132. return $this->setDocumentHtml($document, $encoding);
  133. }
  134. /**
  135. * Set DOMDocument to query
  136. *
  137. * @param DOMDocument $document
  138. * @return Zend_Dom_Query
  139. */
  140. public function setDocumentDom(DOMDocument $document)
  141. {
  142. $this->_document = $document;
  143. $this->_docType = self::DOC_DOM;
  144. if (null !== $document->encoding) {
  145. $this->setEncoding($document->encoding);
  146. }
  147. return $this;
  148. }
  149. /**
  150. * Register HTML document
  151. *
  152. * @param string $document
  153. * @param null|string $encoding Document encoding
  154. * @return Zend_Dom_Query
  155. */
  156. public function setDocumentHtml($document, $encoding = null)
  157. {
  158. $this->_document = (string) $document;
  159. $this->_docType = self::DOC_HTML;
  160. if (null !== $encoding) {
  161. $this->setEncoding($encoding);
  162. }
  163. return $this;
  164. }
  165. /**
  166. * Register XHTML document
  167. *
  168. * @param string $document
  169. * @param null|string $encoding Document encoding
  170. * @return Zend_Dom_Query
  171. */
  172. public function setDocumentXhtml($document, $encoding = null)
  173. {
  174. $this->_document = (string) $document;
  175. $this->_docType = self::DOC_XHTML;
  176. if (null !== $encoding) {
  177. $this->setEncoding($encoding);
  178. }
  179. return $this;
  180. }
  181. /**
  182. * Register XML document
  183. *
  184. * @param string $document
  185. * @param null|string $encoding Document encoding
  186. * @return Zend_Dom_Query
  187. */
  188. public function setDocumentXml($document, $encoding = null)
  189. {
  190. $this->_document = (string) $document;
  191. $this->_docType = self::DOC_XML;
  192. if (null !== $encoding) {
  193. $this->setEncoding($encoding);
  194. }
  195. return $this;
  196. }
  197. /**
  198. * Retrieve current document
  199. *
  200. * @return string|DOMDocument
  201. */
  202. public function getDocument()
  203. {
  204. return $this->_document;
  205. }
  206. /**
  207. * Get document type
  208. *
  209. * @return string
  210. */
  211. public function getDocumentType()
  212. {
  213. return $this->_docType;
  214. }
  215. /**
  216. * Get any DOMDocument errors found
  217. *
  218. * @return false|array
  219. */
  220. public function getDocumentErrors()
  221. {
  222. return $this->_documentErrors;
  223. }
  224. /**
  225. * Perform a CSS selector query
  226. *
  227. * @param string $query
  228. * @return Zend_Dom_Query_Result
  229. */
  230. public function query($query)
  231. {
  232. $xpathQuery = Zend_Dom_Query_Css2Xpath::transform($query);
  233. return $this->queryXpath($xpathQuery, $query);
  234. }
  235. /**
  236. * Perform an XPath query
  237. *
  238. * @param string|array $xpathQuery
  239. * @param string $query CSS selector query
  240. * @throws Zend_Dom_Exception
  241. * @return Zend_Dom_Query_Result
  242. */
  243. public function queryXpath($xpathQuery, $query = null)
  244. {
  245. if (null === ($document = $this->getDocument())) {
  246. require_once 'Zend/Dom/Exception.php';
  247. throw new Zend_Dom_Exception('Cannot query; no document registered');
  248. }
  249. $encoding = $this->getEncoding();
  250. libxml_use_internal_errors(true);
  251. if (null === $encoding) {
  252. $domDoc = new DOMDocument('1.0');
  253. } else {
  254. $domDoc = new DOMDocument('1.0', $encoding);
  255. }
  256. $type = $this->getDocumentType();
  257. switch ($type) {
  258. case self::DOC_DOM:
  259. $domDoc = $this->_document;
  260. $success = true;
  261. break;
  262. case self::DOC_XML:
  263. try {
  264. $domDoc = Zend_Xml_Security::scan($document, $domDoc);
  265. $success = ($domDoc !== false);
  266. } catch (Zend_Xml_Exception $e) {
  267. require_once 'Zend/Dom/Exception.php';
  268. throw new Zend_Dom_Exception(
  269. $e->getMessage()
  270. );
  271. }
  272. break;
  273. case self::DOC_HTML:
  274. case self::DOC_XHTML:
  275. default:
  276. $success = $domDoc->loadHTML($document);
  277. break;
  278. }
  279. $errors = libxml_get_errors();
  280. if (!empty($errors)) {
  281. $this->_documentErrors = $errors;
  282. libxml_clear_errors();
  283. }
  284. libxml_use_internal_errors(false);
  285. if (!$success) {
  286. require_once 'Zend/Dom/Exception.php';
  287. throw new Zend_Dom_Exception(sprintf('Error parsing document (type == %s)', $type));
  288. }
  289. $nodeList = $this->_getNodeList($domDoc, $xpathQuery);
  290. return new Zend_Dom_Query_Result($query, $xpathQuery, $domDoc, $nodeList);
  291. }
  292. /**
  293. * Register XPath namespaces
  294. *
  295. * @param array $xpathNamespaces
  296. */
  297. public function registerXpathNamespaces($xpathNamespaces)
  298. {
  299. $this->_xpathNamespaces = $xpathNamespaces;
  300. }
  301. /**
  302. * Prepare node list
  303. *
  304. * @param DOMDocument $document
  305. * @param string|array $xpathQuery
  306. * @return array
  307. */
  308. protected function _getNodeList($document, $xpathQuery)
  309. {
  310. $xpath = new DOMXPath($document);
  311. foreach ($this->_xpathNamespaces as $prefix => $namespaceUri) {
  312. $xpath->registerNamespace($prefix, $namespaceUri);
  313. }
  314. $xpathQuery = (string) $xpathQuery;
  315. if (preg_match_all('|\[contains\((@[a-z0-9_-]+),\s?\' |i', $xpathQuery, $matches)) {
  316. foreach ($matches[1] as $attribute) {
  317. $queryString = '//*[' . $attribute . ']';
  318. $attributeName = substr($attribute, 1);
  319. $nodes = $xpath->query($queryString);
  320. foreach ($nodes as $node) {
  321. $attr = $node->attributes->getNamedItem($attributeName);
  322. $attr->value = ' ' . $attr->value . ' ';
  323. }
  324. }
  325. }
  326. return $xpath->query($xpathQuery);
  327. }
  328. }