PageRenderTime 37ms CodeModel.GetById 6ms RepoModel.GetById 1ms app.codeStats 0ms

/Zend/Dom/Query.php

http://grupal.googlecode.com/
PHP | 328 lines | 160 code | 25 blank | 143 comment | 16 complexity | 8ff07dae61fb866c3f60e82ac8deb961 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-2.0
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Dom
  17. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id: Query.php 25031 2012-08-17 19:42:00Z matthew $
  20. */
  21. /**
  22. * @see Zend_Dom_Query_Css2Xpath
  23. */
  24. require_once 'Zend/Dom/Query/Css2Xpath.php';
  25. /**
  26. * @see Zend_Dom_Query_Result
  27. */
  28. require_once 'Zend/Dom/Query/Result.php';
  29. /**
  30. * Query DOM structures based on CSS selectors and/or XPath
  31. *
  32. * @package Zend_Dom
  33. * @subpackage Query
  34. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  35. * @license http://framework.zend.com/license/new-bsd New BSD License
  36. */
  37. class Zend_Dom_Query
  38. {
  39. /**#@+
  40. * Document types
  41. */
  42. const DOC_XML = 'docXml';
  43. const DOC_HTML = 'docHtml';
  44. const DOC_XHTML = 'docXhtml';
  45. /**#@-*/
  46. /**
  47. * @var string
  48. */
  49. protected $_document;
  50. /**
  51. * DOMDocument errors, if any
  52. * @var false|array
  53. */
  54. protected $_documentErrors = false;
  55. /**
  56. * Document type
  57. * @var string
  58. */
  59. protected $_docType;
  60. /**
  61. * Document encoding
  62. * @var null|string
  63. */
  64. protected $_encoding;
  65. /**
  66. * XPath namespaces
  67. * @var array
  68. */
  69. protected $_xpathNamespaces = array();
  70. /**
  71. * Constructor
  72. *
  73. * @param null|string $document
  74. * @return void
  75. */
  76. public function __construct($document = null, $encoding = null)
  77. {
  78. $this->setEncoding($encoding);
  79. $this->setDocument($document);
  80. }
  81. /**
  82. * Set document encoding
  83. *
  84. * @param string $encoding
  85. * @return Zend_Dom_Query
  86. */
  87. public function setEncoding($encoding)
  88. {
  89. $this->_encoding = (null === $encoding) ? null : (string) $encoding;
  90. return $this;
  91. }
  92. /**
  93. * Get document encoding
  94. *
  95. * @return null|string
  96. */
  97. public function getEncoding()
  98. {
  99. return $this->_encoding;
  100. }
  101. /**
  102. * Set document to query
  103. *
  104. * @param string $document
  105. * @param null|string $encoding Document encoding
  106. * @return Zend_Dom_Query
  107. */
  108. public function setDocument($document, $encoding = null)
  109. {
  110. if (0 === strlen($document)) {
  111. return $this;
  112. }
  113. // breaking XML declaration to make syntax highlighting work
  114. if ('<' . '?xml' == substr(trim($document), 0, 5)) {
  115. if (preg_match('/<html[^>]*xmlns="([^"]+)"[^>]*>/i', $document, $matches)) {
  116. $this->_xpathNamespaces[] = $matches[1];
  117. return $this->setDocumentXhtml($document, $encoding);
  118. }
  119. return $this->setDocumentXml($document, $encoding);
  120. }
  121. if (strstr($document, 'DTD XHTML')) {
  122. return $this->setDocumentXhtml($document, $encoding);
  123. }
  124. return $this->setDocumentHtml($document, $encoding);
  125. }
  126. /**
  127. * Register HTML document
  128. *
  129. * @param string $document
  130. * @param null|string $encoding Document encoding
  131. * @return Zend_Dom_Query
  132. */
  133. public function setDocumentHtml($document, $encoding = null)
  134. {
  135. $this->_document = (string) $document;
  136. $this->_docType = self::DOC_HTML;
  137. if (null !== $encoding) {
  138. $this->setEncoding($encoding);
  139. }
  140. return $this;
  141. }
  142. /**
  143. * Register XHTML document
  144. *
  145. * @param string $document
  146. * @param null|string $encoding Document encoding
  147. * @return Zend_Dom_Query
  148. */
  149. public function setDocumentXhtml($document, $encoding = null)
  150. {
  151. $this->_document = (string) $document;
  152. $this->_docType = self::DOC_XHTML;
  153. if (null !== $encoding) {
  154. $this->setEncoding($encoding);
  155. }
  156. return $this;
  157. }
  158. /**
  159. * Register XML document
  160. *
  161. * @param string $document
  162. * @param null|string $encoding Document encoding
  163. * @return Zend_Dom_Query
  164. */
  165. public function setDocumentXml($document, $encoding = null)
  166. {
  167. $this->_document = (string) $document;
  168. $this->_docType = self::DOC_XML;
  169. if (null !== $encoding) {
  170. $this->setEncoding($encoding);
  171. }
  172. return $this;
  173. }
  174. /**
  175. * Retrieve current document
  176. *
  177. * @return string
  178. */
  179. public function getDocument()
  180. {
  181. return $this->_document;
  182. }
  183. /**
  184. * Get document type
  185. *
  186. * @return string
  187. */
  188. public function getDocumentType()
  189. {
  190. return $this->_docType;
  191. }
  192. /**
  193. * Get any DOMDocument errors found
  194. *
  195. * @return false|array
  196. */
  197. public function getDocumentErrors()
  198. {
  199. return $this->_documentErrors;
  200. }
  201. /**
  202. * Perform a CSS selector query
  203. *
  204. * @param string $query
  205. * @return Zend_Dom_Query_Result
  206. */
  207. public function query($query)
  208. {
  209. $xpathQuery = Zend_Dom_Query_Css2Xpath::transform($query);
  210. return $this->queryXpath($xpathQuery, $query);
  211. }
  212. /**
  213. * Perform an XPath query
  214. *
  215. * @param string|array $xpathQuery
  216. * @param string $query CSS selector query
  217. * @return Zend_Dom_Query_Result
  218. */
  219. public function queryXpath($xpathQuery, $query = null)
  220. {
  221. if (null === ($document = $this->getDocument())) {
  222. require_once 'Zend/Dom/Exception.php';
  223. throw new Zend_Dom_Exception('Cannot query; no document registered');
  224. }
  225. $encoding = $this->getEncoding();
  226. libxml_use_internal_errors(true);
  227. libxml_disable_entity_loader(true);
  228. if (null === $encoding) {
  229. $domDoc = new DOMDocument('1.0');
  230. } else {
  231. $domDoc = new DOMDocument('1.0', $encoding);
  232. }
  233. $type = $this->getDocumentType();
  234. switch ($type) {
  235. case self::DOC_XML:
  236. $success = $domDoc->loadXML($document);
  237. foreach ($domDoc->childNodes as $child) {
  238. if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
  239. require_once 'Zend/Dom/Exception.php';
  240. throw new Zend_Dom_Exception(
  241. 'Invalid XML: Detected use of illegal DOCTYPE'
  242. );
  243. }
  244. }
  245. break;
  246. case self::DOC_HTML:
  247. case self::DOC_XHTML:
  248. default:
  249. $success = $domDoc->loadHTML($document);
  250. break;
  251. }
  252. $errors = libxml_get_errors();
  253. if (!empty($errors)) {
  254. $this->_documentErrors = $errors;
  255. libxml_clear_errors();
  256. }
  257. libxml_disable_entity_loader(false);
  258. libxml_use_internal_errors(false);
  259. if (!$success) {
  260. require_once 'Zend/Dom/Exception.php';
  261. throw new Zend_Dom_Exception(sprintf('Error parsing document (type == %s)', $type));
  262. }
  263. $nodeList = $this->_getNodeList($domDoc, $xpathQuery);
  264. return new Zend_Dom_Query_Result($query, $xpathQuery, $domDoc, $nodeList);
  265. }
  266. /**
  267. * Register XPath namespaces
  268. *
  269. * @param array $xpathNamespaces
  270. * @return void
  271. */
  272. public function registerXpathNamespaces($xpathNamespaces)
  273. {
  274. $this->_xpathNamespaces = $xpathNamespaces;
  275. }
  276. /**
  277. * Prepare node list
  278. *
  279. * @param DOMDocument $document
  280. * @param string|array $xpathQuery
  281. * @return array
  282. */
  283. protected function _getNodeList($document, $xpathQuery)
  284. {
  285. $xpath = new DOMXPath($document);
  286. foreach ($this->_xpathNamespaces as $prefix => $namespaceUri) {
  287. $xpath->registerNamespace($prefix, $namespaceUri);
  288. }
  289. $xpathQuery = (string) $xpathQuery;
  290. if (preg_match_all('|\[contains\((@[a-z0-9_-]+),\s?\' |i', $xpathQuery, $matches)) {
  291. foreach ($matches[1] as $attribute) {
  292. $queryString = '//*[' . $attribute . ']';
  293. $attributeName = substr($attribute, 1);
  294. $nodes = $xpath->query($queryString);
  295. foreach ($nodes as $node) {
  296. $attr = $node->attributes->getNamedItem($attributeName);
  297. $attr->value = ' ' . $attr->value . ' ';
  298. }
  299. }
  300. }
  301. return $xpath->query($xpathQuery);
  302. }
  303. }