PageRenderTime 53ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/base/lib/flourishlib/fXML.php

https://bitbucket.org/thanhtungnguyenphp/monitos
PHP | 691 lines | 318 code | 106 blank | 267 comment | 63 complexity | 22bea34497227d0974163a5296bf6d7d MD5 | raw file
  1. <?php
  2. /**
  3. * Provides functionality for XML files
  4. *
  5. * This class is implemented to use the UTF-8 character encoding. Please see
  6. * http://flourishlib.com/docs/UTF-8 for more information.
  7. *
  8. * @copyright Copyright (c) 2007-2010 Will Bond, others
  9. * @author Will Bond [wb] <will@flourishlib.com>
  10. * @author Craig Ruksznis [cr-imarc] <craigruk@imarc.net>
  11. * @license http://flourishlib.com/license
  12. *
  13. * @package Flourish
  14. * @link http://flourishlib.com/fXML
  15. *
  16. * @version 1.0.0b6
  17. * @changes 1.0.0b6 Updated class to use fCore::startErrorCapture() instead of `error_reporting()` [wb, 2010-08-09]
  18. * @changes 1.0.0b5 Added the `$fix_entities_encoding` parameter to ::__construct() [cr-imarc+wb, 2010-08-08]
  19. * @changes 1.0.0b4 Updated the class to automatically add a `__` prefix for the default namespace and to use that for attribute and child element access [wb, 2010-04-06]
  20. * @changes 1.0.0b3 Added the `$http_timeout` parameter to ::__construct() [wb, 2009-09-16]
  21. * @changes 1.0.0b2 Added instance functionality for reading of XML files [wb, 2009-09-01]
  22. * @changes 1.0.0b The initial implementation [wb, 2008-01-13]
  23. */
  24. class fXML implements ArrayAccess
  25. {
  26. // The following constants allow for nice looking callbacks to static methods
  27. const encode = 'fXML::encode';
  28. const sendHeader = 'fXML::sendHeader';
  29. /**
  30. * Encodes content for display in a UTF-8 encoded XML document
  31. *
  32. * @param string $content The content to encode
  33. * @return string The encoded content
  34. */
  35. static public function encode($content)
  36. {
  37. return htmlspecialchars(html_entity_decode($content, ENT_QUOTES, 'UTF-8'), ENT_QUOTES, 'UTF-8');
  38. }
  39. /**
  40. * Sets the proper `Content-Type` HTTP header for a UTF-8 XML file
  41. *
  42. * @return void
  43. */
  44. static public function sendHeader()
  45. {
  46. header('Content-Type: text/xml; charset=utf-8');
  47. }
  48. /**
  49. * Custom prefix => namespace URI mappings
  50. *
  51. * @var array
  52. */
  53. protected $__custom_prefixes;
  54. /**
  55. * The dom element for this XML
  56. *
  57. * @var DOMElement
  58. */
  59. protected $__dom;
  60. /**
  61. * An XPath object for performing xpath lookups
  62. *
  63. * @var DOMXPath
  64. */
  65. protected $__xpath;
  66. /**
  67. * The XML string for serialization
  68. *
  69. * @var string
  70. */
  71. protected $__xml;
  72. /**
  73. * Create the XML object from a string, fFile or URL
  74. *
  75. * The `$default_namespace` will be used for any sort of methods calls,
  76. * member access or array access when the element or attribute name does
  77. * not include a `:`.
  78. *
  79. * @throws fValidationException When the source XML is invalid or does not exist
  80. *
  81. * @param fFile|string $source The source of the XML, either an fFile object, a string of XML, a file path or a URL
  82. * @param numeric $http_timeout The timeout to use in seconds when requesting an XML file from a URL
  83. * @param boolean $fix_entities_encoding This will fix two common XML authoring errors and should only be used when experiencing decoding issues - HTML entities that haven't been encoded as XML, and XML content published in ISO-8859-1 or Windows-1252 encoding without an explicit encoding attribute
  84. * @param fFile|string :$source
  85. * @param boolean :$fix_entities_encoding
  86. * @return fXML
  87. */
  88. public function __construct($source, $http_timeout=NULL, $fix_entities_encoding=NULL)
  89. {
  90. if (is_bool($http_timeout)) {
  91. $fix_entities_encoding = $http_timeout;
  92. $http_timeout = NULL;
  93. }
  94. // Prevent spitting out errors to we can throw exceptions
  95. $old_setting = libxml_use_internal_errors(TRUE);
  96. $exception_message = NULL;
  97. try {
  98. if ($source instanceof fFile && $fix_entities_encoding) {
  99. $source = $source->read();
  100. }
  101. if ($source instanceof DOMElement) {
  102. $this->__dom = $source;
  103. $xml = TRUE;
  104. } elseif ($source instanceof fFile) {
  105. $xml = simplexml_load_file($source->getPath());
  106. // This handles URLs specially by adding a reasonable timeout
  107. } elseif (preg_match('#^(?P<protocol>http(s)?)://#', $source, $matches)) {
  108. if ($http_timeout === NULL) {
  109. $http_timeout = ini_get('default_socket_timeout');
  110. }
  111. // We use the appropriate protocol here so PHP can supress IIS https:// warnings
  112. $context = stream_context_create(array(
  113. $matches['protocol'] => array('timeout' => $http_timeout)
  114. ));
  115. // If the URL is not loaded in time, this supresses the file_get_contents() warning
  116. fCore::startErrorCapture(E_WARNING);
  117. $xml = file_get_contents($source, 0, $context);
  118. fCore::stopErrorCapture();
  119. if (!$xml) {
  120. throw new fExpectedException('The URL specified, %s, could not be loaded', $source);
  121. }
  122. if ($fix_entities_encoding) {
  123. $xml = $this->fixEntitiesEncoding($xml);
  124. }
  125. $xml = new SimpleXMLElement($xml);
  126. } else {
  127. $is_path = $source && !preg_match('#^\s*<#', $source);
  128. if ($fix_entities_encoding) {
  129. if ($is_path) {
  130. $source = file_get_contents($source);
  131. $is_path = FALSE;
  132. }
  133. $source = $this->fixEntitiesEncoding($source);
  134. }
  135. $xml = new SimpleXMLElement($source, 0, $is_path);
  136. }
  137. } catch (Exception $e) {
  138. $exception_message = $e->getMessage();
  139. $xml = FALSE;
  140. }
  141. // We want it to be clear when XML parsing issues occur
  142. if ($xml === FALSE) {
  143. $errors = libxml_get_errors();
  144. foreach ($errors as $error) {
  145. $exception_message .= "\n" . rtrim($error->message);
  146. }
  147. // If internal errors were off before, turn them back off
  148. if (!$old_setting) {
  149. libxml_use_internal_errors(FALSE);
  150. }
  151. throw new fValidationException(str_replace('%', '%%', $exception_message));
  152. }
  153. if (!$old_setting) {
  154. libxml_use_internal_errors(FALSE);
  155. }
  156. if (!$this->__dom) {
  157. $this->__dom = dom_import_simplexml($xml);
  158. }
  159. if ($this->__dom->namespaceURI && $this->__dom->prefix == '') {
  160. $this->addCustomPrefix('__', $this->__dom->namespaceURI);
  161. }
  162. }
  163. /**
  164. * Allows access to the text content of a child tag
  165. *
  166. * The child element name (`$name`) may start with a namespace prefix and a
  167. * `:` to indicate what namespace it is part of. A blank namespace prefix
  168. * (i.e. an element name starting with `:`) is treated as the XML default
  169. * namespace.
  170. *
  171. * @internal
  172. *
  173. * @param string $name The child element to retrieve
  174. * @return fXML|NULL The child element requested
  175. */
  176. public function __get($name)
  177. {
  178. // Handle nice callback syntax
  179. static $methods = array(
  180. '__construct' => TRUE,
  181. '__get' => TRUE,
  182. '__isset' => TRUE,
  183. '__sleep' => TRUE,
  184. '__toString' => TRUE,
  185. '__wakeup' => TRUE,
  186. 'addCustomPrefix' => TRUE,
  187. 'getName' => TRUE,
  188. 'getNamespace' => TRUE,
  189. 'getPrefix' => TRUE,
  190. 'getText' => TRUE,
  191. 'offsetExists' => TRUE,
  192. 'offsetGet' => TRUE,
  193. 'offsetSet' => TRUE,
  194. 'offsetUnset' => TRUE,
  195. 'toXML' => TRUE,
  196. 'xpath' => TRUE
  197. );
  198. if (isset($methods[$name])) {
  199. return array($this, $name);
  200. }
  201. if ($this->__dom->namespaceURI && $this->__dom->prefix == '' && strpos($name, ':') === FALSE) {
  202. $name = '__:' . $name;
  203. }
  204. $first_child = $this->query($name . '[1]');
  205. if ($first_child->length) {
  206. return $first_child->item(0)->textContent;
  207. }
  208. return NULL;
  209. }
  210. /**
  211. * The child element name (`$name`) may start with a namespace prefix and a
  212. * `:` to indicate what namespace it is part of. A blank namespace prefix
  213. * (i.e. an element name starting with `:`) is treated as the XML default
  214. * namespace.
  215. *
  216. * @internal
  217. *
  218. * @param string $name The child element to check - see method description for details about namespaces
  219. * @return boolean If the child element is set
  220. */
  221. public function __isset($name)
  222. {
  223. if ($this->__dom->namespaceURI && $this->__dom->prefix == '' && strpos($name, ':') === FALSE) {
  224. $name = '__:' . $name;
  225. }
  226. return (boolean) $this->query($name . '[1]')->length;
  227. }
  228. /**
  229. * Prevents users from trying to set elements
  230. *
  231. * @internal
  232. *
  233. * @param string $name The element to set
  234. * @param mixed $value The value to set
  235. * @return void
  236. */
  237. public function __set($name, $value)
  238. {
  239. throw new fProgrammerException('The %s class does not support modifying XML', __CLASS__);
  240. }
  241. /**
  242. * The XML needs to be made into a string before being serialized
  243. *
  244. * @internal
  245. *
  246. * @return array The members to serialize
  247. */
  248. public function __sleep()
  249. {
  250. $this->__xml = $this->toXML();
  251. return array('__custom_prefixes', '__xml');
  252. }
  253. /**
  254. * Gets the string inside the root XML element
  255. *
  256. * @return string The text inside the root element
  257. */
  258. public function __toString()
  259. {
  260. return (string) $this->__dom->textContent;
  261. }
  262. /**
  263. * Prevents users from trying to unset elements
  264. *
  265. * @internal
  266. *
  267. * @param string $name The element to unset
  268. * @return void
  269. */
  270. public function __unset($name)
  271. {
  272. throw new fProgrammerException('The %s class does not support modifying XML', __CLASS__);
  273. }
  274. /**
  275. * The XML needs to be made into a DOMElement when woken up
  276. *
  277. * @internal
  278. *
  279. * @return void
  280. */
  281. public function __wakeup()
  282. {
  283. $this->__dom = dom_import_simplexml(new SimpleXMLElement($this->__xml));
  284. $this->__xml = NULL;
  285. }
  286. /**
  287. * Adds a custom namespace prefix to full namespace mapping
  288. *
  289. * This namespace prefix will be valid for any operation on this object,
  290. * including calls to ::xpath().
  291. *
  292. * @param string $ns_prefix The custom namespace prefix
  293. * @param string $namespace The full namespace it maps to
  294. * @return void
  295. */
  296. public function addCustomPrefix($ns_prefix, $namespace)
  297. {
  298. if (!$this->__custom_prefixes) {
  299. $this->__custom_prefixes = array();
  300. }
  301. $this->__custom_prefixes[$ns_prefix] = $namespace;
  302. if ($this->__xpath) {
  303. $this->__xpath->registerNamespace($ns_prefix, $namespace);
  304. }
  305. }
  306. /**
  307. * Fixes HTML entities that aren't XML encoded and fixes ISO-8859-1/Windows-1252 encoded content that does not have an encoding attribute
  308. *
  309. * @param string $xml The XML to fix
  310. * @return string The fixed XML
  311. */
  312. private function fixEntitiesEncoding($xml)
  313. {
  314. preg_match('#^<\?xml.*? encoding="([^"]+)".*?\?>#i', $xml, $match);
  315. $encoding = empty($match[1]) ? NULL : $match[1];
  316. // Try to detect the encoding via the BOM
  317. if ($encoding === NULL) {
  318. if (substr($xml, 0, 3) == "\x0\x0\xFE\xFF") {
  319. $encoding = 'UTF-32BE';
  320. } elseif (substr($xml, 0, 3) == "\xFF\xFE\x0\x0") {
  321. $encoding = 'UTF-32LE';
  322. } elseif (substr($xml, 0, 2) == "\xFE\xFF") {
  323. $encoding = 'UTF-16BE';
  324. } elseif (substr($xml, 0, 2) == "\xFF\xFE") {
  325. $encoding = 'UTF-16LE';
  326. } else {
  327. $encoding = 'UTF-8';
  328. }
  329. }
  330. // This fixes broken encodings where the XML author puts ISO-8859-1 or
  331. // Windows-1252 into an XML file without an encoding or UTF-8 encoding
  332. if (preg_replace('#[^a-z0-9]#', '', strtolower($encoding)) == 'utf8') {
  333. // Remove the UTF-8 BOM if present
  334. $xml = preg_replace("#^\xEF\xBB\xBF#", '', $xml);
  335. fCore::startErrorCapture(E_NOTICE);
  336. $cleaned = iconv('UTF-8', 'UTF-8', $xml);
  337. if ($cleaned != $xml) {
  338. $xml = iconv('Windows-1252', 'UTF-8', $xml);
  339. }
  340. fCore::stopErrorCapture();
  341. }
  342. $num_matches = preg_match_all('#&(?!gt|lt|amp|quot|apos)\w+;#', $xml, $matches, PREG_SET_ORDER);
  343. if ($num_matches) {
  344. // We convert non-UTF-* content to UTF-8 because some character sets
  345. // don't have characters for all HTML entities
  346. if (substr(strtolower($encoding), 0, 3) != 'utf') {
  347. $xml = iconv($encoding, 'UTF-8', $xml);
  348. $xml = preg_replace('#^(<\?xml.*?) encoding="[^"]+"(.*?\?>)#', '\1 encoding="UTF-8"\2', $xml);
  349. $encoding = 'UTF-8';
  350. }
  351. $entities = array();
  352. foreach ($matches as $match) {
  353. $entities[$match[0]] = html_entity_decode($match[0], ENT_COMPAT, $encoding);
  354. }
  355. $xml = strtr($xml, $entities);
  356. }
  357. return $xml;
  358. }
  359. /**
  360. * Returns the name of the current element
  361. *
  362. * @return string The name of the current element
  363. */
  364. public function getName()
  365. {
  366. return $this->__dom->localName;
  367. }
  368. /**
  369. * Returns the namespace of the current element
  370. *
  371. * @return string The namespace of the current element
  372. */
  373. public function getNamespace()
  374. {
  375. return $this->__dom->namespaceURI;
  376. }
  377. /**
  378. * Returns the namespace prefix of the current element
  379. *
  380. * @return string The namespace prefix of the current element
  381. */
  382. public function getPrefix()
  383. {
  384. return $this->__dom->prefix;
  385. }
  386. /**
  387. * Returns the string text of the current element
  388. *
  389. * @return string The string text of the current element
  390. */
  391. public function getText()
  392. {
  393. return (string) $this->__dom->textContent;
  394. }
  395. /**
  396. * Provides functionality for isset() and empty() (required by arrayaccess interface)
  397. *
  398. * Offsets refers to an attribute name. Attribute may start with a namespace
  399. * prefix and a `:` to indicate what namespace the attribute is part of. A
  400. * blank namespace prefix (i.e. an offset starting with `:`) is treated as
  401. * the XML default namespace.
  402. *
  403. * @internal
  404. *
  405. * @param string $offset The offset to check
  406. * @return boolean If the offset exists
  407. */
  408. public function offsetExists($offset)
  409. {
  410. return (boolean) $this->query('@' . $offset . '[1]')->length;
  411. }
  412. /**
  413. * Provides functionality for get [index] syntax (required by ArrayAccess interface)
  414. *
  415. * Offsets refers to an attribute name. Attribute may start with a namespace
  416. * prefix and a `:` to indicate what namespace the attribute is part of. A
  417. * blank namespace prefix (i.e. an offset starting with `:`) is treated as
  418. * the XML default namespace.
  419. *
  420. * @internal
  421. *
  422. * @param string $offset The attribute to retrieve the value for
  423. * @return string The value of the offset
  424. */
  425. public function offsetGet($offset)
  426. {
  427. $attribute = $this->query('@' . $offset . '[1]');
  428. if ($attribute->length) {
  429. return $attribute->item(0)->nodeValue;
  430. }
  431. return NULL;
  432. }
  433. /**
  434. * Required by ArrayAccess interface
  435. *
  436. * @internal
  437. *
  438. * @param integer|string $offset The offset to set
  439. * @return void
  440. */
  441. public function offsetSet($offset, $value)
  442. {
  443. throw new fProgrammerException('The %s class does not support modifying XML', __CLASS__);
  444. }
  445. /**
  446. * Required by ArrayAccess interface
  447. *
  448. * @internal
  449. *
  450. * @param integer|string $offset The offset to unset
  451. * @return void
  452. */
  453. public function offsetUnset($offset)
  454. {
  455. throw new fProgrammerException('The %s class does not support modifying XML', __CLASS__);
  456. }
  457. /**
  458. * Performs an XPath query on the current element, returning the raw results
  459. *
  460. * @param string $path The XPath path to query
  461. * @return array The matching elements
  462. */
  463. protected function query($path)
  464. {
  465. if (!$this->__xpath) {
  466. $this->__xpath = new DOMXPath($this->__dom->ownerDocument);
  467. if ($this->__custom_prefixes) {
  468. foreach ($this->__custom_prefixes as $prefix => $namespace) {
  469. $this->__xpath->registerNamespace($prefix, $namespace);
  470. }
  471. }
  472. }
  473. // Prevent spitting out errors to we can throw exceptions
  474. $old_setting = libxml_use_internal_errors(TRUE);
  475. $result = $this->__xpath->query($path, $this->__dom);
  476. // We want it to be clear when XML parsing issues occur
  477. if ($result === FALSE) {
  478. $errors = libxml_get_errors();
  479. $exception_message = '';
  480. foreach ($errors as $error) {
  481. $exception_message .= "\n" . $error->message;
  482. }
  483. // If internal errors were off before, turn them back off
  484. if (!$old_setting) {
  485. libxml_use_internal_errors(FALSE);
  486. }
  487. throw new fProgrammerException(str_replace('%', '%%', trim($exception_message)));
  488. }
  489. if (!$old_setting) {
  490. libxml_use_internal_errors(FALSE);
  491. }
  492. return $result;
  493. }
  494. /**
  495. * Returns a well-formed XML string from the current element
  496. *
  497. * @return string The XML
  498. */
  499. public function toXML()
  500. {
  501. return $this->__dom->ownerDocument->saveXML($this->__dom->parentNode === $this->__dom->ownerDocument ? $this->__dom->parentNode : $this->__dom);
  502. }
  503. /**
  504. * Executes an XPath query on the current element, returning an array of matching elements
  505. *
  506. * @param string $path The XPath path to query
  507. * @param boolean $first_only If only the first match should be returned
  508. * @return array|string|fXML An array of matching elements, or a string or fXML object if `$first_only` is `TRUE`
  509. */
  510. public function xpath($path, $first_only=FALSE)
  511. {
  512. $result = $this->query($path);
  513. if ($first_only) {
  514. if (!$result->length) { return NULL; }
  515. $result = array($result->item(0));
  516. } else {
  517. if (!$result->length) { return array(); }
  518. }
  519. $keys_to_remove = array();
  520. $output = array();
  521. foreach ($result as $element) {
  522. if ($element instanceof DOMElement) {
  523. $child = new fXML($element);
  524. $child->__custom_prefixes = $this->__custom_prefixes;
  525. if ($child->__dom->namespaceURI && $child->__dom->prefix == '') {
  526. $child->addCustomPrefix('__', $child->__dom->namespaceURI);
  527. }
  528. $output[] = $child;
  529. } elseif ($element instanceof DOMCharacterData) {
  530. $output[] = $element->data;
  531. } elseif ($element instanceof DOMAttr) {
  532. $key = $element->name;
  533. if ($element->prefix) {
  534. $key = $element->prefix . ':' . $key;
  535. }
  536. // We will create an attrname and attrname[0] key for each
  537. // attribute and if more than one is found we remove the
  538. // key attrname. If only one is found we remove attrname[0].
  539. $key_1 = $key . '[1]';
  540. if (isset($output[$key_1])) {
  541. $i = 1;
  542. while (isset($output[$key . '[' . $i . ']'])) {
  543. $i++;
  544. }
  545. // This removes the key without the array index if more than one was found
  546. unset($output[$key]);
  547. unset($keys_to_remove[$key_1]);
  548. $key = $key . '[' . $i . ']';
  549. } else {
  550. $output[$key_1] = $element->nodeValue;
  551. $keys_to_remove[$key_1] = TRUE;
  552. }
  553. $output[$key] = $element->nodeValue;
  554. }
  555. }
  556. foreach ($keys_to_remove as $key => $trash) {
  557. unset($output[$key]);
  558. }
  559. if ($first_only) {
  560. return current($output);
  561. }
  562. return $output;
  563. }
  564. }
  565. /**
  566. * Copyright (c) 2007-2010 Will Bond <will@flourishlib.com>, others
  567. *
  568. * Permission is hereby granted, free of charge, to any person obtaining a copy
  569. * of this software and associated documentation files (the "Software"), to deal
  570. * in the Software without restriction, including without limitation the rights
  571. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  572. * copies of the Software, and to permit persons to whom the Software is
  573. * furnished to do so, subject to the following conditions:
  574. *
  575. * The above copyright notice and this permission notice shall be included in
  576. * all copies or substantial portions of the Software.
  577. *
  578. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  579. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  580. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  581. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  582. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  583. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  584. * THE SOFTWARE.
  585. */