/src/Pyrus/XMLParser.php

https://github.com/CloCkWeRX/Pyrus · PHP · 257 lines · 145 code · 30 blank · 82 comment · 40 complexity · 776b6af6a137e1fb71f12f65071f49b4 MD5 · raw file

  1. <?php
  2. /**
  3. * Process an XML file, convert it to an array
  4. *
  5. * PHP version 5
  6. *
  7. * @category Pyrus
  8. * @package Pyrus
  9. * @subpackage XML
  10. * @author Greg Beaver <cellog@php.net>
  11. * @author Helgi Þormar Þorbjörnsson <helgi@php.net>
  12. * @copyright 2010 The PEAR Group
  13. * @license http://www.opensource.org/licenses/bsd-license.php New BSD License
  14. * @link https://github.com/pyrus/Pyrus
  15. */
  16. /**
  17. * Process an XML file, convert it to an array
  18. *
  19. * @category Pyrus
  20. * @package Pyrus
  21. * @subpackage XML
  22. * @author Greg Beaver <cellog@php.net>
  23. * @author Helgi Þormar Þorbjörnsson <helgi@php.net>
  24. * @copyright 2010 The PEAR Group
  25. * @license http://www.opensource.org/licenses/bsd-license.php New BSD License
  26. * @link https://github.com/pyrus/Pyrus
  27. */
  28. namespace Pyrus;
  29. class XMLParser extends \XMLReader
  30. {
  31. /**
  32. * Parse a string containing XML
  33. *
  34. * @param string $string The raw XML data
  35. * @param string $schema Path to the xml schema file for validation
  36. *
  37. * @return array
  38. */
  39. function parseString($string, $schema = false)
  40. {
  41. $this->XML($string);
  42. return $this->_parse($schema);
  43. }
  44. /**
  45. * Using XMLReader, unserialize XML into an array
  46. *
  47. * This unserializer has limitations on the XML it can parse, for simplicity:
  48. *
  49. * - Only a single text node (the last one) will be processed, so this code:
  50. * <pre>
  51. * <?xml version="1.0" ?><test>hi<tag/>there</test>
  52. * </pre>
  53. * results in <code>array('test' => array('tag' => '', '_content' => 'there'))</code>
  54. * - tag ordering is not preserved in all cases:
  55. * <pre>
  56. * <?xml version="1.0" ?><test><tag /><another /> <tag /></test>
  57. * </pre>
  58. * results in
  59. * <code>array('test' => array('tag' => array('', ''), 'another' => ''))</code>
  60. *
  61. * @param string $file file URI to process
  62. * @param string $schema path to the xml schema file for validation
  63. *
  64. * @return array
  65. */
  66. function parse($file, $schema = false)
  67. {
  68. if (@$this->open($file) === false) {
  69. throw new XMLParser\Exception('Cannot open ' . $file . ' for parsing');
  70. }
  71. return $this->_parse($schema);
  72. }
  73. /**
  74. * Merge tag into the array
  75. *
  76. * @param array $arr The array representation of the XML
  77. * @param string $tag The tag name
  78. * @param array $attribs Associative array of attributes for this tag
  79. * @param string $name The tag name
  80. * @param int $depth The current depth within the XML document
  81. *
  82. * @return array
  83. */
  84. protected function mergeTag($arr, $tag, $attribs, $name, $depth)
  85. {
  86. if ($attribs) {
  87. // tag has attributes
  88. if (is_string($tag) && $tag !== '') {
  89. $tag = array('attribs' => $attribs, '_content' => $tag);
  90. } else {
  91. if (!is_array($tag)) {
  92. $tag = array();
  93. }
  94. $tag['attribs'] = $attribs;
  95. }
  96. }
  97. if (is_array($arr) && isset($arr[$name]) && is_array($arr[$name]) &&
  98. isset($arr[$name][0])
  99. ) {
  100. // tag exists as a sibling
  101. $where = count($arr[$name]);
  102. if (!isset($arr[$name][$where])) {
  103. $arr[$name][$where] = $tag;
  104. return $arr;
  105. }
  106. if (!is_array($arr[$name][$where])) {
  107. if (strlen($arr[$name][$where])) {
  108. $arr[$name][$where] = array('_content' => $arr[$name][$where]);
  109. } else {
  110. $arr[$name][$where] = array();
  111. }
  112. }
  113. $arr[$name][$where] = $tag;
  114. } else {
  115. if (!is_array($arr)) {
  116. $arr = array();
  117. }
  118. if (isset($arr[$name])) {
  119. // new sibling
  120. $arr[$name] = array($arr[$name], $tag);
  121. return $arr;
  122. }
  123. $arr[$name] = $tag;
  124. }
  125. return $arr;
  126. }
  127. protected function mergeValue($arr, $value)
  128. {
  129. if (is_array($arr) && isset($arr[0])) {
  130. // multiple siblings
  131. $count = count($arr) - 1;
  132. $arr[$count] = $this->mergeActualValue($arr[$count], $value);
  133. } elseif (is_array($arr)) {
  134. $arr = $this->mergeActualValue($arr, $value);
  135. } else {
  136. $arr = $value;
  137. }
  138. return $arr;
  139. }
  140. protected function mergeActualValue($me, $value)
  141. {
  142. if (count($me)) {
  143. $me['_content'] = $value;
  144. } else {
  145. $me = $value;
  146. }
  147. return $me;
  148. }
  149. /**
  150. * Parse XML into an array
  151. *
  152. * @param string $schema Filename of xsd schema file
  153. *
  154. * @return array
  155. *
  156. * @throws XMLParser\Exception
  157. */
  158. private function _parse($schema)
  159. {
  160. libxml_use_internal_errors(true);
  161. libxml_clear_errors();
  162. if ($schema) {
  163. // Workaround to avoid XMLReader wanting to URL-encode schema paths
  164. $this->setSchema('data://application/xsd;base64,'.base64_encode(file_get_contents($schema)));
  165. }
  166. $arr = $this->_recursiveParse();
  167. $this->close();
  168. $causes = new \PEAR2\MultiErrors;
  169. foreach (libxml_get_errors() as $error) {
  170. $causes->E_ERROR[]= new XMLParser\Exception("Line " .
  171. $error->line . ': ' . $error->message);
  172. }
  173. if (count($causes->E_ERROR)) {
  174. throw new XMLParser\Exception('Invalid XML document', $causes);
  175. }
  176. return $arr;
  177. }
  178. private function _recursiveParse($arr = array())
  179. {
  180. while (@$this->read()) {
  181. $depth = $this->depth;
  182. if ($this->nodeType == self::ELEMENT) {
  183. $tag = $this->name;
  184. $attribs = array();
  185. if ($this->isEmptyElement) {
  186. if ($this->hasAttributes) {
  187. $attr = $this->moveToFirstAttribute();
  188. while ($attr) {
  189. $attribs[$this->name] = $this->value;
  190. $attr = $this->moveToNextAttribute();
  191. }
  192. $depth = $this->depth;
  193. $arr = $this->mergeTag($arr, '', $attribs, $tag, $depth);
  194. continue;
  195. }
  196. $depth = $this->depth;
  197. $arr = $this->mergeTag($arr, '', array(), $tag, $depth);
  198. continue;
  199. }
  200. if ($this->hasAttributes) {
  201. $attr = $this->moveToFirstAttribute();
  202. while ($attr) {
  203. $attribs[$this->name] = $this->value;
  204. $attr = $this->moveToNextAttribute();
  205. }
  206. }
  207. $depth = $this->depth;
  208. $arr = $this->mergeTag($arr, '', $attribs, $tag, $depth);
  209. if (is_array($arr[$tag]) && isset($arr[$tag][0])) {
  210. // seek to last sibling
  211. $arr[$tag][count($arr[$tag]) - 1] =
  212. $this->_recursiveParse($arr[$tag][count($arr[$tag]) - 1]);
  213. } else {
  214. $arr[$tag] = $this->_recursiveParse($arr[$tag]);
  215. }
  216. continue;
  217. }
  218. if ($this->nodeType == self::END_ELEMENT) {
  219. return $arr;
  220. }
  221. if ($this->nodeType == self::TEXT || $this->nodeType == self::CDATA) {
  222. $arr = $this->mergeValue($arr, $this->value);
  223. }
  224. }
  225. return $arr;
  226. }
  227. }