PageRenderTime 120ms CodeModel.GetById 33ms RepoModel.GetById 4ms app.codeStats 0ms

/sources/subs/XmlArray.class.php

https://github.com/Arantor/Elkarte
PHP | 717 lines | 389 code | 95 blank | 233 comment | 110 complexity | 5ca7942caae603189c82430dba1a7c06 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-3.0
  1. <?php
  2. /**
  3. * @name ElkArte Forum
  4. * @copyright ElkArte Forum contributors
  5. * @license BSD http://opensource.org/licenses/BSD-3-Clause
  6. *
  7. * This software is a derived product, based on:
  8. *
  9. * Simple Machines Forum (SMF)
  10. * copyright: 2011 Simple Machines (http://www.simplemachines.org)
  11. * license: BSD, See included LICENSE.TXT for terms and conditions.
  12. *
  13. * @version 1.0 Alpha
  14. *
  15. * The Xml_Array class is an xml parser.
  16. *
  17. */
  18. if (!defined('ELKARTE'))
  19. die('No access...');
  20. /**
  21. * Class representing an xml array.
  22. * Reads in xml, allows you to access it simply.
  23. * Version 1.1.
  24. */
  25. class Xml_Array
  26. {
  27. /**
  28. * holds xml parsed results
  29. * @var array
  30. */
  31. public $array;
  32. /**
  33. * holds debugging level
  34. * @var type
  35. */
  36. public $debug_level;
  37. /**
  38. * holds trim level textual data
  39. * @var bool
  40. */
  41. public $trim;
  42. /**
  43. * Constructor for the xml parser.
  44. * Example use:
  45. * $xml = new Xml_Array(file('data.xml'));
  46. *
  47. * @param string $data the xml data or an array of, unless is_clone is true.
  48. * @param bool $auto_trim default false, used to automatically trim textual data.
  49. * @param int $level default null, the debug level, specifies whether notices should be generated for missing elements and attributes.
  50. * @param bool $is_clone default false. If is_clone is true, the Xml_Array is cloned from another - used internally only.
  51. */
  52. public function __construct($data, $auto_trim = false, $level = null, $is_clone = false)
  53. {
  54. // If we're using this try to get some more memory.
  55. setMemoryLimit('32M');
  56. // Set the debug level.
  57. $this->debug_level = $level !== null ? $level : error_reporting();
  58. $this->trim = $auto_trim;
  59. // Is the data already parsed?
  60. if ($is_clone)
  61. {
  62. $this->array = $data;
  63. return;
  64. }
  65. // Is the input an array? (ie. passed from file()?)
  66. if (is_array($data))
  67. $data = implode('', $data);
  68. // Remove any xml declaration or doctype, and parse out comments and CDATA.
  69. $data = preg_replace('/<!--.*?-->/s', '', $this->_to_cdata(preg_replace(array('/^<\?xml.+?\?' . '>/is', '/<!DOCTYPE[^>]+?' . '>/s'), '', $data)));
  70. // Now parse the xml!
  71. $this->array = $this->_parse($data);
  72. }
  73. /**
  74. * Get the root element's name.
  75. * Example use:
  76. * echo $element->name();
  77. */
  78. public function name()
  79. {
  80. return isset($this->array['name']) ? $this->array['name'] : '';
  81. }
  82. /**
  83. * Get a specified element's value or attribute by path.
  84. * Children are parsed for text, but only textual data is returned
  85. * unless get_elements is true.
  86. * Example use:
  87. * $data = $xml->fetch('html/head/title');
  88. * @param string $path - the path to the element to fetch
  89. * @param bool $get_elements - whether to include elements
  90. */
  91. public function fetch($path, $get_elements = false)
  92. {
  93. // Get the element, in array form.
  94. $array = $this->path($path);
  95. if ($array === false)
  96. return false;
  97. // Getting elements into this is a bit complicated...
  98. if ($get_elements && !is_string($array))
  99. {
  100. $temp = '';
  101. // Use the _xml() function to get the xml data.
  102. foreach ($array->array as $val)
  103. {
  104. // Skip the name and any attributes.
  105. if (is_array($val))
  106. $temp .= $this->_xml($val, null);
  107. }
  108. // Just get the XML data and then take out the CDATAs.
  109. return $this->_to_cdata($temp);
  110. }
  111. // Return the value - taking care to pick out all the text values.
  112. return is_string($array) ? $array : $this->_fetch($array->array);
  113. }
  114. /** Get an element, returns a new Xml_Array.
  115. * It finds any elements that match the path specified.
  116. * It will always return a set if there is more than one of the element
  117. * or return_set is true.
  118. * Example use:
  119. * $element = $xml->path('html/body');
  120. * @param $path string - the path to the element to get
  121. * @param $return_full bool - always return full result set
  122. * @return Xml_Array, a new Xml_Array.
  123. */
  124. public function path($path, $return_full = false)
  125. {
  126. // Split up the path.
  127. $path = explode('/', $path);
  128. // Start with a base array.
  129. $array = $this->array;
  130. // For each element in the path.
  131. foreach ($path as $el)
  132. {
  133. // Deal with sets....
  134. if (strpos($el, '[') !== false)
  135. {
  136. $lvl = (int) substr($el, strpos($el, '[') + 1);
  137. $el = substr($el, 0, strpos($el, '['));
  138. }
  139. // Find an attribute.
  140. elseif (substr($el, 0, 1) == '@')
  141. {
  142. // It simplifies things if the attribute is already there ;).
  143. if (isset($array[$el]))
  144. return $array[$el];
  145. else
  146. {
  147. $trace = debug_backtrace();
  148. $i = 0;
  149. while ($i < count($trace) && isset($trace[$i]['class']) && $trace[$i]['class'] == get_class($this))
  150. $i++;
  151. $debug = ' from ' . $trace[$i - 1]['file'] . ' on line ' . $trace[$i - 1]['line'];
  152. // Cause an error.
  153. if ($this->debug_level & E_NOTICE)
  154. trigger_error('Undefined XML attribute: ' . substr($el, 1) . $debug, E_USER_NOTICE);
  155. return false;
  156. }
  157. }
  158. else
  159. $lvl = null;
  160. // Find this element.
  161. $array = $this->_path($array, $el, $lvl);
  162. }
  163. // Clean up after $lvl, for $return_full.
  164. if ($return_full && (!isset($array['name']) || substr($array['name'], -1) != ']'))
  165. $array = array('name' => $el . '[]', $array);
  166. // Create the right type of class...
  167. $newClass = get_class($this);
  168. // Return a new Xml_Array for the result.
  169. return $array === false ? false : new $newClass($array, $this->trim, $this->debug_level, true);
  170. }
  171. /**
  172. * Check if an element exists.
  173. * Example use,
  174. * echo $xml->exists('html/body') ? 'y' : 'n';
  175. *
  176. * @param string $path - the path to the element to get.
  177. * @return boolean
  178. */
  179. public function exists($path)
  180. {
  181. // Split up the path.
  182. $path = explode('/', $path);
  183. // Start with a base array.
  184. $array = $this->array;
  185. // For each element in the path.
  186. foreach ($path as $el)
  187. {
  188. // Deal with sets....
  189. if (strpos($el, '[') !== false)
  190. {
  191. $lvl = (int) substr($el, strpos($el, '[') + 1);
  192. $el = substr($el, 0, strpos($el, '['));
  193. }
  194. // Find an attribute.
  195. elseif (substr($el, 0, 1) == '@')
  196. return isset($array[$el]);
  197. else
  198. $lvl = null;
  199. // Find this element.
  200. $array = $this->_path($array, $el, $lvl, true);
  201. }
  202. return $array !== false;
  203. }
  204. /**
  205. * Count the number of occurences of a path.
  206. * Example use:
  207. * echo $xml->count('html/head/meta');
  208. * @param string $path - the path to search for.
  209. * @return int, the number of elements the path matches.
  210. */
  211. public function count($path)
  212. {
  213. // Get the element, always returning a full set.
  214. $temp = $this->path($path, true);
  215. // Start at zero, then count up all the numeric keys.
  216. $i = 0;
  217. foreach ($temp->array as $item)
  218. {
  219. if (is_array($item))
  220. $i++;
  221. }
  222. return $i;
  223. }
  224. /**
  225. * Get an array of Xml_Array's matching the specified path.
  226. * This differs from ->path(path, true) in that instead of an Xml_Array
  227. * of elements, an array of Xml_Array's is returned for use with foreach.
  228. * Example use:
  229. * foreach ($xml->set('html/body/p') as $p)
  230. * @param $path string - the path to search for.
  231. * @return array, an array of Xml_Array objects
  232. */
  233. public function set($path)
  234. {
  235. // None as yet, just get the path.
  236. $array = array();
  237. $xml = $this->path($path, true);
  238. foreach ($xml->array as $val)
  239. {
  240. // Skip these, they aren't elements.
  241. if (!is_array($val) || $val['name'] == '!')
  242. continue;
  243. // Create the right type of class...
  244. $newClass = get_class($this);
  245. // Create a new Xml_Array and stick it in the array.
  246. $array[] = new $newClass($val, $this->trim, $this->debug_level, true);
  247. }
  248. return $array;
  249. }
  250. /**
  251. * Create an xml file from an Xml_Array, the specified path if any.
  252. * Example use:
  253. * echo $this->create_xml();
  254. * @param string $path - the path to the element. (optional)
  255. * @return string, xml-formatted string.
  256. */
  257. public function create_xml($path = null)
  258. {
  259. // Was a path specified? If so, use that array.
  260. if ($path !== null)
  261. {
  262. $path = $this->path($path);
  263. // The path was not found
  264. if ($path === false)
  265. return false;
  266. $path = $path->array;
  267. }
  268. // Just use the current array.
  269. else
  270. $path = $this->array;
  271. // Add the xml declaration to the front.
  272. return '<?xml version="1.0"?' . '>' . $this->_xml($path, 0);
  273. }
  274. /**
  275. * Output the xml in an array form.
  276. * Example use:
  277. * print_r($xml->to_array());
  278. *
  279. * @param string $path the path to output.
  280. */
  281. public function to_array($path = null)
  282. {
  283. // Are we doing a specific path?
  284. if ($path !== null)
  285. {
  286. $path = $this->path($path);
  287. // The path was not found
  288. if ($path === false)
  289. return false;
  290. $path = $path->array;
  291. }
  292. // No, so just use the current array.
  293. else
  294. $path = $this->array;
  295. return $this->_array($path);
  296. }
  297. /**
  298. * Parse data into an array. (privately used...)
  299. *
  300. * @param string $data to parse
  301. */
  302. protected function _parse($data)
  303. {
  304. // Start with an 'empty' array with no data.
  305. $current = array(
  306. );
  307. // Loop until we're out of data.
  308. while ($data != '')
  309. {
  310. // Find and remove the next tag.
  311. preg_match('/\A<([\w\-:]+)((?:\s+.+?)?)([\s]?\/)?' . '>/', $data, $match);
  312. if (isset($match[0]))
  313. $data = preg_replace('/' . preg_quote($match[0], '/') . '/s', '', $data, 1);
  314. // Didn't find a tag? Keep looping....
  315. if (!isset($match[1]) || $match[1] == '')
  316. {
  317. // If there's no <, the rest is data.
  318. if (strpos($data, '<') === false)
  319. {
  320. $text_value = $this->_from_cdata($data);
  321. $data = '';
  322. if ($text_value != '')
  323. $current[] = array(
  324. 'name' => '!',
  325. 'value' => $text_value
  326. );
  327. }
  328. // If the < isn't immediately next to the current position... more data.
  329. elseif (strpos($data, '<') > 0)
  330. {
  331. $text_value = $this->_from_cdata(substr($data, 0, strpos($data, '<')));
  332. $data = substr($data, strpos($data, '<'));
  333. if ($text_value != '')
  334. $current[] = array(
  335. 'name' => '!',
  336. 'value' => $text_value
  337. );
  338. }
  339. // If we're looking at a </something> with no start, kill it.
  340. elseif (strpos($data, '<') !== false && strpos($data, '<') == 0)
  341. {
  342. if (strpos($data, '<', 1) !== false)
  343. {
  344. $text_value = $this->_from_cdata(substr($data, 0, strpos($data, '<', 1)));
  345. $data = substr($data, strpos($data, '<', 1));
  346. if ($text_value != '')
  347. $current[] = array(
  348. 'name' => '!',
  349. 'value' => $text_value
  350. );
  351. }
  352. else
  353. {
  354. $text_value = $this->_from_cdata($data);
  355. $data = '';
  356. if ($text_value != '')
  357. $current[] = array(
  358. 'name' => '!',
  359. 'value' => $text_value
  360. );
  361. }
  362. }
  363. // Wait for an actual occurance of an element.
  364. continue;
  365. }
  366. // Create a new element in the array.
  367. $el = &$current[];
  368. $el['name'] = $match[1];
  369. // If this ISN'T empty, remove the close tag and parse the inner data.
  370. if ((!isset($match[3]) || trim($match[3]) != '/') && (!isset($match[2]) || trim($match[2]) != '/'))
  371. {
  372. // Because PHP 5.2.0+ seems to croak using regex, we'll have to do this the less fun way.
  373. $last_tag_end = strpos($data, '</' . $match[1]. '>');
  374. if ($last_tag_end === false)
  375. continue;
  376. $offset = 0;
  377. while (1 == 1)
  378. {
  379. // Where is the next start tag?
  380. $next_tag_start = strpos($data, '<' . $match[1], $offset);
  381. // If the next start tag is after the last end tag then we've found the right close.
  382. if ($next_tag_start === false || $next_tag_start > $last_tag_end)
  383. break;
  384. // If not then find the next ending tag.
  385. $next_tag_end = strpos($data, '</' . $match[1]. '>', $offset);
  386. // Didn't find one? Then just use the last and sod it.
  387. if ($next_tag_end === false)
  388. break;
  389. else
  390. {
  391. $last_tag_end = $next_tag_end;
  392. $offset = $next_tag_start + 1;
  393. }
  394. }
  395. // Parse the insides.
  396. $inner_match = substr($data, 0, $last_tag_end);
  397. // Data now starts from where this section ends.
  398. $data = substr($data, $last_tag_end + strlen('</' . $match[1]. '>'));
  399. if (!empty($inner_match))
  400. {
  401. // Parse the inner data.
  402. if (strpos($inner_match, '<') !== false)
  403. $el += $this->_parse($inner_match);
  404. elseif (trim($inner_match) != '')
  405. {
  406. $text_value = $this->_from_cdata($inner_match);
  407. if ($text_value != '')
  408. $el[] = array(
  409. 'name' => '!',
  410. 'value' => $text_value
  411. );
  412. }
  413. }
  414. }
  415. // If we're dealing with attributes as well, parse them out.
  416. if (isset($match[2]) && $match[2] != '')
  417. {
  418. // Find all the attribute pairs in the string.
  419. preg_match_all('/([\w:]+)="(.+?)"/', $match[2], $attr, PREG_SET_ORDER);
  420. // Set them as @attribute-name.
  421. foreach ($attr as $match_attr)
  422. $el['@' . $match_attr[1]] = $match_attr[2];
  423. }
  424. }
  425. // Return the parsed array.
  426. return $current;
  427. }
  428. /**
  429. * Get a specific element's xml. (privately used...)
  430. *
  431. * @param $array
  432. * @param $indent
  433. */
  434. protected function _xml($array, $indent)
  435. {
  436. $indentation = $indent !== null ? '
  437. ' . str_repeat(' ', $indent) : '';
  438. // This is a set of elements, with no name...
  439. if (is_array($array) && !isset($array['name']))
  440. {
  441. $temp = '';
  442. foreach ($array as $val)
  443. $temp .= $this->_xml($val, $indent);
  444. return $temp;
  445. }
  446. // This is just text!
  447. if ($array['name'] == '!')
  448. return $indentation . '<![CDATA[' . $array['value'] . ']]>';
  449. elseif (substr($array['name'], -2) == '[]')
  450. $array['name'] = substr($array['name'], 0, -2);
  451. // Start the element.
  452. $output = $indentation . '<' . $array['name'];
  453. $inside_elements = false;
  454. $output_el = '';
  455. // Run through and recurively output all the elements or attrbutes inside this.
  456. foreach ($array as $k => $v)
  457. {
  458. if (substr($k, 0, 1) == '@')
  459. $output .= ' ' . substr($k, 1) . '="' . $v . '"';
  460. elseif (is_array($v))
  461. {
  462. $output_el .= $this->_xml($v, $indent === null ? null : $indent + 1);
  463. $inside_elements = true;
  464. }
  465. }
  466. // Indent, if necessary.... then close the tag.
  467. if ($inside_elements)
  468. $output .= '>' . $output_el . $indentation . '</' . $array['name'] . '>';
  469. else
  470. $output .= ' />';
  471. return $output;
  472. }
  473. /**
  474. * Return an element as an array
  475. *
  476. * @param type $array
  477. * @return type
  478. */
  479. protected function _array($array)
  480. {
  481. $return = array();
  482. $text = '';
  483. foreach ($array as $value)
  484. {
  485. if (!is_array($value) || !isset($value['name']))
  486. continue;
  487. if ($value['name'] == '!')
  488. $text .= $value['value'];
  489. else
  490. $return[$value['name']] = $this->_array($value);
  491. }
  492. if (empty($return))
  493. return $text;
  494. else
  495. return $return;
  496. }
  497. /**
  498. * Parse out CDATA tags. (htmlspecialchars them...)
  499. *
  500. * @param $data
  501. */
  502. function _to_cdata($data)
  503. {
  504. $inCdata = $inComment = false;
  505. $output = '';
  506. $parts = preg_split('~(<!\[CDATA\[|\]\]>|<!--|-->)~', $data, -1, PREG_SPLIT_DELIM_CAPTURE);
  507. foreach ($parts as $part)
  508. {
  509. // Handle XML comments.
  510. if (!$inCdata && $part === '<!--')
  511. $inComment = true;
  512. if ($inComment && $part === '-->')
  513. $inComment = false;
  514. elseif ($inComment)
  515. continue;
  516. // Handle Cdata blocks.
  517. elseif (!$inComment && $part === '<![CDATA[')
  518. $inCdata = true;
  519. elseif ($inCdata && $part === ']]>')
  520. $inCdata = false;
  521. elseif ($inCdata)
  522. $output .= htmlentities($part, ENT_QUOTES);
  523. // Everything else is kept as is.
  524. else
  525. $output .= $part;
  526. }
  527. return $output;
  528. }
  529. /**
  530. * Turn the CDATAs back to normal text.
  531. *
  532. * @param $data
  533. */
  534. protected function _from_cdata($data)
  535. {
  536. // Get the HTML translation table and reverse it.
  537. $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES, ENT_QUOTES));
  538. // Translate all the entities out.
  539. $data = strtr(preg_replace('~&#(\d{1,4});~e', "chr('\$1')", $data), $trans_tbl);
  540. return $this->trim ? trim($data) : $data;
  541. }
  542. /**
  543. * Given an array, return the text from that array. (recursive and privately used.)
  544. *
  545. * @param array $array
  546. */
  547. protected function _fetch($array)
  548. {
  549. // Don't return anything if this is just a string.
  550. if (is_string($array))
  551. return '';
  552. $temp = '';
  553. foreach ($array as $text)
  554. {
  555. // This means it's most likely an attribute or the name itself.
  556. if (!isset($text['name']))
  557. continue;
  558. // This is text!
  559. if ($text['name'] == '!')
  560. $temp .= $text['value'];
  561. // Another element - dive in ;).
  562. else
  563. $temp .= $this->_fetch($text);
  564. }
  565. // Return all the bits and pieces we've put together.
  566. return $temp;
  567. }
  568. /**
  569. * Get a specific array by path, one level down. (privately used...)
  570. *
  571. * @param array $array
  572. * @param string $path
  573. * @param int $level
  574. * @param bool $no_error
  575. */
  576. protected function _path($array, $path, $level, $no_error = false)
  577. {
  578. // Is $array even an array? It might be false!
  579. if (!is_array($array))
  580. return false;
  581. // Asking for *no* path?
  582. if ($path == '' || $path == '.')
  583. return $array;
  584. $paths = explode('|', $path);
  585. // A * means all elements of any name.
  586. $show_all = in_array('*', $paths);
  587. $results = array();
  588. // Check each element.
  589. foreach ($array as $value)
  590. {
  591. if (!is_array($value) || $value['name'] === '!')
  592. continue;
  593. if ($show_all || in_array($value['name'], $paths))
  594. {
  595. // Skip elements before "the one".
  596. if ($level !== null && $level > 0)
  597. $level--;
  598. else
  599. $results[] = $value;
  600. }
  601. }
  602. // No results found...
  603. if (empty($results))
  604. {
  605. $trace = debug_backtrace();
  606. $i = 0;
  607. while ($i < count($trace) && isset($trace[$i]['class']) && $trace[$i]['class'] == get_class($this))
  608. $i++;
  609. $debug = ' from ' . $trace[$i - 1]['file'] . ' on line ' . $trace[$i - 1]['line'];
  610. // Cause an error.
  611. if ($this->debug_level & E_NOTICE && !$no_error)
  612. trigger_error('Undefined XML element: ' . $path . $debug, E_USER_NOTICE);
  613. return false;
  614. }
  615. // Only one result.
  616. elseif (count($results) == 1 || $level !== null)
  617. return $results[0];
  618. // Return the result set.
  619. else
  620. return $results + array('name' => $path . '[]');
  621. }
  622. }