PageRenderTime 41ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/backup/util/xml/parser/progressive_parser.class.php

https://github.com/200896596/moodle
PHP | 283 lines | 205 code | 13 blank | 65 comment | 11 complexity | 18afbeeb4bcf5a1603a3760927cc16c2 MD5 | raw file
  1. <?php
  2. // This file is part of Moodle - http://moodle.org/
  3. //
  4. // Moodle is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // Moodle is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU General Public License
  15. // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
  16. /**
  17. * @package moodlecore
  18. * @subpackage backup-xml
  19. * @copyright 2010 onwards Eloy Lafuente (stronk7) {@link http://stronk7.com}
  20. * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  21. */
  22. /**
  23. * Class implementing one SAX progressive push parser.
  24. *
  25. * SAX parser able to process XML content from files/variables. It supports
  26. * attributes and case folding and works only with UTF-8 content. It's one
  27. * progressive push parser because, intead of loading big crunchs of information
  28. * in memory, it "publishes" (pushes) small information in a "propietary array format" througt
  29. * the corresponding @progressive_parser_procesor, that will be the responsibe for
  30. * returning information into handy formats to higher levels.
  31. *
  32. * Note that, while this progressive parser is able to process any XML file, it is
  33. * 100% progressive so it publishes the information in the original order it's parsed (that's
  34. * the expected behaviour) so information belonging to the same path can be returned in
  35. * different chunks if there are inner levels/paths in the middle. Be warned!
  36. *
  37. * The "propietary array format" that the parser publishes to the @progressive_parser_procesor
  38. * is this:
  39. * array (
  40. * 'path' => path where the tags belong to,
  41. * 'level'=> level (1-based) of the tags
  42. * 'tags => array (
  43. * 'name' => name of the tag,
  44. * 'attrs'=> array( name of the attr => value of the attr),
  45. * 'cdata => cdata of the tag
  46. * )
  47. * )
  48. *
  49. * TODO: Finish phpdocs
  50. */
  51. class progressive_parser {
  52. protected $xml_parser; // PHP's low level XML SAX parser
  53. protected $file; // full path to file being progressively parsed | => mutually exclusive
  54. protected $contents; // contents being progressively parsed |
  55. protected $procesor; // progressive_parser_procesor to be used to publish processed information
  56. protected $level; // level of the current tag
  57. protected $path; // path of the current tag
  58. protected $accum; // accumulated char data of the current tag
  59. protected $attrs; // attributes of the current tag
  60. protected $topush; // array containing current level information being parsed to be "pushed"
  61. protected $prevlevel; // level of the previous tag processed - to detect pushing places
  62. protected $currtag; // name/value/attributes of the tag being processed
  63. public function __construct($case_folding = false) {
  64. $this->xml_parser = xml_parser_create('UTF-8');
  65. xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, $case_folding);
  66. xml_set_object($this->xml_parser, $this);
  67. xml_set_element_handler($this->xml_parser, array($this, 'start_tag'), array($this, 'end_tag'));
  68. xml_set_character_data_handler($this->xml_parser, array($this, 'char_data'));
  69. $this->file = null;
  70. $this->contents = null;
  71. $this->procesor = null;
  72. $this->level = 0;
  73. $this->path = '';
  74. $this->accum = '';
  75. $this->attrs = array();
  76. $this->topush = array();
  77. $this->prevlevel = 0;
  78. $this->currtag = array();
  79. }
  80. /*
  81. * Sets the XML file to be processed by the parser
  82. */
  83. public function set_file($file) {
  84. if (!file_exists($file) || (!is_readable($file))) {
  85. throw new progressive_parser_exception('invalid_file_to_parse');
  86. }
  87. $this->file = $file;
  88. $this->contents = null;
  89. }
  90. /*
  91. * Sets the XML contents to be processed by the parser
  92. */
  93. public function set_contents($contents) {
  94. if (empty($contents)) {
  95. throw new progressive_parser_exception('invalid_contents_to_parse');
  96. }
  97. $this->contents = $contents;
  98. $this->file = null;
  99. }
  100. /*
  101. * Define the @progressive_parser_processor in charge of processing the parsed chunks
  102. */
  103. public function set_processor($processor) {
  104. if (!$processor instanceof progressive_parser_processor) {
  105. throw new progressive_parser_exception('invalid_parser_processor');
  106. }
  107. $this->processor = $processor;
  108. }
  109. /*
  110. * Process the XML, delegating found chunks to the @progressive_parser_processor
  111. */
  112. public function process() {
  113. if (empty($this->processor)) {
  114. throw new progressive_parser_exception('undefined_parser_processor');
  115. }
  116. if (empty($this->file) && empty($this->contents)) {
  117. throw new progressive_parser_exception('undefined_xml_to_parse');
  118. }
  119. if (is_null($this->xml_parser)) {
  120. throw new progressive_parser_exception('progressive_parser_already_used');
  121. }
  122. if ($this->file) {
  123. $fh = fopen($this->file, 'r');
  124. while ($buffer = fread($fh, 8192)) {
  125. $this->parse($buffer, feof($fh));
  126. }
  127. fclose($fh);
  128. } else {
  129. $this->parse($this->contents, true);
  130. }
  131. xml_parser_free($this->xml_parser);
  132. $this->xml_parser = null;
  133. }
  134. /**
  135. * Provides one cross-platform dirname function for
  136. * handling parser paths, see MDL-24381
  137. */
  138. public static function dirname($path) {
  139. return str_replace('\\', '/', dirname($path));
  140. }
  141. // Protected API starts here
  142. protected function parse($data, $eof) {
  143. if (!xml_parse($this->xml_parser, $data, $eof)) {
  144. throw new progressive_parser_exception(
  145. 'xml_parsing_error', null,
  146. sprintf('XML error: %s at line %d, column %d',
  147. xml_error_string(xml_get_error_code($this->xml_parser)),
  148. xml_get_current_line_number($this->xml_parser),
  149. xml_get_current_column_number($this->xml_parser)));
  150. }
  151. }
  152. protected function publish($data) {
  153. $this->processor->receive_chunk($data);
  154. }
  155. /**
  156. * Inform to the processor that we have started parsing one path
  157. */
  158. protected function inform_start($path) {
  159. $this->processor->before_path($path);
  160. }
  161. /**
  162. * Inform to the processor that we have finished parsing one path
  163. */
  164. protected function inform_end($path) {
  165. $this->processor->after_path($path);
  166. }
  167. protected function postprocess_cdata($data) {
  168. return $this->processor->process_cdata($data);
  169. }
  170. protected function start_tag($parser, $tag, $attributes) {
  171. // Normal update of parser internals
  172. $this->level++;
  173. $this->path .= '/' . $tag;
  174. $this->accum = '';
  175. $this->attrs = !empty($attributes) ? $attributes : array();
  176. // Inform processor we are about to start one tag
  177. $this->inform_start($this->path);
  178. // Entering a new inner level, publish all the information available
  179. if ($this->level > $this->prevlevel) {
  180. if (!empty($this->currtag) && (!empty($this->currtag['attrs']) || !empty($this->currtag['cdata']))) {
  181. // We always add the last not-empty repetition. Empty ones are ignored.
  182. if (isset($this->topush['tags'][$this->currtag['name']]) && trim($this->currtag['cdata']) === '') {
  183. // Do nothing, the tag already exists and the repetition is empty
  184. } else {
  185. $this->topush['tags'][$this->currtag['name']] = $this->currtag;
  186. }
  187. }
  188. if (!empty($this->topush['tags'])) {
  189. $this->publish($this->topush);
  190. }
  191. $this->currtag = array();
  192. $this->topush = array();
  193. }
  194. // If not set, build to push common header
  195. if (empty($this->topush)) {
  196. $this->topush['path'] = progressive_parser::dirname($this->path);
  197. $this->topush['level'] = $this->level;
  198. $this->topush['tags'] = array();
  199. }
  200. // Handling a new tag, create it
  201. $this->currtag['name'] = $tag;
  202. // And add attributes if present
  203. if ($this->attrs) {
  204. $this->currtag['attrs'] = $this->attrs;
  205. }
  206. // For the records
  207. $this->prevlevel = $this->level;
  208. }
  209. protected function end_tag($parser, $tag) {
  210. // Ending rencently started tag, add value to current tag
  211. if ($this->level == $this->prevlevel) {
  212. $this->currtag['cdata'] = $this->postprocess_cdata($this->accum);
  213. // We always add the last not-empty repetition. Empty ones are ignored.
  214. if (isset($this->topush['tags'][$this->currtag['name']]) && trim($this->currtag['cdata']) === '') {
  215. // Do nothing, the tag already exists and the repetition is empty
  216. } else {
  217. $this->topush['tags'][$this->currtag['name']] = $this->currtag;
  218. }
  219. $this->currtag = array();
  220. }
  221. // Leaving one level, publish all the information available
  222. if ($this->level < $this->prevlevel) {
  223. if (!empty($this->topush['tags'])) {
  224. $this->publish($this->topush);
  225. }
  226. $this->currtag = array();
  227. $this->topush = array();
  228. }
  229. // For the records
  230. $this->prevlevel = $this->level;
  231. // Inform processor we have finished one tag
  232. $this->inform_end($this->path);
  233. // Normal update of parser internals
  234. $this->level--;
  235. $this->path = progressive_parser::dirname($this->path);
  236. }
  237. protected function char_data($parser, $data) {
  238. $this->accum .= $data;
  239. }
  240. }
  241. /*
  242. * Exception class used by all the @progressive_parser stuff
  243. */
  244. class progressive_parser_exception extends moodle_exception {
  245. public function __construct($errorcode, $a=NULL, $debuginfo=null) {
  246. parent::__construct($errorcode, 'error', '', $a, null, $debuginfo);
  247. }
  248. }