PageRenderTime 46ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/inc/XML/Parser.php

https://github.com/chregu/fluxcms
PHP | 685 lines | 256 code | 67 blank | 362 comment | 39 complexity | cacf261479a5888e82dbd1d4172ad445 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, Apache-2.0, LGPL-2.1
  1. <?php
  2. //
  3. // +----------------------------------------------------------------------+
  4. // | PHP Version 4 |
  5. // +----------------------------------------------------------------------+
  6. // | Copyright (c) 1997-2004 The PHP Group |
  7. // +----------------------------------------------------------------------+
  8. // | This source file is subject to version 3.0 of the PHP license, |
  9. // | that is bundled with this package in the file LICENSE, and is |
  10. // | available at through the world-wide-web at |
  11. // | http://www.php.net/license/3_0.txt. |
  12. // | If you did not receive a copy of the PHP license and are unable to |
  13. // | obtain it through the world-wide-web, please send a note to |
  14. // | license@php.net so we can mail you a copy immediately. |
  15. // +----------------------------------------------------------------------+
  16. // | Author: Stig Bakken <ssb@fast.no> |
  17. // | Tomas V.V.Cox <cox@idecnet.com> |
  18. // | Stephan Schmidt <schst@php-tools.net> |
  19. // +----------------------------------------------------------------------+
  20. //
  21. // $Id$
  22. /**
  23. * XML Parser class.
  24. *
  25. * This is an XML parser based on PHP's "xml" extension,
  26. * based on the bundled expat library.
  27. *
  28. * @category XML
  29. * @package XML_Parser
  30. * @author Stig Bakken <ssb@fast.no>
  31. * @author Tomas V.V.Cox <cox@idecnet.com>
  32. * @author Stephan Schmidt <schst@php-tools.net>
  33. */
  34. /**
  35. * uses PEAR's error handling
  36. */
  37. require_once 'PEAR.php';
  38. /**
  39. * resource could not be created
  40. */
  41. define('XML_PARSER_ERROR_NO_RESOURCE', 200);
  42. /**
  43. * unsupported mode
  44. */
  45. define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201);
  46. /**
  47. * invalid encoding was given
  48. */
  49. define('XML_PARSER_ERROR_INVALID_ENCODING', 202);
  50. /**
  51. * specified file could not be read
  52. */
  53. define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203);
  54. /**
  55. * invalid input
  56. */
  57. define('XML_PARSER_ERROR_INVALID_INPUT', 204);
  58. /**
  59. * remote file cannot be retrieved in safe mode
  60. */
  61. define('XML_PARSER_ERROR_REMOTE', 205);
  62. /**
  63. * XML Parser class.
  64. *
  65. * This is an XML parser based on PHP's "xml" extension,
  66. * based on the bundled expat library.
  67. *
  68. * Notes:
  69. * - It requires PHP 4.0.4pl1 or greater
  70. * - From revision 1.17, the function names used by the 'func' mode
  71. * are in the format "xmltag_$elem", for example: use "xmltag_name"
  72. * to handle the <name></name> tags of your xml file.
  73. *
  74. * @category XML
  75. * @package XML_Parser
  76. * @author Stig Bakken <ssb@fast.no>
  77. * @author Tomas V.V.Cox <cox@idecnet.com>
  78. * @author Stephan Schmidt <schst@php-tools.net>
  79. * @todo create XML_Parser_Namespace to parse documents with namespaces
  80. * @todo create XML_Parser_Pull
  81. * @todo Tests that need to be made:
  82. * - mixing character encodings
  83. * - a test using all expat handlers
  84. * - options (folding, output charset)
  85. * - different parsing modes
  86. */
  87. class XML_Parser extends PEAR
  88. {
  89. // {{{ properties
  90. /**
  91. * XML parser handle
  92. *
  93. * @var resource
  94. * @see xml_parser_create()
  95. */
  96. var $parser;
  97. /**
  98. * File handle if parsing from a file
  99. *
  100. * @var resource
  101. */
  102. var $fp;
  103. /**
  104. * Whether to do case folding
  105. *
  106. * If set to true, all tag and attribute names will
  107. * be converted to UPPER CASE.
  108. *
  109. * @var boolean
  110. */
  111. var $folding = true;
  112. /**
  113. * Mode of operation, one of "event" or "func"
  114. *
  115. * @var string
  116. */
  117. var $mode;
  118. /**
  119. * Mapping from expat handler function to class method.
  120. *
  121. * @var array
  122. */
  123. var $handler = array(
  124. 'character_data_handler' => 'cdataHandler',
  125. 'default_handler' => 'defaultHandler',
  126. 'processing_instruction_handler' => 'piHandler',
  127. 'unparsed_entity_decl_handler' => 'unparsedHandler',
  128. 'notation_decl_handler' => 'notationHandler',
  129. 'external_entity_ref_handler' => 'entityrefHandler'
  130. );
  131. /**
  132. * source encoding
  133. *
  134. * @var string
  135. */
  136. var $srcenc;
  137. /**
  138. * target encoding
  139. *
  140. * @var string
  141. */
  142. var $tgtenc;
  143. /**
  144. * handler object
  145. *
  146. * @var object
  147. */
  148. var $_handlerObj;
  149. // }}}
  150. // {{{ constructor
  151. /**
  152. * Creates an XML parser.
  153. *
  154. * This is needed for PHP4 compatibility, it will
  155. * call the constructor, when a new instance is created.
  156. *
  157. * @param string $srcenc source charset encoding, use NULL (default) to use
  158. * whatever the document specifies
  159. * @param string $mode how this parser object should work, "event" for
  160. * startelement/endelement-type events, "func"
  161. * to have it call functions named after elements
  162. * @param string $tgenc a valid target encoding
  163. */
  164. function XML_Parser($srcenc = null, $mode = 'event', $tgtenc = null)
  165. {
  166. XML_Parser::__construct($srcenc, $mode, $tgtenc);
  167. }
  168. // }}}
  169. /**
  170. * PHP5 constructor
  171. *
  172. * @param string $srcenc source charset encoding, use NULL (default) to use
  173. * whatever the document specifies
  174. * @param string $mode how this parser object should work, "event" for
  175. * startelement/endelement-type events, "func"
  176. * to have it call functions named after elements
  177. * @param string $tgenc a valid target encoding
  178. */
  179. function __construct($srcenc = null, $mode = 'event', $tgtenc = null)
  180. {
  181. $this->PEAR('XML_Parser_Error');
  182. $this->mode = $mode;
  183. $this->srcenc = $srcenc;
  184. $this->tgtenc = $tgtenc;
  185. }
  186. // }}}
  187. /**
  188. * Sets the mode of the parser.
  189. *
  190. * Possible modes are:
  191. * - func
  192. * - event
  193. *
  194. * You can set the mode using the second parameter
  195. * in the constructor.
  196. *
  197. * This method is only needed, when switching to a new
  198. * mode at a later point.
  199. *
  200. * @access public
  201. * @param string mode, either 'func' or 'event'
  202. * @return boolean|object true on success, PEAR_Error otherwise
  203. */
  204. function setMode($mode)
  205. {
  206. if ($mode != 'func' && $mode != 'event') {
  207. $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
  208. }
  209. $this->mode = $mode;
  210. return true;
  211. }
  212. /**
  213. * Sets the object, that will handle the XML events
  214. *
  215. * This allows you to create a handler object independent of the
  216. * parser object that you are using and easily switch the underlying
  217. * parser.
  218. *
  219. * If no object will be set, XML_Parser assumes that you
  220. * extend this class and handle the events in $this.
  221. *
  222. * @access public
  223. * @param object object to handle the events
  224. * @return boolean will always return true
  225. * @since v1.2.0beta3
  226. */
  227. function setHandlerObj(&$obj)
  228. {
  229. $this->_handlerObj = &$obj;
  230. return true;
  231. }
  232. /**
  233. * Init the element handlers
  234. *
  235. * @access private
  236. */
  237. function _initHandlers()
  238. {
  239. if (!is_resource($this->parser)) {
  240. return false;
  241. }
  242. if (!is_object($this->_handlerObj)) {
  243. $this->_handlerObj = &$this;
  244. }
  245. switch ($this->mode) {
  246. case 'func':
  247. xml_set_object($this->parser, $this->_handlerObj);
  248. xml_set_element_handler($this->parser, array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler'));
  249. break;
  250. case 'event':
  251. xml_set_object($this->parser, $this->_handlerObj);
  252. xml_set_element_handler($this->parser, 'startHandler', 'endHandler');
  253. break;
  254. default:
  255. return $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
  256. break;
  257. }
  258. /**
  259. * set additional handlers for character data, entities, etc.
  260. */
  261. foreach ($this->handler as $xml_func => $method) {
  262. if (method_exists($this->_handlerObj, $method)) {
  263. $xml_func = 'xml_set_' . $xml_func;
  264. $xml_func($this->parser, $method);
  265. }
  266. }
  267. }
  268. // {{{ _create()
  269. /**
  270. * create the XML parser resource
  271. *
  272. * Has been moved from the constructor to avoid
  273. * problems with object references.
  274. *
  275. * Furthermore it allows us returning an error
  276. * if something fails.
  277. *
  278. * @access private
  279. * @return boolean|object true on success, PEAR_Error otherwise
  280. *
  281. * @see xml_parser_create
  282. */
  283. function _create()
  284. {
  285. if ($this->srcenc === null) {
  286. $xp = @xml_parser_create();
  287. } else {
  288. $xp = @xml_parser_create($this->srcenc);
  289. }
  290. if (is_resource($xp)) {
  291. if ($this->tgtenc !== null) {
  292. if (!@xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING,
  293. $this->tgtenc)) {
  294. return $this->raiseError('invalid target encoding', XML_PARSER_ERROR_INVALID_ENCODING);
  295. }
  296. }
  297. $this->parser = $xp;
  298. $result = $this->_initHandlers($this->mode);
  299. if ($this->isError($result)) {
  300. return $result;
  301. }
  302. xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding);
  303. return true;
  304. }
  305. return $this->raiseError('Unable to create XML parser resource.', XML_PARSER_ERROR_NO_RESOURCE);
  306. }
  307. // }}}
  308. // {{{ reset()
  309. /**
  310. * Reset the parser.
  311. *
  312. * This allows you to use one parser instance
  313. * to parse multiple XML documents.
  314. *
  315. * @access public
  316. * @return boolean|object true on success, PEAR_Error otherwise
  317. */
  318. function reset()
  319. {
  320. $result = $this->_create();
  321. if ($this->isError( $result )) {
  322. return $result;
  323. }
  324. return true;
  325. }
  326. // }}}
  327. // {{{ setInputFile()
  328. /**
  329. * Sets the input xml file to be parsed
  330. *
  331. * @param string Filename (full path)
  332. * @return resource fopen handle of the given file
  333. * @throws XML_Parser_Error
  334. * @see setInput(), setInputString(), parse()
  335. * @access public
  336. */
  337. function setInputFile($file)
  338. {
  339. /**
  340. * check, if file is a remote file
  341. */
  342. if (eregi('^(http|ftp)://', substr($file, 0, 10))) {
  343. if (!ini_get('allow_url_fopen')) {
  344. return $this->raiseError('Remote files cannot be parsed, as safe mode is enabled.', XML_PARSER_ERROR_REMOTE);
  345. }
  346. }
  347. $fp = @fopen($file, 'rb');
  348. if (is_resource($fp)) {
  349. $this->fp = $fp;
  350. return $fp;
  351. }
  352. return $this->raiseError('File could not be opened.', XML_PARSER_ERROR_FILE_NOT_READABLE);
  353. }
  354. // }}}
  355. // {{{ setInputString()
  356. /**
  357. * XML_Parser::setInputString()
  358. *
  359. * Sets the xml input from a string
  360. *
  361. * @param string $data a string containing the XML document
  362. * @return null
  363. **/
  364. function setInputString($data)
  365. {
  366. $this->fp = $data;
  367. return null;
  368. }
  369. // }}}
  370. // {{{ setInput()
  371. /**
  372. * Sets the file handle to use with parse().
  373. *
  374. * You should use setInputFile() or setInputString() if you
  375. * pass a string
  376. *
  377. * @param mixed $fp Can be either a resource returned from fopen(),
  378. * a URL, a local filename or a string.
  379. * @access public
  380. * @see parse()
  381. * @uses setInputString(), setInputFile()
  382. */
  383. function setInput($fp)
  384. {
  385. if (is_resource($fp)) {
  386. $this->fp = $fp;
  387. return true;
  388. }
  389. // see if it's an absolute URL (has a scheme at the beginning)
  390. elseif (eregi('^[a-z]+://', substr($fp, 0, 10))) {
  391. return $this->setInputFile($fp);
  392. }
  393. // see if it's a local file
  394. elseif (file_exists($fp)) {
  395. return $this->setInputFile($fp);
  396. }
  397. // it must be a string
  398. else {
  399. $this->fp = $fp;
  400. return true;
  401. }
  402. return $this->raiseError('Illegal input format', XML_PARSER_ERROR_INVALID_INPUT);
  403. }
  404. // }}}
  405. // {{{ parse()
  406. /**
  407. * Central parsing function.
  408. *
  409. * @return true|object PEAR error returns true on success, or a PEAR_Error otherwise
  410. * @access public
  411. */
  412. function parse()
  413. {
  414. /**
  415. * reset the parser
  416. */
  417. $result = $this->reset();
  418. if ($this->isError($result)) {
  419. return $result;
  420. }
  421. // if $this->fp was fopened previously
  422. if (is_resource($this->fp)) {
  423. while ($data = fread($this->fp, 4096)) {
  424. if (!$this->_parseString($data, feof($this->fp))) {
  425. $error = &$this->raiseError();
  426. $this->free();
  427. return $error;
  428. }
  429. }
  430. // otherwise, $this->fp must be a string
  431. } else {
  432. if (!$this->_parseString($this->fp, true)) {
  433. $error = &$this->raiseError();
  434. $this->free();
  435. return $error;
  436. }
  437. }
  438. $this->free();
  439. return true;
  440. }
  441. /**
  442. * XML_Parser::_parseString()
  443. *
  444. * @param string $data
  445. * @param boolean $eof
  446. * @return bool
  447. * @access private
  448. * @see parseString()
  449. **/
  450. function _parseString($data, $eof = false)
  451. {
  452. return xml_parse($this->parser, $data, $eof);
  453. }
  454. // }}}
  455. // {{{ parseString()
  456. /**
  457. * XML_Parser::parseString()
  458. *
  459. * Parses a string.
  460. *
  461. * @param string $data XML data
  462. * @param boolean $eof If set and TRUE, data is the last piece of data sent in this parser
  463. * @throws XML_Parser_Error
  464. * @return Pear Error|true true on success or a PEAR Error
  465. * @see _parseString()
  466. */
  467. function parseString($data, $eof = false)
  468. {
  469. if (!isset($this->parser) || !is_resource($this->parser)) {
  470. $this->reset();
  471. }
  472. if (!$this->_parseString($data, $eof)) {
  473. $error = &$this->raiseError();
  474. $this->free();
  475. return $error;
  476. }
  477. if ($eof === true) {
  478. $this->free();
  479. }
  480. return true;
  481. }
  482. /**
  483. * XML_Parser::free()
  484. *
  485. * Free the internal resources associated with the parser
  486. *
  487. * @return null
  488. **/
  489. function free()
  490. {
  491. if (isset($this->parser) && is_resource($this->parser)) {
  492. xml_parser_free($this->parser);
  493. unset( $this->parser );
  494. }
  495. if (isset($this->fp) && is_resource($this->fp)) {
  496. fclose($this->fp);
  497. }
  498. unset($this->fp);
  499. return null;
  500. }
  501. /**
  502. * XML_Parser::raiseError()
  503. *
  504. * Throws a XML_Parser_Error
  505. *
  506. * @param string $msg the error message
  507. * @param integer $ecode the error message code
  508. * @return XML_Parser_Error
  509. **/
  510. function raiseError($msg = null, $ecode = 0)
  511. {
  512. $msg = !is_null($msg) ? $msg : $this->parser;
  513. $err = &new XML_Parser_Error($msg, $ecode);
  514. return parent::raiseError($err);
  515. }
  516. // }}}
  517. // {{{ funcStartHandler()
  518. function funcStartHandler($xp, $elem, $attribs)
  519. {
  520. $func = 'xmltag_' . $elem;
  521. if (strchr($func, '.')) {
  522. $func = str_replace('.', '_', $func);
  523. }
  524. if (method_exists($this->_handlerObj, $func)) {
  525. call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs);
  526. } elseif (method_exists($this->_handlerObj, 'xmltag')) {
  527. call_user_func(array(&$this->_handlerObj, 'xmltag'), $xp, $elem, $attribs);
  528. }
  529. }
  530. // }}}
  531. // {{{ funcEndHandler()
  532. function funcEndHandler($xp, $elem)
  533. {
  534. $func = 'xmltag_' . $elem . '_';
  535. if (strchr($func, '.')) {
  536. $func = str_replace('.', '_', $func);
  537. }
  538. if (method_exists($this->_handlerObj, $func)) {
  539. call_user_func(array(&$this->_handlerObj, $func), $xp, $elem);
  540. } elseif (method_exists($this->_handlerObj, 'xmltag_')) {
  541. call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem);
  542. }
  543. }
  544. // }}}
  545. // {{{ startHandler()
  546. /**
  547. *
  548. * @abstract
  549. */
  550. function startHandler($xp, $elem, &$attribs)
  551. {
  552. return NULL;
  553. }
  554. // }}}
  555. // {{{ endHandler()
  556. /**
  557. *
  558. * @abstract
  559. */
  560. function endHandler($xp, $elem)
  561. {
  562. return NULL;
  563. }
  564. // }}}me
  565. }
  566. /**
  567. * error class, replaces PEAR_Error
  568. *
  569. * An instance of this class will be returned
  570. * if an error occurs inside XML_Parser.
  571. *
  572. * There are three advantages over using the standard PEAR_Error:
  573. * - All messages will be prefixed
  574. * - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' )
  575. * - messages can be generated from the xml_parser resource
  576. *
  577. * @package XML_Parser
  578. * @access public
  579. * @see PEAR_Error
  580. */
  581. class XML_Parser_Error extends PEAR_Error
  582. {
  583. // {{{ properties
  584. /**
  585. * prefix for all messages
  586. *
  587. * @var string
  588. */
  589. var $error_message_prefix = 'XML_Parser: ';
  590. // }}}
  591. // {{{ constructor()
  592. /**
  593. * construct a new error instance
  594. *
  595. * You may either pass a message or an xml_parser resource as first
  596. * parameter. If a resource has been passed, the last error that
  597. * happened will be retrieved and returned.
  598. *
  599. * @access public
  600. * @param string|resource message or parser resource
  601. * @param integer error code
  602. * @param integer error handling
  603. * @param integer error level
  604. */
  605. function XML_Parser_Error($msgorparser = 'unknown error', $code = 0, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE)
  606. {
  607. if (is_resource($msgorparser)) {
  608. $code = xml_get_error_code($msgorparser);
  609. $msgorparser = sprintf('%s at XML input line %d:%d',
  610. xml_error_string($code),
  611. xml_get_current_line_number($msgorparser),
  612. xml_get_current_column_number($msgorparser));
  613. }
  614. $this->PEAR_Error($msgorparser, $code, $mode, $level);
  615. }
  616. // }}}
  617. }
  618. ?>