PageRenderTime 46ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/easyrest/pear/XML/Parser.php

http://easyrest.googlecode.com/
PHP | 768 lines | 265 code | 67 blank | 436 comment | 38 complexity | 144dfed44aba21574924acdb76cfa236 MD5 | raw file
  1. <?php
  2. /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
  3. /**
  4. * XML_Parser
  5. *
  6. * XML Parser package
  7. *
  8. * PHP versions 4 and 5
  9. *
  10. * LICENSE:
  11. *
  12. * Copyright (c) 2002-2008 The PHP Group
  13. * All rights reserved.
  14. *
  15. * Redistribution and use in source and binary forms, with or without
  16. * modification, are permitted provided that the following conditions
  17. * are met:
  18. *
  19. * * Redistributions of source code must retain the above copyright
  20. * notice, this list of conditions and the following disclaimer.
  21. * * Redistributions in binary form must reproduce the above copyright
  22. * notice, this list of conditions and the following disclaimer in the
  23. * documentation and/or other materials provided with the distribution.
  24. * * The name of the author may not be used to endorse or promote products
  25. * derived from this software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  28. * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  29. * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  30. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  31. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  32. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  33. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  34. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  35. * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  36. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  37. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. *
  39. * @category XML
  40. * @package XML_Parser
  41. * @author Stig Bakken <ssb@fast.no>
  42. * @author Tomas V.V.Cox <cox@idecnet.com>
  43. * @author Stephan Schmidt <schst@php.net>
  44. * @copyright 2002-2008 The PHP Group
  45. * @license http://opensource.org/licenses/bsd-license New BSD License
  46. * @version CVS: $Id: Parser.php,v 1.29 2008/08/24 21:48:21 ashnazg Exp $
  47. * @link http://pear.php.net/package/XML_Parser
  48. */
  49. /**
  50. * uses PEAR's error handling
  51. */
  52. require_once (PEAR_LIB.'PEAR.php');
  53. /**
  54. * resource could not be created
  55. */
  56. define('XML_PARSER_ERROR_NO_RESOURCE', 200);
  57. /**
  58. * unsupported mode
  59. */
  60. define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201);
  61. /**
  62. * invalid encoding was given
  63. */
  64. define('XML_PARSER_ERROR_INVALID_ENCODING', 202);
  65. /**
  66. * specified file could not be read
  67. */
  68. define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203);
  69. /**
  70. * invalid input
  71. */
  72. define('XML_PARSER_ERROR_INVALID_INPUT', 204);
  73. /**
  74. * remote file cannot be retrieved in safe mode
  75. */
  76. define('XML_PARSER_ERROR_REMOTE', 205);
  77. /**
  78. * XML Parser class.
  79. *
  80. * This is an XML parser based on PHP's "xml" extension,
  81. * based on the bundled expat library.
  82. *
  83. * Notes:
  84. * - It requires PHP 4.0.4pl1 or greater
  85. * - From revision 1.17, the function names used by the 'func' mode
  86. * are in the format "xmltag_$elem", for example: use "xmltag_name"
  87. * to handle the <name></name> tags of your xml file.
  88. * - different parsing modes
  89. *
  90. * @category XML
  91. * @package XML_Parser
  92. * @author Stig Bakken <ssb@fast.no>
  93. * @author Tomas V.V.Cox <cox@idecnet.com>
  94. * @author Stephan Schmidt <schst@php.net>
  95. * @copyright 2002-2008 The PHP Group
  96. * @license http://opensource.org/licenses/bsd-license New BSD License
  97. * @version Release: @package_version@
  98. * @link http://pear.php.net/package/XML_Parser
  99. * @todo create XML_Parser_Namespace to parse documents with namespaces
  100. * @todo create XML_Parser_Pull
  101. * @todo Tests that need to be made:
  102. * - mixing character encodings
  103. * - a test using all expat handlers
  104. * - options (folding, output charset)
  105. */
  106. class XML_Parser extends PEAR
  107. {
  108. // {{{ properties
  109. /**
  110. * XML parser handle
  111. *
  112. * @var resource
  113. * @see xml_parser_create()
  114. */
  115. var $parser;
  116. /**
  117. * File handle if parsing from a file
  118. *
  119. * @var resource
  120. */
  121. var $fp;
  122. /**
  123. * Whether to do case folding
  124. *
  125. * If set to true, all tag and attribute names will
  126. * be converted to UPPER CASE.
  127. *
  128. * @var boolean
  129. */
  130. var $folding = true;
  131. /**
  132. * Mode of operation, one of "event" or "func"
  133. *
  134. * @var string
  135. */
  136. var $mode;
  137. /**
  138. * Mapping from expat handler function to class method.
  139. *
  140. * @var array
  141. */
  142. var $handler = array(
  143. 'character_data_handler' => 'cdataHandler',
  144. 'default_handler' => 'defaultHandler',
  145. 'processing_instruction_handler' => 'piHandler',
  146. 'unparsed_entity_decl_handler' => 'unparsedHandler',
  147. 'notation_decl_handler' => 'notationHandler',
  148. 'external_entity_ref_handler' => 'entityrefHandler'
  149. );
  150. /**
  151. * source encoding
  152. *
  153. * @var string
  154. */
  155. var $srcenc;
  156. /**
  157. * target encoding
  158. *
  159. * @var string
  160. */
  161. var $tgtenc;
  162. /**
  163. * handler object
  164. *
  165. * @var object
  166. */
  167. var $_handlerObj;
  168. /**
  169. * valid encodings
  170. *
  171. * @var array
  172. */
  173. var $_validEncodings = array('ISO-8859-1', 'UTF-8', 'US-ASCII');
  174. // }}}
  175. // {{{ php4 constructor
  176. /**
  177. * Creates an XML parser.
  178. *
  179. * This is needed for PHP4 compatibility, it will
  180. * call the constructor, when a new instance is created.
  181. *
  182. * @param string $srcenc source charset encoding, use NULL (default) to use
  183. * whatever the document specifies
  184. * @param string $mode how this parser object should work, "event" for
  185. * startelement/endelement-type events, "func"
  186. * to have it call functions named after elements
  187. * @param string $tgtenc a valid target encoding
  188. */
  189. function XML_Parser($srcenc = null, $mode = 'event', $tgtenc = null)
  190. {
  191. XML_Parser::__construct($srcenc, $mode, $tgtenc);
  192. }
  193. // }}}
  194. // {{{ php5 constructor
  195. /**
  196. * PHP5 constructor
  197. *
  198. * @param string $srcenc source charset encoding, use NULL (default) to use
  199. * whatever the document specifies
  200. * @param string $mode how this parser object should work, "event" for
  201. * startelement/endelement-type events, "func"
  202. * to have it call functions named after elements
  203. * @param string $tgtenc a valid target encoding
  204. */
  205. function __construct($srcenc = null, $mode = 'event', $tgtenc = null)
  206. {
  207. $this->PEAR('XML_Parser_Error');
  208. $this->mode = $mode;
  209. $this->srcenc = $srcenc;
  210. $this->tgtenc = $tgtenc;
  211. }
  212. // }}}
  213. /**
  214. * Sets the mode of the parser.
  215. *
  216. * Possible modes are:
  217. * - func
  218. * - event
  219. *
  220. * You can set the mode using the second parameter
  221. * in the constructor.
  222. *
  223. * This method is only needed, when switching to a new
  224. * mode at a later point.
  225. *
  226. * @param string $mode mode, either 'func' or 'event'
  227. *
  228. * @return boolean|object true on success, PEAR_Error otherwise
  229. * @access public
  230. */
  231. function setMode($mode)
  232. {
  233. if ($mode != 'func' && $mode != 'event') {
  234. $this->raiseError('Unsupported mode given',
  235. XML_PARSER_ERROR_UNSUPPORTED_MODE);
  236. }
  237. $this->mode = $mode;
  238. return true;
  239. }
  240. /**
  241. * Sets the object, that will handle the XML events
  242. *
  243. * This allows you to create a handler object independent of the
  244. * parser object that you are using and easily switch the underlying
  245. * parser.
  246. *
  247. * If no object will be set, XML_Parser assumes that you
  248. * extend this class and handle the events in $this.
  249. *
  250. * @param object &$obj object to handle the events
  251. *
  252. * @return boolean will always return true
  253. * @access public
  254. * @since v1.2.0beta3
  255. */
  256. function setHandlerObj(&$obj)
  257. {
  258. $this->_handlerObj = &$obj;
  259. return true;
  260. }
  261. /**
  262. * Init the element handlers
  263. *
  264. * @return mixed
  265. * @access private
  266. */
  267. function _initHandlers()
  268. {
  269. if (!is_resource($this->parser)) {
  270. return false;
  271. }
  272. if (!is_object($this->_handlerObj)) {
  273. $this->_handlerObj = &$this;
  274. }
  275. switch ($this->mode) {
  276. case 'func':
  277. xml_set_object($this->parser, $this->_handlerObj);
  278. xml_set_element_handler($this->parser,
  279. array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler'));
  280. break;
  281. case 'event':
  282. xml_set_object($this->parser, $this->_handlerObj);
  283. xml_set_element_handler($this->parser, 'startHandler', 'endHandler');
  284. break;
  285. default:
  286. return $this->raiseError('Unsupported mode given',
  287. XML_PARSER_ERROR_UNSUPPORTED_MODE);
  288. break;
  289. }
  290. /**
  291. * set additional handlers for character data, entities, etc.
  292. */
  293. foreach ($this->handler as $xml_func => $method) {
  294. if (method_exists($this->_handlerObj, $method)) {
  295. $xml_func = 'xml_set_' . $xml_func;
  296. $xml_func($this->parser, $method);
  297. }
  298. }
  299. }
  300. // {{{ _create()
  301. /**
  302. * create the XML parser resource
  303. *
  304. * Has been moved from the constructor to avoid
  305. * problems with object references.
  306. *
  307. * Furthermore it allows us returning an error
  308. * if something fails.
  309. *
  310. * NOTE: uses '@' error suppresion in this method
  311. *
  312. * @return bool|PEAR_Error true on success, PEAR_Error otherwise
  313. * @access private
  314. * @see xml_parser_create
  315. */
  316. function _create()
  317. {
  318. if ($this->srcenc === null) {
  319. $xp = @xml_parser_create();
  320. } else {
  321. $xp = @xml_parser_create($this->srcenc);
  322. }
  323. if (is_resource($xp)) {
  324. if ($this->tgtenc !== null) {
  325. if (!@xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING,
  326. $this->tgtenc)
  327. ) {
  328. return $this->raiseError('invalid target encoding',
  329. XML_PARSER_ERROR_INVALID_ENCODING);
  330. }
  331. }
  332. $this->parser = $xp;
  333. $result = $this->_initHandlers($this->mode);
  334. if ($this->isError($result)) {
  335. return $result;
  336. }
  337. xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding);
  338. return true;
  339. }
  340. if (!in_array(strtoupper($this->srcenc), $this->_validEncodings)) {
  341. return $this->raiseError('invalid source encoding',
  342. XML_PARSER_ERROR_INVALID_ENCODING);
  343. }
  344. return $this->raiseError('Unable to create XML parser resource.',
  345. XML_PARSER_ERROR_NO_RESOURCE);
  346. }
  347. // }}}
  348. // {{{ reset()
  349. /**
  350. * Reset the parser.
  351. *
  352. * This allows you to use one parser instance
  353. * to parse multiple XML documents.
  354. *
  355. * @access public
  356. * @return boolean|object true on success, PEAR_Error otherwise
  357. */
  358. function reset()
  359. {
  360. $result = $this->_create();
  361. if ($this->isError($result)) {
  362. return $result;
  363. }
  364. return true;
  365. }
  366. // }}}
  367. // {{{ setInputFile()
  368. /**
  369. * Sets the input xml file to be parsed
  370. *
  371. * @param string $file Filename (full path)
  372. *
  373. * @return resource fopen handle of the given file
  374. * @access public
  375. * @throws XML_Parser_Error
  376. * @see setInput(), setInputString(), parse()
  377. */
  378. function setInputFile($file)
  379. {
  380. /**
  381. * check, if file is a remote file
  382. */
  383. if (eregi('^(http|ftp)://', substr($file, 0, 10))) {
  384. if (!ini_get('allow_url_fopen')) {
  385. return $this->
  386. raiseError('Remote files cannot be parsed, as safe mode is enabled.',
  387. XML_PARSER_ERROR_REMOTE);
  388. }
  389. }
  390. $fp = @fopen($file, 'rb');
  391. if (is_resource($fp)) {
  392. $this->fp = $fp;
  393. return $fp;
  394. }
  395. return $this->raiseError('File could not be opened.',
  396. XML_PARSER_ERROR_FILE_NOT_READABLE);
  397. }
  398. // }}}
  399. // {{{ setInputString()
  400. /**
  401. * XML_Parser::setInputString()
  402. *
  403. * Sets the xml input from a string
  404. *
  405. * @param string $data a string containing the XML document
  406. *
  407. * @return null
  408. */
  409. function setInputString($data)
  410. {
  411. $this->fp = $data;
  412. return null;
  413. }
  414. // }}}
  415. // {{{ setInput()
  416. /**
  417. * Sets the file handle to use with parse().
  418. *
  419. * You should use setInputFile() or setInputString() if you
  420. * pass a string
  421. *
  422. * @param mixed $fp Can be either a resource returned from fopen(),
  423. * a URL, a local filename or a string.
  424. *
  425. * @return mixed
  426. * @access public
  427. * @see parse()
  428. * @uses setInputString(), setInputFile()
  429. */
  430. function setInput($fp)
  431. {
  432. if (is_resource($fp)) {
  433. $this->fp = $fp;
  434. return true;
  435. } elseif (eregi('^[a-z]+://', substr($fp, 0, 10))) {
  436. // see if it's an absolute URL (has a scheme at the beginning)
  437. return $this->setInputFile($fp);
  438. } elseif (file_exists($fp)) {
  439. // see if it's a local file
  440. return $this->setInputFile($fp);
  441. } else {
  442. // it must be a string
  443. $this->fp = $fp;
  444. return true;
  445. }
  446. return $this->raiseError('Illegal input format',
  447. XML_PARSER_ERROR_INVALID_INPUT);
  448. }
  449. // }}}
  450. // {{{ parse()
  451. /**
  452. * Central parsing function.
  453. *
  454. * @return bool|PEAR_Error returns true on success, or a PEAR_Error otherwise
  455. * @access public
  456. */
  457. function parse()
  458. {
  459. /**
  460. * reset the parser
  461. */
  462. $result = $this->reset();
  463. if ($this->isError($result)) {
  464. return $result;
  465. }
  466. // if $this->fp was fopened previously
  467. if (is_resource($this->fp)) {
  468. while ($data = fread($this->fp, 4096)) {
  469. if (!$this->_parseString($data, feof($this->fp))) {
  470. $error = &$this->raiseError();
  471. $this->free();
  472. return $error;
  473. }
  474. }
  475. } else {
  476. // otherwise, $this->fp must be a string
  477. if (!$this->_parseString($this->fp, true)) {
  478. $error = &$this->raiseError();
  479. $this->free();
  480. return $error;
  481. }
  482. }
  483. $this->free();
  484. return true;
  485. }
  486. /**
  487. * XML_Parser::_parseString()
  488. *
  489. * @param string $data data
  490. * @param bool $eof end-of-file flag
  491. *
  492. * @return bool
  493. * @access private
  494. * @see parseString()
  495. **/
  496. function _parseString($data, $eof = false)
  497. {
  498. return xml_parse($this->parser, $data, $eof);
  499. }
  500. // }}}
  501. // {{{ parseString()
  502. /**
  503. * XML_Parser::parseString()
  504. *
  505. * Parses a string.
  506. *
  507. * @param string $data XML data
  508. * @param boolean $eof If set and TRUE, data is the last piece
  509. * of data sent in this parser
  510. *
  511. * @return bool|PEAR_Error true on success or a PEAR Error
  512. * @throws XML_Parser_Error
  513. * @see _parseString()
  514. */
  515. function parseString($data, $eof = false)
  516. {
  517. if (!isset($this->parser) || !is_resource($this->parser)) {
  518. $this->reset();
  519. }
  520. if (!$this->_parseString($data, $eof)) {
  521. $error = &$this->raiseError();
  522. $this->free();
  523. return $error;
  524. }
  525. if ($eof === true) {
  526. $this->free();
  527. }
  528. return true;
  529. }
  530. /**
  531. * XML_Parser::free()
  532. *
  533. * Free the internal resources associated with the parser
  534. *
  535. * @return null
  536. **/
  537. function free()
  538. {
  539. if (isset($this->parser) && is_resource($this->parser)) {
  540. xml_parser_free($this->parser);
  541. unset( $this->parser );
  542. }
  543. if (isset($this->fp) && is_resource($this->fp)) {
  544. fclose($this->fp);
  545. }
  546. unset($this->fp);
  547. return null;
  548. }
  549. /**
  550. * XML_Parser::raiseError()
  551. *
  552. * Throws a XML_Parser_Error
  553. *
  554. * @param string $msg the error message
  555. * @param integer $ecode the error message code
  556. *
  557. * @return XML_Parser_Error
  558. **/
  559. function raiseError($msg = null, $ecode = 0)
  560. {
  561. $msg = !is_null($msg) ? $msg : $this->parser;
  562. $err = &new XML_Parser_Error($msg, $ecode);
  563. return parent::raiseError($err);
  564. }
  565. // }}}
  566. // {{{ funcStartHandler()
  567. /**
  568. * derives and calls the Start Handler function
  569. *
  570. * @param mixed $xp ??
  571. * @param mixed $elem ??
  572. * @param mixed $attribs ??
  573. *
  574. * @return void
  575. */
  576. function funcStartHandler($xp, $elem, $attribs)
  577. {
  578. $func = 'xmltag_' . $elem;
  579. $func = str_replace(array('.', '-', ':'), '_', $func);
  580. if (method_exists($this->_handlerObj, $func)) {
  581. call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs);
  582. } elseif (method_exists($this->_handlerObj, 'xmltag')) {
  583. call_user_func(array(&$this->_handlerObj, 'xmltag'),
  584. $xp, $elem, $attribs);
  585. }
  586. }
  587. // }}}
  588. // {{{ funcEndHandler()
  589. /**
  590. * derives and calls the End Handler function
  591. *
  592. * @param mixed $xp ??
  593. * @param mixed $elem ??
  594. *
  595. * @return void
  596. */
  597. function funcEndHandler($xp, $elem)
  598. {
  599. $func = 'xmltag_' . $elem . '_';
  600. $func = str_replace(array('.', '-', ':'), '_', $func);
  601. if (method_exists($this->_handlerObj, $func)) {
  602. call_user_func(array(&$this->_handlerObj, $func), $xp, $elem);
  603. } elseif (method_exists($this->_handlerObj, 'xmltag_')) {
  604. call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem);
  605. }
  606. }
  607. // }}}
  608. // {{{ startHandler()
  609. /**
  610. * abstract method signature for Start Handler
  611. *
  612. * @param mixed $xp ??
  613. * @param mixed $elem ??
  614. * @param mixed &$attribs ??
  615. *
  616. * @return null
  617. * @abstract
  618. */
  619. function startHandler($xp, $elem, &$attribs)
  620. {
  621. return null;
  622. }
  623. // }}}
  624. // {{{ endHandler()
  625. /**
  626. * abstract method signature for End Handler
  627. *
  628. * @param mixed $xp ??
  629. * @param mixed $elem ??
  630. *
  631. * @return null
  632. * @abstract
  633. */
  634. function endHandler($xp, $elem)
  635. {
  636. return null;
  637. }
  638. // }}}me
  639. }
  640. /**
  641. * error class, replaces PEAR_Error
  642. *
  643. * An instance of this class will be returned
  644. * if an error occurs inside XML_Parser.
  645. *
  646. * There are three advantages over using the standard PEAR_Error:
  647. * - All messages will be prefixed
  648. * - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' )
  649. * - messages can be generated from the xml_parser resource
  650. *
  651. * @category XML
  652. * @package XML_Parser
  653. * @author Stig Bakken <ssb@fast.no>
  654. * @author Tomas V.V.Cox <cox@idecnet.com>
  655. * @author Stephan Schmidt <schst@php.net>
  656. * @copyright 2002-2008 The PHP Group
  657. * @license http://opensource.org/licenses/bsd-license New BSD License
  658. * @version Release: @package_version@
  659. * @link http://pear.php.net/package/XML_Parser
  660. * @see PEAR_Error
  661. */
  662. class XML_Parser_Error extends PEAR_Error
  663. {
  664. // {{{ properties
  665. /**
  666. * prefix for all messages
  667. *
  668. * @var string
  669. */
  670. var $error_message_prefix = 'XML_Parser: ';
  671. // }}}
  672. // {{{ constructor()
  673. /**
  674. * construct a new error instance
  675. *
  676. * You may either pass a message or an xml_parser resource as first
  677. * parameter. If a resource has been passed, the last error that
  678. * happened will be retrieved and returned.
  679. *
  680. * @param string|resource $msgorparser message or parser resource
  681. * @param integer $code error code
  682. * @param integer $mode error handling
  683. * @param integer $level error level
  684. *
  685. * @access public
  686. * @todo PEAR CS - can't meet 85char line limit without arg refactoring
  687. */
  688. function XML_Parser_Error($msgorparser = 'unknown error', $code = 0, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE)
  689. {
  690. if (is_resource($msgorparser)) {
  691. $code = xml_get_error_code($msgorparser);
  692. $msgorparser = sprintf('%s at XML input line %d:%d',
  693. xml_error_string($code),
  694. xml_get_current_line_number($msgorparser),
  695. xml_get_current_column_number($msgorparser));
  696. }
  697. $this->PEAR_Error($msgorparser, $code, $mode, $level);
  698. }
  699. // }}}
  700. }
  701. ?>