PageRenderTime 33ms CodeModel.GetById 2ms app.highlight 21ms RepoModel.GetById 1ms app.codeStats 1ms

/inc/XML/Parser.php

https://github.com/chregu/fluxcms
PHP | 685 lines | 256 code | 67 blank | 362 comment | 39 complexity | cacf261479a5888e82dbd1d4172ad445 MD5 | raw file
  1<?php
  2//
  3// +----------------------------------------------------------------------+
  4// | PHP Version 4                                                        |
  5// +----------------------------------------------------------------------+
  6// | Copyright (c) 1997-2004 The PHP Group                                |
  7// +----------------------------------------------------------------------+
  8// | This source file is subject to version 3.0 of the PHP license,       |
  9// | that is bundled with this package in the file LICENSE, and is        |
 10// | available at through the world-wide-web at                           |
 11// | http://www.php.net/license/3_0.txt.                                  |
 12// | If you did not receive a copy of the PHP license and are unable to   |
 13// | obtain it through the world-wide-web, please send a note to          |
 14// | license@php.net so we can mail you a copy immediately.               |
 15// +----------------------------------------------------------------------+
 16// | Author: Stig Bakken <ssb@fast.no>                                    |
 17// |         Tomas V.V.Cox <cox@idecnet.com>                              |
 18// |         Stephan Schmidt <schst@php-tools.net>                        |
 19// +----------------------------------------------------------------------+
 20//
 21// $Id$
 22
 23/**
 24 * XML Parser class.
 25 *
 26 * This is an XML parser based on PHP's "xml" extension,
 27 * based on the bundled expat library.
 28 *
 29 * @category XML
 30 * @package XML_Parser
 31 * @author  Stig Bakken <ssb@fast.no>
 32 * @author  Tomas V.V.Cox <cox@idecnet.com>
 33 * @author  Stephan Schmidt <schst@php-tools.net>
 34 */
 35
 36/**
 37 * uses PEAR's error handling
 38 */
 39require_once 'PEAR.php';
 40
 41/**
 42 * resource could not be created
 43 */
 44define('XML_PARSER_ERROR_NO_RESOURCE', 200);
 45
 46/**
 47 * unsupported mode
 48 */
 49define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201);
 50
 51/**
 52 * invalid encoding was given
 53 */
 54define('XML_PARSER_ERROR_INVALID_ENCODING', 202);
 55
 56/**
 57 * specified file could not be read
 58 */
 59define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203);
 60
 61/**
 62 * invalid input
 63 */
 64define('XML_PARSER_ERROR_INVALID_INPUT', 204);
 65
 66/**
 67 * remote file cannot be retrieved in safe mode
 68 */
 69define('XML_PARSER_ERROR_REMOTE', 205);
 70
 71/**
 72 * XML Parser class.
 73 *
 74 * This is an XML parser based on PHP's "xml" extension,
 75 * based on the bundled expat library.
 76 *
 77 * Notes:
 78 * - It requires PHP 4.0.4pl1 or greater
 79 * - From revision 1.17, the function names used by the 'func' mode
 80 *   are in the format "xmltag_$elem", for example: use "xmltag_name"
 81 *   to handle the <name></name> tags of your xml file.
 82 *
 83 * @category XML
 84 * @package XML_Parser
 85 * @author  Stig Bakken <ssb@fast.no>
 86 * @author  Tomas V.V.Cox <cox@idecnet.com>
 87 * @author  Stephan Schmidt <schst@php-tools.net>
 88 * @todo    create XML_Parser_Namespace to parse documents with namespaces
 89 * @todo    create XML_Parser_Pull
 90 * @todo    Tests that need to be made:
 91 *          - mixing character encodings
 92 *          - a test using all expat handlers
 93 *          - options (folding, output charset)
 94 *          - different parsing modes
 95 */
 96class XML_Parser extends PEAR
 97{
 98    // {{{ properties
 99
100   /**
101     * XML parser handle
102     *
103     * @var  resource
104     * @see  xml_parser_create()
105     */
106    var $parser;
107
108    /**
109     * File handle if parsing from a file
110     *
111     * @var  resource
112     */
113    var $fp;
114
115    /**
116     * Whether to do case folding
117     *
118     * If set to true, all tag and attribute names will
119     * be converted to UPPER CASE.
120     *
121     * @var  boolean
122     */
123    var $folding = true;
124
125    /**
126     * Mode of operation, one of "event" or "func"
127     *
128     * @var  string
129     */
130    var $mode;
131
132    /**
133     * Mapping from expat handler function to class method.
134     *
135     * @var  array
136     */
137    var $handler = array(
138        'character_data_handler'            => 'cdataHandler',
139        'default_handler'                   => 'defaultHandler',
140        'processing_instruction_handler'    => 'piHandler',
141        'unparsed_entity_decl_handler'      => 'unparsedHandler',
142        'notation_decl_handler'             => 'notationHandler',
143        'external_entity_ref_handler'       => 'entityrefHandler'
144    );
145
146    /**
147     * source encoding
148     *
149     * @var string
150     */
151    var $srcenc;
152
153    /**
154     * target encoding
155     *
156     * @var string
157     */
158    var $tgtenc;
159
160    /**
161     * handler object
162     *
163     * @var object
164     */
165    var $_handlerObj;
166
167    // }}}
168    // {{{ constructor
169
170    /**
171     * Creates an XML parser.
172     *
173     * This is needed for PHP4 compatibility, it will
174     * call the constructor, when a new instance is created.
175     *
176     * @param string $srcenc source charset encoding, use NULL (default) to use
177     *                       whatever the document specifies
178     * @param string $mode   how this parser object should work, "event" for
179     *                       startelement/endelement-type events, "func"
180     *                       to have it call functions named after elements
181     * @param string $tgenc  a valid target encoding
182     */
183    function XML_Parser($srcenc = null, $mode = 'event', $tgtenc = null)
184    {
185        XML_Parser::__construct($srcenc, $mode, $tgtenc);
186    }
187    // }}}
188
189    /**
190     * PHP5 constructor
191     *
192     * @param string $srcenc source charset encoding, use NULL (default) to use
193     *                       whatever the document specifies
194     * @param string $mode   how this parser object should work, "event" for
195     *                       startelement/endelement-type events, "func"
196     *                       to have it call functions named after elements
197     * @param string $tgenc  a valid target encoding
198     */
199    function __construct($srcenc = null, $mode = 'event', $tgtenc = null)
200    {
201        $this->PEAR('XML_Parser_Error');
202
203        $this->mode   = $mode;
204        $this->srcenc = $srcenc;
205        $this->tgtenc = $tgtenc;
206    }
207    // }}}
208
209    /**
210     * Sets the mode of the parser.
211     *
212     * Possible modes are:
213     * - func
214     * - event
215     *
216     * You can set the mode using the second parameter
217     * in the constructor.
218     *
219     * This method is only needed, when switching to a new
220     * mode at a later point.
221     *
222     * @access  public
223     * @param   string          mode, either 'func' or 'event'
224     * @return  boolean|object  true on success, PEAR_Error otherwise   
225     */
226    function setMode($mode)
227    {
228        if ($mode != 'func' && $mode != 'event') {
229            $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
230        }
231
232        $this->mode = $mode;
233        return true;
234    }
235
236    /**
237     * Sets the object, that will handle the XML events
238     *
239     * This allows you to create a handler object independent of the
240     * parser object that you are using and easily switch the underlying
241     * parser.
242     *
243     * If no object will be set, XML_Parser assumes that you
244     * extend this class and handle the events in $this.
245     *
246     * @access  public
247     * @param   object      object to handle the events
248     * @return  boolean     will always return true
249     * @since   v1.2.0beta3
250     */
251    function setHandlerObj(&$obj)
252    {
253        $this->_handlerObj = &$obj;
254        return true;
255    }
256
257    /**
258     * Init the element handlers
259     *
260     * @access  private
261     */
262    function _initHandlers()
263    {
264        if (!is_resource($this->parser)) {
265            return false;
266        }
267
268        if (!is_object($this->_handlerObj)) {
269            $this->_handlerObj = &$this;
270        }
271        switch ($this->mode) {
272
273            case 'func':
274                xml_set_object($this->parser, $this->_handlerObj);
275                xml_set_element_handler($this->parser, array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler'));
276                break;
277
278            case 'event':
279                xml_set_object($this->parser, $this->_handlerObj);
280                xml_set_element_handler($this->parser, 'startHandler', 'endHandler');
281                break;
282            default:
283                return $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
284                break;
285        }
286
287
288        /**
289         * set additional handlers for character data, entities, etc.
290         */
291        foreach ($this->handler as $xml_func => $method) {
292            if (method_exists($this->_handlerObj, $method)) {
293                $xml_func = 'xml_set_' . $xml_func;
294                $xml_func($this->parser, $method);
295            }
296		}
297    }
298
299    // {{{ _create()
300
301    /**
302     * create the XML parser resource
303     *
304     * Has been moved from the constructor to avoid
305     * problems with object references.
306     *
307     * Furthermore it allows us returning an error
308     * if something fails.
309     *
310     * @access   private
311     * @return   boolean|object     true on success, PEAR_Error otherwise
312     *
313     * @see xml_parser_create
314     */
315    function _create()
316    {
317        if ($this->srcenc === null) {
318            $xp = @xml_parser_create();
319        } else {
320            $xp = @xml_parser_create($this->srcenc);
321        }
322        if (is_resource($xp)) {
323            if ($this->tgtenc !== null) {
324                if (!@xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING,
325                                            $this->tgtenc)) {
326                    return $this->raiseError('invalid target encoding', XML_PARSER_ERROR_INVALID_ENCODING);
327                }
328            }
329            $this->parser = $xp;
330            $result = $this->_initHandlers($this->mode);
331            if ($this->isError($result)) {
332                return $result;
333            }
334            xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding);
335
336            return true;
337        }
338        return $this->raiseError('Unable to create XML parser resource.', XML_PARSER_ERROR_NO_RESOURCE);
339    }
340
341    // }}}
342    // {{{ reset()
343
344    /**
345     * Reset the parser.
346     *
347     * This allows you to use one parser instance
348     * to parse multiple XML documents.
349     *
350     * @access   public
351     * @return   boolean|object     true on success, PEAR_Error otherwise
352     */
353    function reset()
354    {
355        $result = $this->_create();
356        if ($this->isError( $result )) {
357            return $result;
358        }
359        return true;
360    }
361
362    // }}}
363    // {{{ setInputFile()
364
365    /**
366     * Sets the input xml file to be parsed
367     *
368     * @param    string      Filename (full path)
369     * @return   resource    fopen handle of the given file
370     * @throws   XML_Parser_Error
371     * @see      setInput(), setInputString(), parse()
372     * @access   public
373     */
374    function setInputFile($file)
375    {
376        /**
377         * check, if file is a remote file
378         */
379        if (eregi('^(http|ftp)://', substr($file, 0, 10))) {
380            if (!ini_get('allow_url_fopen')) {
381            	return $this->raiseError('Remote files cannot be parsed, as safe mode is enabled.', XML_PARSER_ERROR_REMOTE);
382            }
383        }
384        
385        $fp = @fopen($file, 'rb');
386        if (is_resource($fp)) {
387            $this->fp = $fp;
388            return $fp;
389        }
390        return $this->raiseError('File could not be opened.', XML_PARSER_ERROR_FILE_NOT_READABLE);
391    }
392
393    // }}}
394    // {{{ setInputString()
395    
396    /**
397     * XML_Parser::setInputString()
398     * 
399     * Sets the xml input from a string
400     * 
401     * @param string $data a string containing the XML document
402     * @return null
403     **/
404    function setInputString($data)
405    {
406        $this->fp = $data;
407        return null;
408    }
409    
410    // }}}
411    // {{{ setInput()
412
413    /**
414     * Sets the file handle to use with parse().
415     *
416     * You should use setInputFile() or setInputString() if you
417     * pass a string 
418     *
419     * @param    mixed  $fp  Can be either a resource returned from fopen(),
420     *                       a URL, a local filename or a string.
421     * @access   public
422     * @see      parse()
423     * @uses     setInputString(), setInputFile()
424     */
425    function setInput($fp)
426    {
427        if (is_resource($fp)) {
428            $this->fp = $fp;
429            return true;
430        }
431        // see if it's an absolute URL (has a scheme at the beginning)
432        elseif (eregi('^[a-z]+://', substr($fp, 0, 10))) {
433            return $this->setInputFile($fp);
434        }
435        // see if it's a local file
436        elseif (file_exists($fp)) {
437            return $this->setInputFile($fp);
438        }
439        // it must be a string
440        else {
441            $this->fp = $fp;
442            return true;
443        }
444
445        return $this->raiseError('Illegal input format', XML_PARSER_ERROR_INVALID_INPUT);
446    }
447
448    // }}}
449    // {{{ parse()
450
451    /**
452     * Central parsing function.
453     *
454     * @return   true|object PEAR error     returns true on success, or a PEAR_Error otherwise
455     * @access   public
456     */
457    function parse()
458    {
459        /**
460         * reset the parser
461         */
462        $result = $this->reset();
463        if ($this->isError($result)) {
464            return $result;
465        }
466        // if $this->fp was fopened previously
467        if (is_resource($this->fp)) {
468        
469            while ($data = fread($this->fp, 4096)) {
470                if (!$this->_parseString($data, feof($this->fp))) {
471                    $error = &$this->raiseError();
472                    $this->free();
473                    return $error;
474                }
475            }
476        // otherwise, $this->fp must be a string
477        } else {
478            if (!$this->_parseString($this->fp, true)) {
479                $error = &$this->raiseError();
480                $this->free();
481                return $error;
482            }
483        }
484        $this->free();
485
486        return true;
487    }
488
489    /**
490     * XML_Parser::_parseString()
491     * 
492     * @param string $data
493     * @param boolean $eof
494     * @return bool
495     * @access private
496     * @see parseString()
497     **/
498    function _parseString($data, $eof = false)
499    {
500        return xml_parse($this->parser, $data, $eof);
501    }
502    
503    // }}}
504    // {{{ parseString()
505
506    /**
507     * XML_Parser::parseString()
508     * 
509     * Parses a string.
510     *
511     * @param    string  $data XML data
512     * @param    boolean $eof  If set and TRUE, data is the last piece of data sent in this parser
513     * @throws   XML_Parser_Error
514     * @return   Pear Error|true   true on success or a PEAR Error
515     * @see      _parseString()
516     */
517    function parseString($data, $eof = false)
518    {
519        if (!isset($this->parser) || !is_resource($this->parser)) {
520            $this->reset();
521        }
522        
523        if (!$this->_parseString($data, $eof)) {
524           $error = &$this->raiseError();
525           $this->free();
526           return $error;
527        }
528
529        if ($eof === true) {
530            $this->free();
531        }
532        return true;
533    }
534    
535    /**
536     * XML_Parser::free()
537     * 
538     * Free the internal resources associated with the parser
539     * 
540     * @return null
541     **/
542    function free()
543    {
544        if (isset($this->parser) && is_resource($this->parser)) {
545            xml_parser_free($this->parser);
546            unset( $this->parser );
547        }
548        if (isset($this->fp) && is_resource($this->fp)) {
549            fclose($this->fp);
550        }
551        unset($this->fp);
552        return null;
553    }
554    
555    /**
556     * XML_Parser::raiseError()
557     * 
558     * Throws a XML_Parser_Error
559     * 
560     * @param string  $msg   the error message
561     * @param integer $ecode the error message code
562     * @return XML_Parser_Error 
563     **/
564    function raiseError($msg = null, $ecode = 0)
565    {
566        $msg = !is_null($msg) ? $msg : $this->parser;
567        $err = &new XML_Parser_Error($msg, $ecode);
568        return parent::raiseError($err);
569    }
570    
571    // }}}
572    // {{{ funcStartHandler()
573
574    function funcStartHandler($xp, $elem, $attribs)
575    {
576        $func = 'xmltag_' . $elem;
577        if (strchr($func, '.')) {
578            $func = str_replace('.', '_', $func);
579        }
580        if (method_exists($this->_handlerObj, $func)) {
581            call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs);
582        } elseif (method_exists($this->_handlerObj, 'xmltag')) {
583            call_user_func(array(&$this->_handlerObj, 'xmltag'), $xp, $elem, $attribs);
584        }
585    }
586
587    // }}}
588    // {{{ funcEndHandler()
589
590    function funcEndHandler($xp, $elem)
591    {
592        $func = 'xmltag_' . $elem . '_';
593        if (strchr($func, '.')) {
594            $func = str_replace('.', '_', $func);
595        }
596        if (method_exists($this->_handlerObj, $func)) {
597            call_user_func(array(&$this->_handlerObj, $func), $xp, $elem);
598        } elseif (method_exists($this->_handlerObj, 'xmltag_')) {
599            call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem);
600        }
601    }
602
603    // }}}
604    // {{{ startHandler()
605
606    /**
607     *
608     * @abstract
609     */
610    function startHandler($xp, $elem, &$attribs)
611    {
612        return NULL;
613    }
614
615    // }}}
616    // {{{ endHandler()
617
618    /**
619     *
620     * @abstract
621     */
622    function endHandler($xp, $elem)
623    {
624        return NULL;
625    }
626
627
628    // }}}me
629}
630
631/**
632 * error class, replaces PEAR_Error
633 *
634 * An instance of this class will be returned
635 * if an error occurs inside XML_Parser.
636 *
637 * There are three advantages over using the standard PEAR_Error:
638 * - All messages will be prefixed
639 * - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' )
640 * - messages can be generated from the xml_parser resource
641 *
642 * @package XML_Parser
643 * @access  public
644 * @see     PEAR_Error
645 */
646class XML_Parser_Error extends PEAR_Error
647{
648    // {{{ properties
649
650   /**
651    * prefix for all messages
652    *
653    * @var      string
654    */    
655    var $error_message_prefix = 'XML_Parser: ';
656
657    // }}}
658    // {{{ constructor()
659   /**
660    * construct a new error instance
661    *
662    * You may either pass a message or an xml_parser resource as first
663    * parameter. If a resource has been passed, the last error that
664    * happened will be retrieved and returned.
665    *
666    * @access   public
667    * @param    string|resource     message or parser resource
668    * @param    integer             error code
669    * @param    integer             error handling
670    * @param    integer             error level
671    */    
672    function XML_Parser_Error($msgorparser = 'unknown error', $code = 0, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE)
673    {
674        if (is_resource($msgorparser)) {
675            $code = xml_get_error_code($msgorparser);
676            $msgorparser = sprintf('%s at XML input line %d:%d',
677                                   xml_error_string($code),
678                                   xml_get_current_line_number($msgorparser),
679                                   xml_get_current_column_number($msgorparser));
680        }
681        $this->PEAR_Error($msgorparser, $code, $mode, $level);
682    }
683    // }}}
684}
685?>