PageRenderTime 35ms CodeModel.GetById 14ms app.highlight 16ms RepoModel.GetById 2ms app.codeStats 0ms

/framework/vendor/zend/Zend/Pdf/StringParser.php

http://zoop.googlecode.com/
PHP | 724 lines | 446 code | 91 blank | 187 comment | 108 complexity | eef744b1653f0550ce9ee9711d21d452 MD5 | raw file
  1<?php
  2/**
  3 * Zend Framework
  4 *
  5 * LICENSE
  6 *
  7 * This source file is subject to the new BSD license that is bundled
  8 * with this package in the file LICENSE.txt.
  9 * It is also available through the world-wide-web at this URL:
 10 * http://framework.zend.com/license/new-bsd
 11 * If you did not receive a copy of the license and are unable to
 12 * obtain it through the world-wide-web, please send an email
 13 * to license@zend.com so we can send you a copy immediately.
 14 *
 15 * @category   Zend
 16 * @package    Zend_Pdf
 17 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
 18 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 19 * @version    $Id: StringParser.php 20096 2010-01-06 02:05:09Z bkarwin $
 20 */
 21
 22
 23/** Internally used classes */
 24require_once 'Zend/Pdf/Element/Array.php';
 25require_once 'Zend/Pdf/Element/String/Binary.php';
 26require_once 'Zend/Pdf/Element/Boolean.php';
 27require_once 'Zend/Pdf/Element/Dictionary.php';
 28require_once 'Zend/Pdf/Element/Name.php';
 29require_once 'Zend/Pdf/Element/Null.php';
 30require_once 'Zend/Pdf/Element/Numeric.php';
 31require_once 'Zend/Pdf/Element/Object.php';
 32require_once 'Zend/Pdf/Element/Object/Stream.php';
 33require_once 'Zend/Pdf/Element/Reference.php';
 34require_once 'Zend/Pdf/Element/String.php';
 35
 36
 37/**
 38 * PDF string parser
 39 *
 40 * @package    Zend_Pdf
 41 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
 42 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 43 */
 44class Zend_Pdf_StringParser
 45{
 46    /**
 47     * Source PDF
 48     *
 49     * @var string
 50     */
 51    public $data = '';
 52
 53    /**
 54     * Current position in a data
 55     *
 56     * @var integer
 57     */
 58    public $offset = 0;
 59
 60    /**
 61     * Current reference context
 62     *
 63     * @var Zend_Pdf_Element_Reference_Context
 64     */
 65    private $_context = null;
 66
 67    /**
 68     * Array of elements of the currently parsed object/trailer
 69     *
 70     * @var array
 71     */
 72    private $_elements = array();
 73
 74    /**
 75     * PDF objects factory.
 76     *
 77     * @var Zend_Pdf_ElementFactory_Interface
 78     */
 79    private $_objFactory = null;
 80
 81
 82    /**
 83     * Clean up resources.
 84     *
 85     * Clear current state to remove cyclic object references
 86     */
 87    public function cleanUp()
 88    {
 89        $this->_context = null;
 90        $this->_elements = array();
 91        $this->_objFactory = null;
 92    }
 93
 94    /**
 95     * Character with code $chCode is white space
 96     *
 97     * @param integer $chCode
 98     * @return boolean
 99     */
100    public static function isWhiteSpace($chCode)
101    {
102        if ($chCode == 0x00 || // null character
103            $chCode == 0x09 || // Tab
104            $chCode == 0x0A || // Line feed
105            $chCode == 0x0C || // Form Feed
106            $chCode == 0x0D || // Carriage return
107            $chCode == 0x20    // Space
108           ) {
109            return true;
110        } else {
111            return false;
112        }
113    }
114
115
116    /**
117     * Character with code $chCode is a delimiter character
118     *
119     * @param integer $chCode
120     * @return boolean
121     */
122    public static function isDelimiter($chCode )
123    {
124        if ($chCode == 0x28 || // '('
125            $chCode == 0x29 || // ')'
126            $chCode == 0x3C || // '<'
127            $chCode == 0x3E || // '>'
128            $chCode == 0x5B || // '['
129            $chCode == 0x5D || // ']'
130            $chCode == 0x7B || // '{'
131            $chCode == 0x7D || // '}'
132            $chCode == 0x2F || // '/'
133            $chCode == 0x25    // '%'
134           ) {
135            return true;
136        } else {
137            return false;
138        }
139    }
140
141
142    /**
143     * Skip white space
144     *
145     * @param boolean $skipComment
146     */
147    public function skipWhiteSpace($skipComment = true)
148    {
149        if ($skipComment) {
150            while (true) {
151                $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
152
153                if ($this->offset < strlen($this->data)  &&  $this->data[$this->offset] == '%') {
154                    // Skip comment
155                    $this->offset += strcspn($this->data, "\r\n", $this->offset);
156                } else {
157                    // Non white space character not equal to '%' is found
158                    return;
159                }
160            }
161        } else {
162            $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
163        }
164
165//        /** Original (non-optimized) implementation. */
166//
167//        while ($this->offset < strlen($this->data)) {
168//            if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) {
169//                $this->offset++;
170//            } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
171//                $this->skipComment();
172//            } else {
173//                return;
174//            }
175//        }
176    }
177
178
179    /**
180     * Skip comment
181     */
182    public function skipComment()
183    {
184        while ($this->offset < strlen($this->data))
185        {
186            if (ord($this->data[$this->offset]) != 0x0A || // Line feed
187                ord($this->data[$this->offset]) != 0x0d    // Carriage return
188               ) {
189                $this->offset++;
190            } else {
191                return;
192            }
193        }
194    }
195
196
197    /**
198     * Read comment line
199     *
200     * @return string
201     */
202    public function readComment()
203    {
204        $this->skipWhiteSpace(false);
205
206        /** Check if it's a comment line */
207        if ($this->data[$this->offset] != '%') {
208            return '';
209        }
210
211        for ($start = $this->offset;
212             $this->offset < strlen($this->data);
213             $this->offset++) {
214            if (ord($this->data[$this->offset]) == 0x0A || // Line feed
215                ord($this->data[$this->offset]) == 0x0d    // Carriage return
216               ) {
217                break;
218            }
219        }
220
221        return substr($this->data, $start, $this->offset-$start);
222    }
223
224
225    /**
226     * Returns next lexeme from a pdf stream
227     *
228     * @return string
229     */
230    public function readLexeme()
231    {
232        // $this->skipWhiteSpace();
233        while (true) {
234            $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
235
236            if ($this->offset < strlen($this->data)  &&  $this->data[$this->offset] == '%') {
237                $this->offset += strcspn($this->data, "\r\n", $this->offset);
238            } else {
239                break;
240            }
241        }
242
243        if ($this->offset >= strlen($this->data)) {
244            return '';
245        }
246
247        if ( /* self::isDelimiter( ord($this->data[$start]) ) */
248             strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) {
249
250            switch (substr($this->data, $this->offset, 2)) {
251                case '<<':
252                    $this->offset += 2;
253                    return '<<';
254                    break;
255
256                case '>>':
257                    $this->offset += 2;
258                    return '>>';
259                    break;
260
261                default:
262                    return $this->data[$this->offset++];
263                    break;
264            }
265        } else {
266            $start = $this->offset;
267            $this->offset += strcspn($this->data, "()<>[]{}/%\x00\t\n\f\r ", $this->offset);
268
269            return substr($this->data, $start, $this->offset - $start);
270        }
271    }
272
273
274    /**
275     * Read elemental object from a PDF stream
276     *
277     * @return Zend_Pdf_Element
278     * @throws Zend_Pdf_Exception
279     */
280    public function readElement($nextLexeme = null)
281    {
282        if ($nextLexeme === null) {
283            $nextLexeme = $this->readLexeme();
284        }
285
286        /**
287         * Note: readElement() method is a public method and could be invoked from other classes.
288         * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
289         * about _elements member management.
290         */
291        switch ($nextLexeme) {
292            case '(':
293                return ($this->_elements[] = $this->_readString());
294
295            case '<':
296                return ($this->_elements[] = $this->_readBinaryString());
297
298            case '/':
299                return ($this->_elements[] = new Zend_Pdf_Element_Name(
300                                                    Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
301                                                                      ));
302
303            case '[':
304                return ($this->_elements[] = $this->_readArray());
305
306            case '<<':
307                return ($this->_elements[] = $this->_readDictionary());
308
309            case ')':
310                // fall through to next case
311            case '>':
312                // fall through to next case
313            case ']':
314                // fall through to next case
315            case '>>':
316                // fall through to next case
317            case '{':
318                // fall through to next case
319            case '}':
320                require_once 'Zend/Pdf/Exception.php';
321                throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
322                                                $this->offset));
323
324            default:
325                if (strcasecmp($nextLexeme, 'true') == 0) {
326                    return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
327                } else if (strcasecmp($nextLexeme, 'false') == 0) {
328                    return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
329                } else if (strcasecmp($nextLexeme, 'null') == 0) {
330                    return ($this->_elements[] = new Zend_Pdf_Element_Null());
331                }
332
333                $ref = $this->_readReference($nextLexeme);
334                if ($ref !== null) {
335                    return ($this->_elements[] = $ref);
336                }
337
338                return ($this->_elements[] = $this->_readNumeric($nextLexeme));
339        }
340    }
341
342
343    /**
344     * Read string PDF object
345     * Also reads trailing ')' from a pdf stream
346     *
347     * @return Zend_Pdf_Element_String
348     * @throws Zend_Pdf_Exception
349     */
350    private function _readString()
351    {
352        $start = $this->offset;
353        $openedBrackets = 1;
354
355        $this->offset += strcspn($this->data, '()\\', $this->offset);
356
357        while ($this->offset < strlen($this->data)) {
358            switch (ord( $this->data[$this->offset] )) {
359                case 0x28: // '(' - opened bracket in the string, needs balanced pair.
360                    $this->offset++;
361                    $openedBrackets++;
362                    break;
363
364                case 0x29: // ')' - pair to the opened bracket
365                    $this->offset++;
366                    $openedBrackets--;
367                    break;
368
369                case 0x5C: // '\\' - escape sequence, skip next char from a check
370                    $this->offset += 2;
371            }
372
373            if ($openedBrackets == 0) {
374                break; // end of string
375            }
376
377            $this->offset += strcspn($this->data, '()\\', $this->offset);
378        }
379        if ($openedBrackets != 0) {
380            require_once 'Zend/Pdf/Exception.php';
381            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
382        }
383
384        return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
385                                                                                     $start,
386                                                                                     $this->offset - $start - 1) ));
387    }
388
389
390    /**
391     * Read binary string PDF object
392     * Also reads trailing '>' from a pdf stream
393     *
394     * @return Zend_Pdf_Element_String_Binary
395     * @throws Zend_Pdf_Exception
396     */
397    private function _readBinaryString()
398    {
399        $start = $this->offset;
400
401        $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset);
402
403        if ($this->offset >= strlen($this->data) - 1) {
404            require_once 'Zend/Pdf/Exception.php';
405            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start));
406        }
407
408        if ($this->data[$this->offset++] != '>') {
409            require_once 'Zend/Pdf/Exception.php';
410            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
411        }
412
413        return new Zend_Pdf_Element_String_Binary(
414                       Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
415                                                                        $start,
416                                                                        $this->offset - $start - 1) ));
417    }
418
419
420    /**
421     * Read array PDF object
422     * Also reads trailing ']' from a pdf stream
423     *
424     * @return Zend_Pdf_Element_Array
425     * @throws Zend_Pdf_Exception
426     */
427    private function _readArray()
428    {
429        $elements = array();
430
431        while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
432            if ($nextLexeme != ']') {
433                $elements[] = $this->readElement($nextLexeme);
434            } else {
435                return new Zend_Pdf_Element_Array($elements);
436            }
437        }
438
439        require_once 'Zend/Pdf/Exception.php';
440        throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
441    }
442
443
444    /**
445     * Read dictionary PDF object
446     * Also reads trailing '>>' from a pdf stream
447     *
448     * @return Zend_Pdf_Element_Dictionary
449     * @throws Zend_Pdf_Exception
450     */
451    private function _readDictionary()
452    {
453        $dictionary = new Zend_Pdf_Element_Dictionary();
454
455        while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
456            if ($nextLexeme != '>>') {
457                $nameStart = $this->offset - strlen($nextLexeme);
458
459                $name  = $this->readElement($nextLexeme);
460                $value = $this->readElement();
461
462                if (!$name instanceof Zend_Pdf_Element_Name) {
463                    require_once 'Zend/Pdf/Exception.php';
464                    throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
465                }
466
467                $dictionary->add($name, $value);
468            } else {
469                return $dictionary;
470            }
471        }
472
473        require_once 'Zend/Pdf/Exception.php';
474        throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
475    }
476
477
478    /**
479     * Read reference PDF object
480     *
481     * @param string $nextLexeme
482     * @return Zend_Pdf_Element_Reference
483     */
484    private function _readReference($nextLexeme = null)
485    {
486        $start = $this->offset;
487
488        if ($nextLexeme === null) {
489            $objNum = $this->readLexeme();
490        } else {
491            $objNum = $nextLexeme;
492        }
493        if (!ctype_digit($objNum)) { // it's not a reference
494            $this->offset = $start;
495            return null;
496        }
497
498        $genNum = $this->readLexeme();
499        if (!ctype_digit($genNum)) { // it's not a reference
500            $this->offset = $start;
501            return null;
502        }
503
504        $rMark  = $this->readLexeme();
505        if ($rMark != 'R') { // it's not a reference
506            $this->offset = $start;
507            return null;
508        }
509
510        $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
511
512        return $ref;
513    }
514
515
516    /**
517     * Read numeric PDF object
518     *
519     * @param string $nextLexeme
520     * @return Zend_Pdf_Element_Numeric
521     */
522    private function _readNumeric($nextLexeme = null)
523    {
524        if ($nextLexeme === null) {
525            $nextLexeme = $this->readLexeme();
526        }
527
528        return new Zend_Pdf_Element_Numeric($nextLexeme);
529    }
530
531
532    /**
533     * Read inderect object from a PDF stream
534     *
535     * @param integer $offset
536     * @param Zend_Pdf_Element_Reference_Context $context
537     * @return Zend_Pdf_Element_Object
538     */
539    public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
540    {
541        if ($offset === null ) {
542            return new Zend_Pdf_Element_Null();
543        }
544
545        // Save current offset to make getObject() reentrant
546        $offsetSave = $this->offset;
547
548        $this->offset    = $offset;
549        $this->_context  = $context;
550        $this->_elements = array();
551
552        $objNum = $this->readLexeme();
553        if (!ctype_digit($objNum)) {
554            require_once 'Zend/Pdf/Exception.php';
555            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
556        }
557
558        $genNum = $this->readLexeme();
559        if (!ctype_digit($genNum)) {
560            require_once 'Zend/Pdf/Exception.php';
561            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
562        }
563
564        $objKeyword = $this->readLexeme();
565        if ($objKeyword != 'obj') {
566            require_once 'Zend/Pdf/Exception.php';
567            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
568        }
569
570        $objValue = $this->readElement();
571
572        $nextLexeme = $this->readLexeme();
573
574        if( $nextLexeme == 'endobj' ) {
575            /**
576             * Object is not generated by factory (thus it's not marked as modified object).
577             * But factory is assigned to the obect.
578             */
579            $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
580
581            foreach ($this->_elements as $element) {
582                $element->setParentObject($obj);
583            }
584
585            // Restore offset value
586            $this->offset = $offsetSave;
587
588            return $obj;
589        }
590
591        /**
592         * It's a stream object
593         */
594        if ($nextLexeme != 'stream') {
595            require_once 'Zend/Pdf/Exception.php';
596            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
597        }
598
599        if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
600            require_once 'Zend/Pdf/Exception.php';
601            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
602        }
603
604        /**
605         * References are automatically dereferenced at this moment.
606         */
607        $streamLength = $objValue->Length->value;
608
609        /**
610         * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
611         * This restriction gives the possibility to recognize all cases exactly
612         */
613        if ($this->data[$this->offset] == "\r" &&
614            $this->data[$this->offset + 1] == "\n"    ) {
615            $this->offset += 2;
616        } else if ($this->data[$this->offset] == "\n"    ) {
617            $this->offset++;
618        } else {
619            require_once 'Zend/Pdf/Exception.php';
620            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
621        }
622
623        $dataOffset = $this->offset;
624
625        $this->offset += $streamLength;
626
627        $nextLexeme = $this->readLexeme();
628        if ($nextLexeme != 'endstream') {
629            require_once 'Zend/Pdf/Exception.php';
630            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
631        }
632
633        $nextLexeme = $this->readLexeme();
634        if ($nextLexeme != 'endobj') {
635            require_once 'Zend/Pdf/Exception.php';
636            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
637        }
638
639        $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
640                                                         $dataOffset,
641                                                         $streamLength),
642                                                  (int)$objNum,
643                                                  (int)$genNum,
644                                                  $this->_objFactory->resolve(),
645                                                  $objValue);
646
647        foreach ($this->_elements as $element) {
648            $element->setParentObject($obj);
649        }
650
651        // Restore offset value
652        $this->offset = $offsetSave;
653
654        return $obj;
655    }
656
657
658    /**
659     * Get length of source string
660     *
661     * @return integer
662     */
663    public function getLength()
664    {
665        return strlen($this->data);
666    }
667
668    /**
669     * Get source string
670     *
671     * @return string
672     */
673    public function getString()
674    {
675        return $this->data;
676    }
677
678
679    /**
680     * Parse integer value from a binary stream
681     *
682     * @param string $stream
683     * @param integer $offset
684     * @param integer $size
685     * @return integer
686     */
687    public static function parseIntFromStream($stream, $offset, $size)
688    {
689        $value = 0;
690        for ($count = 0; $count < $size; $count++) {
691            $value *= 256;
692            $value += ord($stream[$offset + $count]);
693        }
694
695        return $value;
696    }
697
698
699
700    /**
701     * Set current context
702     *
703     * @param Zend_Pdf_Element_Reference_Context $context
704     */
705    public function setContext(Zend_Pdf_Element_Reference_Context $context)
706    {
707        $this->_context = $context;
708    }
709
710    /**
711     * Object constructor
712     *
713     * Note: PHP duplicates string, which is sent by value, only of it's updated.
714     * Thus we don't need to care about overhead
715     *
716     * @param string $pdfString
717     * @param Zend_Pdf_ElementFactory_Interface $factory
718     */
719    public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
720    {
721        $this->data         = $source;
722        $this->_objFactory  = $factory;
723    }
724}