PageRenderTime 308ms CodeModel.GetById 141ms app.highlight 83ms RepoModel.GetById 78ms app.codeStats 0ms

/library/Zend/Pdf/StringParser.php

https://bitbucket.org/fabiancarlos/feature_seguimentos
PHP | 731 lines | 452 code | 92 blank | 187 comment | 110 complexity | 573e28ca748f90523b37aba6b1e5ba05 MD5 | raw file
  1<?php
  2/**
  3 * Zend Framework
  4 *
  5 * LICENSE
  6 *
  7 * This source file is subject to the new BSD license that is bundled
  8 * with this package in the file LICENSE.txt.
  9 * It is also available through the world-wide-web at this URL:
 10 * http://framework.zend.com/license/new-bsd
 11 * If you did not receive a copy of the license and are unable to
 12 * obtain it through the world-wide-web, please send an email
 13 * to license@zend.com so we can send you a copy immediately.
 14 *
 15 * @category   Zend
 16 * @package    Zend_Pdf
 17 * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
 18 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 19 * @version    $Id: StringParser.php 23775 2011-03-01 17:25:24Z ralph $
 20 */
 21
 22
 23/** Internally used classes */
 24require_once 'Zend/Pdf/Element/Array.php';
 25require_once 'Zend/Pdf/Element/String/Binary.php';
 26require_once 'Zend/Pdf/Element/Boolean.php';
 27require_once 'Zend/Pdf/Element/Dictionary.php';
 28require_once 'Zend/Pdf/Element/Name.php';
 29require_once 'Zend/Pdf/Element/Null.php';
 30require_once 'Zend/Pdf/Element/Numeric.php';
 31require_once 'Zend/Pdf/Element/Object.php';
 32require_once 'Zend/Pdf/Element/Object/Stream.php';
 33require_once 'Zend/Pdf/Element/Reference.php';
 34require_once 'Zend/Pdf/Element/String.php';
 35
 36
 37/**
 38 * PDF string parser
 39 *
 40 * @package    Zend_Pdf
 41 * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
 42 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 43 */
 44class Zend_Pdf_StringParser
 45{
 46    /**
 47     * Source PDF
 48     *
 49     * @var string
 50     */
 51    public $data = '';
 52
 53    /**
 54     * Current position in a data
 55     *
 56     * @var integer
 57     */
 58    public $offset = 0;
 59
 60    /**
 61     * Current reference context
 62     *
 63     * @var Zend_Pdf_Element_Reference_Context
 64     */
 65    private $_context = null;
 66
 67    /**
 68     * Array of elements of the currently parsed object/trailer
 69     *
 70     * @var array
 71     */
 72    private $_elements = array();
 73
 74    /**
 75     * PDF objects factory.
 76     *
 77     * @var Zend_Pdf_ElementFactory_Interface
 78     */
 79    private $_objFactory = null;
 80
 81
 82    /**
 83     * Clean up resources.
 84     *
 85     * Clear current state to remove cyclic object references
 86     */
 87    public function cleanUp()
 88    {
 89        $this->_context = null;
 90        $this->_elements = array();
 91        $this->_objFactory = null;
 92    }
 93
 94    /**
 95     * Character with code $chCode is white space
 96     *
 97     * @param integer $chCode
 98     * @return boolean
 99     */
100    public static function isWhiteSpace($chCode)
101    {
102        if ($chCode == 0x00 || // null character
103            $chCode == 0x09 || // Tab
104            $chCode == 0x0A || // Line feed
105            $chCode == 0x0C || // Form Feed
106            $chCode == 0x0D || // Carriage return
107            $chCode == 0x20    // Space
108           ) {
109            return true;
110        } else {
111            return false;
112        }
113    }
114
115
116    /**
117     * Character with code $chCode is a delimiter character
118     *
119     * @param integer $chCode
120     * @return boolean
121     */
122    public static function isDelimiter($chCode )
123    {
124        if ($chCode == 0x28 || // '('
125            $chCode == 0x29 || // ')'
126            $chCode == 0x3C || // '<'
127            $chCode == 0x3E || // '>'
128            $chCode == 0x5B || // '['
129            $chCode == 0x5D || // ']'
130            $chCode == 0x7B || // '{'
131            $chCode == 0x7D || // '}'
132            $chCode == 0x2F || // '/'
133            $chCode == 0x25    // '%'
134           ) {
135            return true;
136        } else {
137            return false;
138        }
139    }
140
141
142    /**
143     * Skip white space
144     *
145     * @param boolean $skipComment
146     */
147    public function skipWhiteSpace($skipComment = true)
148    {
149        if ($skipComment) {
150            while (true) {
151                $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
152
153                if ($this->offset < strlen($this->data)  &&  $this->data[$this->offset] == '%') {
154                    // Skip comment
155                    $this->offset += strcspn($this->data, "\r\n", $this->offset);
156                } else {
157                    // Non white space character not equal to '%' is found
158                    return;
159                }
160            }
161        } else {
162            $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
163        }
164
165//        /** Original (non-optimized) implementation. */
166//
167//        while ($this->offset < strlen($this->data)) {
168//            if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) {
169//                $this->offset++;
170//            } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
171//                $this->skipComment();
172//            } else {
173//                return;
174//            }
175//        }
176    }
177
178
179    /**
180     * Skip comment
181     */
182    public function skipComment()
183    {
184        while ($this->offset < strlen($this->data))
185        {
186            if (ord($this->data[$this->offset]) != 0x0A || // Line feed
187                ord($this->data[$this->offset]) != 0x0d    // Carriage return
188               ) {
189                $this->offset++;
190            } else {
191                return;
192            }
193        }
194    }
195
196
197    /**
198     * Read comment line
199     *
200     * @return string
201     */
202    public function readComment()
203    {
204        $this->skipWhiteSpace(false);
205
206        /** Check if it's a comment line */
207        if ($this->data[$this->offset] != '%') {
208            return '';
209        }
210
211        for ($start = $this->offset;
212             $this->offset < strlen($this->data);
213             $this->offset++) {
214            if (ord($this->data[$this->offset]) == 0x0A || // Line feed
215                ord($this->data[$this->offset]) == 0x0d    // Carriage return
216               ) {
217                break;
218            }
219        }
220
221        return substr($this->data, $start, $this->offset-$start);
222    }
223
224
225    /**
226     * Returns next lexeme from a pdf stream
227     *
228     * @return string
229     */
230    public function readLexeme()
231    {
232        // $this->skipWhiteSpace();
233        while (true) {
234            $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
235
236            if ($this->offset < strlen($this->data)  &&  $this->data[$this->offset] == '%') {
237                $this->offset += strcspn($this->data, "\r\n", $this->offset);
238            } else {
239                break;
240            }
241        }
242
243        if ($this->offset >= strlen($this->data)) {
244            return '';
245        }
246
247        if ( /* self::isDelimiter( ord($this->data[$start]) ) */
248             strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) {
249
250            switch (substr($this->data, $this->offset, 2)) {
251                case '<<':
252                    $this->offset += 2;
253                    return '<<';
254                    break;
255
256                case '>>':
257                    $this->offset += 2;
258                    return '>>';
259                    break;
260
261                default:
262                    return $this->data[$this->offset++];
263                    break;
264            }
265        } else {
266            $start = $this->offset;
267            $compare = '';
268            if( version_compare( phpversion(), '5.2.5' ) >= 0) {
269                $compare = "()<>[]{}/%\x00\t\n\f\r ";
270            } else {
271                $compare = "()<>[]{}/%\x00\t\n\r ";
272            }
273
274            $this->offset += strcspn($this->data, $compare, $this->offset);
275
276            return substr($this->data, $start, $this->offset - $start);
277        }
278    }
279
280
281    /**
282     * Read elemental object from a PDF stream
283     *
284     * @return Zend_Pdf_Element
285     * @throws Zend_Pdf_Exception
286     */
287    public function readElement($nextLexeme = null)
288    {
289        if ($nextLexeme === null) {
290            $nextLexeme = $this->readLexeme();
291        }
292
293        /**
294         * Note: readElement() method is a public method and could be invoked from other classes.
295         * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
296         * about _elements member management.
297         */
298        switch ($nextLexeme) {
299            case '(':
300                return ($this->_elements[] = $this->_readString());
301
302            case '<':
303                return ($this->_elements[] = $this->_readBinaryString());
304
305            case '/':
306                return ($this->_elements[] = new Zend_Pdf_Element_Name(
307                                                    Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
308                                                                      ));
309
310            case '[':
311                return ($this->_elements[] = $this->_readArray());
312
313            case '<<':
314                return ($this->_elements[] = $this->_readDictionary());
315
316            case ')':
317                // fall through to next case
318            case '>':
319                // fall through to next case
320            case ']':
321                // fall through to next case
322            case '>>':
323                // fall through to next case
324            case '{':
325                // fall through to next case
326            case '}':
327                require_once 'Zend/Pdf/Exception.php';
328                throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
329                                                $this->offset));
330
331            default:
332                if (strcasecmp($nextLexeme, 'true') == 0) {
333                    return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
334                } else if (strcasecmp($nextLexeme, 'false') == 0) {
335                    return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
336                } else if (strcasecmp($nextLexeme, 'null') == 0) {
337                    return ($this->_elements[] = new Zend_Pdf_Element_Null());
338                }
339
340                $ref = $this->_readReference($nextLexeme);
341                if ($ref !== null) {
342                    return ($this->_elements[] = $ref);
343                }
344
345                return ($this->_elements[] = $this->_readNumeric($nextLexeme));
346        }
347    }
348
349
350    /**
351     * Read string PDF object
352     * Also reads trailing ')' from a pdf stream
353     *
354     * @return Zend_Pdf_Element_String
355     * @throws Zend_Pdf_Exception
356     */
357    private function _readString()
358    {
359        $start = $this->offset;
360        $openedBrackets = 1;
361
362        $this->offset += strcspn($this->data, '()\\', $this->offset);
363
364        while ($this->offset < strlen($this->data)) {
365            switch (ord( $this->data[$this->offset] )) {
366                case 0x28: // '(' - opened bracket in the string, needs balanced pair.
367                    $this->offset++;
368                    $openedBrackets++;
369                    break;
370
371                case 0x29: // ')' - pair to the opened bracket
372                    $this->offset++;
373                    $openedBrackets--;
374                    break;
375
376                case 0x5C: // '\\' - escape sequence, skip next char from a check
377                    $this->offset += 2;
378            }
379
380            if ($openedBrackets == 0) {
381                break; // end of string
382            }
383
384            $this->offset += strcspn($this->data, '()\\', $this->offset);
385        }
386        if ($openedBrackets != 0) {
387            require_once 'Zend/Pdf/Exception.php';
388            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
389        }
390
391        return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
392                                                                                     $start,
393                                                                                     $this->offset - $start - 1) ));
394    }
395
396
397    /**
398     * Read binary string PDF object
399     * Also reads trailing '>' from a pdf stream
400     *
401     * @return Zend_Pdf_Element_String_Binary
402     * @throws Zend_Pdf_Exception
403     */
404    private function _readBinaryString()
405    {
406        $start = $this->offset;
407
408        $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset);
409
410        if ($this->offset >= strlen($this->data) - 1) {
411            require_once 'Zend/Pdf/Exception.php';
412            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start));
413        }
414
415        if ($this->data[$this->offset++] != '>') {
416            require_once 'Zend/Pdf/Exception.php';
417            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
418        }
419
420        return new Zend_Pdf_Element_String_Binary(
421                       Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
422                                                                        $start,
423                                                                        $this->offset - $start - 1) ));
424    }
425
426
427    /**
428     * Read array PDF object
429     * Also reads trailing ']' from a pdf stream
430     *
431     * @return Zend_Pdf_Element_Array
432     * @throws Zend_Pdf_Exception
433     */
434    private function _readArray()
435    {
436        $elements = array();
437
438        while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
439            if ($nextLexeme != ']') {
440                $elements[] = $this->readElement($nextLexeme);
441            } else {
442                return new Zend_Pdf_Element_Array($elements);
443            }
444        }
445
446        require_once 'Zend/Pdf/Exception.php';
447        throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
448    }
449
450
451    /**
452     * Read dictionary PDF object
453     * Also reads trailing '>>' from a pdf stream
454     *
455     * @return Zend_Pdf_Element_Dictionary
456     * @throws Zend_Pdf_Exception
457     */
458    private function _readDictionary()
459    {
460        $dictionary = new Zend_Pdf_Element_Dictionary();
461
462        while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
463            if ($nextLexeme != '>>') {
464                $nameStart = $this->offset - strlen($nextLexeme);
465
466                $name  = $this->readElement($nextLexeme);
467                $value = $this->readElement();
468
469                if (!$name instanceof Zend_Pdf_Element_Name) {
470                    require_once 'Zend/Pdf/Exception.php';
471                    throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
472                }
473
474                $dictionary->add($name, $value);
475            } else {
476                return $dictionary;
477            }
478        }
479
480        require_once 'Zend/Pdf/Exception.php';
481        throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
482    }
483
484
485    /**
486     * Read reference PDF object
487     *
488     * @param string $nextLexeme
489     * @return Zend_Pdf_Element_Reference
490     */
491    private function _readReference($nextLexeme = null)
492    {
493        $start = $this->offset;
494
495        if ($nextLexeme === null) {
496            $objNum = $this->readLexeme();
497        } else {
498            $objNum = $nextLexeme;
499        }
500        if (!ctype_digit($objNum)) { // it's not a reference
501            $this->offset = $start;
502            return null;
503        }
504
505        $genNum = $this->readLexeme();
506        if (!ctype_digit($genNum)) { // it's not a reference
507            $this->offset = $start;
508            return null;
509        }
510
511        $rMark  = $this->readLexeme();
512        if ($rMark != 'R') { // it's not a reference
513            $this->offset = $start;
514            return null;
515        }
516
517        $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
518
519        return $ref;
520    }
521
522
523    /**
524     * Read numeric PDF object
525     *
526     * @param string $nextLexeme
527     * @return Zend_Pdf_Element_Numeric
528     */
529    private function _readNumeric($nextLexeme = null)
530    {
531        if ($nextLexeme === null) {
532            $nextLexeme = $this->readLexeme();
533        }
534
535        return new Zend_Pdf_Element_Numeric($nextLexeme);
536    }
537
538
539    /**
540     * Read inderect object from a PDF stream
541     *
542     * @param integer $offset
543     * @param Zend_Pdf_Element_Reference_Context $context
544     * @return Zend_Pdf_Element_Object
545     */
546    public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
547    {
548        if ($offset === null ) {
549            return new Zend_Pdf_Element_Null();
550        }
551
552        // Save current offset to make getObject() reentrant
553        $offsetSave = $this->offset;
554
555        $this->offset    = $offset;
556        $this->_context  = $context;
557        $this->_elements = array();
558
559        $objNum = $this->readLexeme();
560        if (!ctype_digit($objNum)) {
561            require_once 'Zend/Pdf/Exception.php';
562            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
563        }
564
565        $genNum = $this->readLexeme();
566        if (!ctype_digit($genNum)) {
567            require_once 'Zend/Pdf/Exception.php';
568            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
569        }
570
571        $objKeyword = $this->readLexeme();
572        if ($objKeyword != 'obj') {
573            require_once 'Zend/Pdf/Exception.php';
574            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
575        }
576
577        $objValue = $this->readElement();
578
579        $nextLexeme = $this->readLexeme();
580
581        if( $nextLexeme == 'endobj' ) {
582            /**
583             * Object is not generated by factory (thus it's not marked as modified object).
584             * But factory is assigned to the obect.
585             */
586            $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
587
588            foreach ($this->_elements as $element) {
589                $element->setParentObject($obj);
590            }
591
592            // Restore offset value
593            $this->offset = $offsetSave;
594
595            return $obj;
596        }
597
598        /**
599         * It's a stream object
600         */
601        if ($nextLexeme != 'stream') {
602            require_once 'Zend/Pdf/Exception.php';
603            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
604        }
605
606        if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
607            require_once 'Zend/Pdf/Exception.php';
608            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
609        }
610
611        /**
612         * References are automatically dereferenced at this moment.
613         */
614        $streamLength = $objValue->Length->value;
615
616        /**
617         * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
618         * This restriction gives the possibility to recognize all cases exactly
619         */
620        if ($this->data[$this->offset] == "\r" &&
621            $this->data[$this->offset + 1] == "\n"    ) {
622            $this->offset += 2;
623        } else if ($this->data[$this->offset] == "\n"    ) {
624            $this->offset++;
625        } else {
626            require_once 'Zend/Pdf/Exception.php';
627            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
628        }
629
630        $dataOffset = $this->offset;
631
632        $this->offset += $streamLength;
633
634        $nextLexeme = $this->readLexeme();
635        if ($nextLexeme != 'endstream') {
636            require_once 'Zend/Pdf/Exception.php';
637            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
638        }
639
640        $nextLexeme = $this->readLexeme();
641        if ($nextLexeme != 'endobj') {
642            require_once 'Zend/Pdf/Exception.php';
643            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
644        }
645
646        $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
647                                                         $dataOffset,
648                                                         $streamLength),
649                                                  (int)$objNum,
650                                                  (int)$genNum,
651                                                  $this->_objFactory->resolve(),
652                                                  $objValue);
653
654        foreach ($this->_elements as $element) {
655            $element->setParentObject($obj);
656        }
657
658        // Restore offset value
659        $this->offset = $offsetSave;
660
661        return $obj;
662    }
663
664
665    /**
666     * Get length of source string
667     *
668     * @return integer
669     */
670    public function getLength()
671    {
672        return strlen($this->data);
673    }
674
675    /**
676     * Get source string
677     *
678     * @return string
679     */
680    public function getString()
681    {
682        return $this->data;
683    }
684
685
686    /**
687     * Parse integer value from a binary stream
688     *
689     * @param string $stream
690     * @param integer $offset
691     * @param integer $size
692     * @return integer
693     */
694    public static function parseIntFromStream($stream, $offset, $size)
695    {
696        $value = 0;
697        for ($count = 0; $count < $size; $count++) {
698            $value *= 256;
699            $value += ord($stream[$offset + $count]);
700        }
701
702        return $value;
703    }
704
705
706
707    /**
708     * Set current context
709     *
710     * @param Zend_Pdf_Element_Reference_Context $context
711     */
712    public function setContext(Zend_Pdf_Element_Reference_Context $context)
713    {
714        $this->_context = $context;
715    }
716
717    /**
718     * Object constructor
719     *
720     * Note: PHP duplicates string, which is sent by value, only of it's updated.
721     * Thus we don't need to care about overhead
722     *
723     * @param string $pdfString
724     * @param Zend_Pdf_ElementFactory_Interface $factory
725     */
726    public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
727    {
728        $this->data         = $source;
729        $this->_objFactory  = $factory;
730    }
731}