PageRenderTime 128ms CodeModel.GetById 60ms app.highlight 25ms RepoModel.GetById 35ms app.codeStats 1ms

/library/Zend/Pdf/StringParser.php

https://bitbucket.org/baruffaldi/website-2008-computer-shopping-3
PHP | 709 lines | 376 code | 120 blank | 213 comment | 133 complexity | cc4751af231ef5e9c388aba27b573beb MD5 | raw file
  1<?php
  2/**
  3 * Zend Framework
  4 *
  5 * LICENSE
  6 *
  7 * This source file is subject to the new BSD license that is bundled
  8 * with this package in the file LICENSE.txt.
  9 * It is also available through the world-wide-web at this URL:
 10 * http://framework.zend.com/license/new-bsd
 11 * If you did not receive a copy of the license and are unable to
 12 * obtain it through the world-wide-web, please send an email
 13 * to license@zend.com so we can send you a copy immediately.
 14 *
 15 * @package    Zend_Pdf
 16 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 17 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 18 */
 19
 20
 21/** Zend_Pdf_Element */
 22require_once 'Zend/Pdf/Element.php';
 23
 24/** Zend_Pdf_Element_Array */
 25require_once 'Zend/Pdf/Element/Array.php';
 26
 27/** Zend_Pdf_Element_String_Binary */
 28require_once 'Zend/Pdf/Element/String/Binary.php';
 29
 30/** Zend_Pdf_Element_Boolean */
 31require_once 'Zend/Pdf/Element/Boolean.php';
 32
 33/** Zend_Pdf_Element_Dictionary */
 34require_once 'Zend/Pdf/Element/Dictionary.php';
 35
 36/** Zend_Pdf_Element_Name */
 37require_once 'Zend/Pdf/Element/Name.php';
 38
 39/** Zend_Pdf_Element_Numeric */
 40require_once 'Zend/Pdf/Element/Numeric.php';
 41
 42/** Zend_Pdf_Element_Object */
 43require_once 'Zend/Pdf/Element/Object.php';
 44
 45/** Zend_Pdf_Element_Reference */
 46require_once 'Zend/Pdf/Element/Reference.php';
 47
 48/** Zend_Pdf_Element_Object_Stream */
 49require_once 'Zend/Pdf/Element/Object/Stream.php';
 50
 51/** Zend_Pdf_Element_String */
 52require_once 'Zend/Pdf/Element/String.php';
 53
 54/** Zend_Pdf_Element_Null */
 55require_once 'Zend/Pdf/Element/Null.php';
 56
 57/** Zend_Pdf_Element_Reference_Context */
 58require_once 'Zend/Pdf/Element/Reference/Context.php';
 59
 60/** Zend_Pdf_Element_Reference_Table */
 61require_once 'Zend/Pdf/Element/Reference/Table.php';
 62
 63/** Zend_Pdf_ElementFactory_Interface */
 64require_once 'Zend/Pdf/ElementFactory/Interface.php';
 65
 66/** Zend_Pdf_PhpArray */
 67require_once 'Zend/Pdf/PhpArray.php';
 68
 69
 70/**
 71 * PDF string parser
 72 *
 73 * @package    Zend_Pdf
 74 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 75 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 76 */
 77class Zend_Pdf_StringParser
 78{
 79    /**
 80     * Source PDF
 81     *
 82     * @var string
 83     */
 84    public $data = '';
 85
 86    /**
 87     * Current position in a data
 88     *
 89     * @var integer
 90     */
 91    public $offset = 0;
 92
 93    /**
 94     * Current reference context
 95     *
 96     * @var Zend_Pdf_Element_Reference_Context
 97     */
 98    private $_context = null;
 99
100    /**
101     * Array of elements of the currently parsed object/trailer
102     *
103     * @var array
104     */
105    private $_elements = array();
106
107    /**
108     * PDF objects factory.
109     *
110     * @var Zend_Pdf_ElementFactory_Interface
111     */
112    private $_objFactory = null;
113
114
115    /**
116     * Clean up resources.
117     *
118     * Clear current state to remove cyclic object references
119     */
120    public function cleanUp()
121    {
122        $this->_context = null;
123        $this->_elements = array();
124        $this->_objFactory = null;
125    }
126
127    /**
128     * Character with code $chCode is white space
129     *
130     * @param integer $chCode
131     * @return boolean
132     */
133    public static function isWhiteSpace($chCode)
134    {
135        if ($chCode == 0x00 || // null character
136            $chCode == 0x09 || // Tab
137            $chCode == 0x0A || // Line feed
138            $chCode == 0x0C || // Form Feed
139            $chCode == 0x0D || // Carriage return
140            $chCode == 0x20    // Space
141           ) {
142            return true;
143        } else {
144            return false;
145        }
146    }
147
148
149    /**
150     * Character with code $chCode is a delimiter character
151     *
152     * @param integer $chCode
153     * @return boolean
154     */
155    public static function isDelimiter($chCode )
156    {
157        if ($chCode == 0x28 || // '('
158            $chCode == 0x29 || // ')'
159            $chCode == 0x3C || // '<'
160            $chCode == 0x3E || // '>'
161            $chCode == 0x5B || // '['
162            $chCode == 0x5D || // ']'
163            $chCode == 0x7B || // '{'
164            $chCode == 0x7D || // '}'
165            $chCode == 0x2F || // '/'
166            $chCode == 0x25    // '%'
167           ) {
168            return true;
169        } else {
170            return false;
171        }
172    }
173
174
175    /**
176     * Skip white space
177     *
178     * @param boolean $skipComment
179     */
180    public function skipWhiteSpace($skipComment = true)
181    {
182        while ($this->offset < strlen($this->data)) {
183            if (self::isWhiteSpace( ord($this->data[$this->offset]) )) {
184                $this->offset++;
185            } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
186                $this->skipComment();
187            } else {
188                return;
189            }
190        }
191    }
192
193
194    /**
195     * Skip comment
196     */
197    public function skipComment()
198    {
199        while ($this->offset < strlen($this->data))
200        {
201            if (ord($this->data[$this->offset]) != 0x0A || // Line feed
202                ord($this->data[$this->offset]) != 0x0d    // Carriage return
203               ) {
204                $this->offset++;
205            } else {
206                return;
207            }
208        }
209    }
210
211
212    /**
213     * Read comment line
214     *
215     * @return string
216     */
217    public function readComment()
218    {
219        $this->skipWhiteSpace(false);
220
221        /** Check if it's a comment line */
222        if ($this->data[$this->offset] != '%') {
223            return '';
224        }
225
226        for ($start = $this->offset;
227             $this->offset < strlen($this->data);
228             $this->offset++) {
229            if (ord($this->data[$this->offset]) == 0x0A || // Line feed
230                ord($this->data[$this->offset]) == 0x0d    // Carriage return
231               ) {
232                break;
233            }
234        }
235
236        return substr($this->data, $start, $this->offset-$start);
237    }
238
239
240    /**
241     * Returns next lexeme from a pdf stream
242     *
243     * @return string
244     */
245    public function readLexeme()
246    {
247        $this->skipWhiteSpace();
248
249        if ($this->offset >= strlen($this->data)) {
250            return '';
251        }
252
253        $start = $this->offset;
254
255        if (self::isDelimiter( ord($this->data[$start]) )) {
256            if ($this->data[$start] == '<' && $this->offset + 1 < strlen($this->data) && $this->data[$start+1] == '<') {
257                $this->offset += 2;
258                return '<<';
259            } else if ($this->data[$start] == '>' && $this->offset + 1 < strlen($this->data) && $this->data[$start+1] == '>') {
260                $this->offset += 2;
261                return '>>';
262            } else {
263                $this->offset++;
264                return $this->data[$start];
265            }
266        } else {
267            while ( ($this->offset < strlen($this->data)) &&
268                    (!self::isDelimiter(  ord($this->data[$this->offset]) )) &&
269                    (!self::isWhiteSpace( ord($this->data[$this->offset]) ))   ) {
270                $this->offset++;
271            }
272
273            return substr($this->data, $start, $this->offset - $start);
274        }
275    }
276
277
278    /**
279     * Read elemental object from a PDF stream
280     *
281     * @return Zend_Pdf_Element
282     * @throws Zend_Pdf_Exception
283     */
284    public function readElement($nextLexeme = null)
285    {
286        if ($nextLexeme === null) {
287            $nextLexeme = $this->readLexeme();
288        }
289
290        /**
291         * Note: readElement() method is a public method and could be invoked from other classes.
292         * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
293         * about _elements member management.
294         */
295        switch ($nextLexeme) {
296            case '(':
297                return ($this->_elements[] = $this->_readString());
298
299            case '<':
300                return ($this->_elements[] = $this->_readBinaryString());
301
302            case '/':
303                return ($this->_elements[] = new Zend_Pdf_Element_Name(
304                                                Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
305                                                                      ));
306
307            case '[':
308                return ($this->_elements[] = $this->_readArray());
309
310            case '<<':
311                return ($this->_elements[] = $this->_readDictionary());
312
313            case ')':
314                // fall through to next case
315            case '>':
316                // fall through to next case
317            case ']':
318                // fall through to next case
319            case '>>':
320                // fall through to next case
321            case '{':
322                // fall through to next case
323            case '}':
324                throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
325                                                $this->offset));
326
327            default:
328                if (strcasecmp($nextLexeme, 'true') == 0) {
329                    return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
330                } else if (strcasecmp($nextLexeme, 'false') == 0) {
331                    return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
332                } else if (strcasecmp($nextLexeme, 'null') == 0) {
333                    return ($this->_elements[] = new Zend_Pdf_Element_Null());
334                }
335
336                $ref = $this->_readReference($nextLexeme);
337                if ($ref !== null) {
338                    return ($this->_elements[] = $ref);
339                }
340
341                return ($this->_elements[] = $this->_readNumeric($nextLexeme));
342        }
343    }
344
345
346    /**
347     * Read string PDF object
348     * Also reads trailing ')' from a pdf stream
349     *
350     * @return Zend_Pdf_Element_String
351     * @throws Zend_Pdf_Exception
352     */
353    private function _readString()
354    {
355        $start = $this->offset;
356        $openedBrackets = 1;
357
358        while ($this->offset < strlen($this->data)) {
359            switch (ord( $this->data[$this->offset] )) {
360                case 0x28: // '(' - opened bracket in the string, needs balanced pair.
361                    $openedBrackets++;
362                    break;
363
364                case 0x29: // ')' - pair to the opened bracket
365                    $openedBrackets--;
366                    break;
367
368                case 0x5C: // '\\' - escape sequence, skip next char from a check
369                    $this->offset++;
370            }
371
372            $this->offset++;
373            if ($openedBrackets == 0) {
374                break; // end of string
375            }
376        }
377        if ($openedBrackets != 0) {
378            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
379        }
380
381        return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
382                                                                 $start,
383                                                                 $this->offset - $start - 1) ));
384    }
385
386
387    /**
388     * Read binary string PDF object
389     * Also reads trailing '>' from a pdf stream
390     *
391     * @return Zend_Pdf_Element_String_Binary
392     * @throws Zend_Pdf_Exception
393     */
394    private function _readBinaryString()
395    {
396        $start = $this->offset;
397
398        while ($this->offset < strlen($this->data)) {
399            if (self::isWhiteSpace( ord($this->data[$this->offset]) ) ||
400                ctype_xdigit( $this->data[$this->offset] ) ) {
401                $this->offset++;
402            } else if ($this->data[$this->offset] == '>') {
403                $this->offset++;
404                return new Zend_Pdf_Element_String_Binary(
405                               Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
406                                                                    $start,
407                                                                    $this->offset - $start - 1) ));
408            } else {
409                throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
410            }
411        }
412        throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while binary string reading. Offset - 0x%X. \'>\' expected.', $start));
413    }
414
415
416    /**
417     * Read array PDF object
418     * Also reads trailing ']' from a pdf stream
419     *
420     * @return Zend_Pdf_Element_Array
421     * @throws Zend_Pdf_Exception
422     */
423    private function _readArray()
424    {
425        $elements = array();
426
427        while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
428            if ($nextLexeme != ']') {
429                $elements[] = $this->readElement($nextLexeme);
430            } else {
431                return new Zend_Pdf_Element_Array($elements);
432            }
433        }
434
435        throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
436    }
437
438
439    /**
440     * Read dictionary PDF object
441     * Also reads trailing '>>' from a pdf stream
442     *
443     * @return Zend_Pdf_Element_Dictionary
444     * @throws Zend_Pdf_Exception
445     */
446    private function _readDictionary()
447    {
448        $dictionary = new Zend_Pdf_Element_Dictionary();
449
450        while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
451            if ($nextLexeme != '>>') {
452                $nameStart = $this->offset - strlen($nextLexeme);
453
454                $name  = $this->readElement($nextLexeme);
455                $value = $this->readElement();
456
457                if (!$name instanceof Zend_Pdf_Element_Name) {
458                    throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
459                }
460
461                $dictionary->add($name, $value);
462            } else {
463                return $dictionary;
464            }
465        }
466
467        throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
468    }
469
470
471    /**
472     * Read reference PDF object
473     *
474     * @param string $nextLexeme
475     * @return Zend_Pdf_Element_Reference
476     */
477    private function _readReference($nextLexeme = null)
478    {
479        $start = $this->offset;
480
481        if ($nextLexeme === null) {
482            $objNum = $this->readLexeme();
483        } else {
484            $objNum = $nextLexeme;
485        }
486        if (!ctype_digit($objNum)) { // it's not a reference
487            $this->offset = $start;
488            return null;
489        }
490
491        $genNum = $this->readLexeme();
492        if (!ctype_digit($genNum)) { // it's not a reference
493            $this->offset = $start;
494            return null;
495        }
496
497        $rMark  = $this->readLexeme();
498        if ($rMark != 'R') { // it's not a reference
499            $this->offset = $start;
500            return null;
501        }
502
503        $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
504
505        return $ref;
506    }
507
508
509    /**
510     * Read numeric PDF object
511     *
512     * @param string $nextLexeme
513     * @return Zend_Pdf_Element_Numeric
514     */
515    private function _readNumeric($nextLexeme = null)
516    {
517        if ($nextLexeme === null) {
518            $nextLexeme = $this->readLexeme();
519        }
520
521        return new Zend_Pdf_Element_Numeric($nextLexeme);
522    }
523
524
525    /**
526     * Read inderect object from a PDF stream
527     *
528     * @param integer $offset
529     * @param Zend_Pdf_Element_Reference_Context $context
530     * @return Zend_Pdf_Element_Object
531     */
532    public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
533    {
534        if ($offset === null ) {
535            return new Zend_Pdf_Element_Null();
536        }
537
538        // Save current offset to make getObject() reentrant
539        $offsetSave = $this->offset;
540
541        $this->offset    = $offset;
542        $this->_context  = $context;
543        $this->_elements = array();
544
545        $objNum = $this->readLexeme();
546        if (!ctype_digit($objNum)) {
547            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
548        }
549
550        $genNum = $this->readLexeme();
551        if (!ctype_digit($genNum)) {
552            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
553        }
554
555        $objKeyword = $this->readLexeme();
556        if ($objKeyword != 'obj') {
557            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
558        }
559
560        $objValue = $this->readElement();
561
562        $nextLexeme = $this->readLexeme();
563
564        if( $nextLexeme == 'endobj' ) {
565            /**
566             * Object is not generated by factory (thus it's not marked as modified object).
567             * But factory is assigned to the obect.
568             */
569            $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
570
571            foreach ($this->_elements as $element) {
572                $element->setParentObject($obj);
573            }
574
575            // Restore offset value
576            $this->offset = $offsetSave;
577
578            return $obj;
579        }
580
581        /**
582         * It's a stream object
583         */
584        if ($nextLexeme != 'stream') {
585            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
586        }
587
588        if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
589            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
590        }
591
592        /**
593         * References are automatically dereferenced at this moment.
594         */
595        $streamLength = $objValue->Length->value;
596
597        /**
598         * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
599         * This restriction gives the possibility to recognize all cases exactly
600         */
601        if ($this->data[$this->offset] == "\r" &&
602            $this->data[$this->offset + 1] == "\n"    ) {
603            $this->offset += 2;
604        } else if ($this->data[$this->offset] == "\n"    ) {
605            $this->offset++;
606        } else {
607            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
608        }
609
610        $dataOffset = $this->offset;
611
612        $this->offset += $streamLength;
613
614        $nextLexeme = $this->readLexeme();
615        if ($nextLexeme != 'endstream') {
616            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
617        }
618
619        $nextLexeme = $this->readLexeme();
620        if ($nextLexeme != 'endobj') {
621            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
622        }
623
624        $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
625                                                         $dataOffset,
626                                                         $streamLength),
627                                                  (int)$objNum,
628                                                  (int)$genNum,
629                                                  $this->_objFactory->resolve(),
630                                                  $objValue);
631
632        foreach ($this->_elements as $element) {
633            $element->setParentObject($obj);
634        }
635
636        // Restore offset value
637        $this->offset = $offsetSave;
638
639        return $obj;
640    }
641
642
643    /**
644     * Get length of source string
645     *
646     * @return integer
647     */
648    public function getLength()
649    {
650        return strlen($this->data);
651    }
652
653    /**
654     * Get source string
655     *
656     * @return string
657     */
658    public function getString()
659    {
660        return $this->data;
661    }
662
663
664    /**
665     * Parse integer value from a binary stream
666     *
667     * @param string $stream
668     * @param integer $offset
669     * @param integer $size
670     * @return integer
671     */
672    public static function parseIntFromStream($stream, $offset, $size)
673    {
674        $value = 0;
675        for ($count = 0; $count < $size; $count++) {
676            $value *= 256;
677            $value += ord($stream[$offset + $count]);
678        }
679
680        return $value;
681    }
682
683
684
685    /**
686     * Set current context
687     *
688     * @param Zend_Pdf_Element_Reference_Context $context
689     */
690    public function setContext(Zend_Pdf_Element_Reference_Context $context)
691    {
692        $this->_context = $context;
693    }
694
695    /**
696     * Object constructor
697     *
698     * Note: PHP duplicates string, which is sent by value, only of it's updated.
699     * Thus we don't need to care about overhead
700     *
701     * @param string $pdfString
702     * @param Zend_Pdf_ElementFactory_Interface $factory
703     */
704    public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
705    {
706        $this->data         = $source;
707        $this->_objFactory  = $factory;
708    }
709}