PageRenderTime 167ms CodeModel.GetById 80ms app.highlight 40ms RepoModel.GetById 40ms app.codeStats 1ms

/library/Zend/Pdf/Parser.php

https://bitbucket.org/fabiancarlos/feature_seguimentos
PHP | 472 lines | 285 code | 62 blank | 125 comment | 75 complexity | 2b8f8c7259fbc4fbec75be691309b670 MD5 | raw file
  1<?php
  2/**
  3 * Zend Framework
  4 *
  5 * LICENSE
  6 *
  7 * This source file is subject to the new BSD license that is bundled
  8 * with this package in the file LICENSE.txt.
  9 * It is also available through the world-wide-web at this URL:
 10 * http://framework.zend.com/license/new-bsd
 11 * If you did not receive a copy of the license and are unable to
 12 * obtain it through the world-wide-web, please send an email
 13 * to license@zend.com so we can send you a copy immediately.
 14 *
 15 * @category   Zend
 16 * @package    Zend_Pdf
 17 * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
 18 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 19 * @version    $Id: Parser.php 23775 2011-03-01 17:25:24Z ralph $
 20 */
 21
 22/** Internally used classes */
 23require_once 'Zend/Pdf/Element.php';
 24require_once 'Zend/Pdf/Element/Numeric.php';
 25
 26
 27/** Zend_Pdf_StringParser */
 28require_once 'Zend/Pdf/StringParser.php';
 29
 30
 31/**
 32 * PDF file parser
 33 *
 34 * @package    Zend_Pdf
 35 * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
 36 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 37 */
 38class Zend_Pdf_Parser
 39{
 40    /**
 41     * String parser
 42     *
 43     * @var Zend_Pdf_StringParser
 44     */
 45    private $_stringParser;
 46
 47    /**
 48     * Last PDF file trailer
 49     *
 50     * @var Zend_Pdf_Trailer_Keeper
 51     */
 52    private $_trailer;
 53
 54    /**
 55     * PDF version specified in the file header
 56     *
 57     * @var string
 58     */
 59    private $_pdfVersion;
 60
 61
 62    /**
 63     * Get length of source PDF
 64     *
 65     * @return integer
 66     */
 67    public function getPDFLength()
 68    {
 69        return strlen($this->_stringParser->data);
 70    }
 71
 72    /**
 73     * Get PDF String
 74     *
 75     * @return string
 76     */
 77    public function getPDFString()
 78    {
 79        return $this->_stringParser->data;
 80    }
 81
 82    /**
 83     * PDF version specified in the file header
 84     *
 85     * @return string
 86     */
 87    public function getPDFVersion()
 88    {
 89        return $this->_pdfVersion;
 90    }
 91
 92    /**
 93     * Load XReference table and referenced objects
 94     *
 95     * @param integer $offset
 96     * @throws Zend_Pdf_Exception
 97     * @return Zend_Pdf_Trailer_Keeper
 98     */
 99    private function _loadXRefTable($offset)
100    {
101        $this->_stringParser->offset = $offset;
102
103        require_once 'Zend/Pdf/Element/Reference/Table.php';
104        $refTable = new Zend_Pdf_Element_Reference_Table();
105        require_once 'Zend/Pdf/Element/Reference/Context.php';
106        $context  = new Zend_Pdf_Element_Reference_Context($this->_stringParser, $refTable);
107        $this->_stringParser->setContext($context);
108
109        $nextLexeme = $this->_stringParser->readLexeme();
110        if ($nextLexeme == 'xref') {
111            /**
112             * Common cross-reference table
113             */
114            $this->_stringParser->skipWhiteSpace();
115            while ( ($nextLexeme = $this->_stringParser->readLexeme()) != 'trailer' ) {
116                if (!ctype_digit($nextLexeme)) {
117                    require_once 'Zend/Pdf/Exception.php';
118                    throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($nextLexeme)));
119                }
120                $objNum = (int)$nextLexeme;
121
122                $refCount = $this->_stringParser->readLexeme();
123                if (!ctype_digit($refCount)) {
124                    require_once 'Zend/Pdf/Exception.php';
125                    throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($refCount)));
126                }
127
128                $this->_stringParser->skipWhiteSpace();
129                while ($refCount > 0) {
130                    $objectOffset = substr($this->_stringParser->data, $this->_stringParser->offset, 10);
131                    if (!ctype_digit($objectOffset)) {
132                        require_once 'Zend/Pdf/Exception.php';
133                        throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
134                    }
135                    // Force $objectOffset to be treated as decimal instead of octal number
136                    for ($numStart = 0; $numStart < strlen($objectOffset)-1; $numStart++) {
137                        if ($objectOffset[$numStart] != '0') {
138                            break;
139                        }
140                    }
141                    $objectOffset = substr($objectOffset, $numStart);
142                    $this->_stringParser->offset += 10;
143
144                    if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) {
145                        require_once 'Zend/Pdf/Exception.php';
146                        throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
147                    }
148                    $this->_stringParser->offset++;
149
150                    $genNumber = substr($this->_stringParser->data, $this->_stringParser->offset, 5);
151                    if (!ctype_digit($objectOffset)) {
152                        require_once 'Zend/Pdf/Exception.php';
153                        throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
154                    }
155                    // Force $objectOffset to be treated as decimal instead of octal number
156                    for ($numStart = 0; $numStart < strlen($genNumber)-1; $numStart++) {
157                        if ($genNumber[$numStart] != '0') {
158                            break;
159                        }
160                    }
161                    $genNumber = substr($genNumber, $numStart);
162                    $this->_stringParser->offset += 5;
163
164                    if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) {
165                        require_once 'Zend/Pdf/Exception.php';
166                        throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
167                    }
168                    $this->_stringParser->offset++;
169
170                    $inUseKey = $this->_stringParser->data[$this->_stringParser->offset];
171                    $this->_stringParser->offset++;
172
173                    switch ($inUseKey) {
174                        case 'f':
175                            // free entry
176                            unset( $this->_refTable[$objNum . ' ' . $genNumber . ' R'] );
177                            $refTable->addReference($objNum . ' ' . $genNumber . ' R',
178                                                    $objectOffset,
179                                                    false);
180                            break;
181
182                        case 'n':
183                            // in-use entry
184
185                            $refTable->addReference($objNum . ' ' . $genNumber . ' R',
186                                                    $objectOffset,
187                                                    true);
188                    }
189
190                    if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
191                        require_once 'Zend/Pdf/Exception.php';
192                        throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
193                    }
194                    $this->_stringParser->offset++;
195                    if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
196                        require_once 'Zend/Pdf/Exception.php';
197                        throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
198                    }
199                    $this->_stringParser->offset++;
200
201                    $refCount--;
202                    $objNum++;
203                }
204            }
205
206            $trailerDictOffset = $this->_stringParser->offset;
207            $trailerDict = $this->_stringParser->readElement();
208            if (!$trailerDict instanceof Zend_Pdf_Element_Dictionary) {
209                require_once 'Zend/Pdf/Exception.php';
210                throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.  Dictionary expected after \'trailer\' keyword.', $trailerDictOffset));
211            }
212        } else {
213            $xrefStream = $this->_stringParser->getObject($offset, $context);
214
215            if (!$xrefStream instanceof Zend_Pdf_Element_Object_Stream) {
216                require_once 'Zend/Pdf/Exception.php';
217                throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.  Cross-reference stream expected.', $offset));
218            }
219
220            $trailerDict = $xrefStream->dictionary;
221            if ($trailerDict->Type->value != 'XRef') {
222                require_once 'Zend/Pdf/Exception.php';
223                throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.  Cross-reference stream object must have /Type property assigned to /XRef.', $offset));
224            }
225            if ($trailerDict->W === null  || $trailerDict->W->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
226                require_once 'Zend/Pdf/Exception.php';
227                throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary doesn\'t have W entry or it\'s not an array.', $offset));
228            }
229
230            $entryField1Size = $trailerDict->W->items[0]->value;
231            $entryField2Size = $trailerDict->W->items[1]->value;
232            $entryField3Size = $trailerDict->W->items[2]->value;
233
234            if ($entryField2Size == 0 || $entryField3Size == 0) {
235                require_once 'Zend/Pdf/Exception.php';
236                throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Wrong W dictionary entry. Only type field of stream entries has default value and could be zero length.', $offset));
237            }
238
239            $xrefStreamData = $xrefStream->value;
240
241            if ($trailerDict->Index !== null) {
242                if ($trailerDict->Index->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
243                    require_once 'Zend/Pdf/Exception.php';
244                    throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary Index entry must be an array.', $offset));
245                }
246                $sections = count($trailerDict->Index->items)/2;
247            } else {
248                $sections = 1;
249            }
250
251            $streamOffset = 0;
252
253            $size    = $entryField1Size + $entryField2Size + $entryField3Size;
254            $entries = strlen($xrefStreamData)/$size;
255
256            for ($count = 0; $count < $sections; $count++) {
257                if ($trailerDict->Index !== null) {
258                    $objNum  = $trailerDict->Index->items[$count*2    ]->value;
259                    $entries = $trailerDict->Index->items[$count*2 + 1]->value;
260                } else {
261                    $objNum  = 0;
262                    $entries = $trailerDict->Size->value;
263                }
264
265                for ($count2 = 0; $count2 < $entries; $count2++) {
266                    if ($entryField1Size == 0) {
267                        $type = 1;
268                    } else if ($entryField1Size == 1) { // Optimyze one-byte field case
269                        $type = ord($xrefStreamData[$streamOffset++]);
270                    } else {
271                        $type = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField1Size);
272                        $streamOffset += $entryField1Size;
273                    }
274
275                    if ($entryField2Size == 1) { // Optimyze one-byte field case
276                        $field2 = ord($xrefStreamData[$streamOffset++]);
277                    } else {
278                        $field2 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField2Size);
279                        $streamOffset += $entryField2Size;
280                    }
281
282                    if ($entryField3Size == 1) { // Optimyze one-byte field case
283                        $field3 = ord($xrefStreamData[$streamOffset++]);
284                    } else {
285                        $field3 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField3Size);
286                        $streamOffset += $entryField3Size;
287                    }
288
289                    switch ($type) {
290                        case 0:
291                            // Free object
292                            $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, false);
293                            // Debug output:
294                            // echo "Free object - $objNum $field3 R, next free - $field2\n";
295                            break;
296
297                        case 1:
298                            // In use object
299                            $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, true);
300                            // Debug output:
301                            // echo "In-use object - $objNum $field3 R, offset - $field2\n";
302                            break;
303
304                        case 2:
305                            // Object in an object stream
306                            // Debug output:
307                            // echo "Compressed object - $objNum 0 R, object stream - $field2 0 R, offset - $field3\n";
308                            break;
309                    }
310
311                    $objNum++;
312                }
313            }
314
315            // $streamOffset . ' ' . strlen($xrefStreamData) . "\n";
316            // "$entries\n";
317            require_once 'Zend/Pdf/Exception.php';
318            throw new Zend_Pdf_Exception('Cross-reference streams are not supported yet.');
319        }
320
321
322        require_once 'Zend/Pdf/Trailer/Keeper.php';
323        $trailerObj = new Zend_Pdf_Trailer_Keeper($trailerDict, $context);
324        if ($trailerDict->Prev instanceof Zend_Pdf_Element_Numeric ||
325            $trailerDict->Prev instanceof Zend_Pdf_Element_Reference ) {
326            $trailerObj->setPrev($this->_loadXRefTable($trailerDict->Prev->value));
327            $context->getRefTable()->setParent($trailerObj->getPrev()->getRefTable());
328        }
329
330        /**
331         * We set '/Prev' dictionary property to the current cross-reference section offset.
332         * It doesn't correspond to the actual data, but is true when trailer will be used
333         * as a trailer for next generated PDF section.
334         */
335        $trailerObj->Prev = new Zend_Pdf_Element_Numeric($offset);
336
337        return $trailerObj;
338    }
339
340
341    /**
342     * Get Trailer object
343     *
344     * @return Zend_Pdf_Trailer_Keeper
345     */
346    public function getTrailer()
347    {
348        return $this->_trailer;
349    }
350
351    /**
352     * Object constructor
353     *
354     * Note: PHP duplicates string, which is sent by value, only of it's updated.
355     * Thus we don't need to care about overhead
356     *
357     * @param mixed $source
358     * @param Zend_Pdf_ElementFactory_Interface $factory
359     * @param boolean $load
360     * @throws Zend_Exception
361     */
362    public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory, $load)
363    {
364        if ($load) {
365            if (($pdfFile = @fopen($source, 'rb')) === false ) {
366                require_once 'Zend/Pdf/Exception.php';
367                throw new Zend_Pdf_Exception( "Can not open '$source' file for reading." );
368            }
369
370            $data = '';
371            $byteCount = filesize($source);
372            while ($byteCount > 0 && !feof($pdfFile)) {
373                $nextBlock = fread($pdfFile, $byteCount);
374                if ($nextBlock === false) {
375                    require_once 'Zend/Pdf/Exception.php';
376                    throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." );
377                }
378
379                $data .= $nextBlock;
380                $byteCount -= strlen($nextBlock);
381            }
382            if ($byteCount != 0) {
383                require_once 'Zend/Pdf/Exception.php';
384                throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." );
385            }
386            fclose($pdfFile);
387
388            $this->_stringParser = new Zend_Pdf_StringParser($data, $factory);
389        } else {
390            $this->_stringParser = new Zend_Pdf_StringParser($source, $factory);
391        }
392
393        $pdfVersionComment = $this->_stringParser->readComment();
394        if (substr($pdfVersionComment, 0, 5) != '%PDF-') {
395            require_once 'Zend/Pdf/Exception.php';
396            throw new Zend_Pdf_Exception('File is not a PDF.');
397        }
398
399        $pdfVersion = substr($pdfVersionComment, 5);
400        if (version_compare($pdfVersion, '0.9',  '<')  ||
401            version_compare($pdfVersion, '1.61', '>=')
402           ) {
403            /**
404             * @todo
405             * To support PDF versions 1.5 (Acrobat 6) and PDF version 1.7 (Acrobat 7)
406             * Stream compression filter must be implemented (for compressed object streams).
407             * Cross reference streams must be implemented
408             */
409            require_once 'Zend/Pdf/Exception.php';
410            throw new Zend_Pdf_Exception(sprintf('Unsupported PDF version. Zend_Pdf supports PDF 1.0-1.4. Current version - \'%f\'', $pdfVersion));
411        }
412        $this->_pdfVersion = $pdfVersion;
413
414        $this->_stringParser->offset = strrpos($this->_stringParser->data, '%%EOF');
415        if ($this->_stringParser->offset === false ||
416            strlen($this->_stringParser->data) - $this->_stringParser->offset > 7) {
417            require_once 'Zend/Pdf/Exception.php';
418            throw new Zend_Pdf_Exception('Pdf file syntax error. End-of-fle marker expected at the end of file.');
419        }
420
421        $this->_stringParser->offset--;
422        /**
423         * Go to end of cross-reference table offset
424         */
425        while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
426               ($this->_stringParser->offset > 0)) {
427            $this->_stringParser->offset--;
428        }
429        /**
430         * Go to the start of cross-reference table offset
431         */
432        while ( (!Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) ))&&
433               ($this->_stringParser->offset > 0)) {
434            $this->_stringParser->offset--;
435        }
436        /**
437         * Go to the end of 'startxref' keyword
438         */
439        while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
440               ($this->_stringParser->offset > 0)) {
441            $this->_stringParser->offset--;
442        }
443        /**
444         * Go to the white space (eol marker) before 'startxref' keyword
445         */
446        $this->_stringParser->offset -= 9;
447
448        $nextLexeme = $this->_stringParser->readLexeme();
449        if ($nextLexeme != 'startxref') {
450            require_once 'Zend/Pdf/Exception.php';
451            throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. \'startxref\' keyword expected. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
452        }
453
454        $startXref = $this->_stringParser->readLexeme();
455        if (!ctype_digit($startXref)) {
456            require_once 'Zend/Pdf/Exception.php';
457            throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. Cross-reference table offset must contain only digits. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
458        }
459
460        $this->_trailer = $this->_loadXRefTable($startXref);
461        $factory->setObjectCount($this->_trailer->Size->value);
462    }
463
464
465    /**
466     * Object destructor
467     */
468    public function __destruct()
469    {
470        $this->_stringParser->cleanUp();
471    }
472}