PageRenderTime 105ms CodeModel.GetById 50ms app.highlight 13ms RepoModel.GetById 37ms app.codeStats 1ms

/library/Zend/Pdf/FileParser.php

https://bitbucket.org/baruffaldi/website-2008-computer-shopping-3
PHP | 480 lines | 180 code | 45 blank | 255 comment | 52 complexity | e61c7d0b4fa0d8d26733c932720b23eb MD5 | raw file
  1<?php
  2/**
  3 * Zend Framework
  4 *
  5 * LICENSE
  6 *
  7 * This source file is subject to the new BSD license that is bundled
  8 * with this package in the file LICENSE.txt.
  9 * It is also available through the world-wide-web at this URL:
 10 * http://framework.zend.com/license/new-bsd
 11 * If you did not receive a copy of the license and are unable to
 12 * obtain it through the world-wide-web, please send an email
 13 * to license@zend.com so we can send you a copy immediately.
 14 *
 15 * @package    Zend_Pdf
 16 * @subpackage FileParser
 17 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 18 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 19 */
 20
 21/** Zend_Pdf_Exception */
 22require_once 'Zend/Pdf/Exception.php';
 23
 24
 25/**
 26 * Abstract utility class for parsing binary files.
 27 *
 28 * Provides a library of methods to quickly navigate and extract various data
 29 * types (signed and unsigned integers, floating- and fixed-point numbers,
 30 * strings, etc.) from the file.
 31 *
 32 * File access is managed via a {@link Zend_Pdf_FileParserDataSource} object.
 33 * This allows the same parser code to work with many different data sources:
 34 * in-memory objects, filesystem files, etc.
 35 *
 36 * @package    Zend_Pdf
 37 * @subpackage FileParser
 38 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 39 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 40 */
 41abstract class Zend_Pdf_FileParser
 42{
 43  /**** Class Constants ****/
 44
 45    /**
 46     * Little-endian byte order (0x04 0x03 0x02 0x01).
 47     */
 48    const BYTE_ORDER_LITTLE_ENDIAN = 0;
 49
 50    /**
 51     * Big-endian byte order (0x01 0x02 0x03 0x04).
 52     */
 53    const BYTE_ORDER_BIG_ENDIAN    = 1;
 54
 55
 56
 57  /**** Instance Variables ****/
 58
 59
 60    /**
 61     * Flag indicating that the file has passed a cursory validation check.
 62     * @var boolean
 63     */
 64    protected $_isScreened = false;
 65
 66    /**
 67     * Flag indicating that the file has been sucessfully parsed.
 68     * @var boolean
 69     */
 70    protected $_isParsed = false;
 71
 72    /**
 73     * Object representing the data source to be parsed.
 74     * @var Zend_Pdf_FileParserDataSource
 75     */
 76    protected $_dataSource = null;
 77
 78
 79
 80  /**** Public Interface ****/
 81
 82
 83  /* Abstract Methods */
 84
 85    /**
 86     * Performs a cursory check to verify that the binary file is in the expected
 87     * format. Intended to quickly weed out obviously bogus files.
 88     *
 89     * Must set $this->_isScreened to true if successful.
 90     *
 91     * @throws Zend_Pdf_Exception
 92     */
 93    abstract public function screen();
 94
 95    /**
 96     * Reads and parses the complete binary file.
 97     *
 98     * Must set $this->_isParsed to true if successful.
 99     *
100     * @throws Zend_Pdf_Exception
101     */
102    abstract public function parse();
103
104
105  /* Object Lifecycle */
106
107    /**
108     * Object constructor.
109     *
110     * Verifies that the data source has been properly initialized.
111     *
112     * @param Zend_Pdf_FileParserDataSource $dataSource
113     * @throws Zend_Pdf_Exception
114     */
115    public function __construct(Zend_Pdf_FileParserDataSource $dataSource)
116    {
117        if ($dataSource->getSize() == 0) {
118            throw new Zend_Pdf_Exception('The data source has not been properly initialized',
119                                         Zend_Pdf_Exception::BAD_DATA_SOURCE);
120        }
121        $this->_dataSource = $dataSource;
122    }
123
124    /**
125     * Object destructor.
126     *
127     * Discards the data source object.
128     */
129    public function __destruct()
130    {
131        $this->_dataSource = null;
132    }
133
134
135  /* Accessors */
136
137    /**
138     * Returns true if the file has passed a cursory validation check.
139     *
140     * @return boolean
141     */
142    public function isScreened()
143    {
144        return $this->_isScreened;
145    }
146
147    /**
148     * Returns true if the file has been successfully parsed.
149     *
150     * @return boolean
151     */
152    public function isParsed()
153    {
154        return $this->_isParsed;
155    }
156
157    /**
158     * Returns the data source object representing the file being parsed.
159     *
160     * @return Zend_Pdf_FileParserDataSource
161     */
162    public function getDataSource()
163    {
164        return $this->_dataSource;
165    }
166
167
168  /* Primitive Methods */
169
170    /**
171     * Convenience wrapper for the data source object's moveToOffset() method.
172     *
173     * @param integer $offset Destination byte offset.
174     * @throws Zend_Pdf_Exception
175     */
176    public function moveToOffset($offset)
177    {
178        $this->_dataSource->moveToOffset($offset);
179    }
180
181    public function getOffset() {
182       return $this->_dataSource->getOffset();
183    }
184
185    public function getSize() {
186       return $this->_dataSource->getSize();
187    }
188
189    /**
190     * Convenience wrapper for the data source object's readBytes() method.
191     *
192     * @param integer $byteCount Number of bytes to read.
193     * @return string
194     * @throws Zend_Pdf_Exception
195     */
196    public function readBytes($byteCount)
197    {
198        return $this->_dataSource->readBytes($byteCount);
199    }
200
201    /**
202     * Convenience wrapper for the data source object's skipBytes() method.
203     *
204     * @param integer $byteCount Number of bytes to skip.
205     * @throws Zend_Pdf_Exception
206     */
207    public function skipBytes($byteCount)
208    {
209        $this->_dataSource->skipBytes($byteCount);
210    }
211
212
213  /* Parser Methods */
214
215    /**
216     * Reads the signed integer value from the binary file at the current byte
217     * offset.
218     *
219     * Advances the offset by the number of bytes read. Throws an exception if
220     * an error occurs.
221     *
222     * @param integer $size Size of integer in bytes: 1-4
223     * @param integer $byteOrder (optional) Big- or little-endian byte order.
224     *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
225     *   If omitted, uses big-endian.
226     * @return integer
227     * @throws Zend_Pdf_Exception
228     */
229    public function readInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
230    {
231        if (($size < 1) || ($size > 4)) {
232            throw new Zend_Pdf_Exception("Invalid signed integer size: $size",
233                                         Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
234        }
235        $bytes = $this->_dataSource->readBytes($size);
236        /* unpack() will not work for this method because it always works in
237         * the host byte order for signed integers. It also does not allow for
238         * variable integer sizes.
239         */
240        if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
241            $number = ord($bytes[0]);
242            if (($number & 0x80) == 0x80) {
243                /* This number is negative. Extract the positive equivalent.
244                 */
245                $number = (~ $number) & 0xff;
246                for ($i = 1; $i < $size; $i++) {
247                    $number = ($number << 8) | ((~ ord($bytes[$i])) & 0xff);
248                }
249                /* Now turn this back into a negative number by taking the
250                 * two's complement (we didn't add one above so won't
251                 * subtract it below). This works reliably on both 32- and
252                 * 64-bit systems.
253                 */
254                $number = ~$number;
255            } else {
256                for ($i = 1; $i < $size; $i++) {
257                    $number = ($number << 8) | ord($bytes[$i]);
258                }
259            }
260        } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
261            $number = ord($bytes[$size - 1]);
262            if (($number & 0x80) == 0x80) {
263                /* Negative number. See discussion above.
264                 */
265                $number = 0;
266                for ($i = --$size; $i >= 0; $i--) {
267                    $number |= ((~ ord($bytes[$i])) & 0xff) << ($i * 8);
268                }
269                $number = ~$number;
270            } else {
271                $number = 0;
272                for ($i = --$size; $i >= 0; $i--) {
273                    $number |= ord($bytes[$i]) << ($i * 8);
274                }
275            }
276        } else {
277            throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
278                                         Zend_Pdf_Exception::INVALID_BYTE_ORDER);
279        }
280        return $number;
281    }
282
283    /**
284     * Reads the unsigned integer value from the binary file at the current byte
285     * offset.
286     *
287     * Advances the offset by the number of bytes read. Throws an exception if
288     * an error occurs.
289     *
290     * NOTE: If you ask for a 4-byte unsigned integer on a 32-bit machine, the
291     * resulting value WILL BE SIGNED because PHP uses signed integers internally
292     * for everything. To guarantee portability, be sure to use bitwise operators
293     * operators on large unsigned integers!
294     *
295     * @param integer $size Size of integer in bytes: 1-4
296     * @param integer $byteOrder (optional) Big- or little-endian byte order.
297     *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
298     *   If omitted, uses big-endian.
299     * @return integer
300     * @throws Zend_Pdf_Exception
301     */
302    public function readUInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
303    {
304        if (($size < 1) || ($size > 4)) {
305            throw new Zend_Pdf_Exception("Invalid unsigned integer size: $size",
306                                         Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
307        }
308        $bytes = $this->_dataSource->readBytes($size);
309        /* unpack() is a bit heavyweight for this simple conversion. Just
310         * work the bytes directly.
311         */
312        if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
313            $number = ord($bytes[0]);
314            for ($i = 1; $i < $size; $i++) {
315                $number = ($number << 8) | ord($bytes[$i]);
316            }
317        } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
318            $number = 0;
319            for ($i = --$size; $i >= 0; $i--) {
320                $number |= ord($bytes[$i]) << ($i * 8);
321            }
322        } else {
323            throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
324                                         Zend_Pdf_Exception::INVALID_BYTE_ORDER);
325        }
326        return $number;
327    }
328
329    /**
330     * Returns true if the specified bit is set in the integer bitfield.
331     *
332     * @param integer $bit Bit number to test (i.e. - 0-31)
333     * @param integer $bitField
334     * @return boolean
335     */
336    public function isBitSet($bit, $bitField)
337    {
338        $bitMask = 1 << $bit;
339        $isSet = (($bitField & $bitMask) == $bitMask);
340        return $isSet;
341    }
342
343    /**
344     * Reads the signed fixed-point number from the binary file at the current
345     * byte offset.
346     *
347     * Common fixed-point sizes are 2.14 and 16.16.
348     *
349     * Advances the offset by the number of bytes read. Throws an exception if
350     * an error occurs.
351     *
352     * @param integer $mantissaBits Number of bits in the mantissa
353     * @param integer $fractionBits Number of bits in the fraction
354     * @param integer $byteOrder (optional) Big- or little-endian byte order.
355     *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
356     *   If omitted, uses big-endian.
357     * @return float
358     * @throws Zend_Pdf_Exception
359     */
360    public function readFixed($mantissaBits, $fractionBits,
361                              $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
362    {
363        $bitsToRead = $mantissaBits + $fractionBits;
364        if (($bitsToRead % 8) !== 0) {
365            throw new Zend_Pdf_Exception('Fixed-point numbers are whole bytes',
366                                         Zend_Pdf_Exception::BAD_FIXED_POINT_SIZE);
367        }
368        $number = $this->readInt(($bitsToRead >> 3), $byteOrder) / (1 << $fractionBits);
369        return $number;
370    }
371
372    /**
373     * Reads the Unicode UTF-16-encoded string from the binary file at the
374     * current byte offset.
375     *
376     * The byte order of the UTF-16 string must be specified. You must also
377     * supply the desired resulting character set.
378     *
379     * Advances the offset by the number of bytes read. Throws an exception if
380     * an error occurs.
381     *
382     * @todo Consider changing $byteCount to a character count. They are not
383     *   always equivalent (in the case of surrogates).
384     * @todo Make $byteOrder optional if there is a byte-order mark (BOM) in the
385     *   string being extracted.
386     *
387     * @param integer $byteCount Number of bytes (characters * 2) to return.
388     * @param integer $byteOrder (optional) Big- or little-endian byte order.
389     *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
390     *   If omitted, uses big-endian.
391     * @param string $characterSet (optional) Desired resulting character set.
392     *   You may use any character set supported by {@link iconv()}. If omitted,
393     *   uses 'current locale'.
394     * @return string
395     * @throws Zend_Pdf_Exception
396     */
397    public function readStringUTF16($byteCount,
398                                    $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN,
399                                    $characterSet = '')
400    {
401        if ($byteCount == 0) {
402            return '';
403        }
404        $bytes = $this->_dataSource->readBytes($byteCount);
405        if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
406            if ($characterSet == 'UTF-16BE') {
407                return $bytes;
408            }
409            return iconv('UTF-16BE', $characterSet, $bytes);
410        } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
411            if ($characterSet == 'UTF-16LE') {
412                return $bytes;
413            }
414            return iconv('UTF-16LE', $characterSet, $bytes);
415        } else {
416            throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
417                                         Zend_Pdf_Exception::INVALID_BYTE_ORDER);
418        }
419    }
420
421    /**
422     * Reads the Mac Roman-encoded string from the binary file at the current
423     * byte offset.
424     *
425     * You must supply the desired resulting character set.
426     *
427     * Advances the offset by the number of bytes read. Throws an exception if
428     * an error occurs.
429     *
430     * @param integer $byteCount Number of bytes (characters) to return.
431     * @param string $characterSet (optional) Desired resulting character set.
432     *   You may use any character set supported by {@link iconv()}. If omitted,
433     *   uses 'current locale'.
434     * @return string
435     * @throws Zend_Pdf_Exception
436     */
437    public function readStringMacRoman($byteCount, $characterSet = '')
438    {
439        if ($byteCount == 0) {
440            return '';
441        }
442        $bytes = $this->_dataSource->readBytes($byteCount);
443        if ($characterSet == 'MacRoman') {
444            return $bytes;
445        }
446        return iconv('MacRoman', $characterSet, $bytes);
447    }
448
449    /**
450     * Reads the Pascal string from the binary file at the current byte offset.
451     *
452     * The length of the Pascal string is determined by reading the length bytes
453     * which preceed the character data. You must supply the desired resulting
454     * character set.
455     *
456     * Advances the offset by the number of bytes read. Throws an exception if
457     * an error occurs.
458     *
459     * @param string $characterSet (optional) Desired resulting character set.
460     *   You may use any character set supported by {@link iconv()}. If omitted,
461     *   uses 'current locale'.
462     * @param integer $lengthBytes (optional) Number of bytes that make up the
463     *   length. Default is 1.
464     * @return string
465     * @throws Zend_Pdf_Exception
466     */
467    public function readStringPascal($characterSet = '', $lengthBytes = 1)
468    {
469        $byteCount = $this->readUInt($lengthBytes);
470        if ($byteCount == 0) {
471            return '';
472        }
473        $bytes = $this->_dataSource->readBytes($byteCount);
474        if ($characterSet == 'ASCII') {
475            return $bytes;
476        }
477        return iconv('ASCII', $characterSet, $bytes);
478    }
479
480}