/website/library/Zend/Pdf/StringParser.php
PHP | 731 lines | 452 code | 92 blank | 187 comment | 110 complexity | 7a028202bfd97fd51deaaca3ed4670ef MD5 | raw file
- <?php
- /**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to the new BSD license that is bundled
- * with this package in the file LICENSE.txt.
- * It is also available through the world-wide-web at this URL:
- * http://framework.zend.com/license/new-bsd
- * If you did not receive a copy of the license and are unable to
- * obtain it through the world-wide-web, please send an email
- * to license@zend.com so we can send you a copy immediately.
- *
- * @category Zend
- * @package Zend_Pdf
- * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://framework.zend.com/license/new-bsd New BSD License
- * @version $Id: StringParser.php 22312 2010-05-27 13:11:23Z padraic $
- */
- /** Internally used classes */
- require_once 'Zend/Pdf/Element/Array.php';
- require_once 'Zend/Pdf/Element/String/Binary.php';
- require_once 'Zend/Pdf/Element/Boolean.php';
- require_once 'Zend/Pdf/Element/Dictionary.php';
- require_once 'Zend/Pdf/Element/Name.php';
- require_once 'Zend/Pdf/Element/Null.php';
- require_once 'Zend/Pdf/Element/Numeric.php';
- require_once 'Zend/Pdf/Element/Object.php';
- require_once 'Zend/Pdf/Element/Object/Stream.php';
- require_once 'Zend/Pdf/Element/Reference.php';
- require_once 'Zend/Pdf/Element/String.php';
- /**
- * PDF string parser
- *
- * @package Zend_Pdf
- * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://framework.zend.com/license/new-bsd New BSD License
- */
- class Zend_Pdf_StringParser
- {
- /**
- * Source PDF
- *
- * @var string
- */
- public $data = '';
- /**
- * Current position in a data
- *
- * @var integer
- */
- public $offset = 0;
- /**
- * Current reference context
- *
- * @var Zend_Pdf_Element_Reference_Context
- */
- private $_context = null;
- /**
- * Array of elements of the currently parsed object/trailer
- *
- * @var array
- */
- private $_elements = array();
- /**
- * PDF objects factory.
- *
- * @var Zend_Pdf_ElementFactory_Interface
- */
- private $_objFactory = null;
- /**
- * Clean up resources.
- *
- * Clear current state to remove cyclic object references
- */
- public function cleanUp()
- {
- $this->_context = null;
- $this->_elements = array();
- $this->_objFactory = null;
- }
- /**
- * Character with code $chCode is white space
- *
- * @param integer $chCode
- * @return boolean
- */
- public static function isWhiteSpace($chCode)
- {
- if ($chCode == 0x00 || // null character
- $chCode == 0x09 || // Tab
- $chCode == 0x0A || // Line feed
- $chCode == 0x0C || // Form Feed
- $chCode == 0x0D || // Carriage return
- $chCode == 0x20 // Space
- ) {
- return true;
- } else {
- return false;
- }
- }
- /**
- * Character with code $chCode is a delimiter character
- *
- * @param integer $chCode
- * @return boolean
- */
- public static function isDelimiter($chCode )
- {
- if ($chCode == 0x28 || // '('
- $chCode == 0x29 || // ')'
- $chCode == 0x3C || // '<'
- $chCode == 0x3E || // '>'
- $chCode == 0x5B || // '['
- $chCode == 0x5D || // ']'
- $chCode == 0x7B || // '{'
- $chCode == 0x7D || // '}'
- $chCode == 0x2F || // '/'
- $chCode == 0x25 // '%'
- ) {
- return true;
- } else {
- return false;
- }
- }
- /**
- * Skip white space
- *
- * @param boolean $skipComment
- */
- public function skipWhiteSpace($skipComment = true)
- {
- if ($skipComment) {
- while (true) {
- $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
- if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') {
- // Skip comment
- $this->offset += strcspn($this->data, "\r\n", $this->offset);
- } else {
- // Non white space character not equal to '%' is found
- return;
- }
- }
- } else {
- $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
- }
- // /** Original (non-optimized) implementation. */
- //
- // while ($this->offset < strlen($this->data)) {
- // if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) {
- // $this->offset++;
- // } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
- // $this->skipComment();
- // } else {
- // return;
- // }
- // }
- }
- /**
- * Skip comment
- */
- public function skipComment()
- {
- while ($this->offset < strlen($this->data))
- {
- if (ord($this->data[$this->offset]) != 0x0A || // Line feed
- ord($this->data[$this->offset]) != 0x0d // Carriage return
- ) {
- $this->offset++;
- } else {
- return;
- }
- }
- }
- /**
- * Read comment line
- *
- * @return string
- */
- public function readComment()
- {
- $this->skipWhiteSpace(false);
- /** Check if it's a comment line */
- if ($this->data[$this->offset] != '%') {
- return '';
- }
- for ($start = $this->offset;
- $this->offset < strlen($this->data);
- $this->offset++) {
- if (ord($this->data[$this->offset]) == 0x0A || // Line feed
- ord($this->data[$this->offset]) == 0x0d // Carriage return
- ) {
- break;
- }
- }
- return substr($this->data, $start, $this->offset-$start);
- }
- /**
- * Returns next lexeme from a pdf stream
- *
- * @return string
- */
- public function readLexeme()
- {
- // $this->skipWhiteSpace();
- while (true) {
- $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
- if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') {
- $this->offset += strcspn($this->data, "\r\n", $this->offset);
- } else {
- break;
- }
- }
- if ($this->offset >= strlen($this->data)) {
- return '';
- }
- if ( /* self::isDelimiter( ord($this->data[$start]) ) */
- strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) {
- switch (substr($this->data, $this->offset, 2)) {
- case '<<':
- $this->offset += 2;
- return '<<';
- break;
- case '>>':
- $this->offset += 2;
- return '>>';
- break;
- default:
- return $this->data[$this->offset++];
- break;
- }
- } else {
- $start = $this->offset;
- $compare = '';
- if( version_compare( phpversion(), '5.2.5' ) >= 0) {
- $compare = "()<>[]{}/%\x00\t\n\f\r ";
- } else {
- $compare = "()<>[]{}/%\x00\t\n\r ";
- }
- $this->offset += strcspn($this->data, $compare, $this->offset);
-
- return substr($this->data, $start, $this->offset - $start);
- }
- }
- /**
- * Read elemental object from a PDF stream
- *
- * @return Zend_Pdf_Element
- * @throws Zend_Pdf_Exception
- */
- public function readElement($nextLexeme = null)
- {
- if ($nextLexeme === null) {
- $nextLexeme = $this->readLexeme();
- }
- /**
- * Note: readElement() method is a public method and could be invoked from other classes.
- * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
- * about _elements member management.
- */
- switch ($nextLexeme) {
- case '(':
- return ($this->_elements[] = $this->_readString());
- case '<':
- return ($this->_elements[] = $this->_readBinaryString());
- case '/':
- return ($this->_elements[] = new Zend_Pdf_Element_Name(
- Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
- ));
- case '[':
- return ($this->_elements[] = $this->_readArray());
- case '<<':
- return ($this->_elements[] = $this->_readDictionary());
- case ')':
- // fall through to next case
- case '>':
- // fall through to next case
- case ']':
- // fall through to next case
- case '>>':
- // fall through to next case
- case '{':
- // fall through to next case
- case '}':
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
- $this->offset));
- default:
- if (strcasecmp($nextLexeme, 'true') == 0) {
- return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
- } else if (strcasecmp($nextLexeme, 'false') == 0) {
- return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
- } else if (strcasecmp($nextLexeme, 'null') == 0) {
- return ($this->_elements[] = new Zend_Pdf_Element_Null());
- }
- $ref = $this->_readReference($nextLexeme);
- if ($ref !== null) {
- return ($this->_elements[] = $ref);
- }
- return ($this->_elements[] = $this->_readNumeric($nextLexeme));
- }
- }
- /**
- * Read string PDF object
- * Also reads trailing ')' from a pdf stream
- *
- * @return Zend_Pdf_Element_String
- * @throws Zend_Pdf_Exception
- */
- private function _readString()
- {
- $start = $this->offset;
- $openedBrackets = 1;
- $this->offset += strcspn($this->data, '()\\', $this->offset);
- while ($this->offset < strlen($this->data)) {
- switch (ord( $this->data[$this->offset] )) {
- case 0x28: // '(' - opened bracket in the string, needs balanced pair.
- $this->offset++;
- $openedBrackets++;
- break;
- case 0x29: // ')' - pair to the opened bracket
- $this->offset++;
- $openedBrackets--;
- break;
- case 0x5C: // '\\' - escape sequence, skip next char from a check
- $this->offset += 2;
- }
- if ($openedBrackets == 0) {
- break; // end of string
- }
- $this->offset += strcspn($this->data, '()\\', $this->offset);
- }
- if ($openedBrackets != 0) {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
- }
- return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
- $start,
- $this->offset - $start - 1) ));
- }
- /**
- * Read binary string PDF object
- * Also reads trailing '>' from a pdf stream
- *
- * @return Zend_Pdf_Element_String_Binary
- * @throws Zend_Pdf_Exception
- */
- private function _readBinaryString()
- {
- $start = $this->offset;
- $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset);
- if ($this->offset >= strlen($this->data) - 1) {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start));
- }
- if ($this->data[$this->offset++] != '>') {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
- }
- return new Zend_Pdf_Element_String_Binary(
- Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
- $start,
- $this->offset - $start - 1) ));
- }
- /**
- * Read array PDF object
- * Also reads trailing ']' from a pdf stream
- *
- * @return Zend_Pdf_Element_Array
- * @throws Zend_Pdf_Exception
- */
- private function _readArray()
- {
- $elements = array();
- while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
- if ($nextLexeme != ']') {
- $elements[] = $this->readElement($nextLexeme);
- } else {
- return new Zend_Pdf_Element_Array($elements);
- }
- }
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
- }
- /**
- * Read dictionary PDF object
- * Also reads trailing '>>' from a pdf stream
- *
- * @return Zend_Pdf_Element_Dictionary
- * @throws Zend_Pdf_Exception
- */
- private function _readDictionary()
- {
- $dictionary = new Zend_Pdf_Element_Dictionary();
- while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
- if ($nextLexeme != '>>') {
- $nameStart = $this->offset - strlen($nextLexeme);
- $name = $this->readElement($nextLexeme);
- $value = $this->readElement();
- if (!$name instanceof Zend_Pdf_Element_Name) {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
- }
- $dictionary->add($name, $value);
- } else {
- return $dictionary;
- }
- }
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
- }
- /**
- * Read reference PDF object
- *
- * @param string $nextLexeme
- * @return Zend_Pdf_Element_Reference
- */
- private function _readReference($nextLexeme = null)
- {
- $start = $this->offset;
- if ($nextLexeme === null) {
- $objNum = $this->readLexeme();
- } else {
- $objNum = $nextLexeme;
- }
- if (!ctype_digit($objNum)) { // it's not a reference
- $this->offset = $start;
- return null;
- }
- $genNum = $this->readLexeme();
- if (!ctype_digit($genNum)) { // it's not a reference
- $this->offset = $start;
- return null;
- }
- $rMark = $this->readLexeme();
- if ($rMark != 'R') { // it's not a reference
- $this->offset = $start;
- return null;
- }
- $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
- return $ref;
- }
- /**
- * Read numeric PDF object
- *
- * @param string $nextLexeme
- * @return Zend_Pdf_Element_Numeric
- */
- private function _readNumeric($nextLexeme = null)
- {
- if ($nextLexeme === null) {
- $nextLexeme = $this->readLexeme();
- }
- return new Zend_Pdf_Element_Numeric($nextLexeme);
- }
- /**
- * Read inderect object from a PDF stream
- *
- * @param integer $offset
- * @param Zend_Pdf_Element_Reference_Context $context
- * @return Zend_Pdf_Element_Object
- */
- public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
- {
- if ($offset === null ) {
- return new Zend_Pdf_Element_Null();
- }
- // Save current offset to make getObject() reentrant
- $offsetSave = $this->offset;
- $this->offset = $offset;
- $this->_context = $context;
- $this->_elements = array();
- $objNum = $this->readLexeme();
- if (!ctype_digit($objNum)) {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
- }
- $genNum = $this->readLexeme();
- if (!ctype_digit($genNum)) {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
- }
- $objKeyword = $this->readLexeme();
- if ($objKeyword != 'obj') {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
- }
- $objValue = $this->readElement();
- $nextLexeme = $this->readLexeme();
- if( $nextLexeme == 'endobj' ) {
- /**
- * Object is not generated by factory (thus it's not marked as modified object).
- * But factory is assigned to the obect.
- */
- $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
- foreach ($this->_elements as $element) {
- $element->setParentObject($obj);
- }
- // Restore offset value
- $this->offset = $offsetSave;
- return $obj;
- }
- /**
- * It's a stream object
- */
- if ($nextLexeme != 'stream') {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
- }
- if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
- }
- /**
- * References are automatically dereferenced at this moment.
- */
- $streamLength = $objValue->Length->value;
- /**
- * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
- * This restriction gives the possibility to recognize all cases exactly
- */
- if ($this->data[$this->offset] == "\r" &&
- $this->data[$this->offset + 1] == "\n" ) {
- $this->offset += 2;
- } else if ($this->data[$this->offset] == "\n" ) {
- $this->offset++;
- } else {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
- }
- $dataOffset = $this->offset;
- $this->offset += $streamLength;
- $nextLexeme = $this->readLexeme();
- if ($nextLexeme != 'endstream') {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
- }
- $nextLexeme = $this->readLexeme();
- if ($nextLexeme != 'endobj') {
- require_once 'Zend/Pdf/Exception.php';
- throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
- }
- $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
- $dataOffset,
- $streamLength),
- (int)$objNum,
- (int)$genNum,
- $this->_objFactory->resolve(),
- $objValue);
- foreach ($this->_elements as $element) {
- $element->setParentObject($obj);
- }
- // Restore offset value
- $this->offset = $offsetSave;
- return $obj;
- }
- /**
- * Get length of source string
- *
- * @return integer
- */
- public function getLength()
- {
- return strlen($this->data);
- }
- /**
- * Get source string
- *
- * @return string
- */
- public function getString()
- {
- return $this->data;
- }
- /**
- * Parse integer value from a binary stream
- *
- * @param string $stream
- * @param integer $offset
- * @param integer $size
- * @return integer
- */
- public static function parseIntFromStream($stream, $offset, $size)
- {
- $value = 0;
- for ($count = 0; $count < $size; $count++) {
- $value *= 256;
- $value += ord($stream[$offset + $count]);
- }
- return $value;
- }
- /**
- * Set current context
- *
- * @param Zend_Pdf_Element_Reference_Context $context
- */
- public function setContext(Zend_Pdf_Element_Reference_Context $context)
- {
- $this->_context = $context;
- }
- /**
- * Object constructor
- *
- * Note: PHP duplicates string, which is sent by value, only of it's updated.
- * Thus we don't need to care about overhead
- *
- * @param string $pdfString
- * @param Zend_Pdf_ElementFactory_Interface $factory
- */
- public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
- {
- $this->data = $source;
- $this->_objFactory = $factory;
- }
- }