PageRenderTime 53ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/library/Zend/Pdf/StringParser.php

https://bitbucket.org/baruffaldi/website-2008-computer-shopping-3
PHP | 709 lines | 376 code | 120 blank | 213 comment | 133 complexity | cc4751af231ef5e9c388aba27b573beb MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @package Zend_Pdf
  16. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  17. * @license http://framework.zend.com/license/new-bsd New BSD License
  18. */
  19. /** Zend_Pdf_Element */
  20. require_once 'Zend/Pdf/Element.php';
  21. /** Zend_Pdf_Element_Array */
  22. require_once 'Zend/Pdf/Element/Array.php';
  23. /** Zend_Pdf_Element_String_Binary */
  24. require_once 'Zend/Pdf/Element/String/Binary.php';
  25. /** Zend_Pdf_Element_Boolean */
  26. require_once 'Zend/Pdf/Element/Boolean.php';
  27. /** Zend_Pdf_Element_Dictionary */
  28. require_once 'Zend/Pdf/Element/Dictionary.php';
  29. /** Zend_Pdf_Element_Name */
  30. require_once 'Zend/Pdf/Element/Name.php';
  31. /** Zend_Pdf_Element_Numeric */
  32. require_once 'Zend/Pdf/Element/Numeric.php';
  33. /** Zend_Pdf_Element_Object */
  34. require_once 'Zend/Pdf/Element/Object.php';
  35. /** Zend_Pdf_Element_Reference */
  36. require_once 'Zend/Pdf/Element/Reference.php';
  37. /** Zend_Pdf_Element_Object_Stream */
  38. require_once 'Zend/Pdf/Element/Object/Stream.php';
  39. /** Zend_Pdf_Element_String */
  40. require_once 'Zend/Pdf/Element/String.php';
  41. /** Zend_Pdf_Element_Null */
  42. require_once 'Zend/Pdf/Element/Null.php';
  43. /** Zend_Pdf_Element_Reference_Context */
  44. require_once 'Zend/Pdf/Element/Reference/Context.php';
  45. /** Zend_Pdf_Element_Reference_Table */
  46. require_once 'Zend/Pdf/Element/Reference/Table.php';
  47. /** Zend_Pdf_ElementFactory_Interface */
  48. require_once 'Zend/Pdf/ElementFactory/Interface.php';
  49. /** Zend_Pdf_PhpArray */
  50. require_once 'Zend/Pdf/PhpArray.php';
  51. /**
  52. * PDF string parser
  53. *
  54. * @package Zend_Pdf
  55. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  56. * @license http://framework.zend.com/license/new-bsd New BSD License
  57. */
  58. class Zend_Pdf_StringParser
  59. {
  60. /**
  61. * Source PDF
  62. *
  63. * @var string
  64. */
  65. public $data = '';
  66. /**
  67. * Current position in a data
  68. *
  69. * @var integer
  70. */
  71. public $offset = 0;
  72. /**
  73. * Current reference context
  74. *
  75. * @var Zend_Pdf_Element_Reference_Context
  76. */
  77. private $_context = null;
  78. /**
  79. * Array of elements of the currently parsed object/trailer
  80. *
  81. * @var array
  82. */
  83. private $_elements = array();
  84. /**
  85. * PDF objects factory.
  86. *
  87. * @var Zend_Pdf_ElementFactory_Interface
  88. */
  89. private $_objFactory = null;
  90. /**
  91. * Clean up resources.
  92. *
  93. * Clear current state to remove cyclic object references
  94. */
  95. public function cleanUp()
  96. {
  97. $this->_context = null;
  98. $this->_elements = array();
  99. $this->_objFactory = null;
  100. }
  101. /**
  102. * Character with code $chCode is white space
  103. *
  104. * @param integer $chCode
  105. * @return boolean
  106. */
  107. public static function isWhiteSpace($chCode)
  108. {
  109. if ($chCode == 0x00 || // null character
  110. $chCode == 0x09 || // Tab
  111. $chCode == 0x0A || // Line feed
  112. $chCode == 0x0C || // Form Feed
  113. $chCode == 0x0D || // Carriage return
  114. $chCode == 0x20 // Space
  115. ) {
  116. return true;
  117. } else {
  118. return false;
  119. }
  120. }
  121. /**
  122. * Character with code $chCode is a delimiter character
  123. *
  124. * @param integer $chCode
  125. * @return boolean
  126. */
  127. public static function isDelimiter($chCode )
  128. {
  129. if ($chCode == 0x28 || // '('
  130. $chCode == 0x29 || // ')'
  131. $chCode == 0x3C || // '<'
  132. $chCode == 0x3E || // '>'
  133. $chCode == 0x5B || // '['
  134. $chCode == 0x5D || // ']'
  135. $chCode == 0x7B || // '{'
  136. $chCode == 0x7D || // '}'
  137. $chCode == 0x2F || // '/'
  138. $chCode == 0x25 // '%'
  139. ) {
  140. return true;
  141. } else {
  142. return false;
  143. }
  144. }
  145. /**
  146. * Skip white space
  147. *
  148. * @param boolean $skipComment
  149. */
  150. public function skipWhiteSpace($skipComment = true)
  151. {
  152. while ($this->offset < strlen($this->data)) {
  153. if (self::isWhiteSpace( ord($this->data[$this->offset]) )) {
  154. $this->offset++;
  155. } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
  156. $this->skipComment();
  157. } else {
  158. return;
  159. }
  160. }
  161. }
  162. /**
  163. * Skip comment
  164. */
  165. public function skipComment()
  166. {
  167. while ($this->offset < strlen($this->data))
  168. {
  169. if (ord($this->data[$this->offset]) != 0x0A || // Line feed
  170. ord($this->data[$this->offset]) != 0x0d // Carriage return
  171. ) {
  172. $this->offset++;
  173. } else {
  174. return;
  175. }
  176. }
  177. }
  178. /**
  179. * Read comment line
  180. *
  181. * @return string
  182. */
  183. public function readComment()
  184. {
  185. $this->skipWhiteSpace(false);
  186. /** Check if it's a comment line */
  187. if ($this->data[$this->offset] != '%') {
  188. return '';
  189. }
  190. for ($start = $this->offset;
  191. $this->offset < strlen($this->data);
  192. $this->offset++) {
  193. if (ord($this->data[$this->offset]) == 0x0A || // Line feed
  194. ord($this->data[$this->offset]) == 0x0d // Carriage return
  195. ) {
  196. break;
  197. }
  198. }
  199. return substr($this->data, $start, $this->offset-$start);
  200. }
  201. /**
  202. * Returns next lexeme from a pdf stream
  203. *
  204. * @return string
  205. */
  206. public function readLexeme()
  207. {
  208. $this->skipWhiteSpace();
  209. if ($this->offset >= strlen($this->data)) {
  210. return '';
  211. }
  212. $start = $this->offset;
  213. if (self::isDelimiter( ord($this->data[$start]) )) {
  214. if ($this->data[$start] == '<' && $this->offset + 1 < strlen($this->data) && $this->data[$start+1] == '<') {
  215. $this->offset += 2;
  216. return '<<';
  217. } else if ($this->data[$start] == '>' && $this->offset + 1 < strlen($this->data) && $this->data[$start+1] == '>') {
  218. $this->offset += 2;
  219. return '>>';
  220. } else {
  221. $this->offset++;
  222. return $this->data[$start];
  223. }
  224. } else {
  225. while ( ($this->offset < strlen($this->data)) &&
  226. (!self::isDelimiter( ord($this->data[$this->offset]) )) &&
  227. (!self::isWhiteSpace( ord($this->data[$this->offset]) )) ) {
  228. $this->offset++;
  229. }
  230. return substr($this->data, $start, $this->offset - $start);
  231. }
  232. }
  233. /**
  234. * Read elemental object from a PDF stream
  235. *
  236. * @return Zend_Pdf_Element
  237. * @throws Zend_Pdf_Exception
  238. */
  239. public function readElement($nextLexeme = null)
  240. {
  241. if ($nextLexeme === null) {
  242. $nextLexeme = $this->readLexeme();
  243. }
  244. /**
  245. * Note: readElement() method is a public method and could be invoked from other classes.
  246. * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
  247. * about _elements member management.
  248. */
  249. switch ($nextLexeme) {
  250. case '(':
  251. return ($this->_elements[] = $this->_readString());
  252. case '<':
  253. return ($this->_elements[] = $this->_readBinaryString());
  254. case '/':
  255. return ($this->_elements[] = new Zend_Pdf_Element_Name(
  256. Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
  257. ));
  258. case '[':
  259. return ($this->_elements[] = $this->_readArray());
  260. case '<<':
  261. return ($this->_elements[] = $this->_readDictionary());
  262. case ')':
  263. // fall through to next case
  264. case '>':
  265. // fall through to next case
  266. case ']':
  267. // fall through to next case
  268. case '>>':
  269. // fall through to next case
  270. case '{':
  271. // fall through to next case
  272. case '}':
  273. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
  274. $this->offset));
  275. default:
  276. if (strcasecmp($nextLexeme, 'true') == 0) {
  277. return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
  278. } else if (strcasecmp($nextLexeme, 'false') == 0) {
  279. return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
  280. } else if (strcasecmp($nextLexeme, 'null') == 0) {
  281. return ($this->_elements[] = new Zend_Pdf_Element_Null());
  282. }
  283. $ref = $this->_readReference($nextLexeme);
  284. if ($ref !== null) {
  285. return ($this->_elements[] = $ref);
  286. }
  287. return ($this->_elements[] = $this->_readNumeric($nextLexeme));
  288. }
  289. }
  290. /**
  291. * Read string PDF object
  292. * Also reads trailing ')' from a pdf stream
  293. *
  294. * @return Zend_Pdf_Element_String
  295. * @throws Zend_Pdf_Exception
  296. */
  297. private function _readString()
  298. {
  299. $start = $this->offset;
  300. $openedBrackets = 1;
  301. while ($this->offset < strlen($this->data)) {
  302. switch (ord( $this->data[$this->offset] )) {
  303. case 0x28: // '(' - opened bracket in the string, needs balanced pair.
  304. $openedBrackets++;
  305. break;
  306. case 0x29: // ')' - pair to the opened bracket
  307. $openedBrackets--;
  308. break;
  309. case 0x5C: // '\\' - escape sequence, skip next char from a check
  310. $this->offset++;
  311. }
  312. $this->offset++;
  313. if ($openedBrackets == 0) {
  314. break; // end of string
  315. }
  316. }
  317. if ($openedBrackets != 0) {
  318. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
  319. }
  320. return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
  321. $start,
  322. $this->offset - $start - 1) ));
  323. }
  324. /**
  325. * Read binary string PDF object
  326. * Also reads trailing '>' from a pdf stream
  327. *
  328. * @return Zend_Pdf_Element_String_Binary
  329. * @throws Zend_Pdf_Exception
  330. */
  331. private function _readBinaryString()
  332. {
  333. $start = $this->offset;
  334. while ($this->offset < strlen($this->data)) {
  335. if (self::isWhiteSpace( ord($this->data[$this->offset]) ) ||
  336. ctype_xdigit( $this->data[$this->offset] ) ) {
  337. $this->offset++;
  338. } else if ($this->data[$this->offset] == '>') {
  339. $this->offset++;
  340. return new Zend_Pdf_Element_String_Binary(
  341. Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
  342. $start,
  343. $this->offset - $start - 1) ));
  344. } else {
  345. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
  346. }
  347. }
  348. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while binary string reading. Offset - 0x%X. \'>\' expected.', $start));
  349. }
  350. /**
  351. * Read array PDF object
  352. * Also reads trailing ']' from a pdf stream
  353. *
  354. * @return Zend_Pdf_Element_Array
  355. * @throws Zend_Pdf_Exception
  356. */
  357. private function _readArray()
  358. {
  359. $elements = array();
  360. while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
  361. if ($nextLexeme != ']') {
  362. $elements[] = $this->readElement($nextLexeme);
  363. } else {
  364. return new Zend_Pdf_Element_Array($elements);
  365. }
  366. }
  367. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
  368. }
  369. /**
  370. * Read dictionary PDF object
  371. * Also reads trailing '>>' from a pdf stream
  372. *
  373. * @return Zend_Pdf_Element_Dictionary
  374. * @throws Zend_Pdf_Exception
  375. */
  376. private function _readDictionary()
  377. {
  378. $dictionary = new Zend_Pdf_Element_Dictionary();
  379. while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
  380. if ($nextLexeme != '>>') {
  381. $nameStart = $this->offset - strlen($nextLexeme);
  382. $name = $this->readElement($nextLexeme);
  383. $value = $this->readElement();
  384. if (!$name instanceof Zend_Pdf_Element_Name) {
  385. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
  386. }
  387. $dictionary->add($name, $value);
  388. } else {
  389. return $dictionary;
  390. }
  391. }
  392. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
  393. }
  394. /**
  395. * Read reference PDF object
  396. *
  397. * @param string $nextLexeme
  398. * @return Zend_Pdf_Element_Reference
  399. */
  400. private function _readReference($nextLexeme = null)
  401. {
  402. $start = $this->offset;
  403. if ($nextLexeme === null) {
  404. $objNum = $this->readLexeme();
  405. } else {
  406. $objNum = $nextLexeme;
  407. }
  408. if (!ctype_digit($objNum)) { // it's not a reference
  409. $this->offset = $start;
  410. return null;
  411. }
  412. $genNum = $this->readLexeme();
  413. if (!ctype_digit($genNum)) { // it's not a reference
  414. $this->offset = $start;
  415. return null;
  416. }
  417. $rMark = $this->readLexeme();
  418. if ($rMark != 'R') { // it's not a reference
  419. $this->offset = $start;
  420. return null;
  421. }
  422. $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
  423. return $ref;
  424. }
  425. /**
  426. * Read numeric PDF object
  427. *
  428. * @param string $nextLexeme
  429. * @return Zend_Pdf_Element_Numeric
  430. */
  431. private function _readNumeric($nextLexeme = null)
  432. {
  433. if ($nextLexeme === null) {
  434. $nextLexeme = $this->readLexeme();
  435. }
  436. return new Zend_Pdf_Element_Numeric($nextLexeme);
  437. }
  438. /**
  439. * Read inderect object from a PDF stream
  440. *
  441. * @param integer $offset
  442. * @param Zend_Pdf_Element_Reference_Context $context
  443. * @return Zend_Pdf_Element_Object
  444. */
  445. public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
  446. {
  447. if ($offset === null ) {
  448. return new Zend_Pdf_Element_Null();
  449. }
  450. // Save current offset to make getObject() reentrant
  451. $offsetSave = $this->offset;
  452. $this->offset = $offset;
  453. $this->_context = $context;
  454. $this->_elements = array();
  455. $objNum = $this->readLexeme();
  456. if (!ctype_digit($objNum)) {
  457. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
  458. }
  459. $genNum = $this->readLexeme();
  460. if (!ctype_digit($genNum)) {
  461. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
  462. }
  463. $objKeyword = $this->readLexeme();
  464. if ($objKeyword != 'obj') {
  465. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
  466. }
  467. $objValue = $this->readElement();
  468. $nextLexeme = $this->readLexeme();
  469. if( $nextLexeme == 'endobj' ) {
  470. /**
  471. * Object is not generated by factory (thus it's not marked as modified object).
  472. * But factory is assigned to the obect.
  473. */
  474. $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
  475. foreach ($this->_elements as $element) {
  476. $element->setParentObject($obj);
  477. }
  478. // Restore offset value
  479. $this->offset = $offsetSave;
  480. return $obj;
  481. }
  482. /**
  483. * It's a stream object
  484. */
  485. if ($nextLexeme != 'stream') {
  486. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
  487. }
  488. if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
  489. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
  490. }
  491. /**
  492. * References are automatically dereferenced at this moment.
  493. */
  494. $streamLength = $objValue->Length->value;
  495. /**
  496. * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
  497. * This restriction gives the possibility to recognize all cases exactly
  498. */
  499. if ($this->data[$this->offset] == "\r" &&
  500. $this->data[$this->offset + 1] == "\n" ) {
  501. $this->offset += 2;
  502. } else if ($this->data[$this->offset] == "\n" ) {
  503. $this->offset++;
  504. } else {
  505. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
  506. }
  507. $dataOffset = $this->offset;
  508. $this->offset += $streamLength;
  509. $nextLexeme = $this->readLexeme();
  510. if ($nextLexeme != 'endstream') {
  511. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
  512. }
  513. $nextLexeme = $this->readLexeme();
  514. if ($nextLexeme != 'endobj') {
  515. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
  516. }
  517. $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
  518. $dataOffset,
  519. $streamLength),
  520. (int)$objNum,
  521. (int)$genNum,
  522. $this->_objFactory->resolve(),
  523. $objValue);
  524. foreach ($this->_elements as $element) {
  525. $element->setParentObject($obj);
  526. }
  527. // Restore offset value
  528. $this->offset = $offsetSave;
  529. return $obj;
  530. }
  531. /**
  532. * Get length of source string
  533. *
  534. * @return integer
  535. */
  536. public function getLength()
  537. {
  538. return strlen($this->data);
  539. }
  540. /**
  541. * Get source string
  542. *
  543. * @return string
  544. */
  545. public function getString()
  546. {
  547. return $this->data;
  548. }
  549. /**
  550. * Parse integer value from a binary stream
  551. *
  552. * @param string $stream
  553. * @param integer $offset
  554. * @param integer $size
  555. * @return integer
  556. */
  557. public static function parseIntFromStream($stream, $offset, $size)
  558. {
  559. $value = 0;
  560. for ($count = 0; $count < $size; $count++) {
  561. $value *= 256;
  562. $value += ord($stream[$offset + $count]);
  563. }
  564. return $value;
  565. }
  566. /**
  567. * Set current context
  568. *
  569. * @param Zend_Pdf_Element_Reference_Context $context
  570. */
  571. public function setContext(Zend_Pdf_Element_Reference_Context $context)
  572. {
  573. $this->_context = $context;
  574. }
  575. /**
  576. * Object constructor
  577. *
  578. * Note: PHP duplicates string, which is sent by value, only of it's updated.
  579. * Thus we don't need to care about overhead
  580. *
  581. * @param string $pdfString
  582. * @param Zend_Pdf_ElementFactory_Interface $factory
  583. */
  584. public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
  585. {
  586. $this->data = $source;
  587. $this->_objFactory = $factory;
  588. }
  589. }