/framework/vendor/zend/Zend/Pdf/StringParser.php

http://zoop.googlecode.com/ · PHP · 724 lines · 395 code · 115 blank · 214 comment · 123 complexity · eef744b1653f0550ce9ee9711d21d452 MD5 · raw file

  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Pdf
  17. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id: StringParser.php 20096 2010-01-06 02:05:09Z bkarwin $
  20. */
  21. /** Internally used classes */
  22. require_once 'Zend/Pdf/Element/Array.php';
  23. require_once 'Zend/Pdf/Element/String/Binary.php';
  24. require_once 'Zend/Pdf/Element/Boolean.php';
  25. require_once 'Zend/Pdf/Element/Dictionary.php';
  26. require_once 'Zend/Pdf/Element/Name.php';
  27. require_once 'Zend/Pdf/Element/Null.php';
  28. require_once 'Zend/Pdf/Element/Numeric.php';
  29. require_once 'Zend/Pdf/Element/Object.php';
  30. require_once 'Zend/Pdf/Element/Object/Stream.php';
  31. require_once 'Zend/Pdf/Element/Reference.php';
  32. require_once 'Zend/Pdf/Element/String.php';
  33. /**
  34. * PDF string parser
  35. *
  36. * @package Zend_Pdf
  37. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  38. * @license http://framework.zend.com/license/new-bsd New BSD License
  39. */
  40. class Zend_Pdf_StringParser
  41. {
  42. /**
  43. * Source PDF
  44. *
  45. * @var string
  46. */
  47. public $data = '';
  48. /**
  49. * Current position in a data
  50. *
  51. * @var integer
  52. */
  53. public $offset = 0;
  54. /**
  55. * Current reference context
  56. *
  57. * @var Zend_Pdf_Element_Reference_Context
  58. */
  59. private $_context = null;
  60. /**
  61. * Array of elements of the currently parsed object/trailer
  62. *
  63. * @var array
  64. */
  65. private $_elements = array();
  66. /**
  67. * PDF objects factory.
  68. *
  69. * @var Zend_Pdf_ElementFactory_Interface
  70. */
  71. private $_objFactory = null;
  72. /**
  73. * Clean up resources.
  74. *
  75. * Clear current state to remove cyclic object references
  76. */
  77. public function cleanUp()
  78. {
  79. $this->_context = null;
  80. $this->_elements = array();
  81. $this->_objFactory = null;
  82. }
  83. /**
  84. * Character with code $chCode is white space
  85. *
  86. * @param integer $chCode
  87. * @return boolean
  88. */
  89. public static function isWhiteSpace($chCode)
  90. {
  91. if ($chCode == 0x00 || // null character
  92. $chCode == 0x09 || // Tab
  93. $chCode == 0x0A || // Line feed
  94. $chCode == 0x0C || // Form Feed
  95. $chCode == 0x0D || // Carriage return
  96. $chCode == 0x20 // Space
  97. ) {
  98. return true;
  99. } else {
  100. return false;
  101. }
  102. }
  103. /**
  104. * Character with code $chCode is a delimiter character
  105. *
  106. * @param integer $chCode
  107. * @return boolean
  108. */
  109. public static function isDelimiter($chCode )
  110. {
  111. if ($chCode == 0x28 || // '('
  112. $chCode == 0x29 || // ')'
  113. $chCode == 0x3C || // '<'
  114. $chCode == 0x3E || // '>'
  115. $chCode == 0x5B || // '['
  116. $chCode == 0x5D || // ']'
  117. $chCode == 0x7B || // '{'
  118. $chCode == 0x7D || // '}'
  119. $chCode == 0x2F || // '/'
  120. $chCode == 0x25 // '%'
  121. ) {
  122. return true;
  123. } else {
  124. return false;
  125. }
  126. }
  127. /**
  128. * Skip white space
  129. *
  130. * @param boolean $skipComment
  131. */
  132. public function skipWhiteSpace($skipComment = true)
  133. {
  134. if ($skipComment) {
  135. while (true) {
  136. $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
  137. if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') {
  138. // Skip comment
  139. $this->offset += strcspn($this->data, "\r\n", $this->offset);
  140. } else {
  141. // Non white space character not equal to '%' is found
  142. return;
  143. }
  144. }
  145. } else {
  146. $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
  147. }
  148. // /** Original (non-optimized) implementation. */
  149. //
  150. // while ($this->offset < strlen($this->data)) {
  151. // if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) {
  152. // $this->offset++;
  153. // } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
  154. // $this->skipComment();
  155. // } else {
  156. // return;
  157. // }
  158. // }
  159. }
  160. /**
  161. * Skip comment
  162. */
  163. public function skipComment()
  164. {
  165. while ($this->offset < strlen($this->data))
  166. {
  167. if (ord($this->data[$this->offset]) != 0x0A || // Line feed
  168. ord($this->data[$this->offset]) != 0x0d // Carriage return
  169. ) {
  170. $this->offset++;
  171. } else {
  172. return;
  173. }
  174. }
  175. }
  176. /**
  177. * Read comment line
  178. *
  179. * @return string
  180. */
  181. public function readComment()
  182. {
  183. $this->skipWhiteSpace(false);
  184. /** Check if it's a comment line */
  185. if ($this->data[$this->offset] != '%') {
  186. return '';
  187. }
  188. for ($start = $this->offset;
  189. $this->offset < strlen($this->data);
  190. $this->offset++) {
  191. if (ord($this->data[$this->offset]) == 0x0A || // Line feed
  192. ord($this->data[$this->offset]) == 0x0d // Carriage return
  193. ) {
  194. break;
  195. }
  196. }
  197. return substr($this->data, $start, $this->offset-$start);
  198. }
  199. /**
  200. * Returns next lexeme from a pdf stream
  201. *
  202. * @return string
  203. */
  204. public function readLexeme()
  205. {
  206. // $this->skipWhiteSpace();
  207. while (true) {
  208. $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
  209. if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') {
  210. $this->offset += strcspn($this->data, "\r\n", $this->offset);
  211. } else {
  212. break;
  213. }
  214. }
  215. if ($this->offset >= strlen($this->data)) {
  216. return '';
  217. }
  218. if ( /* self::isDelimiter( ord($this->data[$start]) ) */
  219. strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) {
  220. switch (substr($this->data, $this->offset, 2)) {
  221. case '<<':
  222. $this->offset += 2;
  223. return '<<';
  224. break;
  225. case '>>':
  226. $this->offset += 2;
  227. return '>>';
  228. break;
  229. default:
  230. return $this->data[$this->offset++];
  231. break;
  232. }
  233. } else {
  234. $start = $this->offset;
  235. $this->offset += strcspn($this->data, "()<>[]{}/%\x00\t\n\f\r ", $this->offset);
  236. return substr($this->data, $start, $this->offset - $start);
  237. }
  238. }
  239. /**
  240. * Read elemental object from a PDF stream
  241. *
  242. * @return Zend_Pdf_Element
  243. * @throws Zend_Pdf_Exception
  244. */
  245. public function readElement($nextLexeme = null)
  246. {
  247. if ($nextLexeme === null) {
  248. $nextLexeme = $this->readLexeme();
  249. }
  250. /**
  251. * Note: readElement() method is a public method and could be invoked from other classes.
  252. * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
  253. * about _elements member management.
  254. */
  255. switch ($nextLexeme) {
  256. case '(':
  257. return ($this->_elements[] = $this->_readString());
  258. case '<':
  259. return ($this->_elements[] = $this->_readBinaryString());
  260. case '/':
  261. return ($this->_elements[] = new Zend_Pdf_Element_Name(
  262. Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
  263. ));
  264. case '[':
  265. return ($this->_elements[] = $this->_readArray());
  266. case '<<':
  267. return ($this->_elements[] = $this->_readDictionary());
  268. case ')':
  269. // fall through to next case
  270. case '>':
  271. // fall through to next case
  272. case ']':
  273. // fall through to next case
  274. case '>>':
  275. // fall through to next case
  276. case '{':
  277. // fall through to next case
  278. case '}':
  279. require_once 'Zend/Pdf/Exception.php';
  280. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
  281. $this->offset));
  282. default:
  283. if (strcasecmp($nextLexeme, 'true') == 0) {
  284. return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
  285. } else if (strcasecmp($nextLexeme, 'false') == 0) {
  286. return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
  287. } else if (strcasecmp($nextLexeme, 'null') == 0) {
  288. return ($this->_elements[] = new Zend_Pdf_Element_Null());
  289. }
  290. $ref = $this->_readReference($nextLexeme);
  291. if ($ref !== null) {
  292. return ($this->_elements[] = $ref);
  293. }
  294. return ($this->_elements[] = $this->_readNumeric($nextLexeme));
  295. }
  296. }
  297. /**
  298. * Read string PDF object
  299. * Also reads trailing ')' from a pdf stream
  300. *
  301. * @return Zend_Pdf_Element_String
  302. * @throws Zend_Pdf_Exception
  303. */
  304. private function _readString()
  305. {
  306. $start = $this->offset;
  307. $openedBrackets = 1;
  308. $this->offset += strcspn($this->data, '()\\', $this->offset);
  309. while ($this->offset < strlen($this->data)) {
  310. switch (ord( $this->data[$this->offset] )) {
  311. case 0x28: // '(' - opened bracket in the string, needs balanced pair.
  312. $this->offset++;
  313. $openedBrackets++;
  314. break;
  315. case 0x29: // ')' - pair to the opened bracket
  316. $this->offset++;
  317. $openedBrackets--;
  318. break;
  319. case 0x5C: // '\\' - escape sequence, skip next char from a check
  320. $this->offset += 2;
  321. }
  322. if ($openedBrackets == 0) {
  323. break; // end of string
  324. }
  325. $this->offset += strcspn($this->data, '()\\', $this->offset);
  326. }
  327. if ($openedBrackets != 0) {
  328. require_once 'Zend/Pdf/Exception.php';
  329. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
  330. }
  331. return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
  332. $start,
  333. $this->offset - $start - 1) ));
  334. }
  335. /**
  336. * Read binary string PDF object
  337. * Also reads trailing '>' from a pdf stream
  338. *
  339. * @return Zend_Pdf_Element_String_Binary
  340. * @throws Zend_Pdf_Exception
  341. */
  342. private function _readBinaryString()
  343. {
  344. $start = $this->offset;
  345. $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset);
  346. if ($this->offset >= strlen($this->data) - 1) {
  347. require_once 'Zend/Pdf/Exception.php';
  348. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start));
  349. }
  350. if ($this->data[$this->offset++] != '>') {
  351. require_once 'Zend/Pdf/Exception.php';
  352. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
  353. }
  354. return new Zend_Pdf_Element_String_Binary(
  355. Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
  356. $start,
  357. $this->offset - $start - 1) ));
  358. }
  359. /**
  360. * Read array PDF object
  361. * Also reads trailing ']' from a pdf stream
  362. *
  363. * @return Zend_Pdf_Element_Array
  364. * @throws Zend_Pdf_Exception
  365. */
  366. private function _readArray()
  367. {
  368. $elements = array();
  369. while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
  370. if ($nextLexeme != ']') {
  371. $elements[] = $this->readElement($nextLexeme);
  372. } else {
  373. return new Zend_Pdf_Element_Array($elements);
  374. }
  375. }
  376. require_once 'Zend/Pdf/Exception.php';
  377. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
  378. }
  379. /**
  380. * Read dictionary PDF object
  381. * Also reads trailing '>>' from a pdf stream
  382. *
  383. * @return Zend_Pdf_Element_Dictionary
  384. * @throws Zend_Pdf_Exception
  385. */
  386. private function _readDictionary()
  387. {
  388. $dictionary = new Zend_Pdf_Element_Dictionary();
  389. while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
  390. if ($nextLexeme != '>>') {
  391. $nameStart = $this->offset - strlen($nextLexeme);
  392. $name = $this->readElement($nextLexeme);
  393. $value = $this->readElement();
  394. if (!$name instanceof Zend_Pdf_Element_Name) {
  395. require_once 'Zend/Pdf/Exception.php';
  396. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
  397. }
  398. $dictionary->add($name, $value);
  399. } else {
  400. return $dictionary;
  401. }
  402. }
  403. require_once 'Zend/Pdf/Exception.php';
  404. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
  405. }
  406. /**
  407. * Read reference PDF object
  408. *
  409. * @param string $nextLexeme
  410. * @return Zend_Pdf_Element_Reference
  411. */
  412. private function _readReference($nextLexeme = null)
  413. {
  414. $start = $this->offset;
  415. if ($nextLexeme === null) {
  416. $objNum = $this->readLexeme();
  417. } else {
  418. $objNum = $nextLexeme;
  419. }
  420. if (!ctype_digit($objNum)) { // it's not a reference
  421. $this->offset = $start;
  422. return null;
  423. }
  424. $genNum = $this->readLexeme();
  425. if (!ctype_digit($genNum)) { // it's not a reference
  426. $this->offset = $start;
  427. return null;
  428. }
  429. $rMark = $this->readLexeme();
  430. if ($rMark != 'R') { // it's not a reference
  431. $this->offset = $start;
  432. return null;
  433. }
  434. $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
  435. return $ref;
  436. }
  437. /**
  438. * Read numeric PDF object
  439. *
  440. * @param string $nextLexeme
  441. * @return Zend_Pdf_Element_Numeric
  442. */
  443. private function _readNumeric($nextLexeme = null)
  444. {
  445. if ($nextLexeme === null) {
  446. $nextLexeme = $this->readLexeme();
  447. }
  448. return new Zend_Pdf_Element_Numeric($nextLexeme);
  449. }
  450. /**
  451. * Read inderect object from a PDF stream
  452. *
  453. * @param integer $offset
  454. * @param Zend_Pdf_Element_Reference_Context $context
  455. * @return Zend_Pdf_Element_Object
  456. */
  457. public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
  458. {
  459. if ($offset === null ) {
  460. return new Zend_Pdf_Element_Null();
  461. }
  462. // Save current offset to make getObject() reentrant
  463. $offsetSave = $this->offset;
  464. $this->offset = $offset;
  465. $this->_context = $context;
  466. $this->_elements = array();
  467. $objNum = $this->readLexeme();
  468. if (!ctype_digit($objNum)) {
  469. require_once 'Zend/Pdf/Exception.php';
  470. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
  471. }
  472. $genNum = $this->readLexeme();
  473. if (!ctype_digit($genNum)) {
  474. require_once 'Zend/Pdf/Exception.php';
  475. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
  476. }
  477. $objKeyword = $this->readLexeme();
  478. if ($objKeyword != 'obj') {
  479. require_once 'Zend/Pdf/Exception.php';
  480. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
  481. }
  482. $objValue = $this->readElement();
  483. $nextLexeme = $this->readLexeme();
  484. if( $nextLexeme == 'endobj' ) {
  485. /**
  486. * Object is not generated by factory (thus it's not marked as modified object).
  487. * But factory is assigned to the obect.
  488. */
  489. $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
  490. foreach ($this->_elements as $element) {
  491. $element->setParentObject($obj);
  492. }
  493. // Restore offset value
  494. $this->offset = $offsetSave;
  495. return $obj;
  496. }
  497. /**
  498. * It's a stream object
  499. */
  500. if ($nextLexeme != 'stream') {
  501. require_once 'Zend/Pdf/Exception.php';
  502. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
  503. }
  504. if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
  505. require_once 'Zend/Pdf/Exception.php';
  506. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
  507. }
  508. /**
  509. * References are automatically dereferenced at this moment.
  510. */
  511. $streamLength = $objValue->Length->value;
  512. /**
  513. * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
  514. * This restriction gives the possibility to recognize all cases exactly
  515. */
  516. if ($this->data[$this->offset] == "\r" &&
  517. $this->data[$this->offset + 1] == "\n" ) {
  518. $this->offset += 2;
  519. } else if ($this->data[$this->offset] == "\n" ) {
  520. $this->offset++;
  521. } else {
  522. require_once 'Zend/Pdf/Exception.php';
  523. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
  524. }
  525. $dataOffset = $this->offset;
  526. $this->offset += $streamLength;
  527. $nextLexeme = $this->readLexeme();
  528. if ($nextLexeme != 'endstream') {
  529. require_once 'Zend/Pdf/Exception.php';
  530. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
  531. }
  532. $nextLexeme = $this->readLexeme();
  533. if ($nextLexeme != 'endobj') {
  534. require_once 'Zend/Pdf/Exception.php';
  535. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
  536. }
  537. $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
  538. $dataOffset,
  539. $streamLength),
  540. (int)$objNum,
  541. (int)$genNum,
  542. $this->_objFactory->resolve(),
  543. $objValue);
  544. foreach ($this->_elements as $element) {
  545. $element->setParentObject($obj);
  546. }
  547. // Restore offset value
  548. $this->offset = $offsetSave;
  549. return $obj;
  550. }
  551. /**
  552. * Get length of source string
  553. *
  554. * @return integer
  555. */
  556. public function getLength()
  557. {
  558. return strlen($this->data);
  559. }
  560. /**
  561. * Get source string
  562. *
  563. * @return string
  564. */
  565. public function getString()
  566. {
  567. return $this->data;
  568. }
  569. /**
  570. * Parse integer value from a binary stream
  571. *
  572. * @param string $stream
  573. * @param integer $offset
  574. * @param integer $size
  575. * @return integer
  576. */
  577. public static function parseIntFromStream($stream, $offset, $size)
  578. {
  579. $value = 0;
  580. for ($count = 0; $count < $size; $count++) {
  581. $value *= 256;
  582. $value += ord($stream[$offset + $count]);
  583. }
  584. return $value;
  585. }
  586. /**
  587. * Set current context
  588. *
  589. * @param Zend_Pdf_Element_Reference_Context $context
  590. */
  591. public function setContext(Zend_Pdf_Element_Reference_Context $context)
  592. {
  593. $this->_context = $context;
  594. }
  595. /**
  596. * Object constructor
  597. *
  598. * Note: PHP duplicates string, which is sent by value, only of it's updated.
  599. * Thus we don't need to care about overhead
  600. *
  601. * @param string $pdfString
  602. * @param Zend_Pdf_ElementFactory_Interface $factory
  603. */
  604. public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
  605. {
  606. $this->data = $source;
  607. $this->_objFactory = $factory;
  608. }
  609. }