PageRenderTime 51ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/Zend/Pdf/StringParser.php

https://bitbucket.org/simukti/zf1
PHP | 731 lines | 452 code | 92 blank | 187 comment | 110 complexity | fdc557da0f6536d2fc472d39fcf76fbd MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Pdf
  17. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id: StringParser.php 24593 2012-01-05 20:35:02Z matthew $
  20. */
  21. /** Internally used classes */
  22. require_once 'Zend/Pdf/Element/Array.php';
  23. require_once 'Zend/Pdf/Element/String/Binary.php';
  24. require_once 'Zend/Pdf/Element/Boolean.php';
  25. require_once 'Zend/Pdf/Element/Dictionary.php';
  26. require_once 'Zend/Pdf/Element/Name.php';
  27. require_once 'Zend/Pdf/Element/Null.php';
  28. require_once 'Zend/Pdf/Element/Numeric.php';
  29. require_once 'Zend/Pdf/Element/Object.php';
  30. require_once 'Zend/Pdf/Element/Object/Stream.php';
  31. require_once 'Zend/Pdf/Element/Reference.php';
  32. require_once 'Zend/Pdf/Element/String.php';
  33. /**
  34. * PDF string parser
  35. *
  36. * @package Zend_Pdf
  37. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  38. * @license http://framework.zend.com/license/new-bsd New BSD License
  39. */
  40. class Zend_Pdf_StringParser
  41. {
  42. /**
  43. * Source PDF
  44. *
  45. * @var string
  46. */
  47. public $data = '';
  48. /**
  49. * Current position in a data
  50. *
  51. * @var integer
  52. */
  53. public $offset = 0;
  54. /**
  55. * Current reference context
  56. *
  57. * @var Zend_Pdf_Element_Reference_Context
  58. */
  59. private $_context = null;
  60. /**
  61. * Array of elements of the currently parsed object/trailer
  62. *
  63. * @var array
  64. */
  65. private $_elements = array();
  66. /**
  67. * PDF objects factory.
  68. *
  69. * @var Zend_Pdf_ElementFactory_Interface
  70. */
  71. private $_objFactory = null;
  72. /**
  73. * Clean up resources.
  74. *
  75. * Clear current state to remove cyclic object references
  76. */
  77. public function cleanUp()
  78. {
  79. $this->_context = null;
  80. $this->_elements = array();
  81. $this->_objFactory = null;
  82. }
  83. /**
  84. * Character with code $chCode is white space
  85. *
  86. * @param integer $chCode
  87. * @return boolean
  88. */
  89. public static function isWhiteSpace($chCode)
  90. {
  91. if ($chCode == 0x00 || // null character
  92. $chCode == 0x09 || // Tab
  93. $chCode == 0x0A || // Line feed
  94. $chCode == 0x0C || // Form Feed
  95. $chCode == 0x0D || // Carriage return
  96. $chCode == 0x20 // Space
  97. ) {
  98. return true;
  99. } else {
  100. return false;
  101. }
  102. }
  103. /**
  104. * Character with code $chCode is a delimiter character
  105. *
  106. * @param integer $chCode
  107. * @return boolean
  108. */
  109. public static function isDelimiter($chCode )
  110. {
  111. if ($chCode == 0x28 || // '('
  112. $chCode == 0x29 || // ')'
  113. $chCode == 0x3C || // '<'
  114. $chCode == 0x3E || // '>'
  115. $chCode == 0x5B || // '['
  116. $chCode == 0x5D || // ']'
  117. $chCode == 0x7B || // '{'
  118. $chCode == 0x7D || // '}'
  119. $chCode == 0x2F || // '/'
  120. $chCode == 0x25 // '%'
  121. ) {
  122. return true;
  123. } else {
  124. return false;
  125. }
  126. }
  127. /**
  128. * Skip white space
  129. *
  130. * @param boolean $skipComment
  131. */
  132. public function skipWhiteSpace($skipComment = true)
  133. {
  134. if ($skipComment) {
  135. while (true) {
  136. $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
  137. if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') {
  138. // Skip comment
  139. $this->offset += strcspn($this->data, "\r\n", $this->offset);
  140. } else {
  141. // Non white space character not equal to '%' is found
  142. return;
  143. }
  144. }
  145. } else {
  146. $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
  147. }
  148. // /** Original (non-optimized) implementation. */
  149. //
  150. // while ($this->offset < strlen($this->data)) {
  151. // if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) {
  152. // $this->offset++;
  153. // } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
  154. // $this->skipComment();
  155. // } else {
  156. // return;
  157. // }
  158. // }
  159. }
  160. /**
  161. * Skip comment
  162. */
  163. public function skipComment()
  164. {
  165. while ($this->offset < strlen($this->data))
  166. {
  167. if (ord($this->data[$this->offset]) != 0x0A || // Line feed
  168. ord($this->data[$this->offset]) != 0x0d // Carriage return
  169. ) {
  170. $this->offset++;
  171. } else {
  172. return;
  173. }
  174. }
  175. }
  176. /**
  177. * Read comment line
  178. *
  179. * @return string
  180. */
  181. public function readComment()
  182. {
  183. $this->skipWhiteSpace(false);
  184. /** Check if it's a comment line */
  185. if ($this->data[$this->offset] != '%') {
  186. return '';
  187. }
  188. for ($start = $this->offset;
  189. $this->offset < strlen($this->data);
  190. $this->offset++) {
  191. if (ord($this->data[$this->offset]) == 0x0A || // Line feed
  192. ord($this->data[$this->offset]) == 0x0d // Carriage return
  193. ) {
  194. break;
  195. }
  196. }
  197. return substr($this->data, $start, $this->offset-$start);
  198. }
  199. /**
  200. * Returns next lexeme from a pdf stream
  201. *
  202. * @return string
  203. */
  204. public function readLexeme()
  205. {
  206. // $this->skipWhiteSpace();
  207. while (true) {
  208. $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
  209. if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') {
  210. $this->offset += strcspn($this->data, "\r\n", $this->offset);
  211. } else {
  212. break;
  213. }
  214. }
  215. if ($this->offset >= strlen($this->data)) {
  216. return '';
  217. }
  218. if ( /* self::isDelimiter( ord($this->data[$start]) ) */
  219. strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) {
  220. switch (substr($this->data, $this->offset, 2)) {
  221. case '<<':
  222. $this->offset += 2;
  223. return '<<';
  224. break;
  225. case '>>':
  226. $this->offset += 2;
  227. return '>>';
  228. break;
  229. default:
  230. return $this->data[$this->offset++];
  231. break;
  232. }
  233. } else {
  234. $start = $this->offset;
  235. $compare = '';
  236. if( version_compare( phpversion(), '5.2.5' ) >= 0) {
  237. $compare = "()<>[]{}/%\x00\t\n\f\r ";
  238. } else {
  239. $compare = "()<>[]{}/%\x00\t\n\r ";
  240. }
  241. $this->offset += strcspn($this->data, $compare, $this->offset);
  242. return substr($this->data, $start, $this->offset - $start);
  243. }
  244. }
  245. /**
  246. * Read elemental object from a PDF stream
  247. *
  248. * @return Zend_Pdf_Element
  249. * @throws Zend_Pdf_Exception
  250. */
  251. public function readElement($nextLexeme = null)
  252. {
  253. if ($nextLexeme === null) {
  254. $nextLexeme = $this->readLexeme();
  255. }
  256. /**
  257. * Note: readElement() method is a public method and could be invoked from other classes.
  258. * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
  259. * about _elements member management.
  260. */
  261. switch ($nextLexeme) {
  262. case '(':
  263. return ($this->_elements[] = $this->_readString());
  264. case '<':
  265. return ($this->_elements[] = $this->_readBinaryString());
  266. case '/':
  267. return ($this->_elements[] = new Zend_Pdf_Element_Name(
  268. Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
  269. ));
  270. case '[':
  271. return ($this->_elements[] = $this->_readArray());
  272. case '<<':
  273. return ($this->_elements[] = $this->_readDictionary());
  274. case ')':
  275. // fall through to next case
  276. case '>':
  277. // fall through to next case
  278. case ']':
  279. // fall through to next case
  280. case '>>':
  281. // fall through to next case
  282. case '{':
  283. // fall through to next case
  284. case '}':
  285. require_once 'Zend/Pdf/Exception.php';
  286. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
  287. $this->offset));
  288. default:
  289. if (strcasecmp($nextLexeme, 'true') == 0) {
  290. return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
  291. } else if (strcasecmp($nextLexeme, 'false') == 0) {
  292. return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
  293. } else if (strcasecmp($nextLexeme, 'null') == 0) {
  294. return ($this->_elements[] = new Zend_Pdf_Element_Null());
  295. }
  296. $ref = $this->_readReference($nextLexeme);
  297. if ($ref !== null) {
  298. return ($this->_elements[] = $ref);
  299. }
  300. return ($this->_elements[] = $this->_readNumeric($nextLexeme));
  301. }
  302. }
  303. /**
  304. * Read string PDF object
  305. * Also reads trailing ')' from a pdf stream
  306. *
  307. * @return Zend_Pdf_Element_String
  308. * @throws Zend_Pdf_Exception
  309. */
  310. private function _readString()
  311. {
  312. $start = $this->offset;
  313. $openedBrackets = 1;
  314. $this->offset += strcspn($this->data, '()\\', $this->offset);
  315. while ($this->offset < strlen($this->data)) {
  316. switch (ord( $this->data[$this->offset] )) {
  317. case 0x28: // '(' - opened bracket in the string, needs balanced pair.
  318. $this->offset++;
  319. $openedBrackets++;
  320. break;
  321. case 0x29: // ')' - pair to the opened bracket
  322. $this->offset++;
  323. $openedBrackets--;
  324. break;
  325. case 0x5C: // '\\' - escape sequence, skip next char from a check
  326. $this->offset += 2;
  327. }
  328. if ($openedBrackets == 0) {
  329. break; // end of string
  330. }
  331. $this->offset += strcspn($this->data, '()\\', $this->offset);
  332. }
  333. if ($openedBrackets != 0) {
  334. require_once 'Zend/Pdf/Exception.php';
  335. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
  336. }
  337. return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
  338. $start,
  339. $this->offset - $start - 1) ));
  340. }
  341. /**
  342. * Read binary string PDF object
  343. * Also reads trailing '>' from a pdf stream
  344. *
  345. * @return Zend_Pdf_Element_String_Binary
  346. * @throws Zend_Pdf_Exception
  347. */
  348. private function _readBinaryString()
  349. {
  350. $start = $this->offset;
  351. $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset);
  352. if ($this->offset >= strlen($this->data) - 1) {
  353. require_once 'Zend/Pdf/Exception.php';
  354. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start));
  355. }
  356. if ($this->data[$this->offset++] != '>') {
  357. require_once 'Zend/Pdf/Exception.php';
  358. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
  359. }
  360. return new Zend_Pdf_Element_String_Binary(
  361. Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
  362. $start,
  363. $this->offset - $start - 1) ));
  364. }
  365. /**
  366. * Read array PDF object
  367. * Also reads trailing ']' from a pdf stream
  368. *
  369. * @return Zend_Pdf_Element_Array
  370. * @throws Zend_Pdf_Exception
  371. */
  372. private function _readArray()
  373. {
  374. $elements = array();
  375. while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
  376. if ($nextLexeme != ']') {
  377. $elements[] = $this->readElement($nextLexeme);
  378. } else {
  379. return new Zend_Pdf_Element_Array($elements);
  380. }
  381. }
  382. require_once 'Zend/Pdf/Exception.php';
  383. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
  384. }
  385. /**
  386. * Read dictionary PDF object
  387. * Also reads trailing '>>' from a pdf stream
  388. *
  389. * @return Zend_Pdf_Element_Dictionary
  390. * @throws Zend_Pdf_Exception
  391. */
  392. private function _readDictionary()
  393. {
  394. $dictionary = new Zend_Pdf_Element_Dictionary();
  395. while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
  396. if ($nextLexeme != '>>') {
  397. $nameStart = $this->offset - strlen($nextLexeme);
  398. $name = $this->readElement($nextLexeme);
  399. $value = $this->readElement();
  400. if (!$name instanceof Zend_Pdf_Element_Name) {
  401. require_once 'Zend/Pdf/Exception.php';
  402. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
  403. }
  404. $dictionary->add($name, $value);
  405. } else {
  406. return $dictionary;
  407. }
  408. }
  409. require_once 'Zend/Pdf/Exception.php';
  410. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
  411. }
  412. /**
  413. * Read reference PDF object
  414. *
  415. * @param string $nextLexeme
  416. * @return Zend_Pdf_Element_Reference
  417. */
  418. private function _readReference($nextLexeme = null)
  419. {
  420. $start = $this->offset;
  421. if ($nextLexeme === null) {
  422. $objNum = $this->readLexeme();
  423. } else {
  424. $objNum = $nextLexeme;
  425. }
  426. if (!ctype_digit($objNum)) { // it's not a reference
  427. $this->offset = $start;
  428. return null;
  429. }
  430. $genNum = $this->readLexeme();
  431. if (!ctype_digit($genNum)) { // it's not a reference
  432. $this->offset = $start;
  433. return null;
  434. }
  435. $rMark = $this->readLexeme();
  436. if ($rMark != 'R') { // it's not a reference
  437. $this->offset = $start;
  438. return null;
  439. }
  440. $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
  441. return $ref;
  442. }
  443. /**
  444. * Read numeric PDF object
  445. *
  446. * @param string $nextLexeme
  447. * @return Zend_Pdf_Element_Numeric
  448. */
  449. private function _readNumeric($nextLexeme = null)
  450. {
  451. if ($nextLexeme === null) {
  452. $nextLexeme = $this->readLexeme();
  453. }
  454. return new Zend_Pdf_Element_Numeric($nextLexeme);
  455. }
  456. /**
  457. * Read inderect object from a PDF stream
  458. *
  459. * @param integer $offset
  460. * @param Zend_Pdf_Element_Reference_Context $context
  461. * @return Zend_Pdf_Element_Object
  462. */
  463. public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
  464. {
  465. if ($offset === null ) {
  466. return new Zend_Pdf_Element_Null();
  467. }
  468. // Save current offset to make getObject() reentrant
  469. $offsetSave = $this->offset;
  470. $this->offset = $offset;
  471. $this->_context = $context;
  472. $this->_elements = array();
  473. $objNum = $this->readLexeme();
  474. if (!ctype_digit($objNum)) {
  475. require_once 'Zend/Pdf/Exception.php';
  476. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
  477. }
  478. $genNum = $this->readLexeme();
  479. if (!ctype_digit($genNum)) {
  480. require_once 'Zend/Pdf/Exception.php';
  481. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
  482. }
  483. $objKeyword = $this->readLexeme();
  484. if ($objKeyword != 'obj') {
  485. require_once 'Zend/Pdf/Exception.php';
  486. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
  487. }
  488. $objValue = $this->readElement();
  489. $nextLexeme = $this->readLexeme();
  490. if( $nextLexeme == 'endobj' ) {
  491. /**
  492. * Object is not generated by factory (thus it's not marked as modified object).
  493. * But factory is assigned to the obect.
  494. */
  495. $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
  496. foreach ($this->_elements as $element) {
  497. $element->setParentObject($obj);
  498. }
  499. // Restore offset value
  500. $this->offset = $offsetSave;
  501. return $obj;
  502. }
  503. /**
  504. * It's a stream object
  505. */
  506. if ($nextLexeme != 'stream') {
  507. require_once 'Zend/Pdf/Exception.php';
  508. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
  509. }
  510. if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
  511. require_once 'Zend/Pdf/Exception.php';
  512. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
  513. }
  514. /**
  515. * References are automatically dereferenced at this moment.
  516. */
  517. $streamLength = $objValue->Length->value;
  518. /**
  519. * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
  520. * This restriction gives the possibility to recognize all cases exactly
  521. */
  522. if ($this->data[$this->offset] == "\r" &&
  523. $this->data[$this->offset + 1] == "\n" ) {
  524. $this->offset += 2;
  525. } else if ($this->data[$this->offset] == "\n" ) {
  526. $this->offset++;
  527. } else {
  528. require_once 'Zend/Pdf/Exception.php';
  529. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
  530. }
  531. $dataOffset = $this->offset;
  532. $this->offset += $streamLength;
  533. $nextLexeme = $this->readLexeme();
  534. if ($nextLexeme != 'endstream') {
  535. require_once 'Zend/Pdf/Exception.php';
  536. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
  537. }
  538. $nextLexeme = $this->readLexeme();
  539. if ($nextLexeme != 'endobj') {
  540. require_once 'Zend/Pdf/Exception.php';
  541. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
  542. }
  543. $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
  544. $dataOffset,
  545. $streamLength),
  546. (int)$objNum,
  547. (int)$genNum,
  548. $this->_objFactory->resolve(),
  549. $objValue);
  550. foreach ($this->_elements as $element) {
  551. $element->setParentObject($obj);
  552. }
  553. // Restore offset value
  554. $this->offset = $offsetSave;
  555. return $obj;
  556. }
  557. /**
  558. * Get length of source string
  559. *
  560. * @return integer
  561. */
  562. public function getLength()
  563. {
  564. return strlen($this->data);
  565. }
  566. /**
  567. * Get source string
  568. *
  569. * @return string
  570. */
  571. public function getString()
  572. {
  573. return $this->data;
  574. }
  575. /**
  576. * Parse integer value from a binary stream
  577. *
  578. * @param string $stream
  579. * @param integer $offset
  580. * @param integer $size
  581. * @return integer
  582. */
  583. public static function parseIntFromStream($stream, $offset, $size)
  584. {
  585. $value = 0;
  586. for ($count = 0; $count < $size; $count++) {
  587. $value *= 256;
  588. $value += ord($stream[$offset + $count]);
  589. }
  590. return $value;
  591. }
  592. /**
  593. * Set current context
  594. *
  595. * @param Zend_Pdf_Element_Reference_Context $context
  596. */
  597. public function setContext(Zend_Pdf_Element_Reference_Context $context)
  598. {
  599. $this->_context = $context;
  600. }
  601. /**
  602. * Object constructor
  603. *
  604. * Note: PHP duplicates string, which is sent by value, only of it's updated.
  605. * Thus we don't need to care about overhead
  606. *
  607. * @param string $pdfString
  608. * @param Zend_Pdf_ElementFactory_Interface $factory
  609. */
  610. public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
  611. {
  612. $this->data = $source;
  613. $this->_objFactory = $factory;
  614. }
  615. }