PageRenderTime 51ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/Pdf/Parser.php

https://bitbucket.org/gkawka/zend-framework
PHP | 472 lines | 285 code | 62 blank | 125 comment | 75 complexity | 729e9db0a243ed1b6b7102591526ae7d MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Pdf
  17. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id: Parser.php 24593 2012-01-05 20:35:02Z matthew $
  20. */
  21. /** Internally used classes */
  22. require_once 'Zend/Pdf/Element.php';
  23. require_once 'Zend/Pdf/Element/Numeric.php';
  24. /** Zend_Pdf_StringParser */
  25. require_once 'Zend/Pdf/StringParser.php';
  26. /**
  27. * PDF file parser
  28. *
  29. * @package Zend_Pdf
  30. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  31. * @license http://framework.zend.com/license/new-bsd New BSD License
  32. */
  33. class Zend_Pdf_Parser
  34. {
  35. /**
  36. * String parser
  37. *
  38. * @var Zend_Pdf_StringParser
  39. */
  40. private $_stringParser;
  41. /**
  42. * Last PDF file trailer
  43. *
  44. * @var Zend_Pdf_Trailer_Keeper
  45. */
  46. private $_trailer;
  47. /**
  48. * PDF version specified in the file header
  49. *
  50. * @var string
  51. */
  52. private $_pdfVersion;
  53. /**
  54. * Get length of source PDF
  55. *
  56. * @return integer
  57. */
  58. public function getPDFLength()
  59. {
  60. return strlen($this->_stringParser->data);
  61. }
  62. /**
  63. * Get PDF String
  64. *
  65. * @return string
  66. */
  67. public function getPDFString()
  68. {
  69. return $this->_stringParser->data;
  70. }
  71. /**
  72. * PDF version specified in the file header
  73. *
  74. * @return string
  75. */
  76. public function getPDFVersion()
  77. {
  78. return $this->_pdfVersion;
  79. }
  80. /**
  81. * Load XReference table and referenced objects
  82. *
  83. * @param integer $offset
  84. * @throws Zend_Pdf_Exception
  85. * @return Zend_Pdf_Trailer_Keeper
  86. */
  87. private function _loadXRefTable($offset)
  88. {
  89. $this->_stringParser->offset = $offset;
  90. require_once 'Zend/Pdf/Element/Reference/Table.php';
  91. $refTable = new Zend_Pdf_Element_Reference_Table();
  92. require_once 'Zend/Pdf/Element/Reference/Context.php';
  93. $context = new Zend_Pdf_Element_Reference_Context($this->_stringParser, $refTable);
  94. $this->_stringParser->setContext($context);
  95. $nextLexeme = $this->_stringParser->readLexeme();
  96. if ($nextLexeme == 'xref') {
  97. /**
  98. * Common cross-reference table
  99. */
  100. $this->_stringParser->skipWhiteSpace();
  101. while ( ($nextLexeme = $this->_stringParser->readLexeme()) != 'trailer' ) {
  102. if (!ctype_digit($nextLexeme)) {
  103. require_once 'Zend/Pdf/Exception.php';
  104. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($nextLexeme)));
  105. }
  106. $objNum = (int)$nextLexeme;
  107. $refCount = $this->_stringParser->readLexeme();
  108. if (!ctype_digit($refCount)) {
  109. require_once 'Zend/Pdf/Exception.php';
  110. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($refCount)));
  111. }
  112. $this->_stringParser->skipWhiteSpace();
  113. while ($refCount > 0) {
  114. $objectOffset = substr($this->_stringParser->data, $this->_stringParser->offset, 10);
  115. if (!ctype_digit($objectOffset)) {
  116. require_once 'Zend/Pdf/Exception.php';
  117. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
  118. }
  119. // Force $objectOffset to be treated as decimal instead of octal number
  120. for ($numStart = 0; $numStart < strlen($objectOffset)-1; $numStart++) {
  121. if ($objectOffset[$numStart] != '0') {
  122. break;
  123. }
  124. }
  125. $objectOffset = substr($objectOffset, $numStart);
  126. $this->_stringParser->offset += 10;
  127. if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) {
  128. require_once 'Zend/Pdf/Exception.php';
  129. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  130. }
  131. $this->_stringParser->offset++;
  132. $genNumber = substr($this->_stringParser->data, $this->_stringParser->offset, 5);
  133. if (!ctype_digit($objectOffset)) {
  134. require_once 'Zend/Pdf/Exception.php';
  135. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
  136. }
  137. // Force $objectOffset to be treated as decimal instead of octal number
  138. for ($numStart = 0; $numStart < strlen($genNumber)-1; $numStart++) {
  139. if ($genNumber[$numStart] != '0') {
  140. break;
  141. }
  142. }
  143. $genNumber = substr($genNumber, $numStart);
  144. $this->_stringParser->offset += 5;
  145. if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) {
  146. require_once 'Zend/Pdf/Exception.php';
  147. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  148. }
  149. $this->_stringParser->offset++;
  150. $inUseKey = $this->_stringParser->data[$this->_stringParser->offset];
  151. $this->_stringParser->offset++;
  152. switch ($inUseKey) {
  153. case 'f':
  154. // free entry
  155. unset( $this->_refTable[$objNum . ' ' . $genNumber . ' R'] );
  156. $refTable->addReference($objNum . ' ' . $genNumber . ' R',
  157. $objectOffset,
  158. false);
  159. break;
  160. case 'n':
  161. // in-use entry
  162. $refTable->addReference($objNum . ' ' . $genNumber . ' R',
  163. $objectOffset,
  164. true);
  165. }
  166. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  167. require_once 'Zend/Pdf/Exception.php';
  168. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  169. }
  170. $this->_stringParser->offset++;
  171. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  172. require_once 'Zend/Pdf/Exception.php';
  173. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  174. }
  175. $this->_stringParser->offset++;
  176. $refCount--;
  177. $objNum++;
  178. }
  179. }
  180. $trailerDictOffset = $this->_stringParser->offset;
  181. $trailerDict = $this->_stringParser->readElement();
  182. if (!$trailerDict instanceof Zend_Pdf_Element_Dictionary) {
  183. require_once 'Zend/Pdf/Exception.php';
  184. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Dictionary expected after \'trailer\' keyword.', $trailerDictOffset));
  185. }
  186. } else {
  187. $xrefStream = $this->_stringParser->getObject($offset, $context);
  188. if (!$xrefStream instanceof Zend_Pdf_Element_Object_Stream) {
  189. require_once 'Zend/Pdf/Exception.php';
  190. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream expected.', $offset));
  191. }
  192. $trailerDict = $xrefStream->dictionary;
  193. if ($trailerDict->Type->value != 'XRef') {
  194. require_once 'Zend/Pdf/Exception.php';
  195. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream object must have /Type property assigned to /XRef.', $offset));
  196. }
  197. if ($trailerDict->W === null || $trailerDict->W->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
  198. require_once 'Zend/Pdf/Exception.php';
  199. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary doesn\'t have W entry or it\'s not an array.', $offset));
  200. }
  201. $entryField1Size = $trailerDict->W->items[0]->value;
  202. $entryField2Size = $trailerDict->W->items[1]->value;
  203. $entryField3Size = $trailerDict->W->items[2]->value;
  204. if ($entryField2Size == 0 || $entryField3Size == 0) {
  205. require_once 'Zend/Pdf/Exception.php';
  206. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Wrong W dictionary entry. Only type field of stream entries has default value and could be zero length.', $offset));
  207. }
  208. $xrefStreamData = $xrefStream->value;
  209. if ($trailerDict->Index !== null) {
  210. if ($trailerDict->Index->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
  211. require_once 'Zend/Pdf/Exception.php';
  212. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary Index entry must be an array.', $offset));
  213. }
  214. $sections = count($trailerDict->Index->items)/2;
  215. } else {
  216. $sections = 1;
  217. }
  218. $streamOffset = 0;
  219. $size = $entryField1Size + $entryField2Size + $entryField3Size;
  220. $entries = strlen($xrefStreamData)/$size;
  221. for ($count = 0; $count < $sections; $count++) {
  222. if ($trailerDict->Index !== null) {
  223. $objNum = $trailerDict->Index->items[$count*2 ]->value;
  224. $entries = $trailerDict->Index->items[$count*2 + 1]->value;
  225. } else {
  226. $objNum = 0;
  227. $entries = $trailerDict->Size->value;
  228. }
  229. for ($count2 = 0; $count2 < $entries; $count2++) {
  230. if ($entryField1Size == 0) {
  231. $type = 1;
  232. } else if ($entryField1Size == 1) { // Optimyze one-byte field case
  233. $type = ord($xrefStreamData[$streamOffset++]);
  234. } else {
  235. $type = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField1Size);
  236. $streamOffset += $entryField1Size;
  237. }
  238. if ($entryField2Size == 1) { // Optimyze one-byte field case
  239. $field2 = ord($xrefStreamData[$streamOffset++]);
  240. } else {
  241. $field2 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField2Size);
  242. $streamOffset += $entryField2Size;
  243. }
  244. if ($entryField3Size == 1) { // Optimyze one-byte field case
  245. $field3 = ord($xrefStreamData[$streamOffset++]);
  246. } else {
  247. $field3 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField3Size);
  248. $streamOffset += $entryField3Size;
  249. }
  250. switch ($type) {
  251. case 0:
  252. // Free object
  253. $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, false);
  254. // Debug output:
  255. // echo "Free object - $objNum $field3 R, next free - $field2\n";
  256. break;
  257. case 1:
  258. // In use object
  259. $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, true);
  260. // Debug output:
  261. // echo "In-use object - $objNum $field3 R, offset - $field2\n";
  262. break;
  263. case 2:
  264. // Object in an object stream
  265. // Debug output:
  266. // echo "Compressed object - $objNum 0 R, object stream - $field2 0 R, offset - $field3\n";
  267. break;
  268. }
  269. $objNum++;
  270. }
  271. }
  272. // $streamOffset . ' ' . strlen($xrefStreamData) . "\n";
  273. // "$entries\n";
  274. require_once 'Zend/Pdf/Exception.php';
  275. throw new Zend_Pdf_Exception('Cross-reference streams are not supported yet.');
  276. }
  277. require_once 'Zend/Pdf/Trailer/Keeper.php';
  278. $trailerObj = new Zend_Pdf_Trailer_Keeper($trailerDict, $context);
  279. if ($trailerDict->Prev instanceof Zend_Pdf_Element_Numeric ||
  280. $trailerDict->Prev instanceof Zend_Pdf_Element_Reference ) {
  281. $trailerObj->setPrev($this->_loadXRefTable($trailerDict->Prev->value));
  282. $context->getRefTable()->setParent($trailerObj->getPrev()->getRefTable());
  283. }
  284. /**
  285. * We set '/Prev' dictionary property to the current cross-reference section offset.
  286. * It doesn't correspond to the actual data, but is true when trailer will be used
  287. * as a trailer for next generated PDF section.
  288. */
  289. $trailerObj->Prev = new Zend_Pdf_Element_Numeric($offset);
  290. return $trailerObj;
  291. }
  292. /**
  293. * Get Trailer object
  294. *
  295. * @return Zend_Pdf_Trailer_Keeper
  296. */
  297. public function getTrailer()
  298. {
  299. return $this->_trailer;
  300. }
  301. /**
  302. * Object constructor
  303. *
  304. * Note: PHP duplicates string, which is sent by value, only of it's updated.
  305. * Thus we don't need to care about overhead
  306. *
  307. * @param mixed $source
  308. * @param Zend_Pdf_ElementFactory_Interface $factory
  309. * @param boolean $load
  310. * @throws Zend_Exception
  311. */
  312. public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory, $load)
  313. {
  314. if ($load) {
  315. if (($pdfFile = @fopen($source, 'rb')) === false ) {
  316. require_once 'Zend/Pdf/Exception.php';
  317. throw new Zend_Pdf_Exception( "Can not open '$source' file for reading." );
  318. }
  319. $data = '';
  320. $byteCount = filesize($source);
  321. while ($byteCount > 0 && !feof($pdfFile)) {
  322. $nextBlock = fread($pdfFile, $byteCount);
  323. if ($nextBlock === false) {
  324. require_once 'Zend/Pdf/Exception.php';
  325. throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." );
  326. }
  327. $data .= $nextBlock;
  328. $byteCount -= strlen($nextBlock);
  329. }
  330. if ($byteCount != 0) {
  331. require_once 'Zend/Pdf/Exception.php';
  332. throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." );
  333. }
  334. fclose($pdfFile);
  335. $this->_stringParser = new Zend_Pdf_StringParser($data, $factory);
  336. } else {
  337. $this->_stringParser = new Zend_Pdf_StringParser($source, $factory);
  338. }
  339. $pdfVersionComment = $this->_stringParser->readComment();
  340. if (substr($pdfVersionComment, 0, 5) != '%PDF-') {
  341. require_once 'Zend/Pdf/Exception.php';
  342. throw new Zend_Pdf_Exception('File is not a PDF.');
  343. }
  344. $pdfVersion = substr($pdfVersionComment, 5);
  345. if (version_compare($pdfVersion, '0.9', '<') ||
  346. version_compare($pdfVersion, '1.61', '>=')
  347. ) {
  348. /**
  349. * @todo
  350. * To support PDF versions 1.5 (Acrobat 6) and PDF version 1.7 (Acrobat 7)
  351. * Stream compression filter must be implemented (for compressed object streams).
  352. * Cross reference streams must be implemented
  353. */
  354. require_once 'Zend/Pdf/Exception.php';
  355. throw new Zend_Pdf_Exception(sprintf('Unsupported PDF version. Zend_Pdf supports PDF 1.0-1.4. Current version - \'%f\'', $pdfVersion));
  356. }
  357. $this->_pdfVersion = $pdfVersion;
  358. $this->_stringParser->offset = strrpos($this->_stringParser->data, '%%EOF');
  359. if ($this->_stringParser->offset === false ||
  360. strlen($this->_stringParser->data) - $this->_stringParser->offset > 7) {
  361. require_once 'Zend/Pdf/Exception.php';
  362. throw new Zend_Pdf_Exception('Pdf file syntax error. End-of-fle marker expected at the end of file.');
  363. }
  364. $this->_stringParser->offset--;
  365. /**
  366. * Go to end of cross-reference table offset
  367. */
  368. while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
  369. ($this->_stringParser->offset > 0)) {
  370. $this->_stringParser->offset--;
  371. }
  372. /**
  373. * Go to the start of cross-reference table offset
  374. */
  375. while ( (!Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) ))&&
  376. ($this->_stringParser->offset > 0)) {
  377. $this->_stringParser->offset--;
  378. }
  379. /**
  380. * Go to the end of 'startxref' keyword
  381. */
  382. while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
  383. ($this->_stringParser->offset > 0)) {
  384. $this->_stringParser->offset--;
  385. }
  386. /**
  387. * Go to the white space (eol marker) before 'startxref' keyword
  388. */
  389. $this->_stringParser->offset -= 9;
  390. $nextLexeme = $this->_stringParser->readLexeme();
  391. if ($nextLexeme != 'startxref') {
  392. require_once 'Zend/Pdf/Exception.php';
  393. throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. \'startxref\' keyword expected. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
  394. }
  395. $startXref = $this->_stringParser->readLexeme();
  396. if (!ctype_digit($startXref)) {
  397. require_once 'Zend/Pdf/Exception.php';
  398. throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. Cross-reference table offset must contain only digits. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
  399. }
  400. $this->_trailer = $this->_loadXRefTable($startXref);
  401. $factory->setObjectCount($this->_trailer->Size->value);
  402. }
  403. /**
  404. * Object destructor
  405. */
  406. public function __destruct()
  407. {
  408. $this->_stringParser->cleanUp();
  409. }
  410. }