PageRenderTime 52ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/library/Zend/Pdf/Parser.php

https://bitbucket.org/baruffaldi/webapp-urltube
PHP | 467 lines | 261 code | 76 blank | 130 comment | 72 complexity | fbfa4bd2f206f268bbe14d9a25aa90b3 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.0, MIT
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @package Zend_Pdf
  16. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  17. * @license http://framework.zend.com/license/new-bsd New BSD License
  18. */
  19. /** Zend_Pdf_Element */
  20. require_once 'Zend/Pdf/Element.php';
  21. /** Zend_Pdf_Element_Array */
  22. require_once 'Zend/Pdf/Element/Array.php';
  23. /** Zend_Pdf_Element_String_Binary */
  24. require_once 'Zend/Pdf/Element/String/Binary.php';
  25. /** Zend_Pdf_Element_Boolean */
  26. require_once 'Zend/Pdf/Element/Boolean.php';
  27. /** Zend_Pdf_Element_Dictionary */
  28. require_once 'Zend/Pdf/Element/Dictionary.php';
  29. /** Zend_Pdf_Element_Name */
  30. require_once 'Zend/Pdf/Element/Name.php';
  31. /** Zend_Pdf_Element_Numeric */
  32. require_once 'Zend/Pdf/Element/Numeric.php';
  33. /** Zend_Pdf_Element_Object */
  34. require_once 'Zend/Pdf/Element/Object.php';
  35. /** Zend_Pdf_Element_Reference */
  36. require_once 'Zend/Pdf/Element/Reference.php';
  37. /** Zend_Pdf_Element_Object_Stream */
  38. require_once 'Zend/Pdf/Element/Object/Stream.php';
  39. /** Zend_Pdf_Element_String */
  40. require_once 'Zend/Pdf/Element/String.php';
  41. /** Zend_Pdf_Element_Null */
  42. require_once 'Zend/Pdf/Element/Null.php';
  43. /** Zend_Pdf_Element_Reference_Context */
  44. require_once 'Zend/Pdf/Element/Reference/Context.php';
  45. /** Zend_Pdf_Element_Reference_Table */
  46. require_once 'Zend/Pdf/Element/Reference/Table.php';
  47. /** Zend_Pdf_Trailer_Keeper */
  48. require_once 'Zend/Pdf/Trailer/Keeper.php';
  49. /** Zend_Pdf_ElementFactory_Interface */
  50. require_once 'Zend/Pdf/ElementFactory/Interface.php';
  51. /** Zend_Pdf_PhpArray */
  52. require_once 'Zend/Pdf/PhpArray.php';
  53. /** Zend_Pdf_StringParser */
  54. require_once 'Zend/Pdf/StringParser.php';
  55. /** Zend_Pdf_Parser_Stream */
  56. require_once 'Zend/Pdf/Parser/Stream.php';
  57. /**
  58. * PDF file parser
  59. *
  60. * @package Zend_Pdf
  61. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  62. * @license http://framework.zend.com/license/new-bsd New BSD License
  63. */
  64. class Zend_Pdf_Parser
  65. {
  66. /**
  67. * String parser
  68. *
  69. * @var Zend_Pdf_StringParser
  70. */
  71. private $_stringParser;
  72. /**
  73. * Last PDF file trailer
  74. *
  75. * @var Zend_Pdf_Trailer_Keeper
  76. */
  77. private $_trailer;
  78. /**
  79. * Get length of source PDF
  80. *
  81. * @return integer
  82. */
  83. public function getPDFLength()
  84. {
  85. return strlen($this->_stringParser->data);
  86. }
  87. /**
  88. * Get PDF String
  89. *
  90. * @return string
  91. */
  92. public function getPDFString()
  93. {
  94. return $this->_stringParser->data;
  95. }
  96. /**
  97. * Load XReference table and referenced objects
  98. *
  99. * @param integer $offset
  100. * @throws Zend_Pdf_Exception
  101. * @return Zend_Pdf_Trailer_Keeper
  102. */
  103. private function _loadXRefTable($offset)
  104. {
  105. $this->_stringParser->offset = $offset;
  106. $refTable = new Zend_Pdf_Element_Reference_Table();
  107. $context = new Zend_Pdf_Element_Reference_Context($this->_stringParser, $refTable);
  108. $this->_stringParser->setContext($context);
  109. $nextLexeme = $this->_stringParser->readLexeme();
  110. if ($nextLexeme == 'xref') {
  111. /**
  112. * Common cross-reference table
  113. */
  114. $this->_stringParser->skipWhiteSpace();
  115. while ( ($nextLexeme = $this->_stringParser->readLexeme()) != 'trailer' ) {
  116. if (!ctype_digit($nextLexeme)) {
  117. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($nextLexeme)));
  118. }
  119. $objNum = (int)$nextLexeme;
  120. $refCount = $this->_stringParser->readLexeme();
  121. if (!ctype_digit($refCount)) {
  122. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($refCount)));
  123. }
  124. $this->_stringParser->skipWhiteSpace();
  125. while ($refCount > 0) {
  126. $objectOffset = substr($this->_stringParser->data, $this->_stringParser->offset, 10);
  127. if (!ctype_digit($objectOffset)) {
  128. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
  129. }
  130. // Force $objectOffset to be treated as decimal instead of octal number
  131. for ($numStart = 0; $numStart < strlen($objectOffset)-1; $numStart++) {
  132. if ($objectOffset[$numStart] != '0') {
  133. break;
  134. }
  135. }
  136. $objectOffset = substr($objectOffset, $numStart);
  137. $this->_stringParser->offset += 10;
  138. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  139. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  140. }
  141. $this->_stringParser->offset++;
  142. $genNumber = substr($this->_stringParser->data, $this->_stringParser->offset, 5);
  143. if (!ctype_digit($objectOffset)) {
  144. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
  145. }
  146. // Force $objectOffset to be treated as decimal instead of octal number
  147. for ($numStart = 0; $numStart < strlen($genNumber)-1; $numStart++) {
  148. if ($genNumber[$numStart] != '0') {
  149. break;
  150. }
  151. }
  152. $genNumber = substr($genNumber, $numStart);
  153. $this->_stringParser->offset += 5;
  154. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  155. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  156. }
  157. $this->_stringParser->offset++;
  158. $inUseKey = $this->_stringParser->data[$this->_stringParser->offset];
  159. $this->_stringParser->offset++;
  160. switch ($inUseKey) {
  161. case 'f':
  162. // free entry
  163. unset( $this->_refTable[$objNum . ' ' . $genNumber . ' R'] );
  164. $refTable->addReference($objNum . ' ' . $genNumber . ' R',
  165. $objectOffset,
  166. false);
  167. break;
  168. case 'n':
  169. // in-use entry
  170. $refTable->addReference($objNum . ' ' . $genNumber . ' R',
  171. $objectOffset,
  172. true);
  173. }
  174. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  175. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  176. }
  177. $this->_stringParser->offset++;
  178. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  179. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  180. }
  181. $this->_stringParser->offset++;
  182. $refCount--;
  183. $objNum++;
  184. }
  185. }
  186. $trailerDictOffset = $this->_stringParser->offset;
  187. $trailerDict = $this->_stringParser->readElement();
  188. if (!$trailerDict instanceof Zend_Pdf_Element_Dictionary) {
  189. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Dictionary expected after \'trailer\' keyword.', $trailerDictOffset));
  190. }
  191. } else {
  192. $xrefStream = $this->_stringParser->getObject($offset, $context);
  193. if (!$xrefStream instanceof Zend_Pdf_Element_Object_Stream) {
  194. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream expected.', $offset));
  195. }
  196. $trailerDict = $xrefStream->dictionary;
  197. if ($trailerDict->Type->value != 'XRef') {
  198. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream object must have /Type property assigned to /XRef.', $offset));
  199. }
  200. if ($trailerDict->W === null || $trailerDict->W->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
  201. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary doesn\'t have W entry or it\'s not an array.', $offset));
  202. }
  203. $entryField1Size = $trailerDict->W->items[0]->value;
  204. $entryField2Size = $trailerDict->W->items[1]->value;
  205. $entryField3Size = $trailerDict->W->items[2]->value;
  206. if ($entryField2Size == 0 || $entryField3Size == 0) {
  207. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Wrong W dictionary entry. Only type field of stream entries has default value and could be zero length.', $offset));
  208. }
  209. $xrefStreamData = &$xrefStream->value;
  210. if ($trailerDict->Index !== null) {
  211. if ($trailerDict->Index->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
  212. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary Index entry must be an array.', $offset));
  213. }
  214. $sections = count($trailerDict->Index->items)/2;
  215. } else {
  216. $sections = 1;
  217. }
  218. $streamOffset = 0;
  219. $size = $entryField1Size + $entryField2Size + $entryField3Size;
  220. $entries = strlen($xrefStreamData)/$size;
  221. for ($count = 0; $count < $sections; $count++) {
  222. if ($trailerDict->Index !== null) {
  223. $objNum = $trailerDict->Index->items[$count*2 ]->value;
  224. $entries = $trailerDict->Index->items[$count*2 + 1]->value;
  225. } else {
  226. $objNum = 0;
  227. $entries = $trailerDict->Size->value;
  228. }
  229. for ($count2 = 0; $count2 < $entries; $count2++) {
  230. if ($entryField1Size == 0) {
  231. $type = 1;
  232. } else if ($entryField1Size == 1) { // Optimyze one-byte field case
  233. $type = ord($xrefStreamData[$streamOffset++]);
  234. } else {
  235. $type = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField1Size);
  236. $streamOffset += $entryField1Size;
  237. }
  238. if ($entryField2Size == 1) { // Optimyze one-byte field case
  239. $field2 = ord($xrefStreamData[$streamOffset++]);
  240. } else {
  241. $field2 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField2Size);
  242. $streamOffset += $entryField2Size;
  243. }
  244. if ($entryField3Size == 1) { // Optimyze one-byte field case
  245. $field3 = ord($xrefStreamData[$streamOffset++]);
  246. } else {
  247. $field3 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField3Size);
  248. $streamOffset += $entryField3Size;
  249. }
  250. switch ($type) {
  251. case 0:
  252. // Free object
  253. $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, false);
  254. // Debug output:
  255. // echo "Free object - $objNum $field3 R, next free - $field2\n";
  256. break;
  257. case 1:
  258. // In use object
  259. $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, true);
  260. // Debug output:
  261. // echo "In-use object - $objNum $field3 R, offset - $field2\n";
  262. break;
  263. case 2:
  264. // Object in an object stream
  265. // Debug output:
  266. // echo "Compressed object - $objNum 0 R, object stream - $field2 0 R, offset - $field3\n";
  267. break;
  268. }
  269. $objNum++;
  270. }
  271. }
  272. // $streamOffset . ' ' . strlen($xrefStreamData) . "\n";
  273. // "$entries\n";
  274. throw new Zend_Pdf_Exception('Cross-reference streams are not supported yet.');
  275. }
  276. $trailerObj = new Zend_Pdf_Trailer_Keeper($trailerDict, $context);
  277. if ($trailerDict->Prev instanceof Zend_Pdf_Element_Numeric ||
  278. $trailerDict->Prev instanceof Zend_Pdf_Element_Reference ) {
  279. $trailerObj->setPrev($this->_loadXRefTable($trailerDict->Prev->value));
  280. $context->getRefTable()->setParent($trailerObj->getPrev()->getRefTable());
  281. }
  282. /**
  283. * We set '/Prev' dictionary property to the current cross-reference section offset.
  284. * It doesn't correspond to the actual data, but is true when trailer will be used
  285. * as a trailer for next generated PDF section.
  286. */
  287. $trailerObj->Prev = new Zend_Pdf_Element_Numeric($offset);
  288. return $trailerObj;
  289. }
  290. /**
  291. * Get Trailer object
  292. *
  293. * @return Zend_Pdf_Trailer_Keeper
  294. */
  295. public function getTrailer()
  296. {
  297. return $this->_trailer;
  298. }
  299. /**
  300. * Object constructor
  301. *
  302. * Note: PHP duplicates string, which is sent by value, only of it's updated.
  303. * Thus we don't need to care about overhead
  304. *
  305. * @param mixed $source
  306. * @param Zend_Pdf_ElementFactory_Interface $factory
  307. * @param boolean $load
  308. * @throws Zend_Exception
  309. */
  310. public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory, $load)
  311. {
  312. if ($load) {
  313. if (($pdfFile = @fopen($source, 'rb')) === false ) {
  314. throw new Zend_Pdf_Exception( "Can not open '$source' file for reading." );
  315. }
  316. $byteCount = filesize($source);
  317. $data = fread($pdfFile, $byteCount);
  318. $byteCount -= strlen($data);
  319. while ( $byteCount > 0 && ($nextBlock = fread($pdfFile, $byteCount)) != false ) {
  320. $data .= $nextBlock;
  321. $byteCount -= strlen($nextBlock);
  322. }
  323. fclose($pdfFile);
  324. $this->_stringParser = new Zend_Pdf_StringParser($data, $factory);
  325. } else {
  326. $this->_stringParser = new Zend_Pdf_StringParser($source, $factory);
  327. }
  328. $pdfVersionComment = $this->_stringParser->readComment();
  329. if (substr($pdfVersionComment, 0, 5) != '%PDF-') {
  330. throw new Zend_Pdf_Exception('File is not a PDF.');
  331. }
  332. $pdfVersion = (float)substr($pdfVersionComment, 5);
  333. if ($pdfVersion < 0.9 || $pdfVersion >= 1.61) {
  334. /**
  335. * @todo
  336. * To support PDF versions 1.5 (Acrobat 6) and PDF version 1.7 (Acrobat 7)
  337. * Stream compression filter must be implemented (for compressed object streams).
  338. * Cross reference streams must be implemented
  339. */
  340. throw new Zend_Pdf_Exception(sprintf('Unsupported PDF version. Zend_Pdf supports PDF 1.0-1.4. Current version - \'%f\'', $pdfVersion));
  341. }
  342. $this->_stringParser->offset = strrpos($this->_stringParser->data, '%%EOF');
  343. if ($this->_stringParser->offset === false ||
  344. strlen($this->_stringParser->data) - $this->_stringParser->offset > 7) {
  345. throw new Zend_Pdf_Exception('Pdf file syntax error. End-of-fle marker expected at the end of file.');
  346. }
  347. $this->_stringParser->offset--;
  348. /**
  349. * Go to end of cross-reference table offset
  350. */
  351. while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
  352. ($this->_stringParser->offset > 0)) {
  353. $this->_stringParser->offset--;
  354. }
  355. /**
  356. * Go to the start of cross-reference table offset
  357. */
  358. while ( (!Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) ))&&
  359. ($this->_stringParser->offset > 0)) {
  360. $this->_stringParser->offset--;
  361. }
  362. /**
  363. * Go to the end of 'startxref' keyword
  364. */
  365. while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
  366. ($this->_stringParser->offset > 0)) {
  367. $this->_stringParser->offset--;
  368. }
  369. /**
  370. * Go to the white space (eol marker) before 'startxref' keyword
  371. */
  372. $this->_stringParser->offset -= 9;
  373. $nextLexeme = $this->_stringParser->readLexeme();
  374. if ($nextLexeme != 'startxref') {
  375. throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. \'startxref\' keyword expected. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
  376. }
  377. $startXref = $this->_stringParser->readLexeme();
  378. if (!ctype_digit($startXref)) {
  379. throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. Cross-reference table offset must contain only digits. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
  380. }
  381. $this->_trailer = $this->_loadXRefTable($startXref);
  382. $factory->setObjectCount($this->_trailer->Size->value);
  383. }
  384. /**
  385. * Object destructor
  386. */
  387. public function __destruct()
  388. {
  389. $this->_stringParser->cleanUp();
  390. }
  391. }