PageRenderTime 28ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/vivoManagement/app/Plugin/CakePdf/Vendor/tcpdf/tcpdf_parser.php

https://bitbucket.org/vsposato/vivo-tools
PHP | 501 lines | 332 code | 17 blank | 152 comment | 52 complexity | 6aafaee284e918b7661c58e9c247dae3 MD5 | raw file
  1. <?php
  2. //============================================================+
  3. // File name : tcpdf_parser.php
  4. // Version : 1.0.000
  5. // Begin : 2011-05-23
  6. // Last Update : 2012-01-28
  7. // Author : Nicola Asuni - Tecnick.com LTD - Manor Coach House, Church Hill, Aldershot, Hants, GU12 4RQ, UK - www.tecnick.com - info@tecnick.com
  8. // License : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3
  9. // -------------------------------------------------------------------
  10. // Copyright (C) 2011-2012 Nicola Asuni - Tecnick.com LTD
  11. //
  12. // This file is part of TCPDF software library.
  13. //
  14. // TCPDF is free software: you can redistribute it and/or modify it
  15. // under the terms of the GNU Lesser General Public License as
  16. // published by the Free Software Foundation, either version 3 of the
  17. // License, or (at your option) any later version.
  18. //
  19. // TCPDF is distributed in the hope that it will be useful, but
  20. // WITHOUT ANY WARRANTY; without even the implied warranty of
  21. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  22. // See the GNU Lesser General Public License for more details.
  23. //
  24. // You should have received a copy of the License
  25. // along with TCPDF. If not, see
  26. // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
  27. //
  28. // See LICENSE.TXT file for more information.
  29. // -------------------------------------------------------------------
  30. //
  31. // Description : This is a PHP class for parsing PDF documents.
  32. //
  33. //============================================================+
  34. /**
  35. * @file
  36. * This is a PHP class for parsing PDF documents.<br>
  37. * @package com.tecnick.tcpdf
  38. * @author Nicola Asuni
  39. * @version 1.0.000
  40. */
  41. // include class for decoding filters
  42. require_once(dirname(__FILE__).'/tcpdf_filters.php');
  43. /**
  44. * @class TCPDF_PARSER
  45. * This is a PHP class for parsing PDF documents.<br>
  46. * @package com.tecnick.tcpdf
  47. * @brief This is a PHP class for parsing PDF documents..
  48. * @version 1.0.000
  49. * @author Nicola Asuni - info@tecnick.com
  50. */
  51. class TCPDF_PARSER {
  52. /**
  53. * Raw content of the PDF document.
  54. * @private
  55. */
  56. private $pdfdata = '';
  57. /**
  58. * XREF data.
  59. * @protected
  60. */
  61. protected $xref = array();
  62. /**
  63. * Array of PDF objects.
  64. * @protected
  65. */
  66. protected $objects = array();
  67. /**
  68. * Class object for decoding filters.
  69. * @private
  70. */
  71. private $FilterDecoders;
  72. // -----------------------------------------------------------------------------
  73. /**
  74. * Parse a PDF document an return an array of objects.
  75. * @param $data (string) PDF data to parse.
  76. * @public
  77. * @since 1.0.000 (2011-05-24)
  78. */
  79. public function __construct($data) {
  80. if (empty($data)) {
  81. $this->Error('Empty PDF data.');
  82. }
  83. $this->pdfdata = $data;
  84. // get length
  85. $pdflen = strlen($this->pdfdata);
  86. // initialize class for decoding filters
  87. $this->FilterDecoders = new TCPDF_FILTERS();
  88. // get xref and trailer data
  89. $this->xref = $this->getXrefData();
  90. // parse all document objects
  91. $this->objects = array();
  92. foreach ($this->xref['xref'] as $obj => $offset) {
  93. if (!isset($this->objects[$obj])) {
  94. $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
  95. }
  96. }
  97. // release some memory
  98. unset($this->pdfdata);
  99. $this->pdfdata = '';
  100. }
  101. /**
  102. * Return an array of parsed PDF document objects.
  103. * @return (array) Array of parsed PDF document objects.
  104. * @public
  105. * @since 1.0.000 (2011-06-26)
  106. */
  107. public function getParsedData() {
  108. return array($this->xref, $this->objects);
  109. }
  110. /**
  111. * Get xref (cross-reference table) and trailer data from PDF document data.
  112. * @param $offset (int) xref offset (if know).
  113. * @param $xref (array) previous xref array (if any).
  114. * @return Array containing xref and trailer data.
  115. * @protected
  116. * @since 1.0.000 (2011-05-24)
  117. */
  118. protected function getXrefData($offset=0, $xref=array()) {
  119. // find last startxref
  120. if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) {
  121. $this->Error('Unable to find startxref');
  122. }
  123. $matches = array_pop($matches);
  124. $startxref = $matches[1];
  125. // check xref position
  126. if (strpos($this->pdfdata, 'xref', $startxref) != $startxref) {
  127. $this->Error('Unable to find xref');
  128. }
  129. // extract xref data (object indexes and offsets)
  130. $offset = $startxref + 5;
  131. // initialize object number
  132. $obj_num = 0;
  133. while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
  134. $offset = (strlen($matches[0][0]) + $matches[0][1]);
  135. if ($matches[3][0] == 'n') {
  136. // create unique object index: [object number]_[generation number]
  137. $index = $obj_num.'_'.intval($matches[2][0]);
  138. // check if object already exist
  139. if (!isset($xref['xref'][$index])) {
  140. // store object offset position
  141. $xref['xref'][$index] = intval($matches[1][0]);
  142. }
  143. ++$obj_num;
  144. $offset += 2;
  145. } elseif ($matches[3][0] == 'f') {
  146. ++$obj_num;
  147. $offset += 2;
  148. } else {
  149. // object number (index)
  150. $obj_num = intval($matches[1][0]);
  151. }
  152. }
  153. // get trailer data
  154. if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
  155. $trailer_data = $matches[1][0];
  156. if (!isset($xref['trailer'])) {
  157. // get only the last updated version
  158. $xref['trailer'] = array();
  159. // parse trailer_data
  160. if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
  161. $xref['trailer']['size'] = intval($matches[1]);
  162. }
  163. if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
  164. $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
  165. }
  166. if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
  167. $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
  168. }
  169. if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
  170. $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
  171. }
  172. if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
  173. $xref['trailer']['id'] = array();
  174. $xref['trailer']['id'][0] = $matches[1];
  175. $xref['trailer']['id'][1] = $matches[2];
  176. }
  177. }
  178. if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
  179. // get previous xref
  180. $xref = getXrefData(substr($this->pdfdata, 0, $startxref), intval($matches[1]), $xref);
  181. }
  182. } else {
  183. $this->Error('Unable to find trailer');
  184. }
  185. return $xref;
  186. }
  187. /**
  188. * Get object type, raw value and offset to next object
  189. * @param $offset (int) Object offset.
  190. * @return array containing object type, raw value and offset to next object
  191. * @protected
  192. * @since 1.0.000 (2011-06-20)
  193. */
  194. protected function getRawObject($offset=0) {
  195. $objtype = ''; // object type to be returned
  196. $objval = ''; // object value to be returned
  197. // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
  198. $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
  199. // get first char
  200. $char = $this->pdfdata{$offset};
  201. // get object type
  202. switch ($char) {
  203. case '%': { // \x25 PERCENT SIGN
  204. // skip comment and search for next token
  205. $next = strcspn($this->pdfdata, "\r\n", $offset);
  206. if ($next > 0) {
  207. $offset += $next;
  208. return $this->getRawObject($this->pdfdata, $offset);
  209. }
  210. break;
  211. }
  212. case '/': { // \x2F SOLIDUS
  213. // name object
  214. $objtype = $char;
  215. ++$offset;
  216. if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) {
  217. $objval = $matches[1]; // unescaped value
  218. $offset += strlen($objval);
  219. }
  220. break;
  221. }
  222. case '(': // \x28 LEFT PARENTHESIS
  223. case ')': { // \x29 RIGHT PARENTHESIS
  224. // literal string object
  225. $objtype = $char;
  226. ++$offset;
  227. $strpos = $offset;
  228. if ($char == '(') {
  229. $open_bracket = 1;
  230. while ($open_bracket > 0) {
  231. if (!isset($this->pdfdata{$strpos})) {
  232. break;
  233. }
  234. $ch = $this->pdfdata{$strpos};
  235. switch ($ch) {
  236. case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
  237. // skip next character
  238. ++$strpos;
  239. break;
  240. }
  241. case '(': { // LEFT PARENHESIS (28h)
  242. ++$open_bracket;
  243. break;
  244. }
  245. case ')': { // RIGHT PARENTHESIS (29h)
  246. --$open_bracket;
  247. break;
  248. }
  249. }
  250. ++$strpos;
  251. }
  252. $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
  253. $offset = $strpos;
  254. }
  255. break;
  256. }
  257. case '[': // \x5B LEFT SQUARE BRACKET
  258. case ']': { // \x5D RIGHT SQUARE BRACKET
  259. // array object
  260. $objtype = $char;
  261. ++$offset;
  262. if ($char == '[') {
  263. // get array content
  264. $objval = array();
  265. do {
  266. // get element
  267. $element = $this->getRawObject($offset);
  268. $offset = $element[2];
  269. $objval[] = $element;
  270. } while ($element[0] != ']');
  271. // remove closing delimiter
  272. array_pop($objval);
  273. }
  274. break;
  275. }
  276. case '<': // \x3C LESS-THAN SIGN
  277. case '>': { // \x3E GREATER-THAN SIGN
  278. if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) {
  279. // dictionary object
  280. $objtype = $char.$char;
  281. $offset += 2;
  282. if ($char == '<') {
  283. // get array content
  284. $objval = array();
  285. do {
  286. // get element
  287. $element = $this->getRawObject($offset);
  288. $offset = $element[2];
  289. $objval[] = $element;
  290. } while ($element[0] != '>>');
  291. // remove closing delimiter
  292. array_pop($objval);
  293. }
  294. } else {
  295. // hexadecimal string object
  296. $objtype = $char;
  297. ++$offset;
  298. if (($char == '<') AND (preg_match('/^([0-9A-Fa-f]+)[>]/iU', substr($this->pdfdata, $offset), $matches) == 1)) {
  299. $objval = $matches[1];
  300. $offset += strlen($matches[0]);
  301. }
  302. }
  303. break;
  304. }
  305. default: {
  306. if (substr($this->pdfdata, $offset, 6) == 'endobj') {
  307. // indirect object
  308. $objtype = 'endobj';
  309. $offset += 6;
  310. } elseif (substr($this->pdfdata, $offset, 4) == 'null') {
  311. // null object
  312. $objtype = 'null';
  313. $offset += 4;
  314. $objval = 'null';
  315. } elseif (substr($this->pdfdata, $offset, 4) == 'true') {
  316. // boolean true object
  317. $objtype = 'boolean';
  318. $offset += 4;
  319. $objval = 'true';
  320. } elseif (substr($this->pdfdata, $offset, 5) == 'false') {
  321. // boolean false object
  322. $objtype = 'boolean';
  323. $offset += 5;
  324. $objval = 'false';
  325. } elseif (substr($this->pdfdata, $offset, 6) == 'stream') {
  326. // start stream object
  327. $objtype = 'stream';
  328. $offset += 6;
  329. if (preg_match('/^[\r\n]+(.*)[\r\n]*endstream/isU', substr($this->pdfdata, $offset), $matches) == 1) {
  330. $objval = $matches[1];
  331. $offset += strlen($matches[0]);
  332. }
  333. } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') {
  334. // end stream object
  335. $objtype = 'endstream';
  336. $offset += 9;
  337. } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
  338. // indirect object reference
  339. $objtype = 'ojbref';
  340. $offset += strlen($matches[0]);
  341. $objval = intval($matches[1]).'_'.intval($matches[2]);
  342. } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
  343. // object start
  344. $objtype = 'ojb';
  345. $objval = intval($matches[1]).'_'.intval($matches[2]);
  346. $offset += strlen ($matches[0]);
  347. } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) {
  348. // numeric object
  349. $objtype = 'numeric';
  350. $objval = substr($this->pdfdata, $offset, $numlen);
  351. $offset += $numlen;
  352. }
  353. break;
  354. }
  355. }
  356. return array($objtype, $objval, $offset);
  357. }
  358. /**
  359. * Get content of indirect object.
  360. * @param $obj_ref (string) Object number and generation number separated by underscore character.
  361. * @param $offset (int) Object offset.
  362. * @param $decoding (boolean) If true decode streams.
  363. * @return array containing object data.
  364. * @protected
  365. * @since 1.0.000 (2011-05-24)
  366. */
  367. protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
  368. $obj = explode('_', $obj_ref);
  369. if (($obj === false) OR (count($obj) != 2)) {
  370. $this->Error('Invalid object reference: '.$obj);
  371. return;
  372. }
  373. $objref = $obj[0].' '.$obj[1].' obj';
  374. if (strpos($this->pdfdata, $objref, $offset) != $offset) {
  375. // an indirect reference to an undefined object shall be considered a reference to the null object
  376. return array('null', 'null', $offset);
  377. }
  378. // starting position of object content
  379. $offset += strlen($objref);
  380. // get array of object content
  381. $objdata = array();
  382. $i = 0; // object main index
  383. do {
  384. // get element
  385. $element = $this->getRawObject($offset);
  386. $offset = $element[2];
  387. // decode stream using stream's dictionary information
  388. if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) {
  389. $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
  390. }
  391. $objdata[$i] = $element;
  392. ++$i;
  393. } while ($element[0] != 'endobj');
  394. // remove closing delimiter
  395. array_pop($objdata);
  396. // return raw object content
  397. return $objdata;
  398. }
  399. /**
  400. * Get the content of object, resolving indect object reference if necessary.
  401. * @param $obj (string) Object value.
  402. * @return array containing object data.
  403. * @protected
  404. * @since 1.0.000 (2011-06-26)
  405. */
  406. protected function getObjectVal($obj) {
  407. if ($obj[0] == 'objref') {
  408. // reference to indirect object
  409. if (isset($this->objects[$obj[1]])) {
  410. // this object has been already parsed
  411. return $this->objects[$obj[1]];
  412. } elseif (isset($this->xref[$obj[1]])) {
  413. // parse new object
  414. $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false);
  415. return $this->objects[$obj[1]];
  416. }
  417. }
  418. return $obj;
  419. }
  420. /**
  421. * Decode the specified stream.
  422. * @param $sdic (array) Stream's dictionary array.
  423. * @param $stream (string) Stream to decode.
  424. * @return array containing decoded stream data and remaining filters.
  425. * @protected
  426. * @since 1.0.000 (2011-06-22)
  427. */
  428. protected function decodeStream($sdic, $stream) {
  429. // get stream lenght and filters
  430. $slength = strlen($stream);
  431. $filters = array();
  432. foreach ($sdic as $k => $v) {
  433. if ($v[0] == '/') {
  434. if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) {
  435. // get declared stream lenght
  436. $declength = intval($sdic[($k + 1)][1]);
  437. if ($declength < $slength) {
  438. $stream = substr($stream, 0, $declength);
  439. $slength = $declength;
  440. }
  441. } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) {
  442. // resolve indirect object
  443. $objval = $this->getObjectVal($sdic[($k + 1)]);
  444. if ($objval[0] == '/') {
  445. // single filter
  446. $filters[] = $objval[1];
  447. } elseif ($objval[0] == '[') {
  448. // array of filters
  449. foreach ($objval[1] as $flt) {
  450. if ($flt[0] == '/') {
  451. $filters[] = $flt[1];
  452. }
  453. }
  454. }
  455. }
  456. }
  457. }
  458. // decode the stream
  459. $remaining_filters = array();
  460. foreach ($filters as $filter) {
  461. if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
  462. $stream = $this->FilterDecoders->decodeFilter($filter, $stream);
  463. } else {
  464. // add missing filter to array
  465. $remaining_filters[] = $filter;
  466. }
  467. }
  468. return array($stream, $remaining_filters);
  469. }
  470. /**
  471. * This method is automatically called in case of fatal error; it simply outputs the message and halts the execution.
  472. * @param $msg (string) The error message
  473. * @public
  474. * @since 1.0.000 (2011-05-23)
  475. */
  476. public function Error($msg) {
  477. // exit program and print error
  478. die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
  479. }
  480. } // END OF TCPDF_PARSER CLASS
  481. //============================================================+
  482. // END OF FILE
  483. //============================================================+