PageRenderTime 28ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/test/tcpdf/tcpdf_parser.php

https://bitbucket.org/cwtaylor/airtight-fig
PHP | 811 lines | 564 code | 21 blank | 226 comment | 126 complexity | 5c546b7f37191a754b6743a640c6a0cd MD5 | raw file
  1. <?php
  2. //============================================================+
  3. // File name : tcpdf_parser.php
  4. // Version : 1.0.014
  5. // Begin : 2011-05-23
  6. // Last Update : 2014-02-18
  7. // Author : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - info@tecnick.com
  8. // License : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3
  9. // -------------------------------------------------------------------
  10. // Copyright (C) 2011-2014 Nicola Asuni - Tecnick.com LTD
  11. //
  12. // This file is part of TCPDF software library.
  13. //
  14. // TCPDF is free software: you can redistribute it and/or modify it
  15. // under the terms of the GNU Lesser General Public License as
  16. // published by the Free Software Foundation, either version 3 of the
  17. // License, or (at your option) any later version.
  18. //
  19. // TCPDF is distributed in the hope that it will be useful, but
  20. // WITHOUT ANY WARRANTY; without even the implied warranty of
  21. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  22. // See the GNU Lesser General Public License for more details.
  23. //
  24. // You should have received a copy of the License
  25. // along with TCPDF. If not, see
  26. // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
  27. //
  28. // See LICENSE.TXT file for more information.
  29. // -------------------------------------------------------------------
  30. //
  31. // Description : This is a PHP class for parsing PDF documents.
  32. //
  33. //============================================================+
  34. /**
  35. * @file
  36. * This is a PHP class for parsing PDF documents.<br>
  37. * @package com.tecnick.tcpdf
  38. * @author Nicola Asuni
  39. * @version 1.0.014
  40. */
  41. // include class for decoding filters
  42. require_once(dirname(__FILE__).'/include/tcpdf_filters.php');
  43. /**
  44. * @class TCPDF_PARSER
  45. * This is a PHP class for parsing PDF documents.<br>
  46. * @package com.tecnick.tcpdf
  47. * @brief This is a PHP class for parsing PDF documents..
  48. * @version 1.0.010
  49. * @author Nicola Asuni - info@tecnick.com
  50. */
  51. class TCPDF_PARSER {
  52. /**
  53. * Raw content of the PDF document.
  54. * @private
  55. */
  56. private $pdfdata = '';
  57. /**
  58. * XREF data.
  59. * @protected
  60. */
  61. protected $xref = array();
  62. /**
  63. * Array of PDF objects.
  64. * @protected
  65. */
  66. protected $objects = array();
  67. /**
  68. * Class object for decoding filters.
  69. * @private
  70. */
  71. private $FilterDecoders;
  72. /**
  73. * Array of configuration parameters.
  74. * @private
  75. */
  76. private $cfg = array(
  77. 'die_for_errors' => false,
  78. 'ignore_filter_decoding_errors' => true,
  79. 'ignore_missing_filter_decoders' => true,
  80. );
  81. // -----------------------------------------------------------------------------
  82. /**
  83. * Parse a PDF document an return an array of objects.
  84. * @param $data (string) PDF data to parse.
  85. * @param $cfg (array) Array of configuration parameters:
  86. * 'die_for_errors' : if true termitate the program execution in case of error, otherwise thows an exception;
  87. * 'ignore_filter_decoding_errors' : if true ignore filter decoding errors;
  88. * 'ignore_missing_filter_decoders' : if true ignore missing filter decoding errors.
  89. * @public
  90. * @since 1.0.000 (2011-05-24)
  91. */
  92. public function __construct($data, $cfg=array()) {
  93. if (empty($data)) {
  94. $this->Error('Empty PDF data.');
  95. }
  96. // find the pdf header starting position
  97. if (($trimpos = strpos($data, '%PDF-')) === FALSE) {
  98. $this->Error('Invalid PDF data: missing %PDF header.');
  99. }
  100. // get PDF content string
  101. $this->pdfdata = substr($data, $trimpos);
  102. // get length
  103. $pdflen = strlen($this->pdfdata);
  104. // set configuration parameters
  105. $this->setConfig($cfg);
  106. // get xref and trailer data
  107. $this->xref = $this->getXrefData();
  108. // parse all document objects
  109. $this->objects = array();
  110. foreach ($this->xref['xref'] as $obj => $offset) {
  111. if (!isset($this->objects[$obj]) AND ($offset > 0)) {
  112. // decode objects with positive offset
  113. $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
  114. }
  115. }
  116. // release some memory
  117. unset($this->pdfdata);
  118. $this->pdfdata = '';
  119. }
  120. /**
  121. * Set the configuration parameters.
  122. * @param $cfg (array) Array of configuration parameters:
  123. * 'die_for_errors' : if true termitate the program execution in case of error, otherwise thows an exception;
  124. * 'ignore_filter_decoding_errors' : if true ignore filter decoding errors;
  125. * 'ignore_missing_filter_decoders' : if true ignore missing filter decoding errors.
  126. * @public
  127. */
  128. protected function setConfig($cfg) {
  129. if (isset($cfg['die_for_errors'])) {
  130. $this->cfg['die_for_errors'] = !!$cfg['die_for_errors'];
  131. }
  132. if (isset($cfg['ignore_filter_decoding_errors'])) {
  133. $this->cfg['ignore_filter_decoding_errors'] = !!$cfg['ignore_filter_decoding_errors'];
  134. }
  135. if (isset($cfg['ignore_missing_filter_decoders'])) {
  136. $this->cfg['ignore_missing_filter_decoders'] = !!$cfg['ignore_missing_filter_decoders'];
  137. }
  138. }
  139. /**
  140. * Return an array of parsed PDF document objects.
  141. * @return (array) Array of parsed PDF document objects.
  142. * @public
  143. * @since 1.0.000 (2011-06-26)
  144. */
  145. public function getParsedData() {
  146. return array($this->xref, $this->objects);
  147. }
  148. /**
  149. * Get Cross-Reference (xref) table and trailer data from PDF document data.
  150. * @param $offset (int) xref offset (if know).
  151. * @param $xref (array) previous xref array (if any).
  152. * @return Array containing xref and trailer data.
  153. * @protected
  154. * @since 1.0.000 (2011-05-24)
  155. */
  156. protected function getXrefData($offset=0, $xref=array()) {
  157. if ($offset == 0) {
  158. // find last startxref
  159. if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) {
  160. $this->Error('Unable to find startxref');
  161. }
  162. $matches = array_pop($matches);
  163. $startxref = $matches[1];
  164. } elseif (strpos($this->pdfdata, 'xref', $offset) == $offset) {
  165. // Already pointing at the xref table
  166. $startxref = $offset;
  167. } elseif (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
  168. // Cross-Reference Stream object
  169. $startxref = $offset;
  170. } elseif (preg_match('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
  171. // startxref found
  172. $startxref = $matches[1][0];
  173. } else {
  174. $this->Error('Unable to find startxref');
  175. }
  176. // check xref position
  177. if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) {
  178. // Cross-Reference
  179. $xref = $this->decodeXref($startxref, $xref);
  180. } else {
  181. // Cross-Reference Stream
  182. $xref = $this->decodeXrefStream($startxref, $xref);
  183. }
  184. if (empty($xref)) {
  185. $this->Error('Unable to find xref');
  186. }
  187. return $xref;
  188. }
  189. /**
  190. * Decode the Cross-Reference section
  191. * @param $startxref (int) Offset at which the xref section starts (position of the 'xref' keyword).
  192. * @param $xref (array) Previous xref array (if any).
  193. * @return Array containing xref and trailer data.
  194. * @protected
  195. * @since 1.0.000 (2011-06-20)
  196. */
  197. protected function decodeXref($startxref, $xref=array()) {
  198. $startxref += 4; // 4 is the lenght of the word 'xref'
  199. // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
  200. $offset = $startxref + strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $startxref);
  201. // initialize object number
  202. $obj_num = 0;
  203. // search for cross-reference entries or subsection
  204. while (preg_match('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
  205. if ($matches[0][1] != $offset) {
  206. // we are on another section
  207. break;
  208. }
  209. $offset += strlen($matches[0][0]);
  210. if ($matches[3][0] == 'n') {
  211. // create unique object index: [object number]_[generation number]
  212. $index = $obj_num.'_'.intval($matches[2][0]);
  213. // check if object already exist
  214. if (!isset($xref['xref'][$index])) {
  215. // store object offset position
  216. $xref['xref'][$index] = intval($matches[1][0]);
  217. }
  218. ++$obj_num;
  219. } elseif ($matches[3][0] == 'f') {
  220. ++$obj_num;
  221. } else {
  222. // object number (index)
  223. $obj_num = intval($matches[1][0]);
  224. }
  225. }
  226. // get trailer data
  227. if (preg_match('/trailer[\s]*<<(.*)>>/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
  228. $trailer_data = $matches[1][0];
  229. if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
  230. // get only the last updated version
  231. $xref['trailer'] = array();
  232. // parse trailer_data
  233. if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
  234. $xref['trailer']['size'] = intval($matches[1]);
  235. }
  236. if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
  237. $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
  238. }
  239. if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
  240. $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
  241. }
  242. if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
  243. $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
  244. }
  245. if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
  246. $xref['trailer']['id'] = array();
  247. $xref['trailer']['id'][0] = $matches[1];
  248. $xref['trailer']['id'][1] = $matches[2];
  249. }
  250. }
  251. if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
  252. // get previous xref
  253. $xref = $this->getXrefData(intval($matches[1]), $xref);
  254. }
  255. } else {
  256. $this->Error('Unable to find trailer');
  257. }
  258. return $xref;
  259. }
  260. /**
  261. * Decode the Cross-Reference Stream section
  262. * @param $startxref (int) Offset at which the xref section starts.
  263. * @param $xref (array) Previous xref array (if any).
  264. * @return Array containing xref and trailer data.
  265. * @protected
  266. * @since 1.0.003 (2013-03-16)
  267. */
  268. protected function decodeXrefStream($startxref, $xref=array()) {
  269. // try to read Cross-Reference Stream
  270. $xrefobj = $this->getRawObject($startxref);
  271. $xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true);
  272. if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
  273. // get only the last updated version
  274. $xref['trailer'] = array();
  275. $filltrailer = true;
  276. } else {
  277. $filltrailer = false;
  278. }
  279. if (!isset($xref['xref'])) {
  280. $xref['xref'] = array();
  281. }
  282. $valid_crs = false;
  283. $columns = 0;
  284. $sarr = $xrefcrs[0][1];
  285. foreach ($sarr as $k => $v) {
  286. if (($v[0] == '/') AND ($v[1] == 'Type') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == '/') AND ($sarr[($k +1)][1] == 'XRef'))) {
  287. $valid_crs = true;
  288. } elseif (($v[0] == '/') AND ($v[1] == 'Index') AND (isset($sarr[($k +1)]))) {
  289. // first object number in the subsection
  290. $index_first = intval($sarr[($k +1)][1][0][1]);
  291. // number of entries in the subsection
  292. $index_entries = intval($sarr[($k +1)][1][1][1]);
  293. } elseif (($v[0] == '/') AND ($v[1] == 'Prev') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) {
  294. // get previous xref offset
  295. $prevxref = intval($sarr[($k +1)][1]);
  296. } elseif (($v[0] == '/') AND ($v[1] == 'W') AND (isset($sarr[($k +1)]))) {
  297. // number of bytes (in the decoded stream) of the corresponding field
  298. $wb = array();
  299. $wb[0] = intval($sarr[($k +1)][1][0][1]);
  300. $wb[1] = intval($sarr[($k +1)][1][1][1]);
  301. $wb[2] = intval($sarr[($k +1)][1][2][1]);
  302. } elseif (($v[0] == '/') AND ($v[1] == 'DecodeParms') AND (isset($sarr[($k +1)][1]))) {
  303. $decpar = $sarr[($k +1)][1];
  304. foreach ($decpar as $kdc => $vdc) {
  305. if (($vdc[0] == '/') AND ($vdc[1] == 'Columns') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) {
  306. $columns = intval($decpar[($kdc +1)][1]);
  307. } elseif (($vdc[0] == '/') AND ($vdc[1] == 'Predictor') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) {
  308. $predictor = intval($decpar[($kdc +1)][1]);
  309. }
  310. }
  311. } elseif ($filltrailer) {
  312. if (($v[0] == '/') AND ($v[1] == 'Size') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) {
  313. $xref['trailer']['size'] = $sarr[($k +1)][1];
  314. } elseif (($v[0] == '/') AND ($v[1] == 'Root') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) {
  315. $xref['trailer']['root'] = $sarr[($k +1)][1];
  316. } elseif (($v[0] == '/') AND ($v[1] == 'Info') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) {
  317. $xref['trailer']['info'] = $sarr[($k +1)][1];
  318. } elseif (($v[0] == '/') AND ($v[1] == 'Encrypt') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) {
  319. $xref['trailer']['encrypt'] = $sarr[($k +1)][1];
  320. } elseif (($v[0] == '/') AND ($v[1] == 'ID') AND (isset($sarr[($k +1)]))) {
  321. $xref['trailer']['id'] = array();
  322. $xref['trailer']['id'][0] = $sarr[($k +1)][1][0][1];
  323. $xref['trailer']['id'][1] = $sarr[($k +1)][1][1][1];
  324. }
  325. }
  326. }
  327. // decode data
  328. if ($valid_crs AND isset($xrefcrs[1][3][0])) {
  329. // number of bytes in a row
  330. $rowlen = ($columns + 1);
  331. // convert the stream into an array of integers
  332. $sdata = unpack('C*', $xrefcrs[1][3][0]);
  333. // split the rows
  334. $sdata = array_chunk($sdata, $rowlen);
  335. // initialize decoded array
  336. $ddata = array();
  337. // initialize first row with zeros
  338. $prev_row = array_fill (0, $rowlen, 0);
  339. // for each row apply PNG unpredictor
  340. foreach ($sdata as $k => $row) {
  341. // initialize new row
  342. $ddata[$k] = array();
  343. // get PNG predictor value
  344. $predictor = (10 + $row[0]);
  345. // for each byte on the row
  346. for ($i=1; $i<=$columns; ++$i) {
  347. // new index
  348. $j = ($i - 1);
  349. $row_up = $prev_row[$j];
  350. if ($i == 1) {
  351. $row_left = 0;
  352. $row_upleft = 0;
  353. } else {
  354. $row_left = $row[($i - 1)];
  355. $row_upleft = $prev_row[($j - 1)];
  356. }
  357. switch ($predictor) {
  358. case 10: { // PNG prediction (on encoding, PNG None on all rows)
  359. $ddata[$k][$j] = $row[$i];
  360. break;
  361. }
  362. case 11: { // PNG prediction (on encoding, PNG Sub on all rows)
  363. $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
  364. break;
  365. }
  366. case 12: { // PNG prediction (on encoding, PNG Up on all rows)
  367. $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
  368. break;
  369. }
  370. case 13: { // PNG prediction (on encoding, PNG Average on all rows)
  371. $ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff);
  372. break;
  373. }
  374. case 14: { // PNG prediction (on encoding, PNG Paeth on all rows)
  375. // initial estimate
  376. $p = ($row_left + $row_up - $row_upleft);
  377. // distances
  378. $pa = abs($p - $row_left);
  379. $pb = abs($p - $row_up);
  380. $pc = abs($p - $row_upleft);
  381. $pmin = min($pa, $pb, $pc);
  382. // return minumum distance
  383. switch ($pmin) {
  384. case $pa: {
  385. $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
  386. break;
  387. }
  388. case $pb: {
  389. $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
  390. break;
  391. }
  392. case $pc: {
  393. $ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff);
  394. break;
  395. }
  396. }
  397. break;
  398. }
  399. default: { // PNG prediction (on encoding, PNG optimum)
  400. $this->Error('Unknown PNG predictor');
  401. break;
  402. }
  403. }
  404. }
  405. $prev_row = $ddata[$k];
  406. } // end for each row
  407. // complete decoding
  408. $sdata = array();
  409. // for every row
  410. foreach ($ddata as $k => $row) {
  411. // initialize new row
  412. $sdata[$k] = array(0, 0, 0);
  413. if ($wb[0] == 0) {
  414. // default type field
  415. $sdata[$k][0] = 1;
  416. }
  417. $i = 0; // count bytes in the row
  418. // for every column
  419. for ($c = 0; $c < 3; ++$c) {
  420. // for every byte on the column
  421. for ($b = 0; $b < $wb[$c]; ++$b) {
  422. if (isset($row[$i])) {
  423. $sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
  424. }
  425. ++$i;
  426. }
  427. }
  428. }
  429. $ddata = array();
  430. // fill xref
  431. if (isset($index_first)) {
  432. $obj_num = $index_first;
  433. } else {
  434. $obj_num = 0;
  435. }
  436. foreach ($sdata as $k => $row) {
  437. switch ($row[0]) {
  438. case 0: { // (f) linked list of free objects
  439. break;
  440. }
  441. case 1: { // (n) objects that are in use but are not compressed
  442. // create unique object index: [object number]_[generation number]
  443. $index = $obj_num.'_'.$row[2];
  444. // check if object already exist
  445. if (!isset($xref['xref'][$index])) {
  446. // store object offset position
  447. $xref['xref'][$index] = $row[1];
  448. }
  449. break;
  450. }
  451. case 2: { // compressed objects
  452. // $row[1] = object number of the object stream in which this object is stored
  453. // $row[2] = index of this object within the object stream
  454. $index = $row[1].'_0_'.$row[2];
  455. $xref['xref'][$index] = -1;
  456. break;
  457. }
  458. default: { // null objects
  459. break;
  460. }
  461. }
  462. ++$obj_num;
  463. }
  464. } // end decoding data
  465. if (isset($prevxref)) {
  466. // get previous xref
  467. $xref = $this->getXrefData($prevxref, $xref);
  468. }
  469. return $xref;
  470. }
  471. /**
  472. * Get object type, raw value and offset to next object
  473. * @param $offset (int) Object offset.
  474. * @return array containing object type, raw value and offset to next object
  475. * @protected
  476. * @since 1.0.000 (2011-06-20)
  477. */
  478. protected function getRawObject($offset=0) {
  479. $objtype = ''; // object type to be returned
  480. $objval = ''; // object value to be returned
  481. // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
  482. $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
  483. // get first char
  484. $char = $this->pdfdata[$offset];
  485. // get object type
  486. switch ($char) {
  487. case '%': { // \x25 PERCENT SIGN
  488. // skip comment and search for next token
  489. $next = strcspn($this->pdfdata, "\r\n", $offset);
  490. if ($next > 0) {
  491. $offset += $next;
  492. return $this->getRawObject($offset);
  493. }
  494. break;
  495. }
  496. case '/': { // \x2F SOLIDUS
  497. // name object
  498. $objtype = $char;
  499. ++$offset;
  500. if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) {
  501. $objval = $matches[1]; // unescaped value
  502. $offset += strlen($objval);
  503. }
  504. break;
  505. }
  506. case '(': // \x28 LEFT PARENTHESIS
  507. case ')': { // \x29 RIGHT PARENTHESIS
  508. // literal string object
  509. $objtype = $char;
  510. ++$offset;
  511. $strpos = $offset;
  512. if ($char == '(') {
  513. $open_bracket = 1;
  514. while ($open_bracket > 0) {
  515. if (!isset($this->pdfdata{$strpos})) {
  516. break;
  517. }
  518. $ch = $this->pdfdata{$strpos};
  519. switch ($ch) {
  520. case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
  521. // skip next character
  522. ++$strpos;
  523. break;
  524. }
  525. case '(': { // LEFT PARENHESIS (28h)
  526. ++$open_bracket;
  527. break;
  528. }
  529. case ')': { // RIGHT PARENTHESIS (29h)
  530. --$open_bracket;
  531. break;
  532. }
  533. }
  534. ++$strpos;
  535. }
  536. $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
  537. $offset = $strpos;
  538. }
  539. break;
  540. }
  541. case '[': // \x5B LEFT SQUARE BRACKET
  542. case ']': { // \x5D RIGHT SQUARE BRACKET
  543. // array object
  544. $objtype = $char;
  545. ++$offset;
  546. if ($char == '[') {
  547. // get array content
  548. $objval = array();
  549. do {
  550. // get element
  551. $element = $this->getRawObject($offset);
  552. $offset = $element[2];
  553. $objval[] = $element;
  554. } while ($element[0] != ']');
  555. // remove closing delimiter
  556. array_pop($objval);
  557. }
  558. break;
  559. }
  560. case '<': // \x3C LESS-THAN SIGN
  561. case '>': { // \x3E GREATER-THAN SIGN
  562. if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) {
  563. // dictionary object
  564. $objtype = $char.$char;
  565. $offset += 2;
  566. if ($char == '<') {
  567. // get array content
  568. $objval = array();
  569. do {
  570. // get element
  571. $element = $this->getRawObject($offset);
  572. $offset = $element[2];
  573. $objval[] = $element;
  574. } while ($element[0] != '>>');
  575. // remove closing delimiter
  576. array_pop($objval);
  577. }
  578. } else {
  579. // hexadecimal string object
  580. $objtype = $char;
  581. ++$offset;
  582. if (($char == '<') AND (preg_match('/^([0-9A-Fa-f\x09\x0a\x0c\x0d\x20]+)>/iU', substr($this->pdfdata, $offset), $matches) == 1)) {
  583. // remove white space characters
  584. $objval = strtr($matches[1], "\x09\x0a\x0c\x0d\x20", '');
  585. $offset += strlen($matches[0]);
  586. } elseif (($endpos = strpos($this->pdfdata, '>', $offset)) !== FALSE) {
  587. $offset = $endpos + 1;
  588. }
  589. }
  590. break;
  591. }
  592. default: {
  593. if (substr($this->pdfdata, $offset, 6) == 'endobj') {
  594. // indirect object
  595. $objtype = 'endobj';
  596. $offset += 6;
  597. } elseif (substr($this->pdfdata, $offset, 4) == 'null') {
  598. // null object
  599. $objtype = 'null';
  600. $offset += 4;
  601. $objval = 'null';
  602. } elseif (substr($this->pdfdata, $offset, 4) == 'true') {
  603. // boolean true object
  604. $objtype = 'boolean';
  605. $offset += 4;
  606. $objval = 'true';
  607. } elseif (substr($this->pdfdata, $offset, 5) == 'false') {
  608. // boolean false object
  609. $objtype = 'boolean';
  610. $offset += 5;
  611. $objval = 'false';
  612. } elseif (substr($this->pdfdata, $offset, 6) == 'stream') {
  613. // start stream object
  614. $objtype = 'stream';
  615. $offset += 6;
  616. if (preg_match('/^([\r]?[\n])/isU', substr($this->pdfdata, $offset), $matches) == 1) {
  617. $offset += strlen($matches[0]);
  618. if (preg_match('/(endstream)[\x09\x0a\x0c\x0d\x20]/isU', substr($this->pdfdata, $offset), $matches, PREG_OFFSET_CAPTURE) == 1) {
  619. $objval = substr($this->pdfdata, $offset, $matches[0][1]);
  620. $offset += $matches[1][1];
  621. }
  622. }
  623. } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') {
  624. // end stream object
  625. $objtype = 'endstream';
  626. $offset += 9;
  627. } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
  628. // indirect object reference
  629. $objtype = 'objref';
  630. $offset += strlen($matches[0]);
  631. $objval = intval($matches[1]).'_'.intval($matches[2]);
  632. } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
  633. // object start
  634. $objtype = 'obj';
  635. $objval = intval($matches[1]).'_'.intval($matches[2]);
  636. $offset += strlen ($matches[0]);
  637. } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) {
  638. // numeric object
  639. $objtype = 'numeric';
  640. $objval = substr($this->pdfdata, $offset, $numlen);
  641. $offset += $numlen;
  642. }
  643. break;
  644. }
  645. }
  646. return array($objtype, $objval, $offset);
  647. }
  648. /**
  649. * Get content of indirect object.
  650. * @param $obj_ref (string) Object number and generation number separated by underscore character.
  651. * @param $offset (int) Object offset.
  652. * @param $decoding (boolean) If true decode streams.
  653. * @return array containing object data.
  654. * @protected
  655. * @since 1.0.000 (2011-05-24)
  656. */
  657. protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
  658. $obj = explode('_', $obj_ref);
  659. if (($obj === false) OR (count($obj) != 2)) {
  660. $this->Error('Invalid object reference: '.$obj);
  661. return;
  662. }
  663. $objref = $obj[0].' '.$obj[1].' obj';
  664. // ignore leading zeros
  665. $offset += strspn($this->pdfdata, '0', $offset);
  666. if (strpos($this->pdfdata, $objref, $offset) != $offset) {
  667. // an indirect reference to an undefined object shall be considered a reference to the null object
  668. return array('null', 'null', $offset);
  669. }
  670. // starting position of object content
  671. $offset += strlen($objref);
  672. // get array of object content
  673. $objdata = array();
  674. $i = 0; // object main index
  675. do {
  676. // get element
  677. $element = $this->getRawObject($offset);
  678. $offset = $element[2];
  679. // decode stream using stream's dictionary information
  680. if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) {
  681. $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
  682. }
  683. $objdata[$i] = $element;
  684. ++$i;
  685. } while ($element[0] != 'endobj');
  686. // remove closing delimiter
  687. array_pop($objdata);
  688. // return raw object content
  689. return $objdata;
  690. }
  691. /**
  692. * Get the content of object, resolving indect object reference if necessary.
  693. * @param $obj (string) Object value.
  694. * @return array containing object data.
  695. * @protected
  696. * @since 1.0.000 (2011-06-26)
  697. */
  698. protected function getObjectVal($obj) {
  699. if ($obj[0] == 'objref') {
  700. // reference to indirect object
  701. if (isset($this->objects[$obj[1]])) {
  702. // this object has been already parsed
  703. return $this->objects[$obj[1]];
  704. } elseif (isset($this->xref[$obj[1]])) {
  705. // parse new object
  706. $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false);
  707. return $this->objects[$obj[1]];
  708. }
  709. }
  710. return $obj;
  711. }
  712. /**
  713. * Decode the specified stream.
  714. * @param $sdic (array) Stream's dictionary array.
  715. * @param $stream (string) Stream to decode.
  716. * @return array containing decoded stream data and remaining filters.
  717. * @protected
  718. * @since 1.0.000 (2011-06-22)
  719. */
  720. protected function decodeStream($sdic, $stream) {
  721. // get stream lenght and filters
  722. $slength = strlen($stream);
  723. if ($slength <= 0) {
  724. return array('', array());
  725. }
  726. $filters = array();
  727. foreach ($sdic as $k => $v) {
  728. if ($v[0] == '/') {
  729. if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) {
  730. // get declared stream lenght
  731. $declength = intval($sdic[($k + 1)][1]);
  732. if ($declength < $slength) {
  733. $stream = substr($stream, 0, $declength);
  734. $slength = $declength;
  735. }
  736. } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) {
  737. // resolve indirect object
  738. $objval = $this->getObjectVal($sdic[($k + 1)]);
  739. if ($objval[0] == '/') {
  740. // single filter
  741. $filters[] = $objval[1];
  742. } elseif ($objval[0] == '[') {
  743. // array of filters
  744. foreach ($objval[1] as $flt) {
  745. if ($flt[0] == '/') {
  746. $filters[] = $flt[1];
  747. }
  748. }
  749. }
  750. }
  751. }
  752. }
  753. // decode the stream
  754. $remaining_filters = array();
  755. foreach ($filters as $filter) {
  756. if (in_array($filter, TCPDF_FILTERS::getAvailableFilters())) {
  757. try {
  758. $stream = TCPDF_FILTERS::decodeFilter($filter, $stream);
  759. } catch (Exception $e) {
  760. $emsg = $e->getMessage();
  761. if ((($emsg[0] == '~') AND !$this->cfg['ignore_missing_filter_decoders'])
  762. OR (($emsg[0] != '~') AND !$this->cfg['ignore_filter_decoding_errors'])) {
  763. $this->Error($e->getMessage());
  764. }
  765. }
  766. } else {
  767. // add missing filter to array
  768. $remaining_filters[] = $filter;
  769. }
  770. }
  771. return array($stream, $remaining_filters);
  772. }
  773. /**
  774. * Throw an exception or print an error message and die if the K_TCPDF_PARSER_THROW_EXCEPTION_ERROR constant is set to true.
  775. * @param $msg (string) The error message
  776. * @public
  777. * @since 1.0.000 (2011-05-23)
  778. */
  779. public function Error($msg) {
  780. if ($this->cfg['die_for_errors']) {
  781. die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
  782. } else {
  783. throw new Exception('TCPDF_PARSER ERROR: '.$msg);
  784. }
  785. }
  786. } // END OF TCPDF_PARSER CLASS
  787. //============================================================+
  788. // END OF FILE
  789. //============================================================+