PageRenderTime 26ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/sites/all/libraries/fpdi/fpdi_pdf_parser.php

https://bitbucket.org/afeijo/quaizer
PHP | 408 lines | 205 code | 56 blank | 147 comment | 56 complexity | 5a7aa15081bbff4a59e2fd5d1823f4f6 MD5 | raw file
  1. <?php
  2. //
  3. // FPDI - Version 1.4.2
  4. //
  5. // Copyright 2004-2011 Setasign - Jan Slabon
  6. //
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. //
  11. // http://www.apache.org/licenses/LICENSE-2.0
  12. //
  13. // Unless required by applicable law or agreed to in writing, software
  14. // distributed under the License is distributed on an "AS IS" BASIS,
  15. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. // See the License for the specific language governing permissions and
  17. // limitations under the License.
  18. //
  19. require_once('pdf_parser.php');
  20. class fpdi_pdf_parser extends pdf_parser {
  21. /**
  22. * Pages
  23. * Index beginns at 0
  24. *
  25. * @var array
  26. */
  27. var $pages;
  28. /**
  29. * Page count
  30. * @var integer
  31. */
  32. var $page_count;
  33. /**
  34. * actual page number
  35. * @var integer
  36. */
  37. var $pageno;
  38. /**
  39. * PDF Version of imported Document
  40. * @var string
  41. */
  42. var $pdfVersion;
  43. /**
  44. * FPDI Reference
  45. * @var object
  46. */
  47. var $fpdi;
  48. /**
  49. * Available BoxTypes
  50. *
  51. * @var array
  52. */
  53. var $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox');
  54. /**
  55. * Constructor
  56. *
  57. * @param string $filename Source-Filename
  58. * @param object $fpdi Object of type fpdi
  59. */
  60. function fpdi_pdf_parser($filename, &$fpdi) {
  61. $this->fpdi =& $fpdi;
  62. parent::pdf_parser($filename);
  63. // resolve Pages-Dictonary
  64. $pages = $this->pdf_resolve_object($this->c, $this->root[1][1]['/Pages']);
  65. // Read pages
  66. $this->read_pages($this->c, $pages, $this->pages);
  67. // count pages;
  68. $this->page_count = count($this->pages);
  69. }
  70. /**
  71. * Overwrite parent::error()
  72. *
  73. * @param string $msg Error-Message
  74. */
  75. function error($msg) {
  76. $this->fpdi->error($msg);
  77. }
  78. /**
  79. * Get pagecount from sourcefile
  80. *
  81. * @return int
  82. */
  83. function getPageCount() {
  84. return $this->page_count;
  85. }
  86. /**
  87. * Set pageno
  88. *
  89. * @param int $pageno Pagenumber to use
  90. */
  91. function setPageno($pageno) {
  92. $pageno = ((int) $pageno) - 1;
  93. if ($pageno < 0 || $pageno >= $this->getPageCount()) {
  94. $this->fpdi->error('Pagenumber is wrong!');
  95. }
  96. $this->pageno = $pageno;
  97. }
  98. /**
  99. * Get page-resources from current page
  100. *
  101. * @return array
  102. */
  103. function getPageResources() {
  104. return $this->_getPageResources($this->pages[$this->pageno]);
  105. }
  106. /**
  107. * Get page-resources from /Page
  108. *
  109. * @param array $obj Array of pdf-data
  110. */
  111. function _getPageResources ($obj) { // $obj = /Page
  112. $obj = $this->pdf_resolve_object($this->c, $obj);
  113. // If the current object has a resources
  114. // dictionary associated with it, we use
  115. // it. Otherwise, we move back to its
  116. // parent object.
  117. if (isset ($obj[1][1]['/Resources'])) {
  118. $res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Resources']);
  119. if ($res[0] == PDF_TYPE_OBJECT)
  120. return $res[1];
  121. return $res;
  122. } else {
  123. if (!isset ($obj[1][1]['/Parent'])) {
  124. return false;
  125. } else {
  126. $res = $this->_getPageResources($obj[1][1]['/Parent']);
  127. if ($res[0] == PDF_TYPE_OBJECT)
  128. return $res[1];
  129. return $res;
  130. }
  131. }
  132. }
  133. /**
  134. * Get content of current page
  135. *
  136. * If more /Contents is an array, the streams are concated
  137. *
  138. * @return string
  139. */
  140. function getContent() {
  141. $buffer = '';
  142. if (isset($this->pages[$this->pageno][1][1]['/Contents'])) {
  143. $contents = $this->_getPageContent($this->pages[$this->pageno][1][1]['/Contents']);
  144. foreach($contents AS $tmp_content) {
  145. $buffer .= $this->_rebuildContentStream($tmp_content) . ' ';
  146. }
  147. }
  148. return $buffer;
  149. }
  150. /**
  151. * Resolve all content-objects
  152. *
  153. * @param array $content_ref
  154. * @return array
  155. */
  156. function _getPageContent($content_ref) {
  157. $contents = array();
  158. if ($content_ref[0] == PDF_TYPE_OBJREF) {
  159. $content = $this->pdf_resolve_object($this->c, $content_ref);
  160. if ($content[1][0] == PDF_TYPE_ARRAY) {
  161. $contents = $this->_getPageContent($content[1]);
  162. } else {
  163. $contents[] = $content;
  164. }
  165. } else if ($content_ref[0] == PDF_TYPE_ARRAY) {
  166. foreach ($content_ref[1] AS $tmp_content_ref) {
  167. $contents = array_merge($contents,$this->_getPageContent($tmp_content_ref));
  168. }
  169. }
  170. return $contents;
  171. }
  172. /**
  173. * Rebuild content-streams
  174. *
  175. * @param array $obj
  176. * @return string
  177. */
  178. function _rebuildContentStream($obj) {
  179. $filters = array();
  180. if (isset($obj[1][1]['/Filter'])) {
  181. $_filter = $obj[1][1]['/Filter'];
  182. if ($_filter[0] == PDF_TYPE_OBJREF) {
  183. $tmpFilter = $this->pdf_resolve_object($this->c, $_filter);
  184. $_filter = $tmpFilter[1];
  185. }
  186. if ($_filter[0] == PDF_TYPE_TOKEN) {
  187. $filters[] = $_filter;
  188. } else if ($_filter[0] == PDF_TYPE_ARRAY) {
  189. $filters = $_filter[1];
  190. }
  191. }
  192. $stream = $obj[2][1];
  193. foreach ($filters AS $_filter) {
  194. switch ($_filter[1]) {
  195. case '/FlateDecode':
  196. case '/Fl':
  197. // $stream .= "\x0F\x0D"; // in an errorious stream this suffix could work
  198. // $stream .= "\x0A";
  199. // $stream .= "\x0D";
  200. if (function_exists('gzuncompress')) {
  201. $stream = (strlen($stream) > 0) ? @gzuncompress($stream) : '';
  202. } else {
  203. $this->error(sprintf('To handle %s filter, please compile php with zlib support.',$_filter[1]));
  204. }
  205. if ($stream === false) {
  206. $this->error('Error while decompressing stream.');
  207. }
  208. break;
  209. case '/LZWDecode':
  210. include_once('filters/FilterLZW_FPDI.php');
  211. $decoder = new FilterLZW_FPDI($this->fpdi);
  212. $stream = $decoder->decode($stream);
  213. break;
  214. case '/ASCII85Decode':
  215. include_once('filters/FilterASCII85_FPDI.php');
  216. $decoder = new FilterASCII85_FPDI($this->fpdi);
  217. $stream = $decoder->decode($stream);
  218. break;
  219. case null:
  220. $stream = $stream;
  221. break;
  222. default:
  223. $this->error(sprintf('Unsupported Filter: %s',$_filter[1]));
  224. }
  225. }
  226. return $stream;
  227. }
  228. /**
  229. * Get a Box from a page
  230. * Arrayformat is same as used by fpdf_tpl
  231. *
  232. * @param array $page a /Page
  233. * @param string $box_index Type of Box @see $availableBoxes
  234. * @param float Scale factor from user space units to points
  235. * @return array
  236. */
  237. function getPageBox($page, $box_index, $k) {
  238. $page = $this->pdf_resolve_object($this->c, $page);
  239. $box = null;
  240. if (isset($page[1][1][$box_index]))
  241. $box =& $page[1][1][$box_index];
  242. if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) {
  243. $tmp_box = $this->pdf_resolve_object($this->c, $box);
  244. $box = $tmp_box[1];
  245. }
  246. if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) {
  247. $b =& $box[1];
  248. return array('x' => $b[0][1] / $k,
  249. 'y' => $b[1][1] / $k,
  250. 'w' => abs($b[0][1] - $b[2][1]) / $k,
  251. 'h' => abs($b[1][1] - $b[3][1]) / $k,
  252. 'llx' => min($b[0][1], $b[2][1]) / $k,
  253. 'lly' => min($b[1][1], $b[3][1]) / $k,
  254. 'urx' => max($b[0][1], $b[2][1]) / $k,
  255. 'ury' => max($b[1][1], $b[3][1]) / $k,
  256. );
  257. } else if (!isset ($page[1][1]['/Parent'])) {
  258. return false;
  259. } else {
  260. return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index, $k);
  261. }
  262. }
  263. /**
  264. * Get all page boxes by page no
  265. *
  266. * @param int The page number
  267. * @param float Scale factor from user space units to points
  268. * @return array
  269. */
  270. function getPageBoxes($pageno, $k) {
  271. return $this->_getPageBoxes($this->pages[$pageno - 1], $k);
  272. }
  273. /**
  274. * Get all boxes from /Page
  275. *
  276. * @param array a /Page
  277. * @return array
  278. */
  279. function _getPageBoxes($page, $k) {
  280. $boxes = array();
  281. foreach($this->availableBoxes AS $box) {
  282. if ($_box = $this->getPageBox($page, $box, $k)) {
  283. $boxes[$box] = $_box;
  284. }
  285. }
  286. return $boxes;
  287. }
  288. /**
  289. * Get the page rotation by pageno
  290. *
  291. * @param integer $pageno
  292. * @return array
  293. */
  294. function getPageRotation($pageno) {
  295. return $this->_getPageRotation($this->pages[$pageno - 1]);
  296. }
  297. function _getPageRotation($obj) { // $obj = /Page
  298. $obj = $this->pdf_resolve_object($this->c, $obj);
  299. if (isset ($obj[1][1]['/Rotate'])) {
  300. $res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Rotate']);
  301. if ($res[0] == PDF_TYPE_OBJECT)
  302. return $res[1];
  303. return $res;
  304. } else {
  305. if (!isset ($obj[1][1]['/Parent'])) {
  306. return false;
  307. } else {
  308. $res = $this->_getPageRotation($obj[1][1]['/Parent']);
  309. if ($res[0] == PDF_TYPE_OBJECT)
  310. return $res[1];
  311. return $res;
  312. }
  313. }
  314. }
  315. /**
  316. * Read all /Page(es)
  317. *
  318. * @param object pdf_context
  319. * @param array /Pages
  320. * @param array the result-array
  321. */
  322. function read_pages(&$c, &$pages, &$result) {
  323. // Get the kids dictionary
  324. $_kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']);
  325. if (!is_array($_kids))
  326. $this->error('Cannot find /Kids in current /Page-Dictionary');
  327. if ($_kids[1][0] == PDF_TYPE_ARRAY) {
  328. $kids = $_kids[1][1];
  329. } else {
  330. $kids = $_kids[1];
  331. }
  332. foreach ($kids as $v) {
  333. $pg = $this->pdf_resolve_object ($c, $v);
  334. if ($pg[1][1]['/Type'][1] === '/Pages') {
  335. // If one of the kids is an embedded
  336. // /Pages array, resolve it as well.
  337. $this->read_pages($c, $pg, $result);
  338. } else {
  339. $result[] = $pg;
  340. }
  341. }
  342. }
  343. /**
  344. * Get PDF-Version
  345. *
  346. * And reset the PDF Version used in FPDI if needed
  347. */
  348. function getPDFVersion() {
  349. parent::getPDFVersion();
  350. $this->fpdi->setPDFVersion(max($this->fpdi->getPDFVersion(), $this->pdfVersion));
  351. }
  352. }