PageRenderTime 51ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/library/Zend/Pdf/FileParser.php

https://bitbucket.org/baruffaldi/website-2008-computer-shopping-3
PHP | 480 lines | 180 code | 45 blank | 255 comment | 52 complexity | e61c7d0b4fa0d8d26733c932720b23eb MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @package Zend_Pdf
  16. * @subpackage FileParser
  17. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. */
  20. /** Zend_Pdf_Exception */
  21. require_once 'Zend/Pdf/Exception.php';
  22. /**
  23. * Abstract utility class for parsing binary files.
  24. *
  25. * Provides a library of methods to quickly navigate and extract various data
  26. * types (signed and unsigned integers, floating- and fixed-point numbers,
  27. * strings, etc.) from the file.
  28. *
  29. * File access is managed via a {@link Zend_Pdf_FileParserDataSource} object.
  30. * This allows the same parser code to work with many different data sources:
  31. * in-memory objects, filesystem files, etc.
  32. *
  33. * @package Zend_Pdf
  34. * @subpackage FileParser
  35. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  36. * @license http://framework.zend.com/license/new-bsd New BSD License
  37. */
  38. abstract class Zend_Pdf_FileParser
  39. {
  40. /**** Class Constants ****/
  41. /**
  42. * Little-endian byte order (0x04 0x03 0x02 0x01).
  43. */
  44. const BYTE_ORDER_LITTLE_ENDIAN = 0;
  45. /**
  46. * Big-endian byte order (0x01 0x02 0x03 0x04).
  47. */
  48. const BYTE_ORDER_BIG_ENDIAN = 1;
  49. /**** Instance Variables ****/
  50. /**
  51. * Flag indicating that the file has passed a cursory validation check.
  52. * @var boolean
  53. */
  54. protected $_isScreened = false;
  55. /**
  56. * Flag indicating that the file has been sucessfully parsed.
  57. * @var boolean
  58. */
  59. protected $_isParsed = false;
  60. /**
  61. * Object representing the data source to be parsed.
  62. * @var Zend_Pdf_FileParserDataSource
  63. */
  64. protected $_dataSource = null;
  65. /**** Public Interface ****/
  66. /* Abstract Methods */
  67. /**
  68. * Performs a cursory check to verify that the binary file is in the expected
  69. * format. Intended to quickly weed out obviously bogus files.
  70. *
  71. * Must set $this->_isScreened to true if successful.
  72. *
  73. * @throws Zend_Pdf_Exception
  74. */
  75. abstract public function screen();
  76. /**
  77. * Reads and parses the complete binary file.
  78. *
  79. * Must set $this->_isParsed to true if successful.
  80. *
  81. * @throws Zend_Pdf_Exception
  82. */
  83. abstract public function parse();
  84. /* Object Lifecycle */
  85. /**
  86. * Object constructor.
  87. *
  88. * Verifies that the data source has been properly initialized.
  89. *
  90. * @param Zend_Pdf_FileParserDataSource $dataSource
  91. * @throws Zend_Pdf_Exception
  92. */
  93. public function __construct(Zend_Pdf_FileParserDataSource $dataSource)
  94. {
  95. if ($dataSource->getSize() == 0) {
  96. throw new Zend_Pdf_Exception('The data source has not been properly initialized',
  97. Zend_Pdf_Exception::BAD_DATA_SOURCE);
  98. }
  99. $this->_dataSource = $dataSource;
  100. }
  101. /**
  102. * Object destructor.
  103. *
  104. * Discards the data source object.
  105. */
  106. public function __destruct()
  107. {
  108. $this->_dataSource = null;
  109. }
  110. /* Accessors */
  111. /**
  112. * Returns true if the file has passed a cursory validation check.
  113. *
  114. * @return boolean
  115. */
  116. public function isScreened()
  117. {
  118. return $this->_isScreened;
  119. }
  120. /**
  121. * Returns true if the file has been successfully parsed.
  122. *
  123. * @return boolean
  124. */
  125. public function isParsed()
  126. {
  127. return $this->_isParsed;
  128. }
  129. /**
  130. * Returns the data source object representing the file being parsed.
  131. *
  132. * @return Zend_Pdf_FileParserDataSource
  133. */
  134. public function getDataSource()
  135. {
  136. return $this->_dataSource;
  137. }
  138. /* Primitive Methods */
  139. /**
  140. * Convenience wrapper for the data source object's moveToOffset() method.
  141. *
  142. * @param integer $offset Destination byte offset.
  143. * @throws Zend_Pdf_Exception
  144. */
  145. public function moveToOffset($offset)
  146. {
  147. $this->_dataSource->moveToOffset($offset);
  148. }
  149. public function getOffset() {
  150. return $this->_dataSource->getOffset();
  151. }
  152. public function getSize() {
  153. return $this->_dataSource->getSize();
  154. }
  155. /**
  156. * Convenience wrapper for the data source object's readBytes() method.
  157. *
  158. * @param integer $byteCount Number of bytes to read.
  159. * @return string
  160. * @throws Zend_Pdf_Exception
  161. */
  162. public function readBytes($byteCount)
  163. {
  164. return $this->_dataSource->readBytes($byteCount);
  165. }
  166. /**
  167. * Convenience wrapper for the data source object's skipBytes() method.
  168. *
  169. * @param integer $byteCount Number of bytes to skip.
  170. * @throws Zend_Pdf_Exception
  171. */
  172. public function skipBytes($byteCount)
  173. {
  174. $this->_dataSource->skipBytes($byteCount);
  175. }
  176. /* Parser Methods */
  177. /**
  178. * Reads the signed integer value from the binary file at the current byte
  179. * offset.
  180. *
  181. * Advances the offset by the number of bytes read. Throws an exception if
  182. * an error occurs.
  183. *
  184. * @param integer $size Size of integer in bytes: 1-4
  185. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  186. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  187. * If omitted, uses big-endian.
  188. * @return integer
  189. * @throws Zend_Pdf_Exception
  190. */
  191. public function readInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
  192. {
  193. if (($size < 1) || ($size > 4)) {
  194. throw new Zend_Pdf_Exception("Invalid signed integer size: $size",
  195. Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
  196. }
  197. $bytes = $this->_dataSource->readBytes($size);
  198. /* unpack() will not work for this method because it always works in
  199. * the host byte order for signed integers. It also does not allow for
  200. * variable integer sizes.
  201. */
  202. if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
  203. $number = ord($bytes[0]);
  204. if (($number & 0x80) == 0x80) {
  205. /* This number is negative. Extract the positive equivalent.
  206. */
  207. $number = (~ $number) & 0xff;
  208. for ($i = 1; $i < $size; $i++) {
  209. $number = ($number << 8) | ((~ ord($bytes[$i])) & 0xff);
  210. }
  211. /* Now turn this back into a negative number by taking the
  212. * two's complement (we didn't add one above so won't
  213. * subtract it below). This works reliably on both 32- and
  214. * 64-bit systems.
  215. */
  216. $number = ~$number;
  217. } else {
  218. for ($i = 1; $i < $size; $i++) {
  219. $number = ($number << 8) | ord($bytes[$i]);
  220. }
  221. }
  222. } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
  223. $number = ord($bytes[$size - 1]);
  224. if (($number & 0x80) == 0x80) {
  225. /* Negative number. See discussion above.
  226. */
  227. $number = 0;
  228. for ($i = --$size; $i >= 0; $i--) {
  229. $number |= ((~ ord($bytes[$i])) & 0xff) << ($i * 8);
  230. }
  231. $number = ~$number;
  232. } else {
  233. $number = 0;
  234. for ($i = --$size; $i >= 0; $i--) {
  235. $number |= ord($bytes[$i]) << ($i * 8);
  236. }
  237. }
  238. } else {
  239. throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
  240. Zend_Pdf_Exception::INVALID_BYTE_ORDER);
  241. }
  242. return $number;
  243. }
  244. /**
  245. * Reads the unsigned integer value from the binary file at the current byte
  246. * offset.
  247. *
  248. * Advances the offset by the number of bytes read. Throws an exception if
  249. * an error occurs.
  250. *
  251. * NOTE: If you ask for a 4-byte unsigned integer on a 32-bit machine, the
  252. * resulting value WILL BE SIGNED because PHP uses signed integers internally
  253. * for everything. To guarantee portability, be sure to use bitwise operators
  254. * operators on large unsigned integers!
  255. *
  256. * @param integer $size Size of integer in bytes: 1-4
  257. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  258. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  259. * If omitted, uses big-endian.
  260. * @return integer
  261. * @throws Zend_Pdf_Exception
  262. */
  263. public function readUInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
  264. {
  265. if (($size < 1) || ($size > 4)) {
  266. throw new Zend_Pdf_Exception("Invalid unsigned integer size: $size",
  267. Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
  268. }
  269. $bytes = $this->_dataSource->readBytes($size);
  270. /* unpack() is a bit heavyweight for this simple conversion. Just
  271. * work the bytes directly.
  272. */
  273. if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
  274. $number = ord($bytes[0]);
  275. for ($i = 1; $i < $size; $i++) {
  276. $number = ($number << 8) | ord($bytes[$i]);
  277. }
  278. } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
  279. $number = 0;
  280. for ($i = --$size; $i >= 0; $i--) {
  281. $number |= ord($bytes[$i]) << ($i * 8);
  282. }
  283. } else {
  284. throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
  285. Zend_Pdf_Exception::INVALID_BYTE_ORDER);
  286. }
  287. return $number;
  288. }
  289. /**
  290. * Returns true if the specified bit is set in the integer bitfield.
  291. *
  292. * @param integer $bit Bit number to test (i.e. - 0-31)
  293. * @param integer $bitField
  294. * @return boolean
  295. */
  296. public function isBitSet($bit, $bitField)
  297. {
  298. $bitMask = 1 << $bit;
  299. $isSet = (($bitField & $bitMask) == $bitMask);
  300. return $isSet;
  301. }
  302. /**
  303. * Reads the signed fixed-point number from the binary file at the current
  304. * byte offset.
  305. *
  306. * Common fixed-point sizes are 2.14 and 16.16.
  307. *
  308. * Advances the offset by the number of bytes read. Throws an exception if
  309. * an error occurs.
  310. *
  311. * @param integer $mantissaBits Number of bits in the mantissa
  312. * @param integer $fractionBits Number of bits in the fraction
  313. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  314. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  315. * If omitted, uses big-endian.
  316. * @return float
  317. * @throws Zend_Pdf_Exception
  318. */
  319. public function readFixed($mantissaBits, $fractionBits,
  320. $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
  321. {
  322. $bitsToRead = $mantissaBits + $fractionBits;
  323. if (($bitsToRead % 8) !== 0) {
  324. throw new Zend_Pdf_Exception('Fixed-point numbers are whole bytes',
  325. Zend_Pdf_Exception::BAD_FIXED_POINT_SIZE);
  326. }
  327. $number = $this->readInt(($bitsToRead >> 3), $byteOrder) / (1 << $fractionBits);
  328. return $number;
  329. }
  330. /**
  331. * Reads the Unicode UTF-16-encoded string from the binary file at the
  332. * current byte offset.
  333. *
  334. * The byte order of the UTF-16 string must be specified. You must also
  335. * supply the desired resulting character set.
  336. *
  337. * Advances the offset by the number of bytes read. Throws an exception if
  338. * an error occurs.
  339. *
  340. * @todo Consider changing $byteCount to a character count. They are not
  341. * always equivalent (in the case of surrogates).
  342. * @todo Make $byteOrder optional if there is a byte-order mark (BOM) in the
  343. * string being extracted.
  344. *
  345. * @param integer $byteCount Number of bytes (characters * 2) to return.
  346. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  347. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  348. * If omitted, uses big-endian.
  349. * @param string $characterSet (optional) Desired resulting character set.
  350. * You may use any character set supported by {@link iconv()}. If omitted,
  351. * uses 'current locale'.
  352. * @return string
  353. * @throws Zend_Pdf_Exception
  354. */
  355. public function readStringUTF16($byteCount,
  356. $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN,
  357. $characterSet = '')
  358. {
  359. if ($byteCount == 0) {
  360. return '';
  361. }
  362. $bytes = $this->_dataSource->readBytes($byteCount);
  363. if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
  364. if ($characterSet == 'UTF-16BE') {
  365. return $bytes;
  366. }
  367. return iconv('UTF-16BE', $characterSet, $bytes);
  368. } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
  369. if ($characterSet == 'UTF-16LE') {
  370. return $bytes;
  371. }
  372. return iconv('UTF-16LE', $characterSet, $bytes);
  373. } else {
  374. throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
  375. Zend_Pdf_Exception::INVALID_BYTE_ORDER);
  376. }
  377. }
  378. /**
  379. * Reads the Mac Roman-encoded string from the binary file at the current
  380. * byte offset.
  381. *
  382. * You must supply the desired resulting character set.
  383. *
  384. * Advances the offset by the number of bytes read. Throws an exception if
  385. * an error occurs.
  386. *
  387. * @param integer $byteCount Number of bytes (characters) to return.
  388. * @param string $characterSet (optional) Desired resulting character set.
  389. * You may use any character set supported by {@link iconv()}. If omitted,
  390. * uses 'current locale'.
  391. * @return string
  392. * @throws Zend_Pdf_Exception
  393. */
  394. public function readStringMacRoman($byteCount, $characterSet = '')
  395. {
  396. if ($byteCount == 0) {
  397. return '';
  398. }
  399. $bytes = $this->_dataSource->readBytes($byteCount);
  400. if ($characterSet == 'MacRoman') {
  401. return $bytes;
  402. }
  403. return iconv('MacRoman', $characterSet, $bytes);
  404. }
  405. /**
  406. * Reads the Pascal string from the binary file at the current byte offset.
  407. *
  408. * The length of the Pascal string is determined by reading the length bytes
  409. * which preceed the character data. You must supply the desired resulting
  410. * character set.
  411. *
  412. * Advances the offset by the number of bytes read. Throws an exception if
  413. * an error occurs.
  414. *
  415. * @param string $characterSet (optional) Desired resulting character set.
  416. * You may use any character set supported by {@link iconv()}. If omitted,
  417. * uses 'current locale'.
  418. * @param integer $lengthBytes (optional) Number of bytes that make up the
  419. * length. Default is 1.
  420. * @return string
  421. * @throws Zend_Pdf_Exception
  422. */
  423. public function readStringPascal($characterSet = '', $lengthBytes = 1)
  424. {
  425. $byteCount = $this->readUInt($lengthBytes);
  426. if ($byteCount == 0) {
  427. return '';
  428. }
  429. $bytes = $this->_dataSource->readBytes($byteCount);
  430. if ($characterSet == 'ASCII') {
  431. return $bytes;
  432. }
  433. return iconv('ASCII', $characterSet, $bytes);
  434. }
  435. }