PageRenderTime 42ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/library/Zend/Pdf/FileParser.php

https://bitbucket.org/hjain/loudmusic
PHP | 485 lines | 186 code | 43 blank | 256 comment | 52 complexity | 167772665fc109bbd977818121513a3f MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Pdf
  17. * @subpackage FileParser
  18. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id: FileParser.php 24593 2012-01-05 20:35:02Z matthew $
  21. */
  22. /**
  23. * Abstract utility class for parsing binary files.
  24. *
  25. * Provides a library of methods to quickly navigate and extract various data
  26. * types (signed and unsigned integers, floating- and fixed-point numbers,
  27. * strings, etc.) from the file.
  28. *
  29. * File access is managed via a {@link Zend_Pdf_FileParserDataSource} object.
  30. * This allows the same parser code to work with many different data sources:
  31. * in-memory objects, filesystem files, etc.
  32. *
  33. * @package Zend_Pdf
  34. * @subpackage FileParser
  35. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  36. * @license http://framework.zend.com/license/new-bsd New BSD License
  37. */
  38. abstract class Zend_Pdf_FileParser
  39. {
  40. /**** Class Constants ****/
  41. /**
  42. * Little-endian byte order (0x04 0x03 0x02 0x01).
  43. */
  44. const BYTE_ORDER_LITTLE_ENDIAN = 0;
  45. /**
  46. * Big-endian byte order (0x01 0x02 0x03 0x04).
  47. */
  48. const BYTE_ORDER_BIG_ENDIAN = 1;
  49. /**** Instance Variables ****/
  50. /**
  51. * Flag indicating that the file has passed a cursory validation check.
  52. * @var boolean
  53. */
  54. protected $_isScreened = false;
  55. /**
  56. * Flag indicating that the file has been sucessfully parsed.
  57. * @var boolean
  58. */
  59. protected $_isParsed = false;
  60. /**
  61. * Object representing the data source to be parsed.
  62. * @var Zend_Pdf_FileParserDataSource
  63. */
  64. protected $_dataSource = null;
  65. /**** Public Interface ****/
  66. /* Abstract Methods */
  67. /**
  68. * Performs a cursory check to verify that the binary file is in the expected
  69. * format. Intended to quickly weed out obviously bogus files.
  70. *
  71. * Must set $this->_isScreened to true if successful.
  72. *
  73. * @throws Zend_Pdf_Exception
  74. */
  75. abstract public function screen();
  76. /**
  77. * Reads and parses the complete binary file.
  78. *
  79. * Must set $this->_isParsed to true if successful.
  80. *
  81. * @throws Zend_Pdf_Exception
  82. */
  83. abstract public function parse();
  84. /* Object Lifecycle */
  85. /**
  86. * Object constructor.
  87. *
  88. * Verifies that the data source has been properly initialized.
  89. *
  90. * @param Zend_Pdf_FileParserDataSource $dataSource
  91. * @throws Zend_Pdf_Exception
  92. */
  93. public function __construct(Zend_Pdf_FileParserDataSource $dataSource)
  94. {
  95. if ($dataSource->getSize() == 0) {
  96. require_once 'Zend/Pdf/Exception.php';
  97. throw new Zend_Pdf_Exception('The data source has not been properly initialized',
  98. Zend_Pdf_Exception::BAD_DATA_SOURCE);
  99. }
  100. $this->_dataSource = $dataSource;
  101. }
  102. /**
  103. * Object destructor.
  104. *
  105. * Discards the data source object.
  106. */
  107. public function __destruct()
  108. {
  109. $this->_dataSource = null;
  110. }
  111. /* Accessors */
  112. /**
  113. * Returns true if the file has passed a cursory validation check.
  114. *
  115. * @return boolean
  116. */
  117. public function isScreened()
  118. {
  119. return $this->_isScreened;
  120. }
  121. /**
  122. * Returns true if the file has been successfully parsed.
  123. *
  124. * @return boolean
  125. */
  126. public function isParsed()
  127. {
  128. return $this->_isParsed;
  129. }
  130. /**
  131. * Returns the data source object representing the file being parsed.
  132. *
  133. * @return Zend_Pdf_FileParserDataSource
  134. */
  135. public function getDataSource()
  136. {
  137. return $this->_dataSource;
  138. }
  139. /* Primitive Methods */
  140. /**
  141. * Convenience wrapper for the data source object's moveToOffset() method.
  142. *
  143. * @param integer $offset Destination byte offset.
  144. * @throws Zend_Pdf_Exception
  145. */
  146. public function moveToOffset($offset)
  147. {
  148. $this->_dataSource->moveToOffset($offset);
  149. }
  150. public function getOffset() {
  151. return $this->_dataSource->getOffset();
  152. }
  153. public function getSize() {
  154. return $this->_dataSource->getSize();
  155. }
  156. /**
  157. * Convenience wrapper for the data source object's readBytes() method.
  158. *
  159. * @param integer $byteCount Number of bytes to read.
  160. * @return string
  161. * @throws Zend_Pdf_Exception
  162. */
  163. public function readBytes($byteCount)
  164. {
  165. return $this->_dataSource->readBytes($byteCount);
  166. }
  167. /**
  168. * Convenience wrapper for the data source object's skipBytes() method.
  169. *
  170. * @param integer $byteCount Number of bytes to skip.
  171. * @throws Zend_Pdf_Exception
  172. */
  173. public function skipBytes($byteCount)
  174. {
  175. $this->_dataSource->skipBytes($byteCount);
  176. }
  177. /* Parser Methods */
  178. /**
  179. * Reads the signed integer value from the binary file at the current byte
  180. * offset.
  181. *
  182. * Advances the offset by the number of bytes read. Throws an exception if
  183. * an error occurs.
  184. *
  185. * @param integer $size Size of integer in bytes: 1-4
  186. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  187. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  188. * If omitted, uses big-endian.
  189. * @return integer
  190. * @throws Zend_Pdf_Exception
  191. */
  192. public function readInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
  193. {
  194. if (($size < 1) || ($size > 4)) {
  195. require_once 'Zend/Pdf/Exception.php';
  196. throw new Zend_Pdf_Exception("Invalid signed integer size: $size",
  197. Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
  198. }
  199. $bytes = $this->_dataSource->readBytes($size);
  200. /* unpack() will not work for this method because it always works in
  201. * the host byte order for signed integers. It also does not allow for
  202. * variable integer sizes.
  203. */
  204. if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
  205. $number = ord($bytes[0]);
  206. if (($number & 0x80) == 0x80) {
  207. /* This number is negative. Extract the positive equivalent.
  208. */
  209. $number = (~ $number) & 0xff;
  210. for ($i = 1; $i < $size; $i++) {
  211. $number = ($number << 8) | ((~ ord($bytes[$i])) & 0xff);
  212. }
  213. /* Now turn this back into a negative number by taking the
  214. * two's complement (we didn't add one above so won't
  215. * subtract it below). This works reliably on both 32- and
  216. * 64-bit systems.
  217. */
  218. $number = ~$number;
  219. } else {
  220. for ($i = 1; $i < $size; $i++) {
  221. $number = ($number << 8) | ord($bytes[$i]);
  222. }
  223. }
  224. } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
  225. $number = ord($bytes[$size - 1]);
  226. if (($number & 0x80) == 0x80) {
  227. /* Negative number. See discussion above.
  228. */
  229. $number = 0;
  230. for ($i = --$size; $i >= 0; $i--) {
  231. $number |= ((~ ord($bytes[$i])) & 0xff) << ($i * 8);
  232. }
  233. $number = ~$number;
  234. } else {
  235. $number = 0;
  236. for ($i = --$size; $i >= 0; $i--) {
  237. $number |= ord($bytes[$i]) << ($i * 8);
  238. }
  239. }
  240. } else {
  241. require_once 'Zend/Pdf/Exception.php';
  242. throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
  243. Zend_Pdf_Exception::INVALID_BYTE_ORDER);
  244. }
  245. return $number;
  246. }
  247. /**
  248. * Reads the unsigned integer value from the binary file at the current byte
  249. * offset.
  250. *
  251. * Advances the offset by the number of bytes read. Throws an exception if
  252. * an error occurs.
  253. *
  254. * NOTE: If you ask for a 4-byte unsigned integer on a 32-bit machine, the
  255. * resulting value WILL BE SIGNED because PHP uses signed integers internally
  256. * for everything. To guarantee portability, be sure to use bitwise operators
  257. * operators on large unsigned integers!
  258. *
  259. * @param integer $size Size of integer in bytes: 1-4
  260. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  261. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  262. * If omitted, uses big-endian.
  263. * @return integer
  264. * @throws Zend_Pdf_Exception
  265. */
  266. public function readUInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
  267. {
  268. if (($size < 1) || ($size > 4)) {
  269. require_once 'Zend/Pdf/Exception.php';
  270. throw new Zend_Pdf_Exception("Invalid unsigned integer size: $size",
  271. Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
  272. }
  273. $bytes = $this->_dataSource->readBytes($size);
  274. /* unpack() is a bit heavyweight for this simple conversion. Just
  275. * work the bytes directly.
  276. */
  277. if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
  278. $number = ord($bytes[0]);
  279. for ($i = 1; $i < $size; $i++) {
  280. $number = ($number << 8) | ord($bytes[$i]);
  281. }
  282. } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
  283. $number = 0;
  284. for ($i = --$size; $i >= 0; $i--) {
  285. $number |= ord($bytes[$i]) << ($i * 8);
  286. }
  287. } else {
  288. require_once 'Zend/Pdf/Exception.php';
  289. throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
  290. Zend_Pdf_Exception::INVALID_BYTE_ORDER);
  291. }
  292. return $number;
  293. }
  294. /**
  295. * Returns true if the specified bit is set in the integer bitfield.
  296. *
  297. * @param integer $bit Bit number to test (i.e. - 0-31)
  298. * @param integer $bitField
  299. * @return boolean
  300. */
  301. public function isBitSet($bit, $bitField)
  302. {
  303. $bitMask = 1 << $bit;
  304. $isSet = (($bitField & $bitMask) == $bitMask);
  305. return $isSet;
  306. }
  307. /**
  308. * Reads the signed fixed-point number from the binary file at the current
  309. * byte offset.
  310. *
  311. * Common fixed-point sizes are 2.14 and 16.16.
  312. *
  313. * Advances the offset by the number of bytes read. Throws an exception if
  314. * an error occurs.
  315. *
  316. * @param integer $mantissaBits Number of bits in the mantissa
  317. * @param integer $fractionBits Number of bits in the fraction
  318. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  319. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  320. * If omitted, uses big-endian.
  321. * @return float
  322. * @throws Zend_Pdf_Exception
  323. */
  324. public function readFixed($mantissaBits, $fractionBits,
  325. $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
  326. {
  327. $bitsToRead = $mantissaBits + $fractionBits;
  328. if (($bitsToRead % 8) !== 0) {
  329. require_once 'Zend/Pdf/Exception.php';
  330. throw new Zend_Pdf_Exception('Fixed-point numbers are whole bytes',
  331. Zend_Pdf_Exception::BAD_FIXED_POINT_SIZE);
  332. }
  333. $number = $this->readInt(($bitsToRead >> 3), $byteOrder) / (1 << $fractionBits);
  334. return $number;
  335. }
  336. /**
  337. * Reads the Unicode UTF-16-encoded string from the binary file at the
  338. * current byte offset.
  339. *
  340. * The byte order of the UTF-16 string must be specified. You must also
  341. * supply the desired resulting character set.
  342. *
  343. * Advances the offset by the number of bytes read. Throws an exception if
  344. * an error occurs.
  345. *
  346. * @todo Consider changing $byteCount to a character count. They are not
  347. * always equivalent (in the case of surrogates).
  348. * @todo Make $byteOrder optional if there is a byte-order mark (BOM) in the
  349. * string being extracted.
  350. *
  351. * @param integer $byteCount Number of bytes (characters * 2) to return.
  352. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  353. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  354. * If omitted, uses big-endian.
  355. * @param string $characterSet (optional) Desired resulting character set.
  356. * You may use any character set supported by {@link iconv()}. If omitted,
  357. * uses 'current locale'.
  358. * @return string
  359. * @throws Zend_Pdf_Exception
  360. */
  361. public function readStringUTF16($byteCount,
  362. $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN,
  363. $characterSet = '')
  364. {
  365. if ($byteCount == 0) {
  366. return '';
  367. }
  368. $bytes = $this->_dataSource->readBytes($byteCount);
  369. if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
  370. if ($characterSet == 'UTF-16BE') {
  371. return $bytes;
  372. }
  373. return iconv('UTF-16BE', $characterSet, $bytes);
  374. } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
  375. if ($characterSet == 'UTF-16LE') {
  376. return $bytes;
  377. }
  378. return iconv('UTF-16LE', $characterSet, $bytes);
  379. } else {
  380. require_once 'Zend/Pdf/Exception.php';
  381. throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
  382. Zend_Pdf_Exception::INVALID_BYTE_ORDER);
  383. }
  384. }
  385. /**
  386. * Reads the Mac Roman-encoded string from the binary file at the current
  387. * byte offset.
  388. *
  389. * You must supply the desired resulting character set.
  390. *
  391. * Advances the offset by the number of bytes read. Throws an exception if
  392. * an error occurs.
  393. *
  394. * @param integer $byteCount Number of bytes (characters) to return.
  395. * @param string $characterSet (optional) Desired resulting character set.
  396. * You may use any character set supported by {@link iconv()}. If omitted,
  397. * uses 'current locale'.
  398. * @return string
  399. * @throws Zend_Pdf_Exception
  400. */
  401. public function readStringMacRoman($byteCount, $characterSet = '')
  402. {
  403. if ($byteCount == 0) {
  404. return '';
  405. }
  406. $bytes = $this->_dataSource->readBytes($byteCount);
  407. if ($characterSet == 'MacRoman') {
  408. return $bytes;
  409. }
  410. return iconv('MacRoman', $characterSet, $bytes);
  411. }
  412. /**
  413. * Reads the Pascal string from the binary file at the current byte offset.
  414. *
  415. * The length of the Pascal string is determined by reading the length bytes
  416. * which preceed the character data. You must supply the desired resulting
  417. * character set.
  418. *
  419. * Advances the offset by the number of bytes read. Throws an exception if
  420. * an error occurs.
  421. *
  422. * @param string $characterSet (optional) Desired resulting character set.
  423. * You may use any character set supported by {@link iconv()}. If omitted,
  424. * uses 'current locale'.
  425. * @param integer $lengthBytes (optional) Number of bytes that make up the
  426. * length. Default is 1.
  427. * @return string
  428. * @throws Zend_Pdf_Exception
  429. */
  430. public function readStringPascal($characterSet = '', $lengthBytes = 1)
  431. {
  432. $byteCount = $this->readUInt($lengthBytes);
  433. if ($byteCount == 0) {
  434. return '';
  435. }
  436. $bytes = $this->_dataSource->readBytes($byteCount);
  437. if ($characterSet == 'ASCII') {
  438. return $bytes;
  439. }
  440. return iconv('ASCII', $characterSet, $bytes);
  441. }
  442. }