PageRenderTime 64ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/www/lib/rarinfo.php

https://github.com/vikjon0/newznab
PHP | 583 lines | 282 code | 71 blank | 230 comment | 37 complexity | 9259b386850b70f2e9833518a1997b45 MD5 | raw file
Possible License(s): GPL-3.0
  1. <?php
  2. /**
  3. * RarInfo class.
  4. *
  5. * A simple class for inspecting RAR file data and listing information about
  6. * the archive contents in pure PHP (no external dependencies). Data can be
  7. * loaded directly from a file or from a variable passed by reference.
  8. *
  9. * Example usage:
  10. *
  11. * <code>
  12. *
  13. * // Load the RAR file or data
  14. * $rar = new RarInfo;
  15. * $rar->open('./foo.rar'); // or $rar->setData($data);
  16. * if ($rar->error) {
  17. * echo "Error: {$rar->error}\n";
  18. * exit;
  19. * }
  20. *
  21. * // Check encryption
  22. * if ($rar->isEncrypted) {
  23. * echo "Archive is password encrypted\n";
  24. * exit;
  25. * }
  26. *
  27. * // Process the file list
  28. * $files = $rar->getFileList();
  29. * foreach ($files as $file) {
  30. * if ($file['pass'] == true) {
  31. * echo "File is passworded: {$file['name']}\n";
  32. * }
  33. * }
  34. *
  35. * </code>
  36. *
  37. * @todo Plenty of parsing still possible, most format values have been added ;)
  38. * @link http://www.win-rar.com/index.php?id=24&kb_article_id=162
  39. *
  40. * @author Hecks
  41. * @copyright (c) 2010 Hecks
  42. * @license Modified BSD
  43. * @version 1.6
  44. *
  45. * CHANGELOG:
  46. * ----------
  47. * 1.6 Added extra error checking to read method
  48. * 1.5 Improved getSummary method output
  49. * 1.4 Added filename sanity checks & maxFilenameLength variable
  50. * 1.3 Fixed issues with some file headers lacking LONG_BLOCK flag
  51. * 1.2 Tweaked seeking method
  52. * 1.1 Fixed issues with PHP not handling unsigned longs properly (pfft)
  53. * 1.0 Initial release
  54. *
  55. */
  56. class RarInfo
  57. {
  58. // ------ Class constants -----------------------------------------------------
  59. /**#@+
  60. * RAR file format values
  61. */
  62. // Block types
  63. const BLOCK_MARK = 0x72;
  64. const BLOCK_MAIN = 0x73;
  65. const BLOCK_FILE = 0x74;
  66. const BLOCK_OLD_COMMENT = 0x75;
  67. const BLOCK_OLD_EXTRA = 0x76;
  68. const BLOCK_OLD_SUB = 0x77;
  69. const BLOCK_OLD_RECOVERY = 0x78;
  70. const BLOCK_OLD_AUTH = 0x79;
  71. const BLOCK_SUB = 0x7a;
  72. const BLOCK_ENDARC = 0x7b;
  73. // Flags for BLOCK_MAIN
  74. const MAIN_VOLUME = 0x0001;
  75. const MAIN_COMMENT = 0x0002;
  76. const MAIN_LOCK = 0x0004;
  77. const MAIN_SOLID = 0x0008;
  78. const MAIN_NEWNUMBERING = 0x0010;
  79. const MAIN_AUTH = 0x0020;
  80. const MAIN_RECOVERY = 0x0040;
  81. const MAIN_PASSWORD = 0x0080;
  82. const MAIN_FIRSTVOLUME = 0x0100;
  83. const MAIN_ENCRYPTVER = 0x0200;
  84. // Flags for BLOCK_FILE
  85. const FILE_SPLIT_BEFORE = 0x0001;
  86. const FILE_SPLIT_AFTER = 0x0002;
  87. const FILE_PASSWORD = 0x0004;
  88. const FILE_COMMENT = 0x0008;
  89. const FILE_SOLID = 0x0010;
  90. const FILE_DICTMASK = 0x00e0;
  91. const FILE_DICT64 = 0x0000;
  92. const FILE_DICT128 = 0x0020;
  93. const FILE_DICT256 = 0x0040;
  94. const FILE_DICT512 = 0x0060;
  95. const FILE_DICT1024 = 0x0080;
  96. const FILE_DICT2048 = 0x00a0;
  97. const FILE_DICT4096 = 0x00c0;
  98. const FILE_DIRECTORY = 0x00e0;
  99. const FILE_LARGE = 0x0100;
  100. const FILE_UNICODE = 0x0200;
  101. const FILE_SALT = 0x0400;
  102. const FILE_VERSION = 0x0800;
  103. const FILE_EXTTIME = 0x1000;
  104. const FILE_EXTFLAGS = 0x2000;
  105. // Flags for BLOCK_ENDARC
  106. const ENDARC_NEXT_VOLUME = 0x0001;
  107. const ENDARC_DATACRC = 0x0002;
  108. const ENDARC_REVSPACE = 0x0004;
  109. const ENDARC_VOLNR = 0x0008;
  110. // Flags for all blocks
  111. const SKIP_IF_UNKNOWN = 0x4000;
  112. const LONG_BLOCK = 0x8000;
  113. // OS types
  114. const OS_MSDOS = 0;
  115. const OS_OS2 = 1;
  116. const OS_WIN32 = 2;
  117. const OS_UNIX = 3;
  118. const OS_MACOS = 4;
  119. const OS_BEOS = 5;
  120. /**#@-*/
  121. /**
  122. * Format for unpacking the main part of each block header.
  123. */
  124. const FORMAT_BLOCK_HEADER = 'vhead_crc/Chead_type/vhead_flags/vhead_size';
  125. /**
  126. * Format for unpacking the remainder of a File block header.
  127. */
  128. const FORMAT_FILE_HEADER = 'Vunp_size/Chost_os/Vfile_crc/Vftime/Cunp_ver/Cmethod/vname_size/Vattr';
  129. /**
  130. * Signature for the Marker block.
  131. */
  132. const MARKER_BLOCK = '526172211a0700';
  133. // ------ Class variables and methods -----------------------------------------
  134. /**
  135. * List of block names corresponding to block types.
  136. * @var array
  137. */
  138. static $blockNames = array(
  139. 0x72 => 'Marker',
  140. 0x73 => 'Archive',
  141. 0x74 => 'File',
  142. 0x75 => 'Old Style Comment',
  143. 0x76 => 'Old Style Extra Info',
  144. 0x77 => 'Old Style Subblock',
  145. 0x78 => 'Old Style Recovery Record',
  146. 0x79 => 'Old Style Archive Authenticity',
  147. 0x7a => 'Subblock',
  148. 0x7b => 'Archive End',
  149. );
  150. // ------ Instance variables and methods ---------------------------------------
  151. /**
  152. * Is the volume attribute set for the archive?
  153. * @var bool
  154. */
  155. public $isVolume;
  156. /**
  157. * Is authenticity information present?
  158. * @var bool
  159. */
  160. public $hasAuth;
  161. /**
  162. * Is a recovery record present?
  163. * @var bool
  164. */
  165. public $hasRecovery;
  166. /**
  167. * Is the archive encrypted with a password?
  168. * @var bool
  169. */
  170. public $isEncrypted;
  171. /**
  172. * The last error message.
  173. * @var string
  174. */
  175. public $error;
  176. /**
  177. * Loads data from the specified file (up to maxReadBytes) and analyses
  178. * the archive contents.
  179. *
  180. * @param string path to the file
  181. * @return bool false if archive analysis fails
  182. */
  183. public function open($file)
  184. {
  185. if ($this->isAnalyzed) {$this->reset();}
  186. if (!($rarFile = realpath($file))) {
  187. trigger_error("File does not exist ($file)", E_USER_WARNING);
  188. $this->error = 'File does not exist';
  189. return false;
  190. }
  191. $this->data = file_get_contents($rarFile, NULL, NULL, 0, $this->maxReadBytes);
  192. $this->dataSize = strlen($this->data);
  193. $this->rarFile = $rarFile;
  194. return $this->analyze();
  195. }
  196. /**
  197. * Loads data passed by reference (up to maxReadBytes) and analyses the
  198. * archive contents.
  199. *
  200. * @param string archive data stored in a variable
  201. * @return bool false if archive analysis fails
  202. */
  203. public function setData(&$data)
  204. {
  205. if ($this->isAnalyzed) {$this->reset();}
  206. $this->data = substr($data, 0, $this->maxReadBytes);
  207. $this->dataSize = strlen($data);
  208. return $this->analyze();
  209. }
  210. /**
  211. * Sets the maximum number of data bytes to be stored.
  212. *
  213. * @param integer maximum bytes
  214. * @return void
  215. */
  216. public function setMaxBytes($bytes)
  217. {
  218. if (is_int($bytes)) {$this->maxReadBytes = $bytes;}
  219. }
  220. /**
  221. * Convenience method that outputs a summary list of the archive information,
  222. * useful for pretty-printing.
  223. *
  224. * @param bool add file list to output?
  225. * @return array archive summary
  226. */
  227. public function getSummary($full=false)
  228. {
  229. $summary = array(
  230. 'rar_file' => $this->rarFile,
  231. 'data_size' => $this->dataSize,
  232. 'is_volume' => (int) $this->isVolume,
  233. 'has_auth' => (int) $this->hasAuth,
  234. 'has_recovery' => (int) $this->hasRecovery,
  235. 'is_encrypted' => (int) $this->isEncrypted,
  236. );
  237. $fileList = $this->getFileList();
  238. $summary['file_count'] = count($fileList);
  239. if ($full) {
  240. $summary['file_list'] = $fileList;
  241. }
  242. return $summary;
  243. }
  244. /**
  245. * Returns a list of the blocks found in the archive in human-readable format
  246. * (for debugging purposes only).
  247. *
  248. * @param bool should numeric values be displayed as hexadecimal?
  249. * @return array list of blocks
  250. */
  251. public function getBlocks($asHex=false)
  252. {
  253. // Check that blocks are stored
  254. if (!$this->blocks) {return false;}
  255. // Build the block list
  256. $ret = array();
  257. foreach ($this->blocks AS $block) {
  258. $b = array();
  259. $b['type'] = isset(self::$blockNames[$block['head_type']]) ? self::$blockNames[$block['head_type']] : 'Unknown';
  260. if ($asHex) foreach ($block AS $key=>$val) {
  261. $b[$key] = is_numeric($val) ? dechex($val) : $val;
  262. } else {
  263. $b += $block;
  264. }
  265. // Sanity check filename length
  266. if (isset($b['file_name'])) {$b['file_name'] = substr($b['file_name'], 0, $this->maxFilenameLength);}
  267. $ret[] = $b;
  268. }
  269. return $ret;
  270. }
  271. /**
  272. * Parses the stored blocks and returns a list of records for each of the
  273. * files in the archive.
  274. *
  275. * @return mixed false if no file blocks available, or array of file records
  276. */
  277. public function getFileList()
  278. {
  279. // Check that blocks are stored
  280. if (!$this->blocks) {return false;}
  281. // Build the file list
  282. $ret = array();
  283. foreach ($this->blocks AS $block) {
  284. if ($block['head_type'] == self::BLOCK_FILE) {
  285. $ret[] = array(
  286. 'name' => !empty($block['file_name']) ? substr($block['file_name'], 0, $this->maxFilenameLength) : 'Unknown',
  287. 'size' => isset($block['unp_size']) ? $block['unp_size'] : 0,
  288. 'date' => !empty($block['ftime']) ? $this->dos2unixtime($block['ftime']) : 0,
  289. 'pass' => (int) $block['has_password'],
  290. );
  291. }
  292. }
  293. return $ret;
  294. }
  295. /**
  296. * Path to the RAR file (if any).
  297. * @var string
  298. */
  299. protected $rarFile;
  300. /**
  301. * The maximum number of bytes to analyze.
  302. * @var integer
  303. */
  304. protected $maxReadBytes = 1048576;
  305. /**
  306. * The maximum length of filenames (for sanity checking).
  307. * @var integer
  308. */
  309. protected $maxFilenameLength = 500;
  310. /**
  311. * Have the archive contents been analyzed?
  312. * @var bool
  313. */
  314. protected $isAnalyzed = false;
  315. /**
  316. * The stored RAR file data.
  317. * @var string
  318. */
  319. protected $data;
  320. /**
  321. * The size in bytes of the currently stored data.
  322. * @var integer
  323. */
  324. protected $dataSize;
  325. /**
  326. * A pointer to the current position in the data.
  327. * @var integer
  328. */
  329. protected $offset = 0;
  330. /**
  331. * List of blocks found in the archive.
  332. * @var array
  333. */
  334. protected $blocks;
  335. /**
  336. * Parses the RAR data and stores a list of found blocks.
  337. *
  338. * @return bool false if parsing fails
  339. */
  340. protected function analyze()
  341. {
  342. // Find the MARKER block
  343. $startPos = strpos($this->data, pack('H*', self::MARKER_BLOCK));
  344. if ($startPos === false) {
  345. trigger_error('Not a valid RAR file', E_USER_WARNING);
  346. $this->error = 'Could not find Marker Block, not a valid RAR file';
  347. return false;
  348. }
  349. $this->offset = $startPos;
  350. $block = array('offset' => $startPos);
  351. $block += unpack(self::FORMAT_BLOCK_HEADER, $this->read(7));
  352. $this->blocks[] = $block;
  353. // Analyze all remaining blocks
  354. while ($this->offset < $this->dataSize) try {
  355. // Get the current block header
  356. $block = array('offset' => $this->offset);
  357. $block += unpack(self::FORMAT_BLOCK_HEADER, $this->read(7));
  358. if (($block['head_flags'] & self::LONG_BLOCK)
  359. || ($block['head_type'] == self::BLOCK_FILE)
  360. ) {
  361. $addsize = unpack('V', $this->read(4));
  362. $block['add_size'] = sprintf('%u', $addsize[1]);
  363. } else {
  364. $block['add_size'] = 0;
  365. }
  366. // Block type: ARCHIVE
  367. if ($block['head_type'] == self::BLOCK_MAIN) {
  368. // Unpack the remainder of the Archive block header
  369. $block += unpack('vreserved1/Vreserved2', $this->read(6));
  370. // Parse Archive flags
  371. if ($block['head_flags'] & self::MAIN_VOLUME) {
  372. $this->isVolume = true;
  373. }
  374. if ($block['head_flags'] & self::MAIN_AUTH) {
  375. $this->hasAuth = true;
  376. }
  377. if ($block['head_flags'] & self::MAIN_RECOVERY) {
  378. $this->hasRecovery = true;
  379. }
  380. if ($block['head_flags'] & self::MAIN_PASSWORD) {
  381. $this->isEncrypted = true;
  382. }
  383. }
  384. // Block type: FILE
  385. elseif ($block['head_type'] == self::BLOCK_FILE) {
  386. // Unpack the remainder of the File block header
  387. $block += unpack(self::FORMAT_FILE_HEADER, $this->read(21));
  388. // Fix PHP issue with unsigned longs
  389. $block['unp_size'] = sprintf('%u', $block['unp_size']);
  390. $block['file_crc'] = sprintf('%u', $block['file_crc']);
  391. $block['ftime'] = sprintf('%u', $block['ftime']);
  392. $block['attr'] = sprintf('%u', $block['attr']);
  393. // Large file sizes
  394. if ($block['head_flags'] & self::FILE_LARGE) {
  395. $block += unpack('Vhigh_pack_size/Vhigh_unp_size', $this->read(8));
  396. $block['high_pack_size'] = sprintf('%u', $block['high_pack_size']);
  397. $block['high_unp_size'] = sprintf('%u', $block['high_unp_size']);
  398. $block['add_size'] += ($block['high_pack_size'] * 0x100000000);
  399. $block['unp_size'] += ($block['high_unp_size'] * 0x100000000);
  400. }
  401. // Filename
  402. $block['file_name'] = $this->read($block['name_size']);
  403. // Salt (optional)
  404. if ($block['head_flags'] & self::FILE_SALT) {
  405. $block += unpack('C8salt', $this->read(8));
  406. }
  407. // Extended time fields (optional)
  408. if ($block['head_flags'] & self::FILE_EXTTIME) {
  409. $block['ext_time'] = true;
  410. }
  411. // Encrypted with password?
  412. if ($block['head_flags'] & self::FILE_PASSWORD) {
  413. $block['has_password'] = true;
  414. } else {
  415. $block['has_password'] = false;
  416. }
  417. }
  418. // Add block to the list
  419. $this->blocks[] = $block;
  420. // Skip to the next block
  421. $this->seek($block['offset'] + $block['head_size'] + $block['add_size']);
  422. // Sanity check
  423. if ($block['offset'] == $this->offset) {
  424. trigger_error('Parsing failed', E_USER_WARNING);
  425. $this->error = 'Parsing seems to be stuck';
  426. return false;
  427. }
  428. // No more readable data, or read error
  429. } catch (Exception $e) {
  430. if ($this->error) {return false;}
  431. break;
  432. }
  433. // End
  434. $this->isAnalyzed = true;
  435. return true;
  436. }
  437. /**
  438. * Reads the given number of bytes from the stored data and moves the
  439. * pointer forward.
  440. *
  441. * @param integer number of bytes to read
  442. * @return string byte string
  443. */
  444. protected function read($num)
  445. {
  446. // Check that enough data is available
  447. $newPos = $this->offset + $num;
  448. if ($newPos > ($this->dataSize - 1)) {
  449. throw new Exception('End of readable data');
  450. }
  451. // Read the requested bytes
  452. $read = substr($this->data, $this->offset, $num);
  453. // Confirm read length
  454. $rlen = strlen($read);
  455. if ($rlen < $num) {
  456. $this->error = "Not enough data ({$num} requested, {$rlen} available)";
  457. //trigger_error($this->error, E_USER_WARNING);
  458. throw new Exception('Read error');
  459. }
  460. // Move the data pointer
  461. $this->offset = $newPos;
  462. return $read;
  463. }
  464. /**
  465. * Moves the stored data pointer to the given position.
  466. *
  467. * @param integer new pointer position
  468. * @return void
  469. */
  470. protected function seek($pos)
  471. {
  472. if ($pos > ($this->dataSize - 1) || $pos < 0) {
  473. $this->offset = ($this->dataSize - 1);
  474. }
  475. $this->offset = $pos;
  476. }
  477. /**
  478. * Converts DOS standard timestamps to UNIX timestamps.
  479. *
  480. * @param integer DOS timestamp
  481. * @return integer UNIX timestamp
  482. */
  483. protected function dos2unixtime($dostime)
  484. {
  485. $sec = 2 * ($dostime & 0x1f);
  486. $min = ($dostime >> 5) & 0x3f;
  487. $hrs = ($dostime >> 11) & 0x1f;
  488. $day = ($dostime >> 16) & 0x1f;
  489. $mon = ($dostime >> 21) & 0x0f;
  490. $year = (($dostime >> 25) & 0x7f) + 1980;
  491. return mktime($hrs, $min, $sec, $mon, $day, $year);
  492. }
  493. /**
  494. * Resets the instance variables before parsing new data.
  495. *
  496. * @return void
  497. */
  498. protected function reset()
  499. {
  500. $this->rarFile = null;
  501. $this->data = null;
  502. $this->dataSize = null;
  503. $this->offset = 0;
  504. $this->isAnalyzed = false;
  505. $this->error = null;
  506. $this->isVolume = null;
  507. $this->hasAuth = null;
  508. $this->hasRecovery = null;
  509. $this->isEncrypted = null;
  510. $this->blocks = null;
  511. }
  512. } // End RarInfo class