PageRenderTime 53ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/include/git/Pack.class.php

http://github.com/tpruvot/GitPHP
PHP | 632 lines | 328 code | 80 blank | 224 comment | 86 complexity | facaad68023a04519c526240c0937dcc MD5 | raw file
Possible License(s): Apache-2.0, LGPL-3.0, LGPL-2.1, GPL-2.0
  1. <?php
  2. /**
  3. * Extracts data from a pack
  4. * Based on code from Glip by Patrik Fimml
  5. *
  6. * @author Christopher Han <xiphux@gmail.com>
  7. * @copyright Copyright (c) 2011 Christopher Han
  8. * @package GitPHP
  9. * @subpackage Git
  10. */
  11. class GitPHP_Pack
  12. {
  13. /**
  14. * Object types constants
  15. */
  16. const OBJ_COMMIT = 1;
  17. const OBJ_TREE = 2;
  18. const OBJ_BLOB = 3;
  19. const OBJ_TAG = 4;
  20. const OBJ_OFS_DELTA = 6;
  21. const OBJ_REF_DELTA = 7;
  22. /**
  23. * Stores the project internally
  24. * @var GitPHP_Project
  25. */
  26. protected $project;
  27. /**
  28. * Stores the hash of the pack
  29. */
  30. protected $hash;
  31. /**
  32. * Caches object offsets
  33. */
  34. protected $offsetCache = array();
  35. /**
  36. * Instantiates object
  37. *
  38. * @param GitPHP_Project $project the project
  39. * @param string $hash pack hash
  40. * @throws Exception exception on invalid hash
  41. */
  42. public function __construct($project, $hash)
  43. {
  44. if (!(preg_match('/[0-9a-f]{40}/', $hash))) {
  45. throw new GitPHP_InvalidHashException($hash);
  46. }
  47. if (is_string($project))
  48. $this->project = GitPHP_ProjectList::GetInstance()->GetProject($project);
  49. else
  50. $this->project = $project;
  51. $pack = $this->project->GetPath() . '/objects/pack/pack-' . $hash;
  52. if (is_readable($pack . '.pack')) {
  53. if (!file_exists($pack . '.idx'))
  54. throw new GitPHP_MessageException('Pack index does not exist', false);
  55. $this->hash = $hash;
  56. }
  57. }
  58. /**
  59. * Gets the project
  60. *
  61. * @return GitPHP_Project project
  62. */
  63. public function GetProject()
  64. {
  65. return $this->project;
  66. }
  67. /**
  68. * Gets the hash
  69. *
  70. * @return string object hash
  71. */
  72. public function GetHash()
  73. {
  74. return $this->hash;
  75. }
  76. /**
  77. * Check if the Pack Data was loaded
  78. */
  79. public function Valid()
  80. {
  81. return isset($this->hash);
  82. }
  83. /**
  84. * Checks if an object exists in the pack
  85. *
  86. * @param string $hash object hash
  87. * @return boolean true if object is in pack
  88. */
  89. public function ContainsObject($hash)
  90. {
  91. if (!$this->Valid()) {
  92. return false;
  93. }
  94. if (!preg_match('/[0-9a-fA-F]{40}/', $hash)) {
  95. return false;
  96. }
  97. return $this->FindPackedObject($hash) !== false;
  98. }
  99. /**
  100. * Searches for an object's offset in the index
  101. *
  102. * @param string $hash hash
  103. * @return int offset
  104. */
  105. private function FindPackedObject($hash)
  106. {
  107. if (!preg_match('/[0-9a-fA-F]{40}/', $hash)) {
  108. return false;
  109. }
  110. $indexFile = $this->project->GetPath() . '/objects/pack/pack-' . $this->hash . '.idx';
  111. if (isset($this->offsetCache[$hash])) {
  112. return $this->offsetCache[$hash];
  113. }
  114. $offset = false;
  115. $index = fopen($indexFile, 'rb');
  116. flock($index, LOCK_SH);
  117. $magic = fread($index, 4);
  118. if ($magic == "\xFFtOc") {
  119. $version = GitPHP_Pack::fuint32($index);
  120. if ($version == 2) {
  121. $offset = $this->SearchIndexV2($index, $hash);
  122. }
  123. } else {
  124. $offset = $this->SearchIndexV1($index, $hash);
  125. }
  126. flock($index, LOCK_UN);
  127. fclose($index);
  128. $this->offsetCache[$hash] = $offset;
  129. return $offset;
  130. }
  131. /**
  132. * Seraches a version 1 index for a hash
  133. *
  134. * @param resource $index file pointer to index
  135. * @param string $hash hash to find
  136. * @return int pack offset if found
  137. */
  138. private function SearchIndexV1($index, $hash)
  139. {
  140. /*
  141. * index v1 struture:
  142. * fanout table - 256*4 bytes
  143. * offset/sha table - 24*count bytes (4 byte offset + 20 byte sha for each index)
  144. */
  145. $binaryHash = pack('H40', $hash);
  146. /*
  147. * get the start/end indices to search
  148. * from the fanout table
  149. */
  150. list($low, $high) = $this->ReadFanout($index, $binaryHash, 0);
  151. if ($low == $high) {
  152. return false;
  153. }
  154. /*
  155. * binary serach for the index of the hash in the sha/offset listing
  156. * between cur and after from the fanout
  157. */
  158. while ($low <= $high) {
  159. $mid = ($low + $high) >> 1;
  160. fseek($index, 4*256 + 24*$mid);
  161. $off = GitPHP_Pack::fuint32($index);
  162. $binName = fread($index, 20);
  163. $name = bin2hex($binName);
  164. $this->offsetCache[$name] = $off;
  165. $cmp = strcmp($hash, $name);
  166. if ($cmp < 0) {
  167. $high = $mid - 1;
  168. } else if ($cmp > 0) {
  169. $low = $mid + 1;
  170. } else {
  171. return $off;
  172. }
  173. }
  174. return false;
  175. }
  176. /**
  177. * Seraches a version 2 index for a hash
  178. *
  179. * @param resource $index file pointer to index
  180. * @param string $hash hash to find
  181. * @return int pack offset if found
  182. */
  183. private function SearchIndexV2($index, $hash)
  184. {
  185. /*
  186. * index v2 structure:
  187. * magic and version - 2*4 bytes
  188. * fanout table - 256*4 bytes
  189. * sha listing - 20*count bytes
  190. * crc checksums - 4*count bytes
  191. * offsets - 4*count bytes
  192. */
  193. $binaryHash = pack('H40', $hash);
  194. /*
  195. * get the start/end indices to search
  196. * from the fanout table
  197. */
  198. list($low, $high) = $this->ReadFanout($index, $binaryHash, 8);
  199. if ($low == $high) {
  200. return false;
  201. }
  202. /*
  203. * get the object count from fanout[255]
  204. */
  205. fseek($index, 8 + 4*255);
  206. $objectCount = GitPHP_Pack::fuint32($index);
  207. /*
  208. * binary search for the index of the hash in the sha listing
  209. * between cur and after from the fanout
  210. */
  211. $objIndex = false;
  212. while ($low <= $high) {
  213. $mid = ($low + $high) >> 1;
  214. fseek($index, 8 + 4*256 + 20*$mid);
  215. $binName = fread($index, 20);
  216. $name = bin2hex($binName);
  217. $cmp = strcmp($hash, $name);
  218. if ($cmp < 0) {
  219. $high = $mid - 1;
  220. } else if ($cmp > 0) {
  221. $low = $mid + 1;
  222. } else {
  223. $objIndex = $mid;
  224. break;
  225. }
  226. }
  227. if ($objIndex === false) {
  228. return false;
  229. }
  230. /*
  231. * get the offset from the same index in the offset table
  232. */
  233. fseek($index, 8 + 4*256 + 24*$objectCount + 4*$objIndex);
  234. $offset = GitPHP_Pack::fuint32($index);
  235. if ($offset & 0x80000000) {
  236. throw new Exception('64-bit offsets not implemented');
  237. }
  238. return $offset;
  239. }
  240. /**
  241. * Finds the start/end index a hash will be located between,
  242. * acconding to the fanout table
  243. *
  244. * @param resource $index index file pointer
  245. * @param string $binaryHash binary encoded hash to find
  246. * @param int $offset offset in the index file where the fanout table is located
  247. * @return array Range where object can be located
  248. */
  249. private function ReadFanout($index, $binaryHash, $offset)
  250. {
  251. /*
  252. * fanout table has 255 4-byte integers
  253. * indexed by the first byte of the object name.
  254. * the value at that index is the index at which objects
  255. * starting with that byte can be found
  256. * (first level fan-out)
  257. */
  258. if ($binaryHash{0} == "\x00") {
  259. $low = 0;
  260. fseek($index, $offset);
  261. $high = GitPHP_Pack::fuint32($index);
  262. } else {
  263. fseek($index, $offset + (ord($binaryHash{0}) - 1) * 4);
  264. $low = GitPHP_Pack::fuint32($index);
  265. $high = GitPHP_Pack::fuint32($index);
  266. }
  267. return array($low, $high);
  268. }
  269. /**
  270. * Extracts an object from the pack
  271. *
  272. * @param string $hash hash of object to extract
  273. * @param int $type output parameter, returns the type of the object
  274. * @return string object content, or false if not found
  275. */
  276. public function GetObject($hash, &$type = 0)
  277. {
  278. if (!$this->Valid()) {
  279. return false;
  280. }
  281. $offset = $this->FindPackedObject($hash);
  282. if ($offset === false) {
  283. return false;
  284. }
  285. $pack = fopen($this->project->GetPath() . '/objects/pack/pack-' . $this->hash . '.pack', 'rb');
  286. flock($pack, LOCK_SH);
  287. $magic = fread($pack, 4);
  288. $version = GitPHP_Pack::fuint32($pack);
  289. if ($magic != 'PACK' || $version != 2) {
  290. flock($pack, LOCK_UN);
  291. fclose($pack);
  292. throw new Exception('Unsupported pack format');
  293. }
  294. list($type, $data) = $this->UnpackObject($pack, $offset);
  295. flock($pack, LOCK_UN);
  296. fclose($pack);
  297. return $data;
  298. }
  299. /**
  300. * Extracts an object at an offset
  301. *
  302. * @param resource $pack pack file pointer
  303. * @param int $offset object offset
  304. * @return array object type and data
  305. */
  306. private function UnpackObject($pack, $offset)
  307. {
  308. fseek($pack, $offset);
  309. /*
  310. * object header:
  311. * first byte is the type (high 3 bits) and low byte of size (lower 4 bits)
  312. * subsequent bytes each have 7 next higher bits of the size (little endian)
  313. * most significant bit is either 1 or 0 to indicate whether the next byte
  314. * should be read as part of the size. 1 means continue reading the size,
  315. * 0 means the data is starting
  316. */
  317. $c = ord(fgetc($pack));
  318. $type = ($c >> 4) & 0x07;
  319. $size = $c & 0x0F;
  320. for ($i = 4; $c & 0x80; $i += 7) {
  321. $c = ord(fgetc($pack));
  322. $size |= (($c & 0x7f) << $i);
  323. }
  324. if ($type == GitPHP_Pack::OBJ_COMMIT || $type == GitPHP_Pack::OBJ_TREE || $type == GitPHP_Pack::OBJ_BLOB || $type == GitPHP_Pack::OBJ_TAG) {
  325. /*
  326. * regular gzipped object data
  327. */
  328. return array($type, gzuncompress(fread($pack, $size+512), $size));
  329. } else if ($type == GitPHP_Pack::OBJ_OFS_DELTA) {
  330. /*
  331. * delta of an object at offset
  332. */
  333. $buf = fread($pack, $size+512+20);
  334. /*
  335. * read the base object offset
  336. * each subsequent byte's 7 least significant bits
  337. * are part of the offset in decreasing significance per byte
  338. * (opposite of other places)
  339. * most significant bit is a flag indicating whether to read the
  340. * next byte as part of the offset
  341. */
  342. $pos = 0;
  343. $off = -1;
  344. do {
  345. $off++;
  346. $c = ord($buf{$pos++});
  347. $off = ($off << 7) + ($c & 0x7f);
  348. } while ($c & 0x80);
  349. /*
  350. * next read the compressed delta data
  351. */
  352. $delta = gzuncompress(substr($buf, $pos), $size);
  353. unset($buf);
  354. $baseOffset = $offset - $off;
  355. if ($baseOffset > 0) {
  356. /*
  357. * read base object at offset and apply delta to it
  358. */
  359. list($type, $base) = $this->UnpackObject($pack, $baseOffset);
  360. $data = GitPHP_Pack::ApplyDelta($delta, $base);
  361. return array($type, $data);
  362. }
  363. } else if ($type == GitPHP_Pack::OBJ_REF_DELTA) {
  364. /*
  365. * delta of object with hash
  366. */
  367. /*
  368. * first the base object's hash
  369. * load that object
  370. */
  371. $hash = fread($pack, 20);
  372. $hash = bin2hex($hash);
  373. $objectLoader = $this->project->GetObjectLoader();
  374. if (!is_object($objectLoader))
  375. throw new GitPHP_MessageException(sprintf('Unable to get object loader on project %1$s', $project->GetProject()), true);
  376. $base = $objectLoader->GetObject($hash, $type);
  377. /*
  378. * then the gzipped delta data
  379. */
  380. $delta = gzuncompress(fread($pack, $size + 512), $size);
  381. $data = GitPHP_Pack::ApplyDelta($delta, $base);
  382. return array($type, $data);
  383. }
  384. return false;
  385. }
  386. /**
  387. * Applies a binary delta to a base object
  388. *
  389. * @param string $delta delta string
  390. * @param string $base base object data
  391. * @return string patched content
  392. */
  393. private static function ApplyDelta($delta, $base)
  394. {
  395. /*
  396. * algorithm from patch-delta.c
  397. */
  398. $pos = 0;
  399. $baseSize = GitPHP_Pack::ParseVarInt($delta, $pos);
  400. $resultSize = GitPHP_Pack::ParseVarInt($delta, $pos);
  401. $data = '';
  402. $deltalen = strlen($delta);
  403. while ($pos < $deltalen) {
  404. $opcode = ord($delta{$pos++});
  405. if ($opcode & 0x80) {
  406. $off = 0;
  407. if ($opcode & 0x01) $off = ord($delta{$pos++});
  408. if ($opcode & 0x02) $off |= ord($delta{$pos++}) << 8;
  409. if ($opcode & 0x04) $off |= ord($delta{$pos++}) << 16;
  410. if ($opcode & 0x08) $off |= ord($delta{$pos++}) << 24;
  411. $len = 0;
  412. if ($opcode & 0x10) $len = ord($delta{$pos++});
  413. if ($opcode & 0x20) $len |= ord($delta{$pos++}) << 8;
  414. if ($opcode & 0x40) $len |= ord($delta{$pos++}) << 16;
  415. if ($len == 0) $len = 0x10000;
  416. $data .= substr($base, $off, $len);
  417. } else if ($opcode > 0) {
  418. $data .= substr($delta, $pos, $opcode);
  419. $pos += $opcode;
  420. }
  421. }
  422. return $data;
  423. }
  424. /**
  425. * Find hashes in packfile matching a prefix
  426. *
  427. * @param string $prefix hash prefix
  428. * @return array matching hashes
  429. */
  430. public function FindHashes($prefix)
  431. {
  432. if (empty($prefix) || !$this->Valid()) {
  433. return array();
  434. }
  435. if (strlen($prefix) >= 40) {
  436. return array($prefix);
  437. }
  438. $indexFile = $this->project->GetPath() . '/objects/pack/pack-' . $this->hash . '.idx';
  439. $matches = array();
  440. $index = fopen($indexFile, 'rb');
  441. flock($index, LOCK_SH);
  442. $magic = fread($index, 4);
  443. if ($magic == "\xFFtOc") {
  444. $version = GitPHP_Pack::fuint32($index);
  445. if ($version == 2) {
  446. $matches = $this->FindHashesV2($index, $prefix);
  447. }
  448. } else {
  449. $matches = $this->FindHashesV1($index, $prefix);
  450. }
  451. flock($index, LOCK_UN);
  452. fclose($index);
  453. return $matches;
  454. }
  455. /**
  456. * Find hashes in v1 index matching a prefix
  457. *
  458. * @param resource $index file pointer to index
  459. * @param string $prefix hash prefix
  460. * @return array matching hashes
  461. */
  462. private function FindHashesV1($index, $prefix)
  463. {
  464. $matches = array();
  465. $binaryPrefix = pack('H' . strlen($prefix), $prefix);
  466. list($low, $high) = $this->ReadFanout($index, $binaryPrefix, 0);
  467. $range = $high - $low;
  468. $prefixlen = strlen($prefix);
  469. fseek($index, 4*256 + 24*$low);
  470. for ($i = 0; $i < $range; $i++) {
  471. $off = GitPHP_Pack::fuint32($index);
  472. $binName = fread($index, 20);
  473. $name = bin2hex($binName);
  474. $this->offsetCache[$name] = $off;
  475. $cmp = substr_compare($name, $prefix, 0, $prefixlen);
  476. if ($cmp === 0) {
  477. $matches[] = $name;
  478. } else if ($cmp > 0) {
  479. break;
  480. }
  481. }
  482. return $matches;
  483. }
  484. /**
  485. * Find hashes in v2 index matching a prefix
  486. *
  487. * @param resource $index file pointer to index
  488. * @param string $prefix hash prefix
  489. * @return array matching hashes
  490. */
  491. private function FindHashesV2($index, $prefix)
  492. {
  493. $matches = array();
  494. $binaryPrefix = pack('H' . strlen($prefix), $prefix);
  495. list($low, $high) = $this->ReadFanout($index, $binaryPrefix, 8);
  496. $prefixlen = strlen($prefix);
  497. fseek($index, 8 + 4*256 + 20*$low);
  498. for ($i = $low; $i < $high; $i++) {
  499. $binName = fread($index, 20);
  500. $name = bin2hex($binName);
  501. $cmp = substr_compare($name, $prefix, 0, $prefixlen);
  502. if ($cmp === 0) {
  503. $matches[] = $name;
  504. } else if ($cmp > 0) {
  505. break;
  506. }
  507. }
  508. return $matches;
  509. }
  510. /**
  511. * Reads a git-style packed variable length integer
  512. * sequence of bytes, where each byte's 7 less significant bits
  513. * are pieces of the int in increasing significance for each byte (little endian)
  514. * the most significant bit of each byte is a flag whether to continue
  515. * reading bytes or not
  516. *
  517. * @param string $str packed data string
  518. * @param int $pos position in string to read from
  519. * @return int parsed integer
  520. */
  521. private static function ParseVarInt($str, &$pos=0)
  522. {
  523. $ret = 0;
  524. $byte = 0x80;
  525. for ($shift = 0; $byte & 0x80; $shift += 7) {
  526. $byte = ord($str{$pos++});
  527. $ret |= (($byte & 0x7F) << $shift);
  528. }
  529. return $ret;
  530. }
  531. /**
  532. * Unpacks a packed 32 bit integer
  533. *
  534. * @return int integer
  535. * @param string $str binary data
  536. */
  537. private static function uint32($str)
  538. {
  539. $a = unpack('Nx', substr($str, 0, 4));
  540. return $a['x'];
  541. }
  542. /**
  543. * Reads and unpacks the next 32 bit integer
  544. *
  545. * @return int integer
  546. * @param resource $handle file handle
  547. */
  548. private static function fuint32($handle)
  549. {
  550. return GitPHP_Pack::uint32(fread($handle, 4));
  551. }
  552. }