/includes/Cdb_PHP.php

https://github.com/daevid/MWFork · PHP · 445 lines · 272 code · 45 blank · 128 comment · 51 complexity · bc5db77f0c971e446631e68a6676f313 MD5 · raw file

  1. <?php
  2. /**
  3. * This is a port of D.J. Bernstein's CDB to PHP. It's based on the copy that
  4. * appears in PHP 5.3. Changes are:
  5. * * Error returns replaced with exceptions
  6. * * Exception thrown if sizes or offsets are between 2GB and 4GB
  7. * * Some variables renamed
  8. *
  9. * @file
  10. */
  11. /**
  12. * Common functions for readers and writers
  13. */
  14. class CdbFunctions {
  15. /**
  16. * Take a modulo of a signed integer as if it were an unsigned integer.
  17. * $b must be less than 0x40000000 and greater than 0
  18. *
  19. * @param $a
  20. * @param $b
  21. *
  22. * @return int
  23. */
  24. public static function unsignedMod( $a, $b ) {
  25. if ( $a & 0x80000000 ) {
  26. $m = ( $a & 0x7fffffff ) % $b + 2 * ( 0x40000000 % $b );
  27. return $m % $b;
  28. } else {
  29. return $a % $b;
  30. }
  31. }
  32. /**
  33. * Shift a signed integer right as if it were unsigned
  34. * @param $a
  35. * @param $b
  36. * @return int
  37. */
  38. public static function unsignedShiftRight( $a, $b ) {
  39. if ( $b == 0 ) {
  40. return $a;
  41. }
  42. if ( $a & 0x80000000 ) {
  43. return ( ( $a & 0x7fffffff ) >> $b ) | ( 0x40000000 >> ( $b - 1 ) );
  44. } else {
  45. return $a >> $b;
  46. }
  47. }
  48. /**
  49. * The CDB hash function.
  50. *
  51. * @param $s
  52. *
  53. * @return
  54. */
  55. public static function hash( $s ) {
  56. $h = 5381;
  57. for ( $i = 0; $i < strlen( $s ); $i++ ) {
  58. $h5 = ($h << 5) & 0xffffffff;
  59. // Do a 32-bit sum
  60. // Inlined here for speed
  61. $sum = ($h & 0x3fffffff) + ($h5 & 0x3fffffff);
  62. $h =
  63. (
  64. ( $sum & 0x40000000 ? 1 : 0 )
  65. + ( $h & 0x80000000 ? 2 : 0 )
  66. + ( $h & 0x40000000 ? 1 : 0 )
  67. + ( $h5 & 0x80000000 ? 2 : 0 )
  68. + ( $h5 & 0x40000000 ? 1 : 0 )
  69. ) << 30
  70. | ( $sum & 0x3fffffff );
  71. $h ^= ord( $s[$i] );
  72. $h &= 0xffffffff;
  73. }
  74. return $h;
  75. }
  76. }
  77. /**
  78. * CDB reader class
  79. */
  80. class CdbReader_PHP extends CdbReader {
  81. /** The file handle */
  82. var $handle;
  83. /* number of hash slots searched under this key */
  84. var $loop;
  85. /* initialized if loop is nonzero */
  86. var $khash;
  87. /* initialized if loop is nonzero */
  88. var $kpos;
  89. /* initialized if loop is nonzero */
  90. var $hpos;
  91. /* initialized if loop is nonzero */
  92. var $hslots;
  93. /* initialized if findNext() returns true */
  94. var $dpos;
  95. /* initialized if cdb_findnext() returns 1 */
  96. var $dlen;
  97. function __construct( $fileName ) {
  98. $this->handle = fopen( $fileName, 'rb' );
  99. if ( !$this->handle ) {
  100. throw new MWException( 'Unable to open CDB file "' . $fileName . '"' );
  101. }
  102. $this->findStart();
  103. }
  104. function close() {
  105. if( isset( $this->handle ) ) {
  106. fclose( $this->handle );
  107. }
  108. unset( $this->handle );
  109. }
  110. /**
  111. * @param $key
  112. * @return bool|string
  113. */
  114. public function get( $key ) {
  115. // strval is required
  116. if ( $this->find( strval( $key ) ) ) {
  117. return $this->read( $this->dlen, $this->dpos );
  118. } else {
  119. return false;
  120. }
  121. }
  122. /**
  123. * @param $key
  124. * @param $pos
  125. * @return bool
  126. */
  127. protected function match( $key, $pos ) {
  128. $buf = $this->read( strlen( $key ), $pos );
  129. return $buf === $key;
  130. }
  131. protected function findStart() {
  132. $this->loop = 0;
  133. }
  134. /**
  135. * @throws MWException
  136. * @param $length
  137. * @param $pos
  138. * @return string
  139. */
  140. protected function read( $length, $pos ) {
  141. if ( fseek( $this->handle, $pos ) == -1 ) {
  142. // This can easily happen if the internal pointers are incorrect
  143. throw new MWException( __METHOD__.': seek failed, file may be corrupted.' );
  144. }
  145. if ( $length == 0 ) {
  146. return '';
  147. }
  148. $buf = fread( $this->handle, $length );
  149. if ( $buf === false || strlen( $buf ) !== $length ) {
  150. throw new MWException( __METHOD__.': read from CDB file failed, file may be corrupted' );
  151. }
  152. return $buf;
  153. }
  154. /**
  155. * Unpack an unsigned integer and throw an exception if it needs more than 31 bits
  156. * @param $s
  157. * @return
  158. */
  159. protected function unpack31( $s ) {
  160. $data = unpack( 'V', $s );
  161. if ( $data[1] > 0x7fffffff ) {
  162. throw new MWException( __METHOD__.': error in CDB file, integer too big' );
  163. }
  164. return $data[1];
  165. }
  166. /**
  167. * Unpack a 32-bit signed integer
  168. * @param $s
  169. * @return int
  170. */
  171. protected function unpackSigned( $s ) {
  172. $data = unpack( 'va/vb', $s );
  173. return $data['a'] | ( $data['b'] << 16 );
  174. }
  175. /**
  176. * @param $key
  177. * @return bool
  178. */
  179. protected function findNext( $key ) {
  180. if ( !$this->loop ) {
  181. $u = CdbFunctions::hash( $key );
  182. $buf = $this->read( 8, ( $u << 3 ) & 2047 );
  183. $this->hslots = $this->unpack31( substr( $buf, 4 ) );
  184. if ( !$this->hslots ) {
  185. return false;
  186. }
  187. $this->hpos = $this->unpack31( substr( $buf, 0, 4 ) );
  188. $this->khash = $u;
  189. $u = CdbFunctions::unsignedShiftRight( $u, 8 );
  190. $u = CdbFunctions::unsignedMod( $u, $this->hslots );
  191. $u <<= 3;
  192. $this->kpos = $this->hpos + $u;
  193. }
  194. while ( $this->loop < $this->hslots ) {
  195. $buf = $this->read( 8, $this->kpos );
  196. $pos = $this->unpack31( substr( $buf, 4 ) );
  197. if ( !$pos ) {
  198. return false;
  199. }
  200. $this->loop += 1;
  201. $this->kpos += 8;
  202. if ( $this->kpos == $this->hpos + ( $this->hslots << 3 ) ) {
  203. $this->kpos = $this->hpos;
  204. }
  205. $u = $this->unpackSigned( substr( $buf, 0, 4 ) );
  206. if ( $u === $this->khash ) {
  207. $buf = $this->read( 8, $pos );
  208. $keyLen = $this->unpack31( substr( $buf, 0, 4 ) );
  209. if ( $keyLen == strlen( $key ) && $this->match( $key, $pos + 8 ) ) {
  210. // Found
  211. $this->dlen = $this->unpack31( substr( $buf, 4 ) );
  212. $this->dpos = $pos + 8 + $keyLen;
  213. return true;
  214. }
  215. }
  216. }
  217. return false;
  218. }
  219. /**
  220. * @param $key
  221. * @return bool
  222. */
  223. protected function find( $key ) {
  224. $this->findStart();
  225. return $this->findNext( $key );
  226. }
  227. }
  228. /**
  229. * CDB writer class
  230. */
  231. class CdbWriter_PHP extends CdbWriter {
  232. var $handle, $realFileName, $tmpFileName;
  233. var $hplist;
  234. var $numEntries, $pos;
  235. function __construct( $fileName ) {
  236. $this->realFileName = $fileName;
  237. $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
  238. $this->handle = fopen( $this->tmpFileName, 'wb' );
  239. if ( !$this->handle ) {
  240. throw new MWException( 'Unable to open CDB file for write "' . $fileName . '"' );
  241. }
  242. $this->hplist = array();
  243. $this->numentries = 0;
  244. $this->pos = 2048; // leaving space for the pointer array, 256 * 8
  245. if ( fseek( $this->handle, $this->pos ) == -1 ) {
  246. throw new MWException( __METHOD__.': fseek failed' );
  247. }
  248. }
  249. function __destruct() {
  250. if ( isset( $this->handle ) ) {
  251. $this->close();
  252. }
  253. }
  254. /**
  255. * @param $key
  256. * @param $value
  257. * @return
  258. */
  259. public function set( $key, $value ) {
  260. if ( strval( $key ) === '' ) {
  261. // DBA cross-check hack
  262. return;
  263. }
  264. $this->addbegin( strlen( $key ), strlen( $value ) );
  265. $this->write( $key );
  266. $this->write( $value );
  267. $this->addend( strlen( $key ), strlen( $value ), CdbFunctions::hash( $key ) );
  268. }
  269. /**
  270. * @throws MWException
  271. */
  272. public function close() {
  273. $this->finish();
  274. if( isset($this->handle) ) {
  275. fclose( $this->handle );
  276. }
  277. if ( wfIsWindows() && file_exists($this->realFileName) ) {
  278. unlink( $this->realFileName );
  279. }
  280. if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
  281. throw new MWException( 'Unable to move the new CDB file into place.' );
  282. }
  283. unset( $this->handle );
  284. }
  285. /**
  286. * @throws MWException
  287. * @param $buf
  288. */
  289. protected function write( $buf ) {
  290. $len = fwrite( $this->handle, $buf );
  291. if ( $len !== strlen( $buf ) ) {
  292. throw new MWException( 'Error writing to CDB file.' );
  293. }
  294. }
  295. /**
  296. * @throws MWException
  297. * @param $len
  298. */
  299. protected function posplus( $len ) {
  300. $newpos = $this->pos + $len;
  301. if ( $newpos > 0x7fffffff ) {
  302. throw new MWException( 'A value in the CDB file is too large' );
  303. }
  304. $this->pos = $newpos;
  305. }
  306. /**
  307. * @param $keylen
  308. * @param $datalen
  309. * @param $h
  310. */
  311. protected function addend( $keylen, $datalen, $h ) {
  312. $this->hplist[] = array(
  313. 'h' => $h,
  314. 'p' => $this->pos
  315. );
  316. $this->numentries++;
  317. $this->posplus( 8 );
  318. $this->posplus( $keylen );
  319. $this->posplus( $datalen );
  320. }
  321. /**
  322. * @throws MWException
  323. * @param $keylen
  324. * @param $datalen
  325. */
  326. protected function addbegin( $keylen, $datalen ) {
  327. if ( $keylen > 0x7fffffff ) {
  328. throw new MWException( __METHOD__.': key length too long' );
  329. }
  330. if ( $datalen > 0x7fffffff ) {
  331. throw new MWException( __METHOD__.': data length too long' );
  332. }
  333. $buf = pack( 'VV', $keylen, $datalen );
  334. $this->write( $buf );
  335. }
  336. /**
  337. * @throws MWException
  338. */
  339. protected function finish() {
  340. // Hack for DBA cross-check
  341. $this->hplist = array_reverse( $this->hplist );
  342. // Calculate the number of items that will be in each hashtable
  343. $counts = array_fill( 0, 256, 0 );
  344. foreach ( $this->hplist as $item ) {
  345. ++ $counts[ 255 & $item['h'] ];
  346. }
  347. // Fill in $starts with the *end* indexes
  348. $starts = array();
  349. $pos = 0;
  350. for ( $i = 0; $i < 256; ++$i ) {
  351. $pos += $counts[$i];
  352. $starts[$i] = $pos;
  353. }
  354. // Excessively clever and indulgent code to simultaneously fill $packedTables
  355. // with the packed hashtables, and adjust the elements of $starts
  356. // to actually point to the starts instead of the ends.
  357. $packedTables = array_fill( 0, $this->numentries, false );
  358. foreach ( $this->hplist as $item ) {
  359. $packedTables[--$starts[255 & $item['h']]] = $item;
  360. }
  361. $final = '';
  362. for ( $i = 0; $i < 256; ++$i ) {
  363. $count = $counts[$i];
  364. // The size of the hashtable will be double the item count.
  365. // The rest of the slots will be empty.
  366. $len = $count + $count;
  367. $final .= pack( 'VV', $this->pos, $len );
  368. $hashtable = array();
  369. for ( $u = 0; $u < $len; ++$u ) {
  370. $hashtable[$u] = array( 'h' => 0, 'p' => 0 );
  371. }
  372. // Fill the hashtable, using the next empty slot if the hashed slot
  373. // is taken.
  374. for ( $u = 0; $u < $count; ++$u ) {
  375. $hp = $packedTables[$starts[$i] + $u];
  376. $where = CdbFunctions::unsignedMod(
  377. CdbFunctions::unsignedShiftRight( $hp['h'], 8 ), $len );
  378. while ( $hashtable[$where]['p'] )
  379. if ( ++$where == $len )
  380. $where = 0;
  381. $hashtable[$where] = $hp;
  382. }
  383. // Write the hashtable
  384. for ( $u = 0; $u < $len; ++$u ) {
  385. $buf = pack( 'vvV',
  386. $hashtable[$u]['h'] & 0xffff,
  387. CdbFunctions::unsignedShiftRight( $hashtable[$u]['h'], 16 ),
  388. $hashtable[$u]['p'] );
  389. $this->write( $buf );
  390. $this->posplus( 8 );
  391. }
  392. }
  393. // Write the pointer array at the start of the file
  394. rewind( $this->handle );
  395. if ( ftell( $this->handle ) != 0 ) {
  396. throw new MWException( __METHOD__.': Error rewinding to start of file' );
  397. }
  398. $this->write( $final );
  399. }
  400. }