PageRenderTime 68ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/mediawiki-1.21.2/includes/UIDGenerator.php

https://gitlab.com/mcepl/dumpathome
PHP | 350 lines | 187 code | 21 blank | 142 comment | 30 complexity | 78f8b74284d39de9d7955c6b4e4da1d8 MD5 | raw file
Possible License(s): GPL-2.0, Apache-2.0, LGPL-3.0
  1. <?php
  2. /**
  3. * This file deals with UID generation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @author Aaron Schulz
  22. */
  23. /**
  24. * Class for getting statistically unique IDs
  25. *
  26. * @since 1.21
  27. */
  28. class UIDGenerator {
  29. /** @var UIDGenerator */
  30. protected static $instance = null;
  31. protected $nodeId32; // string; node ID in binary (32 bits)
  32. protected $nodeId48; // string; node ID in binary (48 bits)
  33. protected $lockFile88; // string; local file path
  34. protected $lockFile128; // string; local file path
  35. /** @var Array */
  36. protected $fileHandles = array(); // cache file handles
  37. const QUICK_RAND = 1; // get randomness from fast and insecure sources
  38. protected function __construct() {
  39. $idFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
  40. $nodeId = is_file( $idFile ) ? file_get_contents( $idFile ) : '';
  41. // Try to get some ID that uniquely identifies this machine (RFC 4122)...
  42. if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
  43. wfSuppressWarnings();
  44. if ( wfIsWindows() ) {
  45. // http://technet.microsoft.com/en-us/library/bb490913.aspx
  46. $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
  47. $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
  48. $info = str_getcsv( $line );
  49. $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
  50. } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
  51. // See http://linux.die.net/man/8/ifconfig
  52. $m = array();
  53. preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
  54. wfShellExec( '/sbin/ifconfig -a' ), $m );
  55. $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
  56. }
  57. wfRestoreWarnings();
  58. if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
  59. $nodeId = MWCryptRand::generateHex( 12, true );
  60. $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
  61. }
  62. file_put_contents( $idFile, $nodeId ); // cache
  63. }
  64. $this->nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
  65. $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 );
  66. // If different processes run as different users, they may have different temp dirs.
  67. // This is dealt with by initializing the clock sequence number and counters randomly.
  68. $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
  69. $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
  70. }
  71. /**
  72. * @return UIDGenerator
  73. */
  74. protected static function singleton() {
  75. if ( self::$instance === null ) {
  76. self::$instance = new self();
  77. }
  78. return self::$instance;
  79. }
  80. /**
  81. * Get a statistically unique 88-bit unsigned integer ID string.
  82. * The bits of the UID are prefixed with the time (down to the millisecond).
  83. *
  84. * These IDs are suitable as values for the shard key of distributed data.
  85. * If a column uses these as values, it should be declared UNIQUE to handle collisions.
  86. * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
  87. * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL.
  88. *
  89. * UID generation is serialized on each server (as the node ID is for the whole machine).
  90. *
  91. * @param $base integer Specifies a base other than 10
  92. * @return string Number
  93. * @throws MWException
  94. */
  95. public static function newTimestampedUID88( $base = 10 ) {
  96. if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
  97. throw new MWException( "Base must an integer be between 2 and 36" );
  98. }
  99. $gen = self::singleton();
  100. $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 );
  101. return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base );
  102. }
  103. /**
  104. * @param array $time (UIDGenerator::millitime(), clock sequence)
  105. * @return string 88 bits
  106. */
  107. protected function getTimestampedID88( array $info ) {
  108. list( $time, $counter ) = $info;
  109. // Take the 46 MSBs of "milliseconds since epoch"
  110. $id_bin = $this->millisecondsSinceEpochBinary( $time );
  111. // Add a 10 bit counter resulting in 56 bits total
  112. $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
  113. // Add the 32 bit node ID resulting in 88 bits total
  114. $id_bin .= $this->nodeId32;
  115. // Convert to a 1-27 digit integer string
  116. if ( strlen( $id_bin ) !== 88 ) {
  117. throw new MWException( "Detected overflow for millisecond timestamp." );
  118. }
  119. return $id_bin;
  120. }
  121. /**
  122. * Get a statistically unique 128-bit unsigned integer ID string.
  123. * The bits of the UID are prefixed with the time (down to the millisecond).
  124. *
  125. * These IDs are suitable as globally unique IDs, without any enforced uniqueness.
  126. * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
  127. * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL.
  128. *
  129. * UID generation is serialized on each server (as the node ID is for the whole machine).
  130. *
  131. * @param $base integer Specifies a base other than 10
  132. * @return string Number
  133. * @throws MWException
  134. */
  135. public static function newTimestampedUID128( $base = 10 ) {
  136. if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
  137. throw new MWException( "Base must be an integer between 2 and 36" );
  138. }
  139. $gen = self::singleton();
  140. $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 );
  141. return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base );
  142. }
  143. /**
  144. * @param array $info (UIDGenerator::millitime(), counter, clock sequence)
  145. * @return string 128 bits
  146. */
  147. protected function getTimestampedID128( array $info ) {
  148. list( $time, $counter, $clkSeq ) = $info;
  149. // Take the 46 MSBs of "milliseconds since epoch"
  150. $id_bin = $this->millisecondsSinceEpochBinary( $time );
  151. // Add a 20 bit counter resulting in 66 bits total
  152. $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
  153. // Add a 14 bit clock sequence number resulting in 80 bits total
  154. $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
  155. // Add the 48 bit node ID resulting in 128 bits total
  156. $id_bin .= $this->nodeId48;
  157. // Convert to a 1-39 digit integer string
  158. if ( strlen( $id_bin ) !== 128 ) {
  159. throw new MWException( "Detected overflow for millisecond timestamp." );
  160. }
  161. return $id_bin;
  162. }
  163. /**
  164. * Return an RFC4122 compliant v4 UUID
  165. *
  166. * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND)
  167. * @return string
  168. * @throws MWException
  169. */
  170. public static function newUUIDv4( $flags = 0 ) {
  171. $hex = ( $flags & self::QUICK_RAND )
  172. ? wfRandomString( 31 )
  173. : MWCryptRand::generateHex( 31 );
  174. return sprintf( '%s-%s-%s-%s-%s',
  175. // "time_low" (32 bits)
  176. substr( $hex, 0, 8 ),
  177. // "time_mid" (16 bits)
  178. substr( $hex, 8, 4 ),
  179. // "time_hi_and_version" (16 bits)
  180. '4' . substr( $hex, 12, 3 ),
  181. // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
  182. dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
  183. // "node" (48 bits)
  184. substr( $hex, 19, 12 )
  185. );
  186. }
  187. /**
  188. * Return an RFC4122 compliant v4 UUID
  189. *
  190. * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND)
  191. * @return string 32 hex characters with no hyphens
  192. * @throws MWException
  193. */
  194. public static function newRawUUIDv4( $flags = 0 ) {
  195. return str_replace( '-', '', self::newUUIDv4( $flags ) );
  196. }
  197. /**
  198. * Get a (time,counter,clock sequence) where (time,counter) is higher
  199. * than any previous (time,counter) value for the given clock sequence.
  200. * This is useful for making UIDs sequential on a per-node bases.
  201. *
  202. * @param string $lockFile Name of a local lock file
  203. * @param $clockSeqSize integer The number of possible clock sequence values
  204. * @param $counterSize integer The number of possible counter values
  205. * @return Array (result of UIDGenerator::millitime(), counter, clock sequence)
  206. * @throws MWException
  207. */
  208. protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) {
  209. // Get the UID lock file handle
  210. if ( isset( $this->fileHandles[$lockFile] ) ) {
  211. $handle = $this->fileHandles[$lockFile];
  212. } else {
  213. $handle = fopen( $this->$lockFile, 'cb+' );
  214. $this->fileHandles[$lockFile] = $handle ?: null; // cache
  215. }
  216. // Acquire the UID lock file
  217. if ( $handle === false ) {
  218. throw new MWException( "Could not open '{$this->$lockFile}'." );
  219. } elseif ( !flock( $handle, LOCK_EX ) ) {
  220. throw new MWException( "Could not acquire '{$this->$lockFile}'." );
  221. }
  222. // Get the current timestamp, clock sequence number, last time, and counter
  223. rewind( $handle );
  224. $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
  225. $clockChanged = false; // clock set back significantly?
  226. if ( count( $data ) == 5 ) { // last UID info already initialized
  227. $clkSeq = (int) $data[0] % $clockSeqSize;
  228. $prevTime = array( (int) $data[1], (int) $data[2] );
  229. $offset = (int) $data[4] % $counterSize; // random counter offset
  230. $counter = 0; // counter for UIDs with the same timestamp
  231. // Delay until the clock reaches the time of the last ID.
  232. // This detects any microtime() drift among processes.
  233. $time = $this->timeWaitUntil( $prevTime );
  234. if ( !$time ) { // too long to delay?
  235. $clockChanged = true; // bump clock sequence number
  236. $time = self::millitime();
  237. } elseif ( $time == $prevTime ) {
  238. // Bump the counter if there are timestamp collisions
  239. $counter = (int) $data[3] % $counterSize;
  240. if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
  241. flock( $handle, LOCK_UN ); // abort
  242. throw new MWException( "Counter overflow for timestamp value." );
  243. }
  244. }
  245. } else { // last UID info not initialized
  246. $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
  247. $counter = 0;
  248. $offset = mt_rand( 0, $counterSize - 1 );
  249. $time = self::millitime();
  250. }
  251. // microtime() and gettimeofday() can drift from time() at least on Windows.
  252. // The drift is immediate for processes running while the system clock changes.
  253. // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
  254. if ( abs( time() - $time[0] ) >= 2 ) {
  255. // We don't want processes using too high or low timestamps to avoid duplicate
  256. // UIDs and clock sequence number churn. This process should just be restarted.
  257. flock( $handle, LOCK_UN ); // abort
  258. throw new MWException( "Process clock is outdated or drifted." );
  259. }
  260. // If microtime() is synced and a clock change was detected, then the clock went back
  261. if ( $clockChanged ) {
  262. // Bump the clock sequence number and also randomize the counter offset,
  263. // which is useful for UIDs that do not include the clock sequence number.
  264. $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
  265. $offset = mt_rand( 0, $counterSize - 1 );
  266. trigger_error( "Clock was set back; sequence number incremented." );
  267. }
  268. // Update the (clock sequence number, timestamp, counter)
  269. ftruncate( $handle, 0 );
  270. rewind( $handle );
  271. fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
  272. fflush( $handle );
  273. // Release the UID lock file
  274. flock( $handle, LOCK_UN );
  275. return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq );
  276. }
  277. /**
  278. * Wait till the current timestamp reaches $time and return the current
  279. * timestamp. This returns false if it would have to wait more than 10ms.
  280. *
  281. * @param array $time Result of UIDGenerator::millitime()
  282. * @return Array|bool UIDGenerator::millitime() result or false
  283. */
  284. protected function timeWaitUntil( array $time ) {
  285. do {
  286. $ct = self::millitime();
  287. if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php
  288. return $ct; // current timestamp is higher than $time
  289. }
  290. } while ( ( ( $time[0] - $ct[0] )*1000 + ( $time[1] - $ct[1] ) ) <= 10 );
  291. return false;
  292. }
  293. /**
  294. * @param array $time Result of UIDGenerator::millitime()
  295. * @return string 46 MSBs of "milliseconds since epoch" in binary (rolls over in 4201)
  296. */
  297. protected function millisecondsSinceEpochBinary( array $time ) {
  298. list( $sec, $msec ) = $time;
  299. if ( PHP_INT_SIZE >= 8 ) { // 64 bit integers
  300. $ts = ( 1000 * $sec + $msec );
  301. $id_bin = str_pad( decbin( $ts % pow( 2, 46 ) ), 46, '0', STR_PAD_LEFT );
  302. } elseif ( extension_loaded( 'gmp' ) ) {
  303. $ts = gmp_mod( // wrap around
  304. gmp_add( gmp_mul( (string) $sec, (string) 1000 ), (string) $msec ),
  305. gmp_pow( '2', '46' )
  306. );
  307. $id_bin = str_pad( gmp_strval( $ts, 2 ), 46, '0', STR_PAD_LEFT );
  308. } elseif ( extension_loaded( 'bcmath' ) ) {
  309. $ts = bcmod( // wrap around
  310. bcadd( bcmul( $sec, 1000 ), $msec ),
  311. bcpow( 2, 46 )
  312. );
  313. $id_bin = wfBaseConvert( $ts, 10, 2, 46 );
  314. } else {
  315. throw new MWException( 'bcmath or gmp extension required for 32 bit machines.' );
  316. }
  317. return $id_bin;
  318. }
  319. /**
  320. * @return Array (current time in seconds, milliseconds since then)
  321. */
  322. protected static function millitime() {
  323. list( $msec, $sec ) = explode( ' ', microtime() );
  324. return array( (int) $sec, (int) ( $msec * 1000 ) );
  325. }
  326. function __destruct() {
  327. array_map( 'fclose', $this->fileHandles );
  328. }
  329. }