PageRenderTime 65ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/wwwroot/mediawiki/maintenance/storage/fixBug20757.php

https://github.com/spring/spring-website
PHP | 346 lines | 254 code | 39 blank | 53 comment | 35 complexity | 58b3504c0a745df97ce7cfa3df0543b2 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, Apache-2.0, LGPL-3.0, BSD-3-Clause
  1. <?php
  2. /**
  3. * Script to fix bug 20757.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Maintenance ExternalStorage
  22. */
  23. require_once __DIR__ . '/../Maintenance.php';
  24. /**
  25. * Maintenance script to fix bug 20757.
  26. *
  27. * @ingroup Maintenance ExternalStorage
  28. */
  29. class FixBug20757 extends Maintenance {
  30. public $batchSize = 10000;
  31. public $mapCache = array();
  32. public $mapCacheSize = 0;
  33. public $maxMapCacheSize = 1000000;
  34. function __construct() {
  35. parent::__construct();
  36. $this->mDescription = 'Script to fix bug 20757 assuming that blob_tracking is intact';
  37. $this->addOption( 'dry-run', 'Report only' );
  38. $this->addOption( 'start', 'old_id to start at', false, true );
  39. }
  40. function execute() {
  41. $dbr = wfGetDB( DB_SLAVE );
  42. $dbw = wfGetDB( DB_MASTER );
  43. $dryRun = $this->getOption( 'dry-run' );
  44. if ( $dryRun ) {
  45. print "Dry run only.\n";
  46. }
  47. $startId = $this->getOption( 'start', 0 );
  48. $numGood = 0;
  49. $numFixed = 0;
  50. $numBad = 0;
  51. $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
  52. if ( $dbr->getType() == 'mysql' ) {
  53. // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function
  54. $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))';
  55. }
  56. while ( true ) {
  57. print "ID: $startId / $totalRevs\r";
  58. $res = $dbr->select(
  59. 'text',
  60. array( 'old_id', 'old_flags', 'old_text' ),
  61. array(
  62. 'old_id > ' . intval( $startId ),
  63. 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
  64. "$lowerLeft = 'o:15:\"historyblobstub\"'",
  65. ),
  66. __METHOD__,
  67. array(
  68. 'ORDER BY' => 'old_id',
  69. 'LIMIT' => $this->batchSize,
  70. )
  71. );
  72. if ( !$res->numRows() ) {
  73. break;
  74. }
  75. $secondaryIds = array();
  76. $stubs = array();
  77. foreach ( $res as $row ) {
  78. $startId = $row->old_id;
  79. // Basic sanity checks
  80. $obj = unserialize( $row->old_text );
  81. if ( $obj === false ) {
  82. print "{$row->old_id}: unrecoverable: cannot unserialize\n";
  83. ++$numBad;
  84. continue;
  85. }
  86. if ( !is_object( $obj ) ) {
  87. print "{$row->old_id}: unrecoverable: unserialized to type " .
  88. gettype( $obj ) . ", possible double-serialization\n";
  89. ++$numBad;
  90. continue;
  91. }
  92. if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) {
  93. print "{$row->old_id}: unrecoverable: unexpected object class " .
  94. get_class( $obj ) . "\n";
  95. ++$numBad;
  96. continue;
  97. }
  98. // Process flags
  99. $flags = explode( ',', $row->old_flags );
  100. if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) {
  101. $legacyEncoding = false;
  102. } else {
  103. $legacyEncoding = true;
  104. }
  105. // Queue the stub for future batch processing
  106. $id = intval( $obj->mOldId );
  107. $secondaryIds[] = $id;
  108. $stubs[$row->old_id] = array(
  109. 'legacyEncoding' => $legacyEncoding,
  110. 'secondaryId' => $id,
  111. 'hash' => $obj->mHash,
  112. );
  113. }
  114. $secondaryIds = array_unique( $secondaryIds );
  115. if ( !count( $secondaryIds ) ) {
  116. continue;
  117. }
  118. // Run the batch query on blob_tracking
  119. $res = $dbr->select(
  120. 'blob_tracking',
  121. '*',
  122. array(
  123. 'bt_text_id' => $secondaryIds,
  124. ),
  125. __METHOD__
  126. );
  127. $trackedBlobs = array();
  128. foreach ( $res as $row ) {
  129. $trackedBlobs[$row->bt_text_id] = $row;
  130. }
  131. // Process the stubs
  132. foreach ( $stubs as $primaryId => $stub ) {
  133. $secondaryId = $stub['secondaryId'];
  134. if ( !isset( $trackedBlobs[$secondaryId] ) ) {
  135. // No tracked blob. Work out what went wrong
  136. $secondaryRow = $dbr->selectRow(
  137. 'text',
  138. array( 'old_flags', 'old_text' ),
  139. array( 'old_id' => $secondaryId ),
  140. __METHOD__
  141. );
  142. if ( !$secondaryRow ) {
  143. print "$primaryId: unrecoverable: secondary row is missing\n";
  144. ++$numBad;
  145. } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) {
  146. // Not broken yet, and not in the tracked clusters so it won't get
  147. // broken by the current RCT run.
  148. ++$numGood;
  149. } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) {
  150. print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n";
  151. ++$numBad;
  152. } else {
  153. print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n";
  154. ++$numBad;
  155. }
  156. unset( $stubs[$primaryId] );
  157. continue;
  158. }
  159. $trackRow = $trackedBlobs[$secondaryId];
  160. // Check that the specified text really is available in the tracked source row
  161. $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}";
  162. $text = ExternalStore::fetchFromURL( $url );
  163. if ( $text === false ) {
  164. print "$primaryId: unrecoverable: source text missing\n";
  165. ++$numBad;
  166. unset( $stubs[$primaryId] );
  167. continue;
  168. }
  169. if ( md5( $text ) !== $stub['hash'] ) {
  170. print "$primaryId: unrecoverable: content hashes do not match\n";
  171. ++$numBad;
  172. unset( $stubs[$primaryId] );
  173. continue;
  174. }
  175. // Find the page_id and rev_id
  176. // The page is probably the same as the page of the secondary row
  177. $pageId = intval( $trackRow->bt_page );
  178. if ( !$pageId ) {
  179. $revId = $pageId = 0;
  180. } else {
  181. $revId = $this->findTextIdInPage( $pageId, $primaryId );
  182. if ( !$revId ) {
  183. // Actually an orphan
  184. $pageId = $revId = 0;
  185. }
  186. }
  187. $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8';
  188. if ( !$dryRun ) {
  189. // Reset the text row to point to the original copy
  190. $dbw->begin( __METHOD__ );
  191. $dbw->update(
  192. 'text',
  193. // SET
  194. array(
  195. 'old_flags' => $newFlags,
  196. 'old_text' => $url
  197. ),
  198. // WHERE
  199. array( 'old_id' => $primaryId ),
  200. __METHOD__
  201. );
  202. // Add a blob_tracking row so that the new reference can be recompressed
  203. // without needing to run trackBlobs.php again
  204. $dbw->insert( 'blob_tracking',
  205. array(
  206. 'bt_page' => $pageId,
  207. 'bt_rev_id' => $revId,
  208. 'bt_text_id' => $primaryId,
  209. 'bt_cluster' => $trackRow->bt_cluster,
  210. 'bt_blob_id' => $trackRow->bt_blob_id,
  211. 'bt_cgz_hash' => $stub['hash'],
  212. 'bt_new_url' => null,
  213. 'bt_moved' => 0,
  214. ),
  215. __METHOD__
  216. );
  217. $dbw->commit( __METHOD__ );
  218. $this->waitForSlaves();
  219. }
  220. print "$primaryId: resolved to $url\n";
  221. ++$numFixed;
  222. }
  223. }
  224. print "\n";
  225. print "Fixed: $numFixed\n";
  226. print "Unrecoverable: $numBad\n";
  227. print "Good stubs: $numGood\n";
  228. }
  229. function waitForSlaves() {
  230. static $iteration = 0;
  231. ++$iteration;
  232. if ( ++$iteration > 50 == 0 ) {
  233. wfWaitForSlaves();
  234. $iteration = 0;
  235. }
  236. }
  237. function findTextIdInPage( $pageId, $textId ) {
  238. $ids = $this->getRevTextMap( $pageId );
  239. if ( !isset( $ids[$textId] ) ) {
  240. return null;
  241. } else {
  242. return $ids[$textId];
  243. }
  244. }
  245. function getRevTextMap( $pageId ) {
  246. if ( !isset( $this->mapCache[$pageId] ) ) {
  247. // Limit cache size
  248. while ( $this->mapCacheSize > $this->maxMapCacheSize ) {
  249. $key = key( $this->mapCache );
  250. $this->mapCacheSize -= count( $this->mapCache[$key] );
  251. unset( $this->mapCache[$key] );
  252. }
  253. $dbr = wfGetDB( DB_SLAVE );
  254. $map = array();
  255. $res = $dbr->select( 'revision',
  256. array( 'rev_id', 'rev_text_id' ),
  257. array( 'rev_page' => $pageId ),
  258. __METHOD__
  259. );
  260. foreach ( $res as $row ) {
  261. $map[$row->rev_text_id] = $row->rev_id;
  262. }
  263. $this->mapCache[$pageId] = $map;
  264. $this->mapCacheSize += count( $map );
  265. }
  266. return $this->mapCache[$pageId];
  267. }
  268. /**
  269. * This is based on part of HistoryBlobStub::getText().
  270. * Determine if the text can be retrieved from the row in the normal way.
  271. * @param $stub
  272. * @param $secondaryRow
  273. * @return bool
  274. */
  275. function isUnbrokenStub( $stub, $secondaryRow ) {
  276. $flags = explode( ',', $secondaryRow->old_flags );
  277. $text = $secondaryRow->old_text;
  278. if ( in_array( 'external', $flags ) ) {
  279. $url = $text;
  280. @list( /* $proto */ , $path ) = explode( '://', $url, 2 );
  281. if ( $path == "" ) {
  282. return false;
  283. }
  284. $text = ExternalStore::fetchFromUrl( $url );
  285. }
  286. if ( !in_array( 'object', $flags ) ) {
  287. return false;
  288. }
  289. if ( in_array( 'gzip', $flags ) ) {
  290. $obj = unserialize( gzinflate( $text ) );
  291. } else {
  292. $obj = unserialize( $text );
  293. }
  294. if ( !is_object( $obj ) ) {
  295. // Correct for old double-serialization bug.
  296. $obj = unserialize( $obj );
  297. }
  298. if ( !is_object( $obj ) ) {
  299. return false;
  300. }
  301. $obj->uncompress();
  302. $text = $obj->getItem( $stub['hash'] );
  303. return $text !== false;
  304. }
  305. }
  306. $maintClass = 'FixBug20757';
  307. require_once RUN_MAINTENANCE_IF_MAIN;