PageRenderTime 94ms CodeModel.GetById 31ms RepoModel.GetById 0ms app.codeStats 0ms

/wwwroot/mediawiki/maintenance/importDump.php

https://github.com/spring/spring-website
PHP | 291 lines | 244 code | 17 blank | 30 comment | 12 complexity | db7679efecf16bf3d3e1357207cc69cf MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, Apache-2.0, LGPL-3.0, BSD-3-Clause
  1. <?php
  2. /**
  3. * Import XML dump files into the current wiki.
  4. *
  5. * Copyright Š 2005 Brion Vibber <brion@pobox.com>
  6. * https://www.mediawiki.org/
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License along
  19. * with this program; if not, write to the Free Software Foundation, Inc.,
  20. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  21. * http://www.gnu.org/copyleft/gpl.html
  22. *
  23. * @file
  24. * @ingroup Maintenance
  25. */
  26. require_once __DIR__ . '/Maintenance.php';
  27. /**
  28. * Maintenance script that imports XML dump files into the current wiki.
  29. *
  30. * @ingroup Maintenance
  31. */
  32. class BackupReader extends Maintenance {
  33. public $reportingInterval = 100;
  34. public $pageCount = 0;
  35. public $revCount = 0;
  36. public $dryRun = false;
  37. public $uploads = false;
  38. public $imageBasePath = false;
  39. public $nsFilter = false;
  40. function __construct() {
  41. parent::__construct();
  42. $gz = in_array( 'compress.zlib', stream_get_wrappers() ) ? 'ok' : '(disabled; requires PHP zlib module)';
  43. $bz2 = in_array( 'compress.bzip2', stream_get_wrappers() ) ? 'ok' : '(disabled; requires PHP bzip2 module)';
  44. $this->mDescription = <<<TEXT
  45. This script reads pages from an XML file as produced from Special:Export or
  46. dumpBackup.php, and saves them into the current wiki.
  47. Compressed XML files may be read directly:
  48. .gz $gz
  49. .bz2 $bz2
  50. .7z (if 7za executable is in PATH)
  51. Note that for very large data sets, importDump.php may be slow; there are
  52. alternate methods which can be much faster for full site restoration:
  53. <https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps>
  54. TEXT;
  55. $this->stderr = fopen( "php://stderr", "wt" );
  56. $this->addOption( 'report',
  57. 'Report position and speed after every n pages processed', false, true );
  58. $this->addOption( 'namespaces',
  59. 'Import only the pages from namespaces belonging to the list of ' .
  60. 'pipe-separated namespace names or namespace indexes', false, true );
  61. $this->addOption( 'dry-run', 'Parse dump without actually importing pages' );
  62. $this->addOption( 'debug', 'Output extra verbose debug information' );
  63. $this->addOption( 'uploads', 'Process file upload data if included (experimental)' );
  64. $this->addOption( 'no-updates', 'Disable link table updates. Is faster but leaves the wiki in an inconsistent state' );
  65. $this->addOption( 'image-base-path', 'Import files from a specified path', false, true );
  66. $this->addArg( 'file', 'Dump file to import [else use stdin]', false );
  67. }
  68. public function execute() {
  69. if ( wfReadOnly() ) {
  70. $this->error( "Wiki is in read-only mode; you'll need to disable it for import to work.", true );
  71. }
  72. $this->reportingInterval = intval( $this->getOption( 'report', 100 ) );
  73. if ( !$this->reportingInterval ) {
  74. $this->reportingInterval = 100; // avoid division by zero
  75. }
  76. $this->dryRun = $this->hasOption( 'dry-run' );
  77. $this->uploads = $this->hasOption( 'uploads' ); // experimental!
  78. if ( $this->hasOption( 'image-base-path' ) ) {
  79. $this->imageBasePath = $this->getOption( 'image-base-path' );
  80. }
  81. if ( $this->hasOption( 'namespaces' ) ) {
  82. $this->setNsfilter( explode( '|', $this->getOption( 'namespaces' ) ) );
  83. }
  84. if ( $this->hasArg() ) {
  85. $this->importFromFile( $this->getArg() );
  86. } else {
  87. $this->importFromStdin();
  88. }
  89. $this->output( "Done!\n" );
  90. $this->output( "You might want to run rebuildrecentchanges.php to regenerate RecentChanges\n" );
  91. }
  92. function setNsfilter( array $namespaces ) {
  93. if ( count( $namespaces ) == 0 ) {
  94. $this->nsFilter = false;
  95. return;
  96. }
  97. $this->nsFilter = array_unique( array_map( array( $this, 'getNsIndex' ), $namespaces ) );
  98. }
  99. private function getNsIndex( $namespace ) {
  100. global $wgContLang;
  101. if ( ( $result = $wgContLang->getNsIndex( $namespace ) ) !== false ) {
  102. return $result;
  103. }
  104. $ns = intval( $namespace );
  105. if ( strval( $ns ) === $namespace && $wgContLang->getNsText( $ns ) !== false ) {
  106. return $ns;
  107. }
  108. $this->error( "Unknown namespace text / index specified: $namespace", true );
  109. }
  110. /**
  111. * @param $obj Title|Revision
  112. * @return bool
  113. */
  114. private function skippedNamespace( $obj ) {
  115. if ( $obj instanceof Title ) {
  116. $ns = $obj->getNamespace();
  117. } elseif ( $obj instanceof Revision ) {
  118. $ns = $obj->getTitle()->getNamespace();
  119. } elseif ( $obj instanceof WikiRevision ) {
  120. $ns = $obj->title->getNamespace();
  121. } else {
  122. echo wfBacktrace();
  123. $this->error( "Cannot get namespace of object in " . __METHOD__, true );
  124. }
  125. return is_array( $this->nsFilter ) && !in_array( $ns, $this->nsFilter );
  126. }
  127. function reportPage( $page ) {
  128. $this->pageCount++;
  129. }
  130. /**
  131. * @param $rev Revision
  132. * @return mixed
  133. */
  134. function handleRevision( $rev ) {
  135. $title = $rev->getTitle();
  136. if ( !$title ) {
  137. $this->progress( "Got bogus revision with null title!" );
  138. return;
  139. }
  140. if ( $this->skippedNamespace( $title ) ) {
  141. return;
  142. }
  143. $this->revCount++;
  144. $this->report();
  145. if ( !$this->dryRun ) {
  146. call_user_func( $this->importCallback, $rev );
  147. }
  148. }
  149. /**
  150. * @param $revision Revision
  151. * @return bool
  152. */
  153. function handleUpload( $revision ) {
  154. if ( $this->uploads ) {
  155. if ( $this->skippedNamespace( $revision ) ) {
  156. return;
  157. }
  158. $this->uploadCount++;
  159. // $this->report();
  160. $this->progress( "upload: " . $revision->getFilename() );
  161. if ( !$this->dryRun ) {
  162. // bluuuh hack
  163. // call_user_func( $this->uploadCallback, $revision );
  164. $dbw = wfGetDB( DB_MASTER );
  165. return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
  166. }
  167. }
  168. }
  169. function handleLogItem( $rev ) {
  170. if ( $this->skippedNamespace( $rev ) ) {
  171. return;
  172. }
  173. $this->revCount++;
  174. $this->report();
  175. if ( !$this->dryRun ) {
  176. call_user_func( $this->logItemCallback, $rev );
  177. }
  178. }
  179. function report( $final = false ) {
  180. if ( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) {
  181. $this->showReport();
  182. }
  183. }
  184. function showReport() {
  185. if ( !$this->mQuiet ) {
  186. $delta = microtime( true ) - $this->startTime;
  187. if ( $delta ) {
  188. $rate = sprintf( "%.2f", $this->pageCount / $delta );
  189. $revrate = sprintf( "%.2f", $this->revCount / $delta );
  190. } else {
  191. $rate = '-';
  192. $revrate = '-';
  193. }
  194. # Logs dumps don't have page tallies
  195. if ( $this->pageCount ) {
  196. $this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" );
  197. } else {
  198. $this->progress( "$this->revCount ($revrate revs/sec)" );
  199. }
  200. }
  201. wfWaitForSlaves();
  202. // XXX: Don't let deferred jobs array get absurdly large (bug 24375)
  203. DeferredUpdates::doUpdates( 'commit' );
  204. }
  205. function progress( $string ) {
  206. fwrite( $this->stderr, $string . "\n" );
  207. }
  208. function importFromFile( $filename ) {
  209. if ( preg_match( '/\.gz$/', $filename ) ) {
  210. $filename = 'compress.zlib://' . $filename;
  211. } elseif ( preg_match( '/\.bz2$/', $filename ) ) {
  212. $filename = 'compress.bzip2://' . $filename;
  213. } elseif ( preg_match( '/\.7z$/', $filename ) ) {
  214. $filename = 'mediawiki.compress.7z://' . $filename;
  215. }
  216. $file = fopen( $filename, 'rt' );
  217. return $this->importFromHandle( $file );
  218. }
  219. function importFromStdin() {
  220. $file = fopen( 'php://stdin', 'rt' );
  221. if ( self::posix_isatty( $file ) ) {
  222. $this->maybeHelp( true );
  223. }
  224. return $this->importFromHandle( $file );
  225. }
  226. function importFromHandle( $handle ) {
  227. $this->startTime = microtime( true );
  228. $source = new ImportStreamSource( $handle );
  229. $importer = new WikiImporter( $source );
  230. if ( $this->hasOption( 'debug' ) ) {
  231. $importer->setDebug( true );
  232. }
  233. if ( $this->hasOption( 'no-updates' ) ) {
  234. $importer->setNoUpdates( true );
  235. }
  236. $importer->setPageCallback( array( &$this, 'reportPage' ) );
  237. $this->importCallback = $importer->setRevisionCallback(
  238. array( &$this, 'handleRevision' ) );
  239. $this->uploadCallback = $importer->setUploadCallback(
  240. array( &$this, 'handleUpload' ) );
  241. $this->logItemCallback = $importer->setLogItemCallback(
  242. array( &$this, 'handleLogItem' ) );
  243. if ( $this->uploads ) {
  244. $importer->setImportUploads( true );
  245. }
  246. if ( $this->imageBasePath ) {
  247. $importer->setImageBasePath( $this->imageBasePath );
  248. }
  249. if ( $this->dryRun ) {
  250. $importer->setPageOutCallback( null );
  251. }
  252. return $importer->doImport();
  253. }
  254. }
  255. $maintClass = 'BackupReader';
  256. require_once RUN_MAINTENANCE_IF_MAIN;