PageRenderTime 48ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/phase3/maintenance/importDump.php

https://github.com/ChuguluGames/mediawiki-svn
PHP | 277 lines | 234 code | 17 blank | 26 comment | 12 complexity | 1867f335dd1b1e749e9c11603e16d149 MD5 | raw file
  1. <?php
  2. /**
  3. * Copyright (C) 2005 Brion Vibber <brion@pobox.com>
  4. * http://www.mediawiki.org/
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License along
  17. * with this program; if not, write to the Free Software Foundation, Inc.,
  18. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  19. * http://www.gnu.org/copyleft/gpl.html
  20. *
  21. * @file
  22. * @ingroup Maintenance
  23. */
  24. require_once( dirname( __FILE__ ) . '/Maintenance.php' );
  25. /**
  26. * @ingroup Maintenance
  27. */
  28. class BackupReader extends Maintenance {
  29. var $reportingInterval = 100;
  30. var $pageCount = 0;
  31. var $revCount = 0;
  32. var $dryRun = false;
  33. var $uploads = false;
  34. var $imageBasePath = false;
  35. var $nsFilter = false;
  36. function __construct() {
  37. parent::__construct();
  38. $gz = in_array('compress.zlib', stream_get_wrappers()) ? 'ok' : '(disabled; requires PHP zlib module)';
  39. $bz2 = in_array('compress.bzip2', stream_get_wrappers()) ? 'ok' : '(disabled; requires PHP bzip2 module)';
  40. $this->mDescription = <<<TEXT
  41. This script reads pages from an XML file as produced from Special:Export or
  42. dumpBackup.php, and saves them into the current wiki.
  43. Compressed XML files may be read directly:
  44. .gz $gz
  45. .bz2 $bz2
  46. .7z (if 7za executable is in PATH)
  47. Note that for very large data sets, importDump.php may be slow; there are
  48. alternate methods which can be much faster for full site restoration:
  49. <http://www.mediawiki.org/wiki/Manual:Importing_XML_dumps>
  50. TEXT;
  51. $this->stderr = fopen( "php://stderr", "wt" );
  52. $this->addOption( 'report',
  53. 'Report position and speed after every n pages processed', false, true );
  54. $this->addOption( 'namespaces',
  55. 'Import only the pages from namespaces belonging to the list of ' .
  56. 'pipe-separated namespace names or namespace indexes', false, true );
  57. $this->addOption( 'dry-run', 'Parse dump without actually importing pages' );
  58. $this->addOption( 'debug', 'Output extra verbose debug information' );
  59. $this->addOption( 'uploads', 'Process file upload data if included (experimental)' );
  60. $this->addOption( 'no-updates', 'Disable link table updates. Is faster but leaves the wiki in an inconsistent state' );
  61. $this->addOption( 'image-base-path', 'Import files from a specified path', false, true );
  62. $this->addArg( 'file', 'Dump file to import [else use stdin]', false );
  63. }
  64. public function execute() {
  65. if( wfReadOnly() ) {
  66. $this->error( "Wiki is in read-only mode; you'll need to disable it for import to work.", true );
  67. }
  68. $this->reportingInterval = intval( $this->getOption( 'report', 100 ) );
  69. if ( !$this->reportingInterval ) {
  70. $this->reportingInterval = 100; // avoid division by zero
  71. }
  72. $this->dryRun = $this->hasOption( 'dry-run' );
  73. $this->uploads = $this->hasOption( 'uploads' ); // experimental!
  74. if ( $this->hasOption( 'image-base-path' ) ) {
  75. $this->imageBasePath = $this->getOption( 'image-base-path' );
  76. }
  77. if ( $this->hasOption( 'namespaces' ) ) {
  78. $this->setNsfilter( explode( '|', $this->getOption( 'namespaces' ) ) );
  79. }
  80. if( $this->hasArg() ) {
  81. $this->importFromFile( $this->getArg() );
  82. } else {
  83. $this->importFromStdin();
  84. }
  85. $this->output( "Done!\n" );
  86. $this->output( "You might want to run rebuildrecentchanges.php to regenerate RecentChanges\n" );
  87. }
  88. function setNsfilter( array $namespaces ) {
  89. if ( count( $namespaces ) == 0 ) {
  90. $this->nsFilter = false;
  91. return;
  92. }
  93. $this->nsFilter = array_unique( array_map( array( $this, 'getNsIndex' ), $namespaces ) );
  94. }
  95. private function getNsIndex( $namespace ) {
  96. global $wgContLang;
  97. if ( ( $result = $wgContLang->getNsIndex( $namespace ) ) !== false ) {
  98. return $result;
  99. }
  100. $ns = intval( $namespace );
  101. if ( strval( $ns ) === $namespace && $wgContLang->getNsText( $ns ) !== false ) {
  102. return $ns;
  103. }
  104. $this->error( "Unknown namespace text / index specified: $namespace", true );
  105. }
  106. private function skippedNamespace( $obj ) {
  107. if ( $obj instanceof Title ) {
  108. $ns = $obj->getNamespace();
  109. } elseif ( $obj instanceof Revision ) {
  110. $ns = $obj->getTitle()->getNamespace();
  111. } elseif ( $obj instanceof WikiRevision ) {
  112. $ns = $obj->title->getNamespace();
  113. } else {
  114. echo wfBacktrace();
  115. $this->error( "Cannot get namespace of object in " . __METHOD__, true );
  116. }
  117. return is_array( $this->nsFilter ) && !in_array( $ns, $this->nsFilter );
  118. }
  119. function reportPage( $page ) {
  120. $this->pageCount++;
  121. }
  122. function handleRevision( $rev ) {
  123. $title = $rev->getTitle();
  124. if ( !$title ) {
  125. $this->progress( "Got bogus revision with null title!" );
  126. return;
  127. }
  128. if ( $this->skippedNamespace( $title ) ) {
  129. return;
  130. }
  131. $this->revCount++;
  132. $this->report();
  133. if ( !$this->dryRun ) {
  134. call_user_func( $this->importCallback, $rev );
  135. }
  136. }
  137. function handleUpload( $revision ) {
  138. if ( $this->uploads ) {
  139. if ( $this->skippedNamespace( $revision ) ) {
  140. return;
  141. }
  142. $this->uploadCount++;
  143. // $this->report();
  144. $this->progress( "upload: " . $revision->getFilename() );
  145. if ( !$this->dryRun ) {
  146. // bluuuh hack
  147. // call_user_func( $this->uploadCallback, $revision );
  148. $dbw = wfGetDB( DB_MASTER );
  149. return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
  150. }
  151. }
  152. }
  153. function handleLogItem( $rev ) {
  154. if ( $this->skippedNamespace( $rev ) ) {
  155. return;
  156. }
  157. $this->revCount++;
  158. $this->report();
  159. if ( !$this->dryRun ) {
  160. call_user_func( $this->logItemCallback, $rev );
  161. }
  162. }
  163. function report( $final = false ) {
  164. if ( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) {
  165. $this->showReport();
  166. }
  167. }
  168. function showReport() {
  169. if ( $this->mQuiet ) {
  170. $delta = wfTime() - $this->startTime;
  171. if ( $delta ) {
  172. $rate = sprintf( "%.2f", $this->pageCount / $delta );
  173. $revrate = sprintf( "%.2f", $this->revCount / $delta );
  174. } else {
  175. $rate = '-';
  176. $revrate = '-';
  177. }
  178. # Logs dumps don't have page tallies
  179. if ( $this->pageCount ) {
  180. $this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" );
  181. } else {
  182. $this->progress( "$this->revCount ($revrate revs/sec)" );
  183. }
  184. }
  185. wfWaitForSlaves();
  186. // XXX: Don't let deferred jobs array get absurdly large (bug 24375)
  187. DeferredUpdates::doUpdates( 'commit' );
  188. }
  189. function progress( $string ) {
  190. fwrite( $this->stderr, $string . "\n" );
  191. }
  192. function importFromFile( $filename ) {
  193. if ( preg_match( '/\.gz$/', $filename ) ) {
  194. $filename = 'compress.zlib://' . $filename;
  195. }
  196. elseif ( preg_match( '/\.bz2$/', $filename ) ) {
  197. $filename = 'compress.bzip2://' . $filename;
  198. }
  199. elseif ( preg_match( '/\.7z$/', $filename ) ) {
  200. $filename = 'mediawiki.compress.7z://' . $filename;
  201. }
  202. $file = fopen( $filename, 'rt' );
  203. return $this->importFromHandle( $file );
  204. }
  205. function importFromStdin() {
  206. $file = fopen( 'php://stdin', 'rt' );
  207. if( self::posix_isatty( $file ) ) {
  208. $this->maybeHelp( true );
  209. }
  210. return $this->importFromHandle( $file );
  211. }
  212. function importFromHandle( $handle ) {
  213. $this->startTime = wfTime();
  214. $source = new ImportStreamSource( $handle );
  215. $importer = new WikiImporter( $source );
  216. if( $this->hasOption( 'debug' ) ) {
  217. $importer->setDebug( true );
  218. }
  219. if ( $this->hasOption( 'no-updates' ) ) {
  220. $importer->setNoUpdates( true );
  221. }
  222. $importer->setPageCallback( array( &$this, 'reportPage' ) );
  223. $this->importCallback = $importer->setRevisionCallback(
  224. array( &$this, 'handleRevision' ) );
  225. $this->uploadCallback = $importer->setUploadCallback(
  226. array( &$this, 'handleUpload' ) );
  227. $this->logItemCallback = $importer->setLogItemCallback(
  228. array( &$this, 'handleLogItem' ) );
  229. if ( $this->uploads ) {
  230. $importer->setImportUploads( true );
  231. }
  232. if ( $this->imageBasePath ) {
  233. $importer->setImageBasePath( $this->imageBasePath );
  234. }
  235. if ( $this->dryRun ) {
  236. $importer->setPageOutCallback( null );
  237. }
  238. return $importer->doImport();
  239. }
  240. }
  241. $maintClass = 'BackupReader';
  242. require_once( RUN_MAINTENANCE_IF_MAIN );