PageRenderTime 27ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/phase3/includes/Import.php

https://github.com/brion/mediawiki-svn
PHP | 485 lines | 339 code | 68 blank | 78 comment | 37 complexity | 450d469c3ed66341f58210046c5408bb MD5 | raw file
  1. <?php
  2. /**
  3. * MediaWiki page data importer
  4. *
  5. * Copyright © 2003,2005 Brion Vibber <brion@pobox.com>
  6. * http://www.mediawiki.org/
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License along
  19. * with this program; if not, write to the Free Software Foundation, Inc.,
  20. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  21. * http://www.gnu.org/copyleft/gpl.html
  22. *
  23. * @file
  24. * @ingroup SpecialPage
  25. */
  26. /**
  27. * @todo document (e.g. one-sentence class description).
  28. * @ingroup SpecialPage
  29. */
  30. class WikiRevision {
  31. var $title = null;
  32. var $id = 0;
  33. var $timestamp = "20010115000000";
  34. var $user = 0;
  35. var $user_text = "";
  36. var $text = "";
  37. var $comment = "";
  38. var $minor = false;
  39. var $type = "";
  40. var $action = "";
  41. var $params = "";
  42. function setTitle( $title ) {
  43. if( is_object( $title ) ) {
  44. $this->title = $title;
  45. } elseif( is_null( $title ) ) {
  46. throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
  47. } else {
  48. throw new MWException( "WikiRevision given non-object title in import." );
  49. }
  50. }
  51. function setID( $id ) {
  52. $this->id = $id;
  53. }
  54. function setTimestamp( $ts ) {
  55. # 2003-08-05T18:30:02Z
  56. $this->timestamp = wfTimestamp( TS_MW, $ts );
  57. }
  58. function setUsername( $user ) {
  59. $this->user_text = $user;
  60. }
  61. function setUserIP( $ip ) {
  62. $this->user_text = $ip;
  63. }
  64. function setText( $text ) {
  65. $this->text = $text;
  66. }
  67. function setComment( $text ) {
  68. $this->comment = $text;
  69. }
  70. function setMinor( $minor ) {
  71. $this->minor = (bool)$minor;
  72. }
  73. function setSrc( $src ) {
  74. $this->src = $src;
  75. }
  76. function setFilename( $filename ) {
  77. $this->filename = $filename;
  78. }
  79. function setSize( $size ) {
  80. $this->size = intval( $size );
  81. }
  82. function setType( $type ) {
  83. $this->type = $type;
  84. }
  85. function setAction( $action ) {
  86. $this->action = $action;
  87. }
  88. function setParams( $params ) {
  89. $this->params = $params;
  90. }
  91. function getTitle() {
  92. return $this->title;
  93. }
  94. function getID() {
  95. return $this->id;
  96. }
  97. function getTimestamp() {
  98. return $this->timestamp;
  99. }
  100. function getUser() {
  101. return $this->user_text;
  102. }
  103. function getText() {
  104. return $this->text;
  105. }
  106. function getComment() {
  107. return $this->comment;
  108. }
  109. function getMinor() {
  110. return $this->minor;
  111. }
  112. function getSrc() {
  113. return $this->src;
  114. }
  115. function getFilename() {
  116. return $this->filename;
  117. }
  118. function getSize() {
  119. return $this->size;
  120. }
  121. function getType() {
  122. return $this->type;
  123. }
  124. function getAction() {
  125. return $this->action;
  126. }
  127. function getParams() {
  128. return $this->params;
  129. }
  130. function importOldRevision() {
  131. $dbw = wfGetDB( DB_MASTER );
  132. # Sneak a single revision into place
  133. $user = User::newFromName( $this->getUser() );
  134. if( $user ) {
  135. $userId = intval( $user->getId() );
  136. $userText = $user->getName();
  137. } else {
  138. $userId = 0;
  139. $userText = $this->getUser();
  140. }
  141. // avoid memory leak...?
  142. $linkCache = LinkCache::singleton();
  143. $linkCache->clear();
  144. $article = new Article( $this->title );
  145. $pageId = $article->getId();
  146. if( $pageId == 0 ) {
  147. # must create the page...
  148. $pageId = $article->insertOn( $dbw );
  149. $created = true;
  150. } else {
  151. $created = false;
  152. $prior = $dbw->selectField( 'revision', '1',
  153. array( 'rev_page' => $pageId,
  154. 'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
  155. 'rev_user_text' => $userText,
  156. 'rev_comment' => $this->getComment() ),
  157. __METHOD__
  158. );
  159. if( $prior ) {
  160. // FIXME: this could fail slightly for multiple matches :P
  161. wfDebug( __METHOD__ . ": skipping existing revision for [[" .
  162. $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
  163. return false;
  164. }
  165. }
  166. # FIXME: Use original rev_id optionally (better for backups)
  167. # Insert the row
  168. $revision = new Revision( array(
  169. 'page' => $pageId,
  170. 'text' => $this->getText(),
  171. 'comment' => $this->getComment(),
  172. 'user' => $userId,
  173. 'user_text' => $userText,
  174. 'timestamp' => $this->timestamp,
  175. 'minor_edit' => $this->minor,
  176. ) );
  177. $revId = $revision->insertOn( $dbw );
  178. $changed = $article->updateIfNewerOn( $dbw, $revision );
  179. # To be on the safe side...
  180. $tempTitle = $GLOBALS['wgTitle'];
  181. $GLOBALS['wgTitle'] = $this->title;
  182. if( $created ) {
  183. wfDebug( __METHOD__ . ": running onArticleCreate\n" );
  184. Article::onArticleCreate( $this->title );
  185. wfDebug( __METHOD__ . ": running create updates\n" );
  186. $article->createUpdates( $revision );
  187. } elseif( $changed ) {
  188. wfDebug( __METHOD__ . ": running onArticleEdit\n" );
  189. Article::onArticleEdit( $this->title );
  190. wfDebug( __METHOD__ . ": running edit updates\n" );
  191. $article->editUpdates(
  192. $this->getText(),
  193. $this->getComment(),
  194. $this->minor,
  195. $this->timestamp,
  196. $revId );
  197. }
  198. $GLOBALS['wgTitle'] = $tempTitle;
  199. return true;
  200. }
  201. function importLogItem() {
  202. $dbw = wfGetDB( DB_MASTER );
  203. # FIXME: this will not record autoblocks
  204. if( !$this->getTitle() ) {
  205. wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
  206. $this->timestamp . "\n" );
  207. return;
  208. }
  209. # Check if it exists already
  210. // FIXME: use original log ID (better for backups)
  211. $prior = $dbw->selectField( 'logging', '1',
  212. array( 'log_type' => $this->getType(),
  213. 'log_action' => $this->getAction(),
  214. 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
  215. 'log_namespace' => $this->getTitle()->getNamespace(),
  216. 'log_title' => $this->getTitle()->getDBkey(),
  217. 'log_comment' => $this->getComment(),
  218. #'log_user_text' => $this->user_text,
  219. 'log_params' => $this->params ),
  220. __METHOD__
  221. );
  222. // FIXME: this could fail slightly for multiple matches :P
  223. if( $prior ) {
  224. wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
  225. $this->timestamp . "\n" );
  226. return false;
  227. }
  228. $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
  229. $data = array(
  230. 'log_id' => $log_id,
  231. 'log_type' => $this->type,
  232. 'log_action' => $this->action,
  233. 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
  234. 'log_user' => User::idFromName( $this->user_text ),
  235. #'log_user_text' => $this->user_text,
  236. 'log_namespace' => $this->getTitle()->getNamespace(),
  237. 'log_title' => $this->getTitle()->getDBkey(),
  238. 'log_comment' => $this->getComment(),
  239. 'log_params' => $this->params
  240. );
  241. $dbw->insert( 'logging', $data, __METHOD__ );
  242. }
  243. function importUpload() {
  244. wfDebug( __METHOD__ . ": STUB\n" );
  245. /**
  246. // from file revert...
  247. $source = $this->file->getArchiveVirtualUrl( $this->oldimage );
  248. $comment = $wgRequest->getText( 'wpComment' );
  249. // TODO: Preserve file properties from database instead of reloading from file
  250. $status = $this->file->upload( $source, $comment, $comment );
  251. if( $status->isGood() ) {
  252. */
  253. /**
  254. // from file upload...
  255. $this->mLocalFile = wfLocalFile( $nt );
  256. $this->mDestName = $this->mLocalFile->getName();
  257. //....
  258. $status = $this->mLocalFile->upload( $this->mTempPath, $this->mComment, $pageText,
  259. File::DELETE_SOURCE, $this->mFileProps );
  260. if ( !$status->isGood() ) {
  261. $resultDetails = array( 'internal' => $status->getWikiText() );
  262. */
  263. // @todo Fixme: upload() uses $wgUser, which is wrong here
  264. // it may also create a page without our desire, also wrong potentially.
  265. // and, it will record a *current* upload, but we might want an archive version here
  266. $file = wfLocalFile( $this->getTitle() );
  267. if( !$file ) {
  268. wfDebug( "IMPORT: Bad file. :(\n" );
  269. return false;
  270. }
  271. $source = $this->downloadSource();
  272. if( !$source ) {
  273. wfDebug( "IMPORT: Could not fetch remote file. :(\n" );
  274. return false;
  275. }
  276. $status = $file->upload( $source,
  277. $this->getComment(),
  278. $this->getComment(), // Initial page, if none present...
  279. File::DELETE_SOURCE,
  280. false, // props...
  281. $this->getTimestamp() );
  282. if( $status->isGood() ) {
  283. // yay?
  284. wfDebug( "IMPORT: is ok?\n" );
  285. return true;
  286. }
  287. wfDebug( "IMPORT: is bad? " . $status->getXml() . "\n" );
  288. return false;
  289. }
  290. function downloadSource() {
  291. global $wgEnableUploads;
  292. if( !$wgEnableUploads ) {
  293. return false;
  294. }
  295. $tempo = tempnam( wfTempDir(), 'download' );
  296. $f = fopen( $tempo, 'wb' );
  297. if( !$f ) {
  298. wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
  299. return false;
  300. }
  301. // @todo Fixme!
  302. $src = $this->getSrc();
  303. $data = Http::get( $src );
  304. if( !$data ) {
  305. wfDebug( "IMPORT: couldn't fetch source $src\n" );
  306. fclose( $f );
  307. unlink( $tempo );
  308. return false;
  309. }
  310. fwrite( $f, $data );
  311. fclose( $f );
  312. return $tempo;
  313. }
  314. }
  315. /**
  316. * @todo document (e.g. one-sentence class description).
  317. * @ingroup SpecialPage
  318. */
  319. class ImportStringSource {
  320. function __construct( $string ) {
  321. $this->mString = $string;
  322. $this->mRead = false;
  323. }
  324. function atEnd() {
  325. return $this->mRead;
  326. }
  327. function readChunk() {
  328. if( $this->atEnd() ) {
  329. return false;
  330. } else {
  331. $this->mRead = true;
  332. return $this->mString;
  333. }
  334. }
  335. }
  336. /**
  337. * @todo document (e.g. one-sentence class description).
  338. * @ingroup SpecialPage
  339. */
  340. class ImportStreamSource {
  341. function __construct( $handle ) {
  342. $this->mHandle = $handle;
  343. }
  344. function atEnd() {
  345. return feof( $this->mHandle );
  346. }
  347. function readChunk() {
  348. return fread( $this->mHandle, 32768 );
  349. }
  350. static function newFromFile( $filename ) {
  351. $file = @fopen( $filename, 'rt' );
  352. if( !$file ) {
  353. return new WikiErrorMsg( "importcantopen" );
  354. }
  355. return new ImportStreamSource( $file );
  356. }
  357. static function newFromUpload( $fieldname = "xmlimport" ) {
  358. $upload =& $_FILES[$fieldname];
  359. if( !isset( $upload ) || !$upload['name'] ) {
  360. return new WikiErrorMsg( 'importnofile' );
  361. }
  362. if( !empty( $upload['error'] ) ) {
  363. switch($upload['error']){
  364. case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
  365. return new WikiErrorMsg( 'importuploaderrorsize' );
  366. case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
  367. return new WikiErrorMsg( 'importuploaderrorsize' );
  368. case 3: # The uploaded file was only partially uploaded
  369. return new WikiErrorMsg( 'importuploaderrorpartial' );
  370. case 6: #Missing a temporary folder.
  371. return new WikiErrorMsg( 'importuploaderrortemp' );
  372. # case else: # Currently impossible
  373. }
  374. }
  375. $fname = $upload['tmp_name'];
  376. if( is_uploaded_file( $fname ) ) {
  377. return ImportStreamSource::newFromFile( $fname );
  378. } else {
  379. return new WikiErrorMsg( 'importnofile' );
  380. }
  381. }
  382. static function newFromURL( $url, $method = 'GET' ) {
  383. wfDebug( __METHOD__ . ": opening $url\n" );
  384. # Use the standard HTTP fetch function; it times out
  385. # quicker and sorts out user-agent problems which might
  386. # otherwise prevent importing from large sites, such
  387. # as the Wikimedia cluster, etc.
  388. $data = Http::request( $method, $url );
  389. if( $data !== false ) {
  390. $file = tmpfile();
  391. fwrite( $file, $data );
  392. fflush( $file );
  393. fseek( $file, 0 );
  394. return new ImportStreamSource( $file );
  395. } else {
  396. return new WikiErrorMsg( 'importcantopen' );
  397. }
  398. }
  399. public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
  400. if( $page == '' ) {
  401. return new WikiErrorMsg( 'import-noarticle' );
  402. }
  403. $link = Title::newFromText( "$interwiki:Special:Export/$page" );
  404. if( is_null( $link ) || $link->getInterwiki() == '' ) {
  405. return new WikiErrorMsg( 'importbadinterwiki' );
  406. } else {
  407. $params = array();
  408. if ( $history ) $params['history'] = 1;
  409. if ( $templates ) $params['templates'] = 1;
  410. if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth;
  411. $url = $link->getFullUrl( $params );
  412. # For interwikis, use POST to avoid redirects.
  413. return ImportStreamSource::newFromURL( $url, "POST" );
  414. }
  415. }
  416. }