/maintenance/namespaceDupes.php

https://github.com/tav/confluence · PHP · 296 lines · 222 code · 40 blank · 34 comment · 34 complexity · b333c28798e037f57ef02a7bd7502a18 MD5 · raw file

  1. <?php
  2. # Copyright (C) 2005-2007 Brion Vibber <brion@pobox.com>
  3. # http://www.mediawiki.org/
  4. #
  5. # This program is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation; either version 2 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License along
  16. # with this program; if not, write to the Free Software Foundation, Inc.,
  17. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. # http://www.gnu.org/copyleft/gpl.html
  19. /**
  20. * @file
  21. * @ingroup Maintenance
  22. */
  23. $options = array( 'fix', 'suffix', 'help' );
  24. /** */
  25. require_once( 'commandLine.inc' );
  26. if(isset( $options['help'] ) ) {
  27. print <<<ENDS
  28. usage: namespaceDupes.php [--fix] [--suffix=<text>] [--help]
  29. --help : this help message
  30. --fix : attempt to automatically fix errors
  31. --suffix=<text> : dupes will be renamed with correct namespace with <text>
  32. appended after the article name.
  33. --prefix=<text> : Do an explicit check for the given title prefix
  34. in place of the standard namespace list.
  35. --verbose : Display output for checked namespaces without conflicts
  36. ENDS;
  37. die;
  38. }
  39. class NamespaceConflictChecker {
  40. function NamespaceConflictChecker( $db, $verbose=false ) {
  41. $this->db = $db;
  42. $this->verbose = $verbose;
  43. }
  44. function checkAll( $fix, $suffix = '' ) {
  45. global $wgContLang, $wgNamespaceAliases, $wgCanonicalNamespaceNames;
  46. global $wgCapitalLinks;
  47. $spaces = array();
  48. // List interwikis first, so they'll be overridden
  49. // by any conflicting local namespaces.
  50. foreach( $this->getInterwikiList() as $prefix ) {
  51. $name = $wgContLang->ucfirst( $prefix );
  52. $spaces[$name] = 0;
  53. }
  54. // Now pull in all canonical and alias namespaces...
  55. foreach( $wgCanonicalNamespaceNames as $ns => $name ) {
  56. // This includes $wgExtraNamespaces
  57. if( $name !== '' ) {
  58. $spaces[$name] = $ns;
  59. }
  60. }
  61. foreach( $wgContLang->getNamespaces() as $ns => $name ) {
  62. if( $name !== '' ) {
  63. $spaces[$name] = $ns;
  64. }
  65. }
  66. foreach( $wgNamespaceAliases as $name => $ns ) {
  67. $spaces[$name] = $ns;
  68. }
  69. foreach( $wgContLang->namespaceAliases as $name => $ns ) {
  70. $spaces[$name] = $ns;
  71. }
  72. // We'll need to check for lowercase keys as well,
  73. // since we're doing case-sensitive searches in the db.
  74. foreach( $spaces as $name => $ns ) {
  75. $moreNames = array();
  76. $moreNames[] = $wgContLang->uc( $name );
  77. $moreNames[] = $wgContLang->ucfirst( $wgContLang->lc( $name ) );
  78. $moreNames[] = $wgContLang->ucwords( $name );
  79. $moreNames[] = $wgContLang->ucwords( $wgContLang->lc( $name ) );
  80. $moreNames[] = $wgContLang->ucwordbreaks( $name );
  81. $moreNames[] = $wgContLang->ucwordbreaks( $wgContLang->lc( $name ) );
  82. if( !$wgCapitalLinks ) {
  83. foreach( $moreNames as $altName ) {
  84. $moreNames[] = $wgContLang->lcfirst( $altName );
  85. }
  86. $moreNames[] = $wgContLang->lcfirst( $name );
  87. }
  88. foreach( array_unique( $moreNames ) as $altName ) {
  89. if( $altName !== $name ) {
  90. $spaces[$altName] = $ns;
  91. }
  92. }
  93. }
  94. ksort( $spaces );
  95. asort( $spaces );
  96. $ok = true;
  97. foreach( $spaces as $name => $ns ) {
  98. $ok = $this->checkNamespace( $ns, $name, $fix, $suffix ) && $ok;
  99. }
  100. return $ok;
  101. }
  102. private function getInterwikiList() {
  103. $result = $this->db->select( 'interwiki', array( 'iw_prefix' ) );
  104. while( $row = $this->db->fetchObject( $result ) ) {
  105. $prefixes[] = $row->iw_prefix;
  106. }
  107. $this->db->freeResult( $result );
  108. return $prefixes;
  109. }
  110. function checkNamespace( $ns, $name, $fix, $suffix = '' ) {
  111. if( $ns == 0 ) {
  112. $header = "Checking interwiki prefix: \"$name\"\n";
  113. } else {
  114. $header = "Checking namespace $ns: \"$name\"\n";
  115. }
  116. $conflicts = $this->getConflicts( $ns, $name );
  117. $count = count( $conflicts );
  118. if( $count == 0 ) {
  119. if( $this->verbose ) {
  120. echo $header;
  121. echo "... no conflicts detected!\n";
  122. }
  123. return true;
  124. }
  125. echo $header;
  126. echo "... $count conflicts detected:\n";
  127. $ok = true;
  128. foreach( $conflicts as $row ) {
  129. $resolvable = $this->reportConflict( $row, $suffix );
  130. $ok = $ok && $resolvable;
  131. if( $fix && ( $resolvable || $suffix != '' ) ) {
  132. $ok = $this->resolveConflict( $row, $resolvable, $suffix ) && $ok;
  133. }
  134. }
  135. return $ok;
  136. }
  137. /**
  138. * @todo: do this for reals
  139. */
  140. function checkPrefix( $key, $prefix, $fix, $suffix = '' ) {
  141. echo "Checking prefix \"$prefix\" vs namespace $key\n";
  142. return $this->checkNamespace( $key, $prefix, $fix, $suffix );
  143. }
  144. function getConflicts( $ns, $name ) {
  145. $page = 'page';
  146. $table = $this->db->tableName( $page );
  147. $prefix = $this->db->strencode( $name );
  148. $likeprefix = str_replace( '_', '\\_', $prefix);
  149. $encNamespace = $this->db->addQuotes( $ns );
  150. $titleSql = "TRIM(LEADING '$prefix:' FROM {$page}_title)";
  151. if( $ns == 0 ) {
  152. // An interwiki; try an alternate encoding with '-' for ':'
  153. $titleSql = "CONCAT('$prefix-',$titleSql)";
  154. }
  155. $sql = "SELECT {$page}_id AS id,
  156. {$page}_title AS oldtitle,
  157. $encNamespace AS namespace,
  158. $titleSql AS title
  159. FROM {$table}
  160. WHERE {$page}_namespace=0
  161. AND {$page}_title LIKE '$likeprefix:%'";
  162. $result = $this->db->query( $sql, 'NamespaceConflictChecker::getConflicts' );
  163. $set = array();
  164. while( $row = $this->db->fetchObject( $result ) ) {
  165. $set[] = $row;
  166. }
  167. $this->db->freeResult( $result );
  168. return $set;
  169. }
  170. function reportConflict( $row, $suffix ) {
  171. $newTitle = Title::makeTitleSafe( $row->namespace, $row->title );
  172. if( is_null($newTitle) || !$newTitle->canExist() ) {
  173. // Title is also an illegal title...
  174. // For the moment we'll let these slide to cleanupTitles or whoever.
  175. printf( "... %d (0,\"%s\")\n",
  176. $row->id,
  177. $row->oldtitle );
  178. echo "... *** cannot resolve automatically; illegal title ***\n";
  179. return false;
  180. }
  181. printf( "... %d (0,\"%s\") -> (%d,\"%s\") [[%s]]\n",
  182. $row->id,
  183. $row->oldtitle,
  184. $newTitle->getNamespace(),
  185. $newTitle->getDBkey(),
  186. $newTitle->getPrefixedText() );
  187. $id = $newTitle->getArticleId();
  188. if( $id ) {
  189. echo "... *** cannot resolve automatically; page exists with ID $id ***\n";
  190. return false;
  191. } else {
  192. return true;
  193. }
  194. }
  195. function resolveConflict( $row, $resolvable, $suffix ) {
  196. if( !$resolvable ) {
  197. echo "... *** old title {$row->title}\n";
  198. while( true ) {
  199. $row->title .= $suffix;
  200. echo "... *** new title {$row->title}\n";
  201. $title = Title::makeTitleSafe( $row->namespace, $row->title );
  202. if ( ! $title ) {
  203. echo "... !!! invalid title\n";
  204. return false;
  205. }
  206. if ( $id = $title->getArticleId() ) {
  207. echo "... *** page exists with ID $id ***\n";
  208. } else {
  209. break;
  210. }
  211. }
  212. echo "... *** using suffixed form [[" . $title->getPrefixedText() . "]] ***\n";
  213. }
  214. $tables = array( 'page' );
  215. foreach( $tables as $table ) {
  216. $this->resolveConflictOn( $row, $table );
  217. }
  218. return true;
  219. }
  220. function resolveConflictOn( $row, $table ) {
  221. echo "... resolving on $table... ";
  222. $newTitle = Title::makeTitleSafe( $row->namespace, $row->title );
  223. $this->db->update( $table,
  224. array(
  225. "{$table}_namespace" => $newTitle->getNamespace(),
  226. "{$table}_title" => $newTitle->getDBkey(),
  227. ),
  228. array(
  229. "{$table}_namespace" => 0,
  230. "{$table}_title" => $row->oldtitle,
  231. ),
  232. __METHOD__ );
  233. echo "ok.\n";
  234. return true;
  235. }
  236. }
  237. $wgTitle = Title::newFromText( 'Namespace title conflict cleanup script' );
  238. $verbose = isset( $options['verbose'] );
  239. $fix = isset( $options['fix'] );
  240. $suffix = isset( $options['suffix'] ) ? $options['suffix'] : '';
  241. $prefix = isset( $options['prefix'] ) ? $options['prefix'] : '';
  242. $key = isset( $options['key'] ) ? intval( $options['key'] ) : 0;
  243. $dbw = wfGetDB( DB_MASTER );
  244. $duper = new NamespaceConflictChecker( $dbw, $verbose );
  245. if( $prefix ) {
  246. $retval = $duper->checkPrefix( $key, $prefix, $fix, $suffix );
  247. } else {
  248. $retval = $duper->checkAll( $fix, $suffix );
  249. }
  250. if( $retval ) {
  251. echo "\nLooks good!\n";
  252. exit( 0 );
  253. } else {
  254. echo "\nOh noeees\n";
  255. exit( -1 );
  256. }