/extensions/Translate/scripts/plural-comparison.php

https://github.com/ChuguluGames/mediawiki-svn · PHP · 204 lines · 161 code · 31 blank · 12 comment · 30 complexity · 0a30c7bc163c8888d6679776812a0e97 MD5 · raw file

  1. <?php
  2. /**
  3. * Script for comparing different plural implementations.
  4. *
  5. * @author Niklas Laxstrom
  6. *
  7. * @copyright Copyright © 2010, Niklas Laxström
  8. * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
  9. * @file
  10. */
  11. // Standard boilerplate to define $IP
  12. if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
  13. $IP = getenv( 'MW_INSTALL_PATH' );
  14. } else {
  15. $dir = dirname( __FILE__ ); $IP = "$dir/../../..";
  16. }
  17. require_once( "$IP/maintenance/Maintenance.php" );
  18. /// Script for comparing different plural implementations.
  19. class PluralCompare extends Maintenance {
  20. public function __construct() {
  21. parent::__construct();
  22. $this->mDescription = 'Script for comparing different plural implementations.';
  23. }
  24. public function execute() {
  25. $mwLanguages = $this->loadMediaWiki();
  26. $gtLanguages = $this->loadGettext();
  27. $clLanguages = $this->loadCLDR();
  28. $allkeys = array_keys( $mwLanguages + $gtLanguages + $clLanguages );
  29. sort( $allkeys );
  30. $this->output( sprintf( "%12s %3s %3s %4s\n", 'Code', 'MW', 'Get', 'CLDR' ) );
  31. foreach ( $allkeys as $index => $code ) {
  32. $mw = isset( $mwLanguages[$code] ) ? ( $mwLanguages[$code] === false ? '.' : '+' ) : '';
  33. $gt = isset( $gtLanguages[$code] ) ? ( $gtLanguages[$code] === '(n != 1);' ? '.' : '+' ) : '';
  34. $cl = isset( $clLanguages[$code] ) ? ( $clLanguages[$code][0] === 'Default' ? '.' : '+' ) : '';
  35. $this->output( sprintf( "%12s %-3s %-3s %-4s\n", $code, $mw, $gt, $cl ) );
  36. if ( substr_count( sprintf( '%s%s%s', $mw, $gt, $cl ), '+' ) < 2 ) {
  37. unset( $allkeys[$index] );
  38. }
  39. }
  40. $this->output( "\n" );
  41. $c = count( $allkeys );
  42. $this->output( "Proceeding to test differences in $c languages\n" );
  43. foreach ( $allkeys as $code ) {
  44. $output = sprintf( "%3s %3s %3s %4s for [$code]\n", 'I', 'MW', 'Get', 'CLDR' );
  45. if ( isset( $mwLanguages[$code] ) && $mwLanguages[$code] !== false ) {
  46. $obj = Language::factory( $code );
  47. } else {
  48. $obj = false;
  49. }
  50. if ( isset( $gtLanguages[$code] ) ) {
  51. $gtExp = 'return (int) ' . str_replace( 'n', '$i', $gtLanguages[$code] ) . ';';
  52. } else {
  53. $gtExp = false;
  54. }
  55. if ( isset( $clLanguages[$code] ) ) {
  56. $cldrExp = $clLanguages[$code][1];
  57. } else {
  58. $cldrExp = false;
  59. }
  60. $cldrmap = array();
  61. $error = false;
  62. for ( $i = 0; $i <= 200; $i++ ) {
  63. $mw = $obj ? $obj->convertPlural( $i, array( 0, 1, 2, 3, 4, 5 ) ) : '?';
  64. $gt = $gtExp ? eval( $gtExp ) : '?';
  65. $cldr = $cldrExp !== false ? $this->evalCLDRRule( $i, $cldrExp ) : '?';
  66. if ( self::comp( $mw, $gt ) ) {
  67. $value = $gt !== '?' ? $gt : $mw;
  68. if ( !isset( $cldrmap[$cldr] ) ) {
  69. $cldrmap[$cldr] = $value;
  70. if ( $cldr !== '?' ) {
  71. $output .= sprintf( "%3s %-3s %-3s %-6s # Established that %-6s == $mw\n", $i, $mw, $gt, $cldr, $cldr );
  72. }
  73. continue;
  74. } elseif ( self::comp( $cldrmap[$cldr], $value ) ) {
  75. continue;
  76. } elseif ( $i > 4 && $value === 1 && self::comp( $cldr, 'other' ) ) {
  77. if ( $i === 5 ) {
  78. $output .= "Supressing further output for this language.\n";
  79. }
  80. continue;
  81. }
  82. }
  83. $error = true;
  84. $output .= sprintf( "%3s %-3s %-3s %-6s\n", $i, $mw, $gt, $cldr );
  85. }
  86. if ( $error ) {
  87. $this->output( "$output\n" );
  88. }
  89. }
  90. }
  91. public static function comp( $a, $b ) {
  92. return $a === '?' || $b === '?' || $a === $b;
  93. }
  94. public function loadCLDR() {
  95. $filename = dirname( __FILE__ ) . '/../data/plural-cldr.yaml';
  96. $data = TranslateYaml::load( $filename );
  97. $languages = array();
  98. $ruleExps = array();
  99. foreach ( $data['rulesets'] as $name => $rules ) {
  100. $ruleExps[$name] = array();
  101. foreach ( $rules as $rulename => $rule ) {
  102. $ruleExps[$name][$rulename] = $this->parseCLDRRule( $rule );
  103. }
  104. }
  105. foreach ( $data['locales'] as $code => $rulename ) {
  106. $languages[$code] = array( $rulename, $ruleExps[$rulename] );
  107. }
  108. return $languages;
  109. }
  110. public function loadMediaWiki() {
  111. $mwLanguages = Language::getLanguageNames( true );
  112. foreach ( $mwLanguages as $code => $name ) {
  113. $obj = Language::factory( $code );
  114. $method = new ReflectionMethod( $obj, 'convertPlural' );
  115. if ( $method->getDeclaringClass()->name === 'Language' ) {
  116. $mwLanguages[$code] = false;
  117. }
  118. }
  119. return $mwLanguages;
  120. }
  121. public function loadGettext() {
  122. $gtData = file_get_contents( dirname( __FILE__ ) . '/../data/plural-gettext.txt' );
  123. $gtLanguages = array();
  124. foreach ( preg_split( '/\n|\r/', $gtData, -1, PREG_SPLIT_NO_EMPTY ) as $line ) {
  125. list( $code, $rule ) = explode( "\t", $line );
  126. $rule = preg_replace( '/^.*?plural=/', '', $rule );
  127. $gtLanguages[$code] = $rule;
  128. }
  129. return $gtLanguages;
  130. }
  131. public function evalCLDRRule( $i, $rules ) {
  132. foreach ( $rules as $name => $rule ) {
  133. if ( eval( "return $rule;" ) ) {
  134. return $name;
  135. }
  136. }
  137. return "other";
  138. }
  139. public function parseCLDRRule( $rule ) {
  140. $rule = preg_replace( '/\bn\b/', '$i', $rule );
  141. $rule = preg_replace( '/([^ ]+) mod (\d+)/', 'self::mod(\1,\2)', $rule );
  142. $rule = preg_replace( '/([^ ]+) is not (\d+)/' , '\1!==\2', $rule );
  143. $rule = preg_replace( '/([^ ]+) is (\d+)/', '\1===\2', $rule );
  144. $rule = preg_replace( '/([^ ]+) not in (\d+)\.\.(\d+)/', '!self::in(\1,\2,\3)', $rule );
  145. $rule = preg_replace( '/([^ ]+) not within (\d+)\.\.(\d+)/', '!self::within(\1,\2,\3)', $rule );
  146. $rule = preg_replace( '/([^ ]+) in (\d+)\.\.(\d+)/', 'self::in(\1,\2,\3)', $rule );
  147. $rule = preg_replace( '/([^ ]+) within (\d+)\.\.(\d+)/', 'self::within(\1,\2,\3)', $rule );
  148. // AND takes precedence over OR
  149. $andrule = '/([^ ]+) and ([^ ]+)/i';
  150. while ( preg_match( $andrule, $rule ) ) {
  151. $rule = preg_replace( $andrule, '(\1&&\2)', $rule );
  152. }
  153. $orrule = '/([^ ]+) or ([^ ]+)/i';
  154. while ( preg_match( $orrule, $rule ) ) {
  155. $rule = preg_replace( $orrule, '(\1||\2)', $rule );
  156. }
  157. return $rule;
  158. }
  159. public static function in( $num, $low, $high ) {
  160. return is_int( $num ) && $num >= $low && $num <= $high;
  161. }
  162. public static function within( $num, $low, $high ) {
  163. return $num >= $low && $num <= $high;
  164. }
  165. public static function mod( $num, $mod ) {
  166. if ( is_int( $num ) ) {
  167. return (int) fmod( $num, $mod );
  168. }
  169. return fmod( $num, $mod );
  170. }
  171. }
  172. $maintClass = 'PluralCompare';
  173. require_once( DO_MAINTENANCE );