PageRenderTime 46ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/language/mw-classes/LanguageEo.php

https://github.com/Hedonil/intuition
PHP | 123 lines | 63 code | 7 blank | 53 comment | 10 complexity | 25d4a4fda56092276492bc38947ec040 MD5 | raw file
Possible License(s): GPL-2.0
  1. <?php
  2. /** Esperanto (Esperanto)
  3. *
  4. * @ingroup Language
  5. * @author Brion Vibber <brion@pobox.com>
  6. */
  7. class LanguageEo extends Language {
  8. /**
  9. * Wrapper for charset conversions.
  10. *
  11. * In most languages, this calls through to standard system iconv(), but
  12. * for Esperanto we're also adding a special pseudo-charset to convert
  13. * accented characters to/from the ASCII-friendly "X" surrogate coding:
  14. *
  15. * cx = ĉ cxx = cx
  16. * gx = ĝ gxx = gx
  17. * hx = ĥ hxx = hx
  18. * jx = ĵ jxx = jx
  19. * sx = ŝ sxx = sx
  20. * ux = ŭ uxx = ux
  21. * xx = x
  22. *
  23. * http://en.wikipedia.org/wiki/Esperanto_orthography#X-system
  24. * http://eo.wikipedia.org/wiki/X-sistemo
  25. *
  26. * X-conversion is applied, in either direction, between "utf-8" and "x" charsets;
  27. * this comes into effect when input is run through $wgRequest->getText() and the
  28. * $wgEditEncoding is set to 'x'.
  29. *
  30. * In the long run, this should be moved out of here and into the client-side
  31. * editor behavior; the original server-side translation system dates to 2002-2003
  32. * when many browsers with really bad Unicode support were still in use.
  33. *
  34. * @param string $in input character set
  35. * @param string $out output character set
  36. * @param string $string text to be converted
  37. * @return string
  38. */
  39. function iconv( $in, $out, $string ) {
  40. if ( strcasecmp( $in, 'x' ) == 0 && strcasecmp( $out, 'utf-8' ) == 0 ) {
  41. return preg_replace_callback (
  42. '/([cghjsu]x?)((?:xx)*)(?!x)/i',
  43. array( $this, 'strrtxuCallback' ), $string );
  44. } elseif ( strcasecmp( $in, 'UTF-8' ) == 0 && strcasecmp( $out, 'x' ) == 0 ) {
  45. # Double Xs only if they follow cxapelutaj literoj.
  46. return preg_replace_callback(
  47. '/((?:[cghjsu]|\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]|\xc5[\x9c\x9d\xac\xad])x*)/i',
  48. array( $this, 'strrtuxCallback' ), $string );
  49. }
  50. return parent::iconv( $in, $out, $string );
  51. }
  52. /**
  53. * @param $matches array
  54. * @return string
  55. */
  56. function strrtuxCallback( $matches ) {
  57. static $ux = array (
  58. 'x' => 'xx' , 'X' => 'Xx' ,
  59. "\xc4\x88" => "Cx" , "\xc4\x89" => "cx" ,
  60. "\xc4\x9c" => "Gx" , "\xc4\x9d" => "gx" ,
  61. "\xc4\xa4" => "Hx" , "\xc4\xa5" => "hx" ,
  62. "\xc4\xb4" => "Jx" , "\xc4\xb5" => "jx" ,
  63. "\xc5\x9c" => "Sx" , "\xc5\x9d" => "sx" ,
  64. "\xc5\xac" => "Ux" , "\xc5\xad" => "ux"
  65. );
  66. return strtr( $matches[1], $ux );
  67. }
  68. /**
  69. * @param $matches array
  70. * @return string
  71. */
  72. function strrtxuCallback( $matches ) {
  73. static $xu = array (
  74. 'xx' => 'x' , 'xX' => 'x' ,
  75. 'Xx' => 'X' , 'XX' => 'X' ,
  76. "Cx" => "\xc4\x88" , "CX" => "\xc4\x88" ,
  77. "cx" => "\xc4\x89" , "cX" => "\xc4\x89" ,
  78. "Gx" => "\xc4\x9c" , "GX" => "\xc4\x9c" ,
  79. "gx" => "\xc4\x9d" , "gX" => "\xc4\x9d" ,
  80. "Hx" => "\xc4\xa4" , "HX" => "\xc4\xa4" ,
  81. "hx" => "\xc4\xa5" , "hX" => "\xc4\xa5" ,
  82. "Jx" => "\xc4\xb4" , "JX" => "\xc4\xb4" ,
  83. "jx" => "\xc4\xb5" , "jX" => "\xc4\xb5" ,
  84. "Sx" => "\xc5\x9c" , "SX" => "\xc5\x9c" ,
  85. "sx" => "\xc5\x9d" , "sX" => "\xc5\x9d" ,
  86. "Ux" => "\xc5\xac" , "UX" => "\xc5\xac" ,
  87. "ux" => "\xc5\xad" , "uX" => "\xc5\xad"
  88. );
  89. return strtr( $matches[1], $xu ) . strtr( $matches[2], $xu );
  90. }
  91. /**
  92. * @param $s string
  93. * @return string
  94. */
  95. function checkTitleEncoding( $s ) {
  96. # Check for X-system backwards-compatibility URLs
  97. $ishigh = preg_match( '/[\x80-\xff]/', $s );
  98. $isutf = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
  99. '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
  100. if ( $ishigh and !$isutf ) {
  101. # Assume Latin1
  102. $s = utf8_encode( $s );
  103. } else {
  104. if ( preg_match( '/(\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]' .
  105. '|\xc5[\x9c\x9d\xac\xad])/', $s ) )
  106. return $s;
  107. }
  108. // if( preg_match( '/[cghjsu]x/i', $s ) )
  109. // return $this->iconv( 'x', 'utf-8', $s );
  110. return $s;
  111. }
  112. function initEncoding() {
  113. global $wgEditEncoding;
  114. $wgEditEncoding = 'x';
  115. }
  116. }