PageRenderTime 48ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/source/class/class_chinese.php

https://github.com/kuaileshike/upload
PHP | 247 lines | 223 code | 18 blank | 6 comment | 63 complexity | aee754dfe638bcbc982fc3ba74c24c55 MD5 | raw file
  1. <?php
  2. /**
  3. * [Discuz!] (C)2001-2099 Comsenz Inc.
  4. * This is NOT a freeware, use is subject to license terms
  5. *
  6. * $Id: class_chinese.php 6757 2010-03-25 09:01:29Z cnteacher $
  7. */
  8. if(!defined('IN_DISCUZ')) {
  9. exit('Access Denied');
  10. }
  11. define('CODETABLE_DIR', DISCUZ_ROOT.'./source/include/table/');
  12. class Chinese {
  13. var $table = '';
  14. var $iconv_enabled = false;
  15. var $convertbig5 = false;
  16. var $unicode_table = array();
  17. var $config = array (
  18. 'SourceLang' => '',
  19. 'TargetLang' => '',
  20. 'GBtoUnicode_table' => 'gb-unicode.table',
  21. 'BIG5toUnicode_table' => 'big5-unicode.table',
  22. 'GBtoBIG5_table' => 'gb-big5.table',
  23. );
  24. function Chinese($SourceLang, $TargetLang, $ForceTable = FALSE) {
  25. $this->config['SourceLang'] = $this->_lang($SourceLang);
  26. $this->config['TargetLang'] = $this->_lang($TargetLang);
  27. if(ICONV_ENABLE && $this->config['TargetLang'] != 'BIG5' && !$ForceTable) {
  28. $this->iconv_enabled = true;
  29. } else {
  30. $this->iconv_enabled = false;
  31. $this->OpenTable();
  32. }
  33. }
  34. function _lang($LangCode) {
  35. $LangCode = strtoupper($LangCode);
  36. if(substr($LangCode, 0, 2) == 'GB') {
  37. return 'GBK';
  38. } elseif(substr($LangCode, 0, 3) == 'BIG') {
  39. return 'BIG5';
  40. } elseif(substr($LangCode, 0, 3) == 'UTF') {
  41. return 'UTF-8';
  42. } elseif(substr($LangCode, 0, 3) == 'UNI') {
  43. return 'UNICODE';
  44. }
  45. }
  46. function _hex2bin($hexdata) {
  47. for($i=0; $i < strlen($hexdata); $i += 2) {
  48. $bindata .= chr(hexdec(substr($hexdata, $i, 2)));
  49. }
  50. return $bindata;
  51. }
  52. function OpenTable() {
  53. $this->unicode_table = array();
  54. if(!$this->iconv_enabled && $this->config['TargetLang'] == 'BIG5') {
  55. $this->config['TargetLang'] = 'GBK';
  56. $this->convertbig5 = TRUE;
  57. }
  58. if($this->config['SourceLang'] == 'GBK' || $this->config['TargetLang'] == 'GBK') {
  59. $this->table = CODETABLE_DIR.$this->config['GBtoUnicode_table'];
  60. } elseif($this->config['SourceLang'] == 'BIG5' || $this->config['TargetLang'] == 'BIG5') {
  61. $this->table = CODETABLE_DIR.$this->config['BIG5toUnicode_table'];
  62. }
  63. $fp = fopen($this->table, 'rb');
  64. $tabletmp = fread($fp, filesize($this->table));
  65. for($i = 0; $i < strlen($tabletmp); $i += 4) {
  66. $tmp = unpack('nkey/nvalue', substr($tabletmp, $i, 4));
  67. if($this->config['TargetLang'] == 'UTF-8') {
  68. $this->unicode_table[$tmp['key']] = '0x'.dechex($tmp['value']);
  69. } elseif($this->config['SourceLang'] == 'UTF-8') {
  70. $this->unicode_table[$tmp['value']] = '0x'.dechex($tmp['key']);
  71. } elseif($this->config['TargetLang'] == 'UNICODE') {
  72. $this->unicode_table[$tmp['key']] = dechex($tmp['value']);
  73. }
  74. }
  75. }
  76. function CHSUtoUTF8($c) {
  77. $str = '';
  78. if($c < 0x80) {
  79. $str .= $c;
  80. } elseif($c < 0x800) {
  81. $str .= (0xC0 | $c >> 6);
  82. $str .= (0x80 | $c & 0x3F);
  83. } elseif($c < 0x10000) {
  84. $str .= (0xE0 | $c >> 12);
  85. $str .= (0x80 | $c >> 6 & 0x3F);
  86. $str .=( 0x80 | $c & 0x3F);
  87. } elseif($c < 0x200000) {
  88. $str .= (0xF0 | $c >> 18);
  89. $str .= (0x80 | $c >> 12 & 0x3F);
  90. $str .= (0x80 | $c >> 6 & 0x3F);
  91. $str .= (0x80 | $c & 0x3F);
  92. }
  93. return $str;
  94. }
  95. function GB2312toBIG5($c) {
  96. $f = fopen(CODETABLE_DIR.$this->config['GBtoBIG5_table'], 'r');
  97. $max=strlen($c)-1;
  98. for($i = 0;$i < $max;$i++){
  99. $h=ord($c[$i]);
  100. if($h>=160) {
  101. $l=ord($c[$i+1]);
  102. if($h==161 && $l==64){
  103. $gb=" ";
  104. } else{
  105. fseek($f,($h-160)*510+($l-1)*2);
  106. $gb=fread($f,2);
  107. }
  108. $c[$i]=$gb[0];
  109. $c[$i+1]=$gb[1];
  110. $i++;
  111. }
  112. }
  113. $result = $c;
  114. return $result;
  115. }
  116. function Convert($SourceText) {
  117. if($this->config['SourceLang'] == $this->config['TargetLang']) {
  118. return $SourceText;
  119. } elseif($this->iconv_enabled) {
  120. if($this->config['TargetLang'] <> 'UNICODE') {
  121. return iconv($this->config['SourceLang'], $this->config['TargetLang'], $SourceText);
  122. } else {
  123. $return = '';
  124. while($SourceText != '') {
  125. if(ord(substr($SourceText, 0, 1)) > 127) {
  126. $return .= "&#x".dechex($this->Utf8_Unicode(iconv($this->config['SourceLang'],"UTF-8", substr($SourceText, 0, 2)))).";";
  127. $SourceText = substr($SourceText, 2, strlen($SourceText));
  128. } else {
  129. $return .= substr($SourceText, 0, 1);
  130. $SourceText = substr($SourceText, 1, strlen($SourceText));
  131. }
  132. }
  133. return $return;
  134. }
  135. } elseif($this->config['TargetLang'] == 'UNICODE') {
  136. $utf = '';
  137. while($SourceText != '') {
  138. if(ord(substr($SourceText, 0, 1)) > 127) {
  139. if($this->config['SourceLang'] == 'GBK') {
  140. $utf .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2))) - 0x8080].';';
  141. } elseif($this->config['SourceLang'] == 'BIG5') {
  142. $utf .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2)))].';';
  143. }
  144. $SourceText = substr($SourceText, 2, strlen($SourceText));
  145. } else {
  146. $utf .= substr($SourceText, 0, 1);
  147. $SourceText = substr($SourceText, 1, strlen($SourceText));
  148. }
  149. }
  150. return $utf;
  151. } else {
  152. $ret = '';
  153. if($this->config['SourceLang'] == 'UTF-8') {
  154. $out = '';
  155. $len = strlen($SourceText);
  156. $i = 0;
  157. while($i < $len) {
  158. $c = ord(substr($SourceText, $i++, 1));
  159. switch($c >> 4) {
  160. case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
  161. $out .= substr($SourceText, $i - 1, 1);
  162. break;
  163. case 12: case 13:
  164. $char2 = ord(substr($SourceText, $i++, 1));
  165. $char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
  166. if($this->config['TargetLang'] == 'GBK') {
  167. $out .= $this->_hex2bin(dechex($char3 + 0x8080));
  168. } elseif($this->config['TargetLang'] == 'BIG5') {
  169. $out .= $this->_hex2bin($char3);
  170. }
  171. break;
  172. case 14:
  173. $char2 = ord(substr($SourceText, $i++, 1));
  174. $char3 = ord(substr($SourceText, $i++, 1));
  175. $char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
  176. if($this->config['TargetLang'] == 'GBK') {
  177. $out .= $this->_hex2bin(dechex($char4 + 0x8080));
  178. } elseif($this->config['TargetLang'] == 'BIG5') {
  179. $out .= $this->_hex2bin($char4);
  180. }
  181. break;
  182. }
  183. }
  184. return !$this->convertbig5 ? $out : $this->GB2312toBIG5($out);
  185. } else {
  186. while($SourceText != '') {
  187. if(ord(substr($SourceText, 0, 1)) > 127) {
  188. if($this->config['SourceLang'] == 'BIG5') {
  189. $utf8 = $this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2)))]));
  190. } elseif($this->config['SourceLang'] == 'GBK') {
  191. $utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2))) - 0x8080]));
  192. }
  193. for($i = 0; $i < strlen($utf8); $i += 3) {
  194. $ret .= chr(substr($utf8, $i, 3));
  195. }
  196. $SourceText = substr($SourceText, 2, strlen($SourceText));
  197. } else {
  198. $ret .= substr($SourceText, 0, 1);
  199. $SourceText = substr($SourceText, 1, strlen($SourceText));
  200. }
  201. }
  202. $SourceText = '';
  203. return $ret;
  204. }
  205. }
  206. }
  207. function Utf8_Unicode($char) {
  208. switch(strlen($char)) {
  209. case 1:
  210. return ord($char);
  211. case 2:
  212. $n = (ord($char[0]) & 0x3f) << 6;
  213. $n += ord($char[1]) & 0x3f;
  214. return $n;
  215. case 3:
  216. $n = (ord($char[0]) & 0x1f) << 12;
  217. $n += (ord($char[1]) & 0x3f) << 6;
  218. $n += ord($char[2]) & 0x3f;
  219. return $n;
  220. case 4:
  221. $n = (ord($char[0]) & 0x0f) << 18;
  222. $n += (ord($char[1]) & 0x3f) << 12;
  223. $n += (ord($char[2]) & 0x3f) << 6;
  224. $n += ord($char[3]) & 0x3f;
  225. return $n;
  226. }
  227. }
  228. }
  229. ?>