/firstrend/src/core/class/chinese.class.php

http://ownerpress.googlecode.com/ · PHP · 255 lines · 221 code · 14 blank · 20 comment · 62 complexity · 89049bc1f697832e34117fe2576a96cc MD5 · raw file

  1. <?php
  2. define('CODETABLE_DIR', FANWE_ROOT.'./public/table/');
  3. /**
  4. * chinese.class.php
  5. *
  6. * ???????
  7. *
  8. * @package class
  9. * @author awfigq <awfigq@qq.com>
  10. */
  11. class Chinese
  12. {
  13. var $table = '';
  14. var $iconv_enabled = false;
  15. var $convertbig5 = false;
  16. var $unicode_table = array();
  17. var $config = array (
  18. 'SourceLang' => '',
  19. 'TargetLang' => '',
  20. 'GBtoUnicode_table' => 'gb-unicode.table',
  21. 'BIG5toUnicode_table' => 'big5-unicode.table',
  22. 'GBtoBIG5_table' => 'gb-big5.table',
  23. );
  24. /**
  25. * ?????
  26. * @param string $SourceLang ???
  27. * @param string $TargetLang ????
  28. * @param bool $ForceTable ??????????? ????
  29. * @return void
  30. */
  31. function Chinese($SourceLang, $TargetLang, $ForceTable = FALSE) {
  32. $this->config['SourceLang'] = $this->_lang($SourceLang);
  33. $this->config['TargetLang'] = $this->_lang($TargetLang);
  34. if(ICONV_ENABLE && $this->config['TargetLang'] != 'BIG5' && !$ForceTable) {
  35. $this->iconv_enabled = true;
  36. } else {
  37. $this->iconv_enabled = false;
  38. $this->OpenTable();
  39. }
  40. }
  41. /**
  42. * ??????????
  43. * @param string $SourceText ???????
  44. * @return string
  45. */
  46. function convert($SourceText) {
  47. if($this->config['SourceLang'] == $this->config['TargetLang']) {
  48. return $SourceText;
  49. } elseif($this->iconv_enabled) {
  50. if($this->config['TargetLang'] <> 'UNICODE') {
  51. return iconv($this->config['SourceLang'], $this->config['TargetLang'], $SourceText);
  52. } else {
  53. $return = '';
  54. while($SourceText != '') {
  55. if(ord(substr($SourceText, 0, 1)) > 127) {
  56. $return .= "&#x".dechex($this->Utf8_Unicode(iconv($this->config['SourceLang'],"UTF-8", substr($SourceText, 0, 2)))).";";
  57. $SourceText = substr($SourceText, 2, strlen($SourceText));
  58. } else {
  59. $return .= substr($SourceText, 0, 1);
  60. $SourceText = substr($SourceText, 1, strlen($SourceText));
  61. }
  62. }
  63. return $return;
  64. }
  65. } elseif($this->config['TargetLang'] == 'UNICODE') {
  66. $utf = '';
  67. while($SourceText != '') {
  68. if(ord(substr($SourceText, 0, 1)) > 127) {
  69. if($this->config['SourceLang'] == 'GBK') {
  70. $utf .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2))) - 0x8080].';';
  71. } elseif($this->config['SourceLang'] == 'BIG5') {
  72. $utf .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2)))].';';
  73. }
  74. $SourceText = substr($SourceText, 2, strlen($SourceText));
  75. } else {
  76. $utf .= substr($SourceText, 0, 1);
  77. $SourceText = substr($SourceText, 1, strlen($SourceText));
  78. }
  79. }
  80. return $utf;
  81. } else {
  82. $ret = '';
  83. if($this->config['SourceLang'] == 'UTF-8') {
  84. $out = '';
  85. $len = strlen($SourceText);
  86. $i = 0;
  87. while($i < $len) {
  88. $c = ord(substr($SourceText, $i++, 1));
  89. switch($c >> 4) {
  90. case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
  91. $out .= substr($SourceText, $i - 1, 1);
  92. break;
  93. case 12: case 13:
  94. $char2 = ord(substr($SourceText, $i++, 1));
  95. $char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
  96. if($this->config['TargetLang'] == 'GBK') {
  97. $out .= $this->_hex2bin(dechex($char3 + 0x8080));
  98. } elseif($this->config['TargetLang'] == 'BIG5') {
  99. $out .= $this->_hex2bin($char3);
  100. }
  101. break;
  102. case 14:
  103. $char2 = ord(substr($SourceText, $i++, 1));
  104. $char3 = ord(substr($SourceText, $i++, 1));
  105. $char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
  106. if($this->config['TargetLang'] == 'GBK') {
  107. $out .= $this->_hex2bin(dechex($char4 + 0x8080));
  108. } elseif($this->config['TargetLang'] == 'BIG5') {
  109. $out .= $this->_hex2bin($char4);
  110. }
  111. break;
  112. }
  113. }
  114. return !$this->convertbig5 ? $out : $this->GB2312toBIG5($out);
  115. } else {
  116. while($SourceText != '') {
  117. if(ord(substr($SourceText, 0, 1)) > 127) {
  118. if($this->config['SourceLang'] == 'BIG5') {
  119. $utf8 = $this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2)))]));
  120. } elseif($this->config['SourceLang'] == 'GBK') {
  121. $utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2))) - 0x8080]));
  122. }
  123. for($i = 0; $i < strlen($utf8); $i += 3) {
  124. $ret .= chr(substr($utf8, $i, 3));
  125. }
  126. $SourceText = substr($SourceText, 2, strlen($SourceText));
  127. } else {
  128. $ret .= substr($SourceText, 0, 1);
  129. $SourceText = substr($SourceText, 1, strlen($SourceText));
  130. }
  131. }
  132. $SourceText = '';
  133. return $ret;
  134. }
  135. }
  136. }
  137. function _lang($LangCode) {
  138. $LangCode = strtoupper($LangCode);
  139. if(substr($LangCode, 0, 2) == 'GB') {
  140. return 'GBK';
  141. } elseif(substr($LangCode, 0, 3) == 'BIG') {
  142. return 'BIG5';
  143. } elseif(substr($LangCode, 0, 3) == 'UTF') {
  144. return 'UTF-8';
  145. } elseif(substr($LangCode, 0, 3) == 'UNI') {
  146. return 'UNICODE';
  147. }
  148. }
  149. function _hex2bin($hexdata) {
  150. for($i=0; $i < strlen($hexdata); $i += 2) {
  151. $bindata .= chr(hexdec(substr($hexdata, $i, 2)));
  152. }
  153. return $bindata;
  154. }
  155. function OpenTable() {
  156. $this->unicode_table = array();
  157. if(!$this->iconv_enabled && $this->config['TargetLang'] == 'BIG5') {
  158. $this->config['TargetLang'] = 'GBK';
  159. $this->convertbig5 = TRUE;
  160. }
  161. if($this->config['SourceLang'] == 'GBK' || $this->config['TargetLang'] == 'GBK') {
  162. $this->table = CODETABLE_DIR.$this->config['GBtoUnicode_table'];
  163. } elseif($this->config['SourceLang'] == 'BIG5' || $this->config['TargetLang'] == 'BIG5') {
  164. $this->table = CODETABLE_DIR.$this->config['BIG5toUnicode_table'];
  165. }
  166. $fp = fopen($this->table, 'rb');
  167. $tabletmp = fread($fp, filesize($this->table));
  168. for($i = 0; $i < strlen($tabletmp); $i += 4) {
  169. $tmp = unpack('nkey/nvalue', substr($tabletmp, $i, 4));
  170. if($this->config['TargetLang'] == 'UTF-8') {
  171. $this->unicode_table[$tmp['key']] = '0x'.dechex($tmp['value']);
  172. } elseif($this->config['SourceLang'] == 'UTF-8') {
  173. $this->unicode_table[$tmp['value']] = '0x'.dechex($tmp['key']);
  174. } elseif($this->config['TargetLang'] == 'UNICODE') {
  175. $this->unicode_table[$tmp['key']] = dechex($tmp['value']);
  176. }
  177. }
  178. }
  179. function CHSUtoUTF8($c) {
  180. $str = '';
  181. if($c < 0x80) {
  182. $str .= $c;
  183. } elseif($c < 0x800) {
  184. $str .= (0xC0 | $c >> 6);
  185. $str .= (0x80 | $c & 0x3F);
  186. } elseif($c < 0x10000) {
  187. $str .= (0xE0 | $c >> 12);
  188. $str .= (0x80 | $c >> 6 & 0x3F);
  189. $str .=( 0x80 | $c & 0x3F);
  190. } elseif($c < 0x200000) {
  191. $str .= (0xF0 | $c >> 18);
  192. $str .= (0x80 | $c >> 12 & 0x3F);
  193. $str .= (0x80 | $c >> 6 & 0x3F);
  194. $str .= (0x80 | $c & 0x3F);
  195. }
  196. return $str;
  197. }
  198. function GB2312toBIG5($c) {
  199. $f = fopen(CODETABLE_DIR.$this->config['GBtoBIG5_table'], 'r');
  200. $max=strlen($c)-1;
  201. for($i = 0;$i < $max;$i++){
  202. $h=ord($c[$i]);
  203. if($h>=160) {
  204. $l=ord($c[$i+1]);
  205. if($h==161 && $l==64){
  206. $gb=" ";
  207. } else{
  208. fseek($f,($h-160)*510+($l-1)*2);
  209. $gb=fread($f,2);
  210. }
  211. $c[$i]=$gb[0];
  212. $c[$i+1]=$gb[1];
  213. $i++;
  214. }
  215. }
  216. $result = $c;
  217. return $result;
  218. }
  219. function Utf8_Unicode($char) {
  220. switch(strlen($char)) {
  221. case 1:
  222. return ord($char);
  223. case 2:
  224. $n = (ord($char[0]) & 0x3f) << 6;
  225. $n += ord($char[1]) & 0x3f;
  226. return $n;
  227. case 3:
  228. $n = (ord($char[0]) & 0x1f) << 12;
  229. $n += (ord($char[1]) & 0x3f) << 6;
  230. $n += ord($char[2]) & 0x3f;
  231. return $n;
  232. case 4:
  233. $n = (ord($char[0]) & 0x0f) << 18;
  234. $n += (ord($char[1]) & 0x3f) << 12;
  235. $n += (ord($char[2]) & 0x3f) << 6;
  236. $n += ord($char[3]) & 0x3f;
  237. return $n;
  238. }
  239. }
  240. }
  241. ?>