/firstrend/src/core/class/chinese.class.php
http://ownerpress.googlecode.com/ · PHP · 255 lines · 221 code · 14 blank · 20 comment · 62 complexity · 89049bc1f697832e34117fe2576a96cc MD5 · raw file
- <?php
- define('CODETABLE_DIR', FANWE_ROOT.'./public/table/');
-
- /**
- * chinese.class.php
- *
- * ???????
- *
- * @package class
- * @author awfigq <awfigq@qq.com>
- */
- class Chinese
- {
- var $table = '';
- var $iconv_enabled = false;
- var $convertbig5 = false;
- var $unicode_table = array();
- var $config = array (
- 'SourceLang' => '',
- 'TargetLang' => '',
- 'GBtoUnicode_table' => 'gb-unicode.table',
- 'BIG5toUnicode_table' => 'big5-unicode.table',
- 'GBtoBIG5_table' => 'gb-big5.table',
- );
-
- /**
- * ?????
- * @param string $SourceLang ???
- * @param string $TargetLang ????
- * @param bool $ForceTable ??????????? ????
- * @return void
- */
- function Chinese($SourceLang, $TargetLang, $ForceTable = FALSE) {
- $this->config['SourceLang'] = $this->_lang($SourceLang);
- $this->config['TargetLang'] = $this->_lang($TargetLang);
-
- if(ICONV_ENABLE && $this->config['TargetLang'] != 'BIG5' && !$ForceTable) {
- $this->iconv_enabled = true;
- } else {
- $this->iconv_enabled = false;
- $this->OpenTable();
- }
- }
-
- /**
- * ??????????
- * @param string $SourceText ???????
- * @return string
- */
- function convert($SourceText) {
- if($this->config['SourceLang'] == $this->config['TargetLang']) {
- return $SourceText;
- } elseif($this->iconv_enabled) {
- if($this->config['TargetLang'] <> 'UNICODE') {
- return iconv($this->config['SourceLang'], $this->config['TargetLang'], $SourceText);
- } else {
- $return = '';
- while($SourceText != '') {
- if(ord(substr($SourceText, 0, 1)) > 127) {
- $return .= "&#x".dechex($this->Utf8_Unicode(iconv($this->config['SourceLang'],"UTF-8", substr($SourceText, 0, 2)))).";";
- $SourceText = substr($SourceText, 2, strlen($SourceText));
- } else {
- $return .= substr($SourceText, 0, 1);
- $SourceText = substr($SourceText, 1, strlen($SourceText));
- }
- }
- return $return;
- }
-
- } elseif($this->config['TargetLang'] == 'UNICODE') {
- $utf = '';
- while($SourceText != '') {
- if(ord(substr($SourceText, 0, 1)) > 127) {
- if($this->config['SourceLang'] == 'GBK') {
- $utf .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2))) - 0x8080].';';
- } elseif($this->config['SourceLang'] == 'BIG5') {
- $utf .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2)))].';';
- }
- $SourceText = substr($SourceText, 2, strlen($SourceText));
- } else {
- $utf .= substr($SourceText, 0, 1);
- $SourceText = substr($SourceText, 1, strlen($SourceText));
- }
- }
- return $utf;
- } else {
- $ret = '';
- if($this->config['SourceLang'] == 'UTF-8') {
- $out = '';
- $len = strlen($SourceText);
- $i = 0;
- while($i < $len) {
- $c = ord(substr($SourceText, $i++, 1));
- switch($c >> 4) {
- case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
- $out .= substr($SourceText, $i - 1, 1);
- break;
- case 12: case 13:
- $char2 = ord(substr($SourceText, $i++, 1));
- $char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
- if($this->config['TargetLang'] == 'GBK') {
- $out .= $this->_hex2bin(dechex($char3 + 0x8080));
- } elseif($this->config['TargetLang'] == 'BIG5') {
- $out .= $this->_hex2bin($char3);
- }
- break;
- case 14:
- $char2 = ord(substr($SourceText, $i++, 1));
- $char3 = ord(substr($SourceText, $i++, 1));
- $char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
- if($this->config['TargetLang'] == 'GBK') {
- $out .= $this->_hex2bin(dechex($char4 + 0x8080));
- } elseif($this->config['TargetLang'] == 'BIG5') {
- $out .= $this->_hex2bin($char4);
- }
- break;
- }
- }
- return !$this->convertbig5 ? $out : $this->GB2312toBIG5($out);
- } else {
- while($SourceText != '') {
- if(ord(substr($SourceText, 0, 1)) > 127) {
- if($this->config['SourceLang'] == 'BIG5') {
- $utf8 = $this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2)))]));
- } elseif($this->config['SourceLang'] == 'GBK') {
- $utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2))) - 0x8080]));
- }
- for($i = 0; $i < strlen($utf8); $i += 3) {
- $ret .= chr(substr($utf8, $i, 3));
- }
- $SourceText = substr($SourceText, 2, strlen($SourceText));
- } else {
- $ret .= substr($SourceText, 0, 1);
- $SourceText = substr($SourceText, 1, strlen($SourceText));
- }
- }
- $SourceText = '';
- return $ret;
- }
- }
- }
-
- function _lang($LangCode) {
- $LangCode = strtoupper($LangCode);
-
- if(substr($LangCode, 0, 2) == 'GB') {
- return 'GBK';
- } elseif(substr($LangCode, 0, 3) == 'BIG') {
- return 'BIG5';
- } elseif(substr($LangCode, 0, 3) == 'UTF') {
- return 'UTF-8';
- } elseif(substr($LangCode, 0, 3) == 'UNI') {
- return 'UNICODE';
- }
- }
-
- function _hex2bin($hexdata) {
- for($i=0; $i < strlen($hexdata); $i += 2) {
- $bindata .= chr(hexdec(substr($hexdata, $i, 2)));
- }
- return $bindata;
- }
-
- function OpenTable() {
- $this->unicode_table = array();
- if(!$this->iconv_enabled && $this->config['TargetLang'] == 'BIG5') {
- $this->config['TargetLang'] = 'GBK';
- $this->convertbig5 = TRUE;
- }
- if($this->config['SourceLang'] == 'GBK' || $this->config['TargetLang'] == 'GBK') {
- $this->table = CODETABLE_DIR.$this->config['GBtoUnicode_table'];
- } elseif($this->config['SourceLang'] == 'BIG5' || $this->config['TargetLang'] == 'BIG5') {
- $this->table = CODETABLE_DIR.$this->config['BIG5toUnicode_table'];
- }
- $fp = fopen($this->table, 'rb');
- $tabletmp = fread($fp, filesize($this->table));
- for($i = 0; $i < strlen($tabletmp); $i += 4) {
- $tmp = unpack('nkey/nvalue', substr($tabletmp, $i, 4));
- if($this->config['TargetLang'] == 'UTF-8') {
- $this->unicode_table[$tmp['key']] = '0x'.dechex($tmp['value']);
- } elseif($this->config['SourceLang'] == 'UTF-8') {
- $this->unicode_table[$tmp['value']] = '0x'.dechex($tmp['key']);
- } elseif($this->config['TargetLang'] == 'UNICODE') {
- $this->unicode_table[$tmp['key']] = dechex($tmp['value']);
- }
- }
- }
-
- function CHSUtoUTF8($c) {
- $str = '';
- if($c < 0x80) {
- $str .= $c;
- } elseif($c < 0x800) {
- $str .= (0xC0 | $c >> 6);
- $str .= (0x80 | $c & 0x3F);
- } elseif($c < 0x10000) {
- $str .= (0xE0 | $c >> 12);
- $str .= (0x80 | $c >> 6 & 0x3F);
- $str .=( 0x80 | $c & 0x3F);
- } elseif($c < 0x200000) {
- $str .= (0xF0 | $c >> 18);
- $str .= (0x80 | $c >> 12 & 0x3F);
- $str .= (0x80 | $c >> 6 & 0x3F);
- $str .= (0x80 | $c & 0x3F);
- }
- return $str;
- }
-
- function GB2312toBIG5($c) {
- $f = fopen(CODETABLE_DIR.$this->config['GBtoBIG5_table'], 'r');
- $max=strlen($c)-1;
- for($i = 0;$i < $max;$i++){
- $h=ord($c[$i]);
- if($h>=160) {
- $l=ord($c[$i+1]);
- if($h==161 && $l==64){
- $gb=" ";
- } else{
- fseek($f,($h-160)*510+($l-1)*2);
- $gb=fread($f,2);
- }
- $c[$i]=$gb[0];
- $c[$i+1]=$gb[1];
- $i++;
- }
- }
- $result = $c;
- return $result;
- }
-
- function Utf8_Unicode($char) {
- switch(strlen($char)) {
- case 1:
- return ord($char);
- case 2:
- $n = (ord($char[0]) & 0x3f) << 6;
- $n += ord($char[1]) & 0x3f;
- return $n;
- case 3:
- $n = (ord($char[0]) & 0x1f) << 12;
- $n += (ord($char[1]) & 0x3f) << 6;
- $n += ord($char[2]) & 0x3f;
- return $n;
- case 4:
- $n = (ord($char[0]) & 0x0f) << 18;
- $n += (ord($char[1]) & 0x3f) << 12;
- $n += (ord($char[2]) & 0x3f) << 6;
- $n += ord($char[3]) & 0x3f;
- return $n;
- }
- }
-
- }
-
- ?>