/e/class/doiconv.php
PHP | 525 lines | 387 code | 98 blank | 40 comment | 104 complexity | d4ee46961d22cf1ea1f2eeddf085cf85 MD5 | raw file
- <?php
- define('InEmpireCMSIconv',TRUE);
-
- class Chinese
- {
- //存放简体中文与拼音对照表
- var $pinyin_table = array();
-
- //存放 GB <-> UNICODE 对照表的内容
- var $unicode_table = array();
-
- //访问中文繁简互换表的文件指针
- var $ctf;
-
- var $SourceText = "";
-
- //配置
- var $config = array(
- 'codetable_dir' => '', // 存放各种语言互换表的目录
- 'source_lang' => '', // 字符的原编码
- 'target_lang' => '', // 转换后的编码
- 'GBtoBIG5_table' => 'gb-big5.table', // 简体中文转换为繁体中文的对照表
- 'BIG5toGB_table' => 'big5-gb.table', // 繁体中文转换为简体中文的对照表
- 'GBtoPinYin_table' => 'gb-pinyin.table', // 简体中文转换为拼音的对照表
- 'GBtoUnicode_table' => 'gb-unicode.table', // 简体中文转换为UNICODE的对照表
- 'BIG5toUnicode_table' => 'big5-unicode.table' // 繁体中文转换为UNICODE的对照表
- );
-
- function Chinese($dir='./')
- {
- $this->config['codetable_dir'] = $dir."../data/codetable/";
- }
-
- function Convert( $source_lang , $target_lang , $source_string='' )
- {
- /* 如果编码相同,直接返回 */
- if ($source_lang == $target_lang || $source_string == '')
- {
- return $source_string;
- }
-
- if ($source_lang != '') {
- $this->config['source_lang'] = $source_lang;
- }
-
- if ($target_lang != '') {
- $this->config['target_lang'] = $target_lang;
- }
-
-
- $this->SourceText = $source_string;
-
- $this->OpenTable();
- // 判断是否为中文繁、简转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && ($this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
- return $this->GB2312toBIG5();
- }
-
- // 判断是否为简体中文与拼音转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="PinYin" ) {
- return $this->CHStoPinYin();
- }
-
- // 判断是否为简体、繁体中文与UTF8转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5" || $this->config['source_lang']=="UTF8") && ($this->config['target_lang']=="UTF8" || $this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
- return $this->CHStoUTF8();
- }
-
- // 判断是否为简体、繁体中文与UNICODE转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="UNICODE" ) {
- return $this->CHStoUNICODE();
- }
- }
-
- //将 16 进制转换为 2 进制字符
- function _hex2bin( $hexdata )
- {
- $bindata = '';
- for ($i = 0; $i < strlen($hexdata); $i += 2 )
- {
- $bindata .= chr(hexdec(substr($hexdata, $i, 2)));
- }
-
- return $bindata;
- }
-
- function OpenTable()
- {
-
- // 假如原编码为简体中文的话
- if ($this->config['source_lang']=="GB2312") {
-
- // 假如转换目标编码为繁体中文的话
- if ($this->config['target_lang'] == "BIG5") {
- $this->ctf = fopen($this->config['codetable_dir'].$this->config['GBtoBIG5_table'], "rb");
- if (is_null($this->ctf)) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- }
-
- // 假如转换目标编码为拼音的话
- if ($this->config['target_lang'] == "PinYin") {
- $tmp = @file($this->config['codetable_dir'].$this->config['GBtoPinYin_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- //
- $i = 0;
- for ($i=0; $i<count($tmp); $i++) {
- $tmp1 = explode(" ", $tmp[$i]);
- $this->pinyin_table[$i]=array($tmp1[0],$tmp1[1]);
- }
- }
-
- // 假如转换目标编码为 UTF8 的话
- if ($this->config['target_lang'] == "UTF8") {
- $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
- if (!$tmp) {
- echo "编码转换失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,7,6);
- }
-
- // 假如转换目标编码为 UNICODE 的话
- if ($this->config['target_lang'] == "UNICODE") {
- $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,9,4);
- }
- }
-
- // 假如原编码为繁体中文的话
- if ($this->config['source_lang']=="BIG5") {
- // 假如转换目标编码为简体中文的话
- if ($this->config['target_lang'] == "GB2312") {
- $this->ctf = fopen($this->config['codetable_dir'].$this->config['BIG5toGB_table'], "r");
- if (is_null($this->ctf)) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- }
- // 假如转换目标编码为 UTF8 的话
- if ($this->config['target_lang'] == "UTF8") {
- $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,7,6);
- }
-
- // 假如转换目标编码为 UNICODE 的话
- if ($this->config['target_lang'] == "UNICODE") {
- $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,9,4);
- }
-
- // 假如转换目标编码为拼音的话
- if ($this->config['target_lang'] == "PinYin") {
- $tmp = @file($this->config['codetable_dir'].$this->config['GBtoPinYin_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- //
- $i = 0;
- for ($i=0; $i<count($tmp); $i++) {
- $tmp1 = explode(" ", $tmp[$i]);
- $this->pinyin_table[$i]=array($tmp1[0],$tmp1[1]);
- }
- }
- }
-
- // 假如原编码为 UTF8 的话
- if ($this->config['source_lang']=="UTF8") {
-
- // 假如转换目标编码为 GB2312 的话
- if ($this->config['target_lang'] == "GB2312") {
- $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- {
- $this->unicode_table[hexdec(substr($value,7,6))]=substr($value,0,6);
- }
- }
-
- // 假如转换目标编码为 BIG5 的话
- if ($this->config['target_lang'] == "BIG5") {
- $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
- if (!$tmp) {
- echo "打开打开转换表文件失败!";
- exit;
- }
- $this->unicode_table = array();
- while(list($key,$value)=each($tmp))
- {
- $this->unicode_table[hexdec(substr($value,7,6))]=substr($value,0,6);
- }
- }
- }
-
- }
-
- function OpenFile( $position , $isHTML=false )
- {
- $tempcontent = @file($position);
-
- if (!$tempcontent) {
- echo "打开文件失败!";
- exit;
- }
-
- $this->SourceText = implode("",$tempcontent);
-
- if ($isHTML) {
- $this->SourceText = preg_replace( "/charset=".$this->config['source_lang']."/i" , "charset=".$this->config['target_lang'] , $this->SourceText);
-
- $this->SourceText = str_replace("\n", "", $this->SourceText);
-
- $this->SourceText = str_replace("\r", "", $this->SourceText);
- }
- }
-
- function SiteOpen( $position )
- {
- $tempcontent = @file($position);
-
- if (!$tempcontent) {
- echo "打开文件失败!";
- exit;
- }
-
- // 将数组的所有内容转换为字符串
- $this->SourceText = implode("",$tempcontent);
-
- $this->SourceText = preg_replace( "/charset=".$this->config['source_lang']."/i" , "charset=".$this->config['target_lang'] , $this->SourceText);
-
- }
-
- function setvar( $parameter , $value )
- {
- if(!trim($parameter))
- return $parameter;
-
- $this->config[$parameter] = $value;
-
- }
-
- function CHSUtoUTF8($c)
- {
- $str="";
-
- if ($c < 0x80) {
- $str.=$c;
- }
-
- elseif ($c < 0x800) {
- $str.=(0xC0 | $c>>6);
- $str.=(0x80 | $c & 0x3F);
- }
-
- elseif ($c < 0x10000) {
- $str.=(0xE0 | $c>>12);
- $str.=(0x80 | $c>>6 & 0x3F);
- $str.=(0x80 | $c & 0x3F);
- }
-
- elseif ($c < 0x200000) {
- $str.=(0xF0 | $c>>18);
- $str.=(0x80 | $c>>12 & 0x3F);
- $str.=(0x80 | $c>>6 & 0x3F);
- $str.=(0x80 | $c & 0x3F);
- }
-
- return $str;
- }
-
- function CHStoUTF8(){
-
- if ($this->config["source_lang"]=="BIG5" || $this->config["source_lang"]=="GB2312") {
- $ret="";
-
- while($this->SourceText){
-
- if(ord(substr($this->SourceText,0,1))>127){
-
- if ($this->config["source_lang"]=="BIG5") {
- $utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))]));
- }
- if ($this->config["source_lang"]=="GB2312") {
- $utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))-0x8080]));
- }
- for($i=0;$i<strlen($utf8);$i+=3)
- $ret.=chr(substr($utf8,$i,3));
-
- $this->SourceText=substr($this->SourceText,2,strlen($this->SourceText));
- }
-
- else{
- $ret.=substr($this->SourceText,0,1);
- $this->SourceText=substr($this->SourceText,1,strlen($this->SourceText));
- }
- }
- $this->unicode_table = array();
- $this->SourceText = "";
- return $ret;
- }
-
- if ($this->config["source_lang"]=="UTF8") {
- $out = '';
- $len = strlen($this->SourceText);
- $i = 0;
- while($i < $len) {
- $c = ord( substr( $this->SourceText, $i++, 1 ) );
- switch($c >> 4)
- {
- case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
- // 0xxxxxxx
- $out .= substr( $this->SourceText, $i - 1, 1 );
- break;
- case 12: case 13:
- // 110x xxxx 10xx xxxx
- $char2 = ord( substr( $this->SourceText, $i++, 1 ) );
- $char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
-
- if ($this->config["target_lang"]=="GB2312")
- {
- $out .= $this->_hex2bin( dechex( $char3 + 0x8080 ) );
- } elseif ($this->config["target_lang"]=="BIG5")
- {
- $out .= $this->_hex2bin( dechex ( $char3 + 0x0000 ) );
- }
- break;
- case 14:
- // 1110 xxxx 10xx xxxx 10xx xxxx
- $char2 = ord( substr( $this->SourceText, $i++, 1 ) );
- $char3 = ord( substr( $this->SourceText, $i++, 1 ) );
- $char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
-
- if ($this->config["target_lang"]=="GB2312")
- {
- $out .= $this->_hex2bin( dechex ( $char4 + 0x8080 ) );
- } elseif ($this->config["target_lang"]=="BIG5")
- {
- $out .= $this->_hex2bin( dechex ( $char4 + 0x0000 ) );
- }
- break;
- }
- }
-
- // 返回结果
- return $out;
- }
- }
-
- function CHStoUNICODE()
- {
-
- $utf="";
-
- while($this->SourceText)
- {
- if (ord(substr($this->SourceText,0,1))>127)
- {
-
- if ($this->config["source_lang"]=="GB2312")
- $utf.="&#x".$this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))-0x8080].";";
-
- if ($this->config["source_lang"]=="BIG5")
- $utf.="&#x".$this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))].";";
-
- $this->SourceText=substr($this->SourceText,2,strlen($this->SourceText));
- }
- else
- {
- $utf.=substr($this->SourceText,0,1);
- $this->SourceText=substr($this->SourceText,1,strlen($this->SourceText));
- }
- }
- return $utf;
- }
-
- function GB2312toBIG5()
- {
- // 获取等待转换的字符串的总长度
- $max=strlen($this->SourceText)-1;
-
- for($i=0;$i<$max;$i++){
-
- $h=ord($this->SourceText[$i]);
-
- if($h>=160){
-
- $l=ord($this->SourceText[$i+1]);
-
- if($h==161 && $l==64){
- $gb=" ";
- }
- else{
- fseek($this->ctf,($h-160)*510+($l-1)*2);
- $gb=fread($this->ctf,2);
- }
-
- $this->SourceText[$i]=$gb[0];
- $this->SourceText[$i+1]=$gb[1];
- $i++;
- }
- }
- fclose($this->ctf);
-
- // 将转换后的结果赋予 $result;
- $result = $this->SourceText;
-
- // 清空 $thisSourceText
- $this->SourceText = "";
-
- // 返回转换结果
- return $result;
- }
-
- function PinYinSearch($num){
-
- if($num>0&&$num<160){
- return chr($num);
- }
-
- elseif($num<-20319||$num>-10247){
- return "";
- }
-
- else{
-
- for($i=count($this->pinyin_table)-1;$i>=0;$i--){
- if($this->pinyin_table[$i][1]<=$num)
- break;
- }
-
- return $this->pinyin_table[$i][0];
- }
- }
-
- function CHStoPinYin(){
- if ( $this->config['source_lang']=="BIG5" ) {
- $this->ctf = fopen($this->config['codetable_dir'].$this->config['BIG5toGB_table'], "r");
- if (is_null($this->ctf)) {
- echo "打开打开转换表文件失败!";
- exit;
- }
-
- $this->SourceText = $this->GB2312toBIG5();
- $this->config['target_lang'] = "PinYin";
- }
-
- $ret = array();
- $ri = 0;
- for($i=0;$i<strlen($this->SourceText);$i++){
-
- $p=ord(substr($this->SourceText,$i,1));
-
- if($p>160){
- $q=ord(substr($this->SourceText,++$i,1));
- $p=$p*256+$q-65536;
- }
-
- $ret[$ri]=$this->PinYinSearch($p);
- $ri = $ri + 1;
- }
-
- // 清空 $this->SourceText
- $this->SourceText = "";
-
- $this->pinyin_table = array();
-
- // 返回转换后的结果
- return implode(" ", $ret);
- }
-
- function ConvertIT()
- {
- // 判断是否为中文繁、简转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && ($this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
- return $this->GB2312toBIG5();
- }
-
- // 判断是否为简体中文与拼音转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="PinYin" ) {
- return $this->CHStoPinYin();
- }
-
- // 判断是否为简体、繁体中文与UTF8转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5" || $this->config['source_lang']=="UTF8") && ($this->config['target_lang']=="UTF8" || $this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
- return $this->CHStoUTF8();
- }
-
- // 判断是否为简体、繁体中文与UNICODE转换
- if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="UNICODE" ) {
- return $this->CHStoUNICODE();
- }
-
- }
-
- }
- ?>