/ php-ppcms/includes/classes/core.charset.class.php
PHP | 310 lines | 262 code | 34 blank | 14 comment | 64 complexity | 92194b8022b891e00c714ba9a5935636 MD5 | raw file
Possible License(s): LGPL-2.1, GPL-2.0
- <?php
- /***************************************************************
- * Copyright notice
- * (c) 2009, jianyuzhu@gmail.com
- * All rights reserved
- * This script is part of the PPEMI project.
- ***************************************************************/
-
- class CoreCharsetConverter {
- var $gd_map;
- var $big5_map;
- var $dep_char = 127;
-
- var $iconv_enabled = false;
- var $unicode_table = array();
-
- //constructor
- function CoreCharsetConverter($fromLang = '', $toLang = '', $forceTable = false) {
- $this->charset_dir = CONFIG_PATH . CONFIG_DIR_CHARSET;
- //
- $this->gd_map = 'gb.map';
- $this->big5_map = 'big5.map';
- //
- $this->gb_unicode_table = 'gb_unicode.table';
- $this->big5_unicode_table = 'big5_unicode.table';
- //
- $this->fromLang = $fromLang;
- $this->toLang = $toLang;
- //
- if( function_exists('iconv') && $this->toLang != 'BIG5' && $forceTable == false) {
- $this->iconv_enabled = true;
- } else {
- $this->iconv_enabled = false;
- $this->_open_table();
- }
- }
-
- function Big5_GB($string) {
- $fp = fopen($this->gd_map, "r");
-
- $outstring = "";
- for($i=0, $n=strlen($string); $i<$n; $i++) {
- $ch = ord(substr($string, $i, 1));
- if( $ch > $this->dep_char ) {
- $outstring .= $this->_convert_big5_gb(substr($string, $i, 2), $fp);
- $i++;
- } else {
- $outstring .= substr($string, $i, 1);
- }
- }
-
- fclose($fp);
-
- return $outstring;
- }
-
- function GB_Big5($string) {
- $fp = fopen($this->big5_map, "r");
-
- $outstring = "";
- for($i=0, $n=strlen($string); $i<$n; $i++) {
- $ch = ord(substr($string, $i, 1));
- if( $ch > $this->dep_char ) {
- $outstring .= $this->_convert_gb_big5(substr($string, $i, 2), $fp);
- $i++;
- } else {
- $outstring .= substr($string, $i, 1);
- }
- }
-
- fclose($fp);
-
- return $outstring;
- }
-
- function Utf8_Unicode($char) {
- switch(strlen($char)) {
- case 1:
- return ord($char);
- case 2:
- $n = (ord($char[0]) & 0x3f) << 6;
- $n += ord($char[1]) & 0x3f;
- return $n;
- case 3:
- $n = (ord($char[0]) & 0x1f) << 12;
- $n += (ord($char[1]) & 0x3f) << 6;
- $n += ord($char[2]) & 0x3f;
- return $n;
- case 4:
- $n = (ord($char[0]) & 0x0f) << 18;
- $n += (ord($char[1]) & 0x3f) << 12;
- $n += (ord($char[2]) & 0x3f) << 6;
- $n += ord($char[3]) & 0x3f;
- return $n;
- }
- }
-
- function CHSUtoUTF8($c) {
- $str = '';
- if( $c < 0x80 ) {
- $str .= $c;
- } elseif( $c < 0x800 ) {
- $str .= (0xC0 | $c >> 6);
- $str .= (0x80 | $c & 0x3F);
- } elseif( $c < 0x10000 ) {
- $str .= (0xE0 | $c >> 12);
- $str .= (0x80 | $c >> 6 & 0x3F);
- $str .=( 0x80 | $c & 0x3F);
- } elseif( $c < 0x200000 ) {
- $str .= (0xF0 | $c >> 18);
- $str .= (0x80 | $c >> 12 & 0x3F);
- $str .= (0x80 | $c >> 6 & 0x3F);
- $str .= (0x80 | $c & 0x3F);
- }
-
- return $str;
- }
-
- function Convert($string) {
- if( $this->fromLang == $this->toLang ) {
- return $string;
- } elseif( $this->iconv_enabled ) {
- if( $this->toLang <> 'UNICODE' ) {
- return iconv($this->fromLang, $this->toLang, $string);
- } else {
- $outstring = '';
- while($string != '') {
- if( ord(substr($string, 0, 1)) > 127 ) {
- $outstring .= "&#x" . dechex($this->Utf8_Unicode(iconv($this->fromLang, "UTF-8", substr($string, 0, 2)))) . ";";
- $string = substr($string, 2, strlen($string));
- } else {
- $outstring .= substr($string, 0, 1);
- $string = substr($string, 1, strlen($string));
- }
- }
-
- return $outstring;
- }
-
- } elseif( $this->toLang == 'UNICODE' ) {
- $outstring = '';
- while($string != '') {
- if( ord(substr($string, 0, 1)) > 127 ) {
- if( $this->fromLang == 'GBK' ) {
- $outstring .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($string, 0, 2))) - 0x8080] . ';';
- } elseif( $this->fromLang == 'BIG5' ) {
- $outstring .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($string, 0, 2)))] . ';';
- }
- $string = substr($string, 2, strlen($string));
- } else {
- $outstring .= substr($string, 0, 1);
- $string = substr($string, 1, strlen($string));
- }
- }
- return $outstring;
- } else {
- $outstring = '';
- if( $this->fromLang == 'UTF-8' ) {
- $out = '';
- $len = strlen($string);
- $i = 0;
- while($i < $len) {
- $c = ord(substr($string, $i++, 1));
- switch($c >> 4) {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- $out .= substr($string, $i - 1, 1);
- break;
- case 12:
- case 13:
- $char2 = ord(substr($string, $i++, 1));
- $char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
- if( $this->toLang == 'GBK' ) {
- $out .= $this->_hex2bin(dechex($char3 + 0x8080));
- } elseif( $this->toLang == 'BIG5' ) {
- $out .= $this->_hex2bin($char3);
- }
- break;
- case 14:
- $char2 = ord(substr($string, $i++, 1));
- $char3 = ord(substr($string, $i++, 1));
- $char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
- if( $this->toLang == 'GBK' ) {
- $out .= $this->_hex2bin(dechex($char4 + 0x8080));
- } elseif( $this->toLang == 'BIG5' ) {
- $out .= $this->_hex2bin($char4);
- }
- break;
- }
- }
- return $out;
- } else {
- while($string != '') {
- if( ord(substr($string, 0, 1)) > 127 ) {
- if( $this->fromLang == 'BIG5' ) {
- $utf8 = $this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($string, 0, 2)))]));
- } elseif( $this->fromLang == 'GBK' ) {
- $utf8 = $this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($string, 0, 2))) - 0x8080]));
- }
- for($i = 0; $i < strlen($utf8); $i += 3) {
- $outstring .= chr(substr($utf8, $i, 3));
- }
- $string = substr($string, 2, strlen($string));
- } else {
- $outstring .= substr($string, 0, 1);
- $string = substr($string, 1, strlen($string));
- }
- }
- $string = '';
- return $outstring;
- }
- }
- }
-
- //private
- function _convert_big5_gb($char, $fp) {
- $c = ord(substr($char, 0, 1));
- $x = ord(substr($char, 1, 1));
- $address = (($c - 160) * 510) + ($x - 1) * 2;
-
- fseek($fp, $address);
- $hi = fgetc($fp);
- $lo = fgetc($fp);
-
- return "$hi$lo";
- }
-
- function _convert_gb_big5($char, $fp) {
- $c = ord(substr($char, 0, 1));
- $x = ord(substr($char, 1, 1));
- $address = ($c - 160) * 510 + ($x - 1) * 2;
-
- fseek($fp, $address);
- $hi = fgetc($fp);
- $lo = fgetc($fp);
- return "$hi$lo";
- }
-
- function _lang($langcode) {
- $langcode = strtoupper($langcode);
-
- if( substr($langcode, 0, 2) == 'GB' ) {
- return 'GBK';
- } elseif( substr($langcode, 0, 3) == 'BIG' ) {
- return 'BIG5';
- } elseif( substr($langcode, 0, 3) == 'UTF' ) {
- return 'UTF-8';
- } elseif( substr($langcode, 0, 3) == 'UNI' ) {
- return 'UNICODE';
- }
-
- }
-
- function _hex2bin($hexdata) {
- for($i=0, $n=strlen($hexdata); $i<$n; $i += 2) {
- $bindata .= chr(hexdec(substr($hexdata, $i, 2)));
- }
-
- return $bindata;
- }
-
- function _open_table() {
- $this->unicode_table = array();
- if( $this->fromLang == 'GBK' || $this->toLang == 'GBK' ) {
-
- } elseif( $this->fromLang = 'BIG5' || $this->toLang == 'BIG5' ) {
-
- }
- //
- $fp = fopen($this->table, 'rb');
- $tabletmp = fread($fp, filesize($this->table));
- for($i=0, $n=strlen($tabletmp); $i<$n; $i += 4) {
- $tmp = unpack('nkey/nvalue', substr($tabletmp, $i, 4));
- if( $this->toLang == 'UTF-8' ) {
- $this->unicode_table[$tmp['key']] = '0x' . dechex($tmp['value']);
- } elseif( $this->fromLang == 'UTF-8' ) {
- $this->unicode_table[$tmp['value']] = '0x' . dechex($tmp['key']);
- } elseif( $this->toLang == 'UNICODE' ) {
- $this->unicode_table[$tmp['key']] = dechex($tmp['value']);
- }
- }
- }
-
- function _open_map() {
- $fp = fopen($this->gd_map, "r");
-
- $outstring = "";
- for($i=0, $n=strlen($string); $i<$n; $i++) {
- $ch = ord(substr($string, $i, 1));
- if( $ch > $this->dep_char ) {
- $outstring .= $this->_convert_big5_gb(substr($string, $i, 2), $fp);
- $i++;
- } else {
- $outstring .= substr($string, $i, 1);
- }
- }
-
- fclose($fp);
-
- return $outstring;
- }
- }
- //
- ?>