PageRenderTime 44ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/ php-ppcms/includes/classes/core.charset.class.php

http://php-ppcms.googlecode.com/
PHP | 310 lines | 262 code | 34 blank | 14 comment | 64 complexity | 92194b8022b891e00c714ba9a5935636 MD5 | raw file
Possible License(s): LGPL-2.1, GPL-2.0
  1. <?php
  2. /***************************************************************
  3. * Copyright notice
  4. * (c) 2009, jianyuzhu@gmail.com
  5. * All rights reserved
  6. * This script is part of the PPEMI project.
  7. ***************************************************************/
  8. class CoreCharsetConverter {
  9. var $gd_map;
  10. var $big5_map;
  11. var $dep_char = 127;
  12. var $iconv_enabled = false;
  13. var $unicode_table = array();
  14. //constructor
  15. function CoreCharsetConverter($fromLang = '', $toLang = '', $forceTable = false) {
  16. $this->charset_dir = CONFIG_PATH . CONFIG_DIR_CHARSET;
  17. //
  18. $this->gd_map = 'gb.map';
  19. $this->big5_map = 'big5.map';
  20. //
  21. $this->gb_unicode_table = 'gb_unicode.table';
  22. $this->big5_unicode_table = 'big5_unicode.table';
  23. //
  24. $this->fromLang = $fromLang;
  25. $this->toLang = $toLang;
  26. //
  27. if( function_exists('iconv') && $this->toLang != 'BIG5' && $forceTable == false) {
  28. $this->iconv_enabled = true;
  29. } else {
  30. $this->iconv_enabled = false;
  31. $this->_open_table();
  32. }
  33. }
  34. function Big5_GB($string) {
  35. $fp = fopen($this->gd_map, "r");
  36. $outstring = "";
  37. for($i=0, $n=strlen($string); $i<$n; $i++) {
  38. $ch = ord(substr($string, $i, 1));
  39. if( $ch > $this->dep_char ) {
  40. $outstring .= $this->_convert_big5_gb(substr($string, $i, 2), $fp);
  41. $i++;
  42. } else {
  43. $outstring .= substr($string, $i, 1);
  44. }
  45. }
  46. fclose($fp);
  47. return $outstring;
  48. }
  49. function GB_Big5($string) {
  50. $fp = fopen($this->big5_map, "r");
  51. $outstring = "";
  52. for($i=0, $n=strlen($string); $i<$n; $i++) {
  53. $ch = ord(substr($string, $i, 1));
  54. if( $ch > $this->dep_char ) {
  55. $outstring .= $this->_convert_gb_big5(substr($string, $i, 2), $fp);
  56. $i++;
  57. } else {
  58. $outstring .= substr($string, $i, 1);
  59. }
  60. }
  61. fclose($fp);
  62. return $outstring;
  63. }
  64. function Utf8_Unicode($char) {
  65. switch(strlen($char)) {
  66. case 1:
  67. return ord($char);
  68. case 2:
  69. $n = (ord($char[0]) & 0x3f) << 6;
  70. $n += ord($char[1]) & 0x3f;
  71. return $n;
  72. case 3:
  73. $n = (ord($char[0]) & 0x1f) << 12;
  74. $n += (ord($char[1]) & 0x3f) << 6;
  75. $n += ord($char[2]) & 0x3f;
  76. return $n;
  77. case 4:
  78. $n = (ord($char[0]) & 0x0f) << 18;
  79. $n += (ord($char[1]) & 0x3f) << 12;
  80. $n += (ord($char[2]) & 0x3f) << 6;
  81. $n += ord($char[3]) & 0x3f;
  82. return $n;
  83. }
  84. }
  85. function CHSUtoUTF8($c) {
  86. $str = '';
  87. if( $c < 0x80 ) {
  88. $str .= $c;
  89. } elseif( $c < 0x800 ) {
  90. $str .= (0xC0 | $c >> 6);
  91. $str .= (0x80 | $c & 0x3F);
  92. } elseif( $c < 0x10000 ) {
  93. $str .= (0xE0 | $c >> 12);
  94. $str .= (0x80 | $c >> 6 & 0x3F);
  95. $str .=( 0x80 | $c & 0x3F);
  96. } elseif( $c < 0x200000 ) {
  97. $str .= (0xF0 | $c >> 18);
  98. $str .= (0x80 | $c >> 12 & 0x3F);
  99. $str .= (0x80 | $c >> 6 & 0x3F);
  100. $str .= (0x80 | $c & 0x3F);
  101. }
  102. return $str;
  103. }
  104. function Convert($string) {
  105. if( $this->fromLang == $this->toLang ) {
  106. return $string;
  107. } elseif( $this->iconv_enabled ) {
  108. if( $this->toLang <> 'UNICODE' ) {
  109. return iconv($this->fromLang, $this->toLang, $string);
  110. } else {
  111. $outstring = '';
  112. while($string != '') {
  113. if( ord(substr($string, 0, 1)) > 127 ) {
  114. $outstring .= "&#x" . dechex($this->Utf8_Unicode(iconv($this->fromLang, "UTF-8", substr($string, 0, 2)))) . ";";
  115. $string = substr($string, 2, strlen($string));
  116. } else {
  117. $outstring .= substr($string, 0, 1);
  118. $string = substr($string, 1, strlen($string));
  119. }
  120. }
  121. return $outstring;
  122. }
  123. } elseif( $this->toLang == 'UNICODE' ) {
  124. $outstring = '';
  125. while($string != '') {
  126. if( ord(substr($string, 0, 1)) > 127 ) {
  127. if( $this->fromLang == 'GBK' ) {
  128. $outstring .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($string, 0, 2))) - 0x8080] . ';';
  129. } elseif( $this->fromLang == 'BIG5' ) {
  130. $outstring .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($string, 0, 2)))] . ';';
  131. }
  132. $string = substr($string, 2, strlen($string));
  133. } else {
  134. $outstring .= substr($string, 0, 1);
  135. $string = substr($string, 1, strlen($string));
  136. }
  137. }
  138. return $outstring;
  139. } else {
  140. $outstring = '';
  141. if( $this->fromLang == 'UTF-8' ) {
  142. $out = '';
  143. $len = strlen($string);
  144. $i = 0;
  145. while($i < $len) {
  146. $c = ord(substr($string, $i++, 1));
  147. switch($c >> 4) {
  148. case 0:
  149. case 1:
  150. case 2:
  151. case 3:
  152. case 4:
  153. case 5:
  154. case 6:
  155. case 7:
  156. $out .= substr($string, $i - 1, 1);
  157. break;
  158. case 12:
  159. case 13:
  160. $char2 = ord(substr($string, $i++, 1));
  161. $char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
  162. if( $this->toLang == 'GBK' ) {
  163. $out .= $this->_hex2bin(dechex($char3 + 0x8080));
  164. } elseif( $this->toLang == 'BIG5' ) {
  165. $out .= $this->_hex2bin($char3);
  166. }
  167. break;
  168. case 14:
  169. $char2 = ord(substr($string, $i++, 1));
  170. $char3 = ord(substr($string, $i++, 1));
  171. $char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
  172. if( $this->toLang == 'GBK' ) {
  173. $out .= $this->_hex2bin(dechex($char4 + 0x8080));
  174. } elseif( $this->toLang == 'BIG5' ) {
  175. $out .= $this->_hex2bin($char4);
  176. }
  177. break;
  178. }
  179. }
  180. return $out;
  181. } else {
  182. while($string != '') {
  183. if( ord(substr($string, 0, 1)) > 127 ) {
  184. if( $this->fromLang == 'BIG5' ) {
  185. $utf8 = $this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($string, 0, 2)))]));
  186. } elseif( $this->fromLang == 'GBK' ) {
  187. $utf8 = $this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($string, 0, 2))) - 0x8080]));
  188. }
  189. for($i = 0; $i < strlen($utf8); $i += 3) {
  190. $outstring .= chr(substr($utf8, $i, 3));
  191. }
  192. $string = substr($string, 2, strlen($string));
  193. } else {
  194. $outstring .= substr($string, 0, 1);
  195. $string = substr($string, 1, strlen($string));
  196. }
  197. }
  198. $string = '';
  199. return $outstring;
  200. }
  201. }
  202. }
  203. //private
  204. function _convert_big5_gb($char, $fp) {
  205. $c = ord(substr($char, 0, 1));
  206. $x = ord(substr($char, 1, 1));
  207. $address = (($c - 160) * 510) + ($x - 1) * 2;
  208. fseek($fp, $address);
  209. $hi = fgetc($fp);
  210. $lo = fgetc($fp);
  211. return "$hi$lo";
  212. }
  213. function _convert_gb_big5($char, $fp) {
  214. $c = ord(substr($char, 0, 1));
  215. $x = ord(substr($char, 1, 1));
  216. $address = ($c - 160) * 510 + ($x - 1) * 2;
  217. fseek($fp, $address);
  218. $hi = fgetc($fp);
  219. $lo = fgetc($fp);
  220. return "$hi$lo";
  221. }
  222. function _lang($langcode) {
  223. $langcode = strtoupper($langcode);
  224. if( substr($langcode, 0, 2) == 'GB' ) {
  225. return 'GBK';
  226. } elseif( substr($langcode, 0, 3) == 'BIG' ) {
  227. return 'BIG5';
  228. } elseif( substr($langcode, 0, 3) == 'UTF' ) {
  229. return 'UTF-8';
  230. } elseif( substr($langcode, 0, 3) == 'UNI' ) {
  231. return 'UNICODE';
  232. }
  233. }
  234. function _hex2bin($hexdata) {
  235. for($i=0, $n=strlen($hexdata); $i<$n; $i += 2) {
  236. $bindata .= chr(hexdec(substr($hexdata, $i, 2)));
  237. }
  238. return $bindata;
  239. }
  240. function _open_table() {
  241. $this->unicode_table = array();
  242. if( $this->fromLang == 'GBK' || $this->toLang == 'GBK' ) {
  243. } elseif( $this->fromLang = 'BIG5' || $this->toLang == 'BIG5' ) {
  244. }
  245. //
  246. $fp = fopen($this->table, 'rb');
  247. $tabletmp = fread($fp, filesize($this->table));
  248. for($i=0, $n=strlen($tabletmp); $i<$n; $i += 4) {
  249. $tmp = unpack('nkey/nvalue', substr($tabletmp, $i, 4));
  250. if( $this->toLang == 'UTF-8' ) {
  251. $this->unicode_table[$tmp['key']] = '0x' . dechex($tmp['value']);
  252. } elseif( $this->fromLang == 'UTF-8' ) {
  253. $this->unicode_table[$tmp['value']] = '0x' . dechex($tmp['key']);
  254. } elseif( $this->toLang == 'UNICODE' ) {
  255. $this->unicode_table[$tmp['key']] = dechex($tmp['value']);
  256. }
  257. }
  258. }
  259. function _open_map() {
  260. $fp = fopen($this->gd_map, "r");
  261. $outstring = "";
  262. for($i=0, $n=strlen($string); $i<$n; $i++) {
  263. $ch = ord(substr($string, $i, 1));
  264. if( $ch > $this->dep_char ) {
  265. $outstring .= $this->_convert_big5_gb(substr($string, $i, 2), $fp);
  266. $i++;
  267. } else {
  268. $outstring .= substr($string, $i, 1);
  269. }
  270. }
  271. fclose($fp);
  272. return $outstring;
  273. }
  274. }
  275. //
  276. ?>