PageRenderTime 65ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/e/class/doiconv.php

https://github.com/westeast/xwdede
PHP | 525 lines | 387 code | 98 blank | 40 comment | 104 complexity | d4ee46961d22cf1ea1f2eeddf085cf85 MD5 | raw file
  1. <?php
  2. define('InEmpireCMSIconv',TRUE);
  3. class Chinese
  4. {
  5. //存放简体中文与拼音对照表
  6. var $pinyin_table = array();
  7. //存放 GB <-> UNICODE 对照表的内容
  8. var $unicode_table = array();
  9. //访问中文繁简互换表的文件指针
  10. var $ctf;
  11. var $SourceText = "";
  12. //配置
  13. var $config = array(
  14. 'codetable_dir' => '', // 存放各种语言互换表的目录
  15. 'source_lang' => '', // 字符的原编码
  16. 'target_lang' => '', // 转换后的编码
  17. 'GBtoBIG5_table' => 'gb-big5.table', // 简体中文转换为繁体中文的对照表
  18. 'BIG5toGB_table' => 'big5-gb.table', // 繁体中文转换为简体中文的对照表
  19. 'GBtoPinYin_table' => 'gb-pinyin.table', // 简体中文转换为拼音的对照表
  20. 'GBtoUnicode_table' => 'gb-unicode.table', // 简体中文转换为UNICODE的对照表
  21. 'BIG5toUnicode_table' => 'big5-unicode.table' // 繁体中文转换为UNICODE的对照表
  22. );
  23. function Chinese($dir='./')
  24. {
  25. $this->config['codetable_dir'] = $dir."../data/codetable/";
  26. }
  27. function Convert( $source_lang , $target_lang , $source_string='' )
  28. {
  29. /* 如果编码相同,直接返回 */
  30. if ($source_lang == $target_lang || $source_string == '')
  31. {
  32. return $source_string;
  33. }
  34. if ($source_lang != '') {
  35. $this->config['source_lang'] = $source_lang;
  36. }
  37. if ($target_lang != '') {
  38. $this->config['target_lang'] = $target_lang;
  39. }
  40. $this->SourceText = $source_string;
  41. $this->OpenTable();
  42. // 判断是否为中文繁、简转换
  43. if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && ($this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
  44. return $this->GB2312toBIG5();
  45. }
  46. // 判断是否为简体中文与拼音转换
  47. if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="PinYin" ) {
  48. return $this->CHStoPinYin();
  49. }
  50. // 判断是否为简体、繁体中文与UTF8转换
  51. if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5" || $this->config['source_lang']=="UTF8") && ($this->config['target_lang']=="UTF8" || $this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
  52. return $this->CHStoUTF8();
  53. }
  54. // 判断是否为简体、繁体中文与UNICODE转换
  55. if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="UNICODE" ) {
  56. return $this->CHStoUNICODE();
  57. }
  58. }
  59. //将 16 进制转换为 2 进制字符
  60. function _hex2bin( $hexdata )
  61. {
  62. $bindata = '';
  63. for ($i = 0; $i < strlen($hexdata); $i += 2 )
  64. {
  65. $bindata .= chr(hexdec(substr($hexdata, $i, 2)));
  66. }
  67. return $bindata;
  68. }
  69. function OpenTable()
  70. {
  71. // 假如原编码为简体中文的话
  72. if ($this->config['source_lang']=="GB2312") {
  73. // 假如转换目标编码为繁体中文的话
  74. if ($this->config['target_lang'] == "BIG5") {
  75. $this->ctf = fopen($this->config['codetable_dir'].$this->config['GBtoBIG5_table'], "rb");
  76. if (is_null($this->ctf)) {
  77. echo "打开打开转换表文件失败!";
  78. exit;
  79. }
  80. }
  81. // 假如转换目标编码为拼音的话
  82. if ($this->config['target_lang'] == "PinYin") {
  83. $tmp = @file($this->config['codetable_dir'].$this->config['GBtoPinYin_table']);
  84. if (!$tmp) {
  85. echo "打开打开转换表文件失败!";
  86. exit;
  87. }
  88. //
  89. $i = 0;
  90. for ($i=0; $i<count($tmp); $i++) {
  91. $tmp1 = explode(" ", $tmp[$i]);
  92. $this->pinyin_table[$i]=array($tmp1[0],$tmp1[1]);
  93. }
  94. }
  95. // 假如转换目标编码为 UTF8 的话
  96. if ($this->config['target_lang'] == "UTF8") {
  97. $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
  98. if (!$tmp) {
  99. echo "编码转换失败!";
  100. exit;
  101. }
  102. $this->unicode_table = array();
  103. while(list($key,$value)=each($tmp))
  104. $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,7,6);
  105. }
  106. // 假如转换目标编码为 UNICODE 的话
  107. if ($this->config['target_lang'] == "UNICODE") {
  108. $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
  109. if (!$tmp) {
  110. echo "打开打开转换表文件失败!";
  111. exit;
  112. }
  113. $this->unicode_table = array();
  114. while(list($key,$value)=each($tmp))
  115. $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,9,4);
  116. }
  117. }
  118. // 假如原编码为繁体中文的话
  119. if ($this->config['source_lang']=="BIG5") {
  120. // 假如转换目标编码为简体中文的话
  121. if ($this->config['target_lang'] == "GB2312") {
  122. $this->ctf = fopen($this->config['codetable_dir'].$this->config['BIG5toGB_table'], "r");
  123. if (is_null($this->ctf)) {
  124. echo "打开打开转换表文件失败!";
  125. exit;
  126. }
  127. }
  128. // 假如转换目标编码为 UTF8 的话
  129. if ($this->config['target_lang'] == "UTF8") {
  130. $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
  131. if (!$tmp) {
  132. echo "打开打开转换表文件失败!";
  133. exit;
  134. }
  135. $this->unicode_table = array();
  136. while(list($key,$value)=each($tmp))
  137. $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,7,6);
  138. }
  139. // 假如转换目标编码为 UNICODE 的话
  140. if ($this->config['target_lang'] == "UNICODE") {
  141. $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
  142. if (!$tmp) {
  143. echo "打开打开转换表文件失败!";
  144. exit;
  145. }
  146. $this->unicode_table = array();
  147. while(list($key,$value)=each($tmp))
  148. $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,9,4);
  149. }
  150. // 假如转换目标编码为拼音的话
  151. if ($this->config['target_lang'] == "PinYin") {
  152. $tmp = @file($this->config['codetable_dir'].$this->config['GBtoPinYin_table']);
  153. if (!$tmp) {
  154. echo "打开打开转换表文件失败!";
  155. exit;
  156. }
  157. //
  158. $i = 0;
  159. for ($i=0; $i<count($tmp); $i++) {
  160. $tmp1 = explode(" ", $tmp[$i]);
  161. $this->pinyin_table[$i]=array($tmp1[0],$tmp1[1]);
  162. }
  163. }
  164. }
  165. // 假如原编码为 UTF8 的话
  166. if ($this->config['source_lang']=="UTF8") {
  167. // 假如转换目标编码为 GB2312 的话
  168. if ($this->config['target_lang'] == "GB2312") {
  169. $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
  170. if (!$tmp) {
  171. echo "打开打开转换表文件失败!";
  172. exit;
  173. }
  174. $this->unicode_table = array();
  175. while(list($key,$value)=each($tmp))
  176. {
  177. $this->unicode_table[hexdec(substr($value,7,6))]=substr($value,0,6);
  178. }
  179. }
  180. // 假如转换目标编码为 BIG5 的话
  181. if ($this->config['target_lang'] == "BIG5") {
  182. $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
  183. if (!$tmp) {
  184. echo "打开打开转换表文件失败!";
  185. exit;
  186. }
  187. $this->unicode_table = array();
  188. while(list($key,$value)=each($tmp))
  189. {
  190. $this->unicode_table[hexdec(substr($value,7,6))]=substr($value,0,6);
  191. }
  192. }
  193. }
  194. }
  195. function OpenFile( $position , $isHTML=false )
  196. {
  197. $tempcontent = @file($position);
  198. if (!$tempcontent) {
  199. echo "打开文件失败!";
  200. exit;
  201. }
  202. $this->SourceText = implode("",$tempcontent);
  203. if ($isHTML) {
  204. $this->SourceText = preg_replace( "/charset=".$this->config['source_lang']."/i" , "charset=".$this->config['target_lang'] , $this->SourceText);
  205. $this->SourceText = str_replace("\n", "", $this->SourceText);
  206. $this->SourceText = str_replace("\r", "", $this->SourceText);
  207. }
  208. }
  209. function SiteOpen( $position )
  210. {
  211. $tempcontent = @file($position);
  212. if (!$tempcontent) {
  213. echo "打开文件失败!";
  214. exit;
  215. }
  216. // 将数组的所有内容转换为字符串
  217. $this->SourceText = implode("",$tempcontent);
  218. $this->SourceText = preg_replace( "/charset=".$this->config['source_lang']."/i" , "charset=".$this->config['target_lang'] , $this->SourceText);
  219. }
  220. function setvar( $parameter , $value )
  221. {
  222. if(!trim($parameter))
  223. return $parameter;
  224. $this->config[$parameter] = $value;
  225. }
  226. function CHSUtoUTF8($c)
  227. {
  228. $str="";
  229. if ($c < 0x80) {
  230. $str.=$c;
  231. }
  232. elseif ($c < 0x800) {
  233. $str.=(0xC0 | $c>>6);
  234. $str.=(0x80 | $c & 0x3F);
  235. }
  236. elseif ($c < 0x10000) {
  237. $str.=(0xE0 | $c>>12);
  238. $str.=(0x80 | $c>>6 & 0x3F);
  239. $str.=(0x80 | $c & 0x3F);
  240. }
  241. elseif ($c < 0x200000) {
  242. $str.=(0xF0 | $c>>18);
  243. $str.=(0x80 | $c>>12 & 0x3F);
  244. $str.=(0x80 | $c>>6 & 0x3F);
  245. $str.=(0x80 | $c & 0x3F);
  246. }
  247. return $str;
  248. }
  249. function CHStoUTF8(){
  250. if ($this->config["source_lang"]=="BIG5" || $this->config["source_lang"]=="GB2312") {
  251. $ret="";
  252. while($this->SourceText){
  253. if(ord(substr($this->SourceText,0,1))>127){
  254. if ($this->config["source_lang"]=="BIG5") {
  255. $utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))]));
  256. }
  257. if ($this->config["source_lang"]=="GB2312") {
  258. $utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))-0x8080]));
  259. }
  260. for($i=0;$i<strlen($utf8);$i+=3)
  261. $ret.=chr(substr($utf8,$i,3));
  262. $this->SourceText=substr($this->SourceText,2,strlen($this->SourceText));
  263. }
  264. else{
  265. $ret.=substr($this->SourceText,0,1);
  266. $this->SourceText=substr($this->SourceText,1,strlen($this->SourceText));
  267. }
  268. }
  269. $this->unicode_table = array();
  270. $this->SourceText = "";
  271. return $ret;
  272. }
  273. if ($this->config["source_lang"]=="UTF8") {
  274. $out = '';
  275. $len = strlen($this->SourceText);
  276. $i = 0;
  277. while($i < $len) {
  278. $c = ord( substr( $this->SourceText, $i++, 1 ) );
  279. switch($c >> 4)
  280. {
  281. case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
  282. // 0xxxxxxx
  283. $out .= substr( $this->SourceText, $i - 1, 1 );
  284. break;
  285. case 12: case 13:
  286. // 110x xxxx 10xx xxxx
  287. $char2 = ord( substr( $this->SourceText, $i++, 1 ) );
  288. $char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
  289. if ($this->config["target_lang"]=="GB2312")
  290. {
  291. $out .= $this->_hex2bin( dechex( $char3 + 0x8080 ) );
  292. } elseif ($this->config["target_lang"]=="BIG5")
  293. {
  294. $out .= $this->_hex2bin( dechex ( $char3 + 0x0000 ) );
  295. }
  296. break;
  297. case 14:
  298. // 1110 xxxx 10xx xxxx 10xx xxxx
  299. $char2 = ord( substr( $this->SourceText, $i++, 1 ) );
  300. $char3 = ord( substr( $this->SourceText, $i++, 1 ) );
  301. $char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
  302. if ($this->config["target_lang"]=="GB2312")
  303. {
  304. $out .= $this->_hex2bin( dechex ( $char4 + 0x8080 ) );
  305. } elseif ($this->config["target_lang"]=="BIG5")
  306. {
  307. $out .= $this->_hex2bin( dechex ( $char4 + 0x0000 ) );
  308. }
  309. break;
  310. }
  311. }
  312. // 返回结果
  313. return $out;
  314. }
  315. }
  316. function CHStoUNICODE()
  317. {
  318. $utf="";
  319. while($this->SourceText)
  320. {
  321. if (ord(substr($this->SourceText,0,1))>127)
  322. {
  323. if ($this->config["source_lang"]=="GB2312")
  324. $utf.="&#x".$this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))-0x8080].";";
  325. if ($this->config["source_lang"]=="BIG5")
  326. $utf.="&#x".$this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))].";";
  327. $this->SourceText=substr($this->SourceText,2,strlen($this->SourceText));
  328. }
  329. else
  330. {
  331. $utf.=substr($this->SourceText,0,1);
  332. $this->SourceText=substr($this->SourceText,1,strlen($this->SourceText));
  333. }
  334. }
  335. return $utf;
  336. }
  337. function GB2312toBIG5()
  338. {
  339. // 获取等待转换的字符串的总长度
  340. $max=strlen($this->SourceText)-1;
  341. for($i=0;$i<$max;$i++){
  342. $h=ord($this->SourceText[$i]);
  343. if($h>=160){
  344. $l=ord($this->SourceText[$i+1]);
  345. if($h==161 && $l==64){
  346. $gb=" ";
  347. }
  348. else{
  349. fseek($this->ctf,($h-160)*510+($l-1)*2);
  350. $gb=fread($this->ctf,2);
  351. }
  352. $this->SourceText[$i]=$gb[0];
  353. $this->SourceText[$i+1]=$gb[1];
  354. $i++;
  355. }
  356. }
  357. fclose($this->ctf);
  358. // 将转换后的结果赋予 $result;
  359. $result = $this->SourceText;
  360. // 清空 $thisSourceText
  361. $this->SourceText = "";
  362. // 返回转换结果
  363. return $result;
  364. }
  365. function PinYinSearch($num){
  366. if($num>0&&$num<160){
  367. return chr($num);
  368. }
  369. elseif($num<-20319||$num>-10247){
  370. return "";
  371. }
  372. else{
  373. for($i=count($this->pinyin_table)-1;$i>=0;$i--){
  374. if($this->pinyin_table[$i][1]<=$num)
  375. break;
  376. }
  377. return $this->pinyin_table[$i][0];
  378. }
  379. }
  380. function CHStoPinYin(){
  381. if ( $this->config['source_lang']=="BIG5" ) {
  382. $this->ctf = fopen($this->config['codetable_dir'].$this->config['BIG5toGB_table'], "r");
  383. if (is_null($this->ctf)) {
  384. echo "打开打开转换表文件失败!";
  385. exit;
  386. }
  387. $this->SourceText = $this->GB2312toBIG5();
  388. $this->config['target_lang'] = "PinYin";
  389. }
  390. $ret = array();
  391. $ri = 0;
  392. for($i=0;$i<strlen($this->SourceText);$i++){
  393. $p=ord(substr($this->SourceText,$i,1));
  394. if($p>160){
  395. $q=ord(substr($this->SourceText,++$i,1));
  396. $p=$p*256+$q-65536;
  397. }
  398. $ret[$ri]=$this->PinYinSearch($p);
  399. $ri = $ri + 1;
  400. }
  401. // 清空 $this->SourceText
  402. $this->SourceText = "";
  403. $this->pinyin_table = array();
  404. // 返回转换后的结果
  405. return implode(" ", $ret);
  406. }
  407. function ConvertIT()
  408. {
  409. // 判断是否为中文繁、简转换
  410. if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && ($this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
  411. return $this->GB2312toBIG5();
  412. }
  413. // 判断是否为简体中文与拼音转换
  414. if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="PinYin" ) {
  415. return $this->CHStoPinYin();
  416. }
  417. // 判断是否为简体、繁体中文与UTF8转换
  418. if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5" || $this->config['source_lang']=="UTF8") && ($this->config['target_lang']=="UTF8" || $this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
  419. return $this->CHStoUTF8();
  420. }
  421. // 判断是否为简体、繁体中文与UNICODE转换
  422. if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="UNICODE" ) {
  423. return $this->CHStoUNICODE();
  424. }
  425. }
  426. }
  427. ?>