PageRenderTime 37ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/upload/wind/utility/WindConvert.php

https://gitlab.com/wuhang2003/phpwind
PHP | 206 lines | 124 code | 10 blank | 72 comment | 26 complexity | 8fae85ed0a40c8dbbe77468d4c008336 MD5 | raw file
  1. <?php
  2. /**
  3. * 编码转化类
  4. * 编码转化类,支持<code>
  5. * 1.
  6. * utf16be转化为utf8
  7. * 2. utf8转化为utf16be
  8. * 3. utf8转化为unicode
  9. * 4. unicode转化为utf8
  10. * </code>
  11. *
  12. * @author Qiong Wu <papa0924@gmail.com> 2011-10-19
  13. * @copyright ©2003-2103 phpwind.com
  14. * @license http://www.windframework.com
  15. * @version $Id: WindConvert.php 3829 2012-11-19 11:13:22Z yishuo $
  16. * @package utility
  17. */
  18. class WindConvert {
  19. /**
  20. * 编码转换
  21. *
  22. * @param string $str
  23. * 内容字符串
  24. * @param string $toEncoding
  25. * 转为新编码
  26. * @param string $fromEncoding
  27. * 原编码
  28. * @param bool $ifMb
  29. * 是否使用mb函数
  30. * @return string
  31. */
  32. public static function convert($str, $toEncoding, $fromEncoding, $ifMb = true) {
  33. if (!strcasecmp($toEncoding, $fromEncoding)) return $str;
  34. switch (gettype($str)) {
  35. case 'string':
  36. if ($ifMb && function_exists('mb_convert_encoding'))
  37. $str = mb_convert_encoding($str, $toEncoding, $fromEncoding);
  38. else {
  39. !$toEncoding && $toEncoding = 'GBK';
  40. !$fromEncoding && $fromEncoding = 'GBK';
  41. Wind::registeComponent(array('path' => 'WIND:convert.WindGeneralConverter', 'scope' => 'singleton'),
  42. 'windConverter');
  43. /* @var $converter WindGeneralConverter */
  44. $converter = Wind::getComponent('windConverter');
  45. $str = $converter->convert($str, $fromEncoding, $toEncoding);
  46. }
  47. break;
  48. case 'array':
  49. foreach ($str as $key => $value) {
  50. is_object($value) && $value = get_object_vars($value);
  51. $str[$key] = self::convert($value, $toEncoding, $fromEncoding, $ifMb);
  52. }
  53. break;
  54. default:
  55. break;
  56. }
  57. return $str;
  58. }
  59. /**
  60. * gbk转为utf8编码
  61. *
  62. * @param mixed $srcText
  63. */
  64. public static function gbkToUtf8($srcText) {
  65. return iconv('GBK', 'UTF-8', $srcText);
  66. $this->getTableIndex();
  67. $tarText = '';
  68. for ($i = 0; $i < strlen($srcText); $i += 2) {
  69. $h = ord($srcText[$i]);
  70. if ($h > 127 && isset($this->TableIndex[$this->EncodeLang][$h])) {
  71. $l = ord($srcText[$i + 1]);
  72. if (!isset($this->TableEncode[$this->EncodeLang][$h][$l])) {
  73. fseek($this->TableHandle, $l * 2 + $this->TableIndex[$this->EncodeLang][$h]);
  74. $this->TableEncode[$this->EncodeLang][$h][$l] = $this->UNICODEtoUTF8(
  75. hexdec(bin2hex(fread($this->TableHandle, 2))));
  76. }
  77. $tarText .= $this->TableEncode[$this->EncodeLang][$h][$l];
  78. } elseif ($h < 128) {
  79. $tarText .= $srcText[$i];
  80. $i--;
  81. }
  82. }
  83. return $tarText;
  84. }
  85. /**
  86. * utf16be编码转化为utf8编码
  87. *
  88. * @param string $str
  89. * @return string
  90. */
  91. public static function utf16beToUTF8($str) {
  92. return self::unicodeToUTF8(unpack('n*', $str));
  93. }
  94. /**
  95. * utf8编码转为utf16BE
  96. *
  97. * @param string $string
  98. * @param boolean $bom
  99. * 是否Big-Endian
  100. */
  101. public static function utf8ToUTF16BE($string, $bom = false) {
  102. $out = $bom ? "\xFE\xFF" : '';
  103. if (function_exists('mb_convert_encoding')) {
  104. return $out . mb_convert_encoding($string, 'UTF-16BE', 'UTF-8');
  105. }
  106. $uni = self::utf8ToUnicode($string);
  107. foreach ($uni as $cp) {
  108. $out .= pack('n', $cp);
  109. }
  110. return $out;
  111. }
  112. /**
  113. * unicode编码转化为utf8编码
  114. *
  115. * @param string $str
  116. * @return string
  117. */
  118. public static function unicodeToUTF8($str) {
  119. $utf8 = '';
  120. foreach ($str as $unicode) {
  121. if ($unicode < 128) {
  122. $utf8 .= chr($unicode);
  123. } elseif ($unicode < 2048) {
  124. $utf8 .= chr(192 + (($unicode - ($unicode % 64)) / 64));
  125. $utf8 .= chr(128 + ($unicode % 64));
  126. } else {
  127. $utf8 .= chr(224 + (($unicode - ($unicode % 4096)) / 4096));
  128. $utf8 .= chr(128 + ((($unicode % 4096) - ($unicode % 64)) / 64));
  129. $utf8 .= chr(128 + ($unicode % 64));
  130. }
  131. }
  132. return $utf8;
  133. }
  134. /**
  135. * utf8编码转化为unicode
  136. *
  137. * @param string $string
  138. * @return Ambigous <multitype:, number>
  139. */
  140. public static function utf8ToUnicode($string) {
  141. $unicode = $values = array();
  142. $lookingFor = 1;
  143. for ($i = 0, $length = strlen($string); $i < $length; $i++) {
  144. $thisValue = ord($string[$i]);
  145. if ($thisValue < 128) {
  146. $unicode[] = $thisValue;
  147. } else {
  148. if (count($values) == 0) {
  149. $lookingFor = ($thisValue < 224) ? 2 : 3;
  150. }
  151. $values[] = $thisValue;
  152. if (count($values) == $lookingFor) {
  153. $unicode[] = ($lookingFor == 3) ? ($values[0] % 16) * 4096 + ($values[1] % 64) * 64 + $values[2] % 64 : ($values[0] % 32) * 64 + $values[1] % 64;
  154. $values = array();
  155. $lookingFor = 1;
  156. }
  157. }
  158. }
  159. return $unicode;
  160. }
  161. /**
  162. * 获取输入编码
  163. *
  164. * @param string $lang
  165. * @return string
  166. */
  167. private static function _getCharset($lang) {
  168. switch (strtoupper(substr($lang, 0, 2))) {
  169. case 'GB':
  170. $lang = 'GBK';
  171. break;
  172. case 'UT':
  173. $lang = 'UTF8';
  174. break;
  175. case 'UN':
  176. $lang = 'UNICODE';
  177. break;
  178. case 'BI':
  179. $lang = 'BIG5';
  180. break;
  181. default:
  182. $lang = '';
  183. }
  184. return $lang;
  185. }
  186. /**
  187. * iconv 是否开启
  188. *
  189. * @param 目标编码 $targeLang
  190. * @return boolean
  191. */
  192. private static function _isIconv($targeLang) {
  193. return function_exists('iconv') && $targeLang != 'BIG5';
  194. }
  195. }
  196. ?>