PageRenderTime 58ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/convertcharset/ConvertCharset.class.php

https://github.com/umbecr/camilaframework
PHP | 307 lines | 262 code | 45 blank | 0 comment | 71 complexity | 49beb64ca1bd381dbf77f62fc8683592 MD5 | raw file
  1. <?php
  2. $PATH_TO_CLASS = dirname(ereg_replace("\\\\", "/", __FILE__)) . "/" . "ConvertTables" . "/";
  3. define ("CONVERT_TABLES_DIR", $PATH_TO_CLASS);
  4. define ("DEBUG_MODE", 1);
  5. class ConvertCharset{
  6. var $RecognizedEncoding; // (boolean) This value keeps information if string contains multibyte chars.
  7. var $Entities; // (boolean) This value keeps information if output should be with numeric entities.
  8. var $FromCharset; // (string) This value keeps information about source (from) encoding
  9. var $ToCharset; // (string) This value keeps information about destination (to) encoding
  10. var $CharsetTable; // (array) This property keeps convert Table inside
  11. function ConvertCharset ($FromCharset, $ToCharset, $TurnOnEntities = false)
  12. {
  13. $this -> FromCharset = strtolower($FromCharset);
  14. $this -> ToCharset = strtolower($ToCharset);
  15. $this -> Entities = $TurnOnEntities;
  16. if ($this -> FromCharset == $this -> ToCharset)
  17. {
  18. print $this -> DebugOutput(1, 0, $this -> FromCharset);
  19. }
  20. if (($this -> FromCharset == $this -> ToCharset) AND ($this -> FromCharset == "utf-8"))
  21. {
  22. print $this -> DebugOutput(0, 4, $this -> FromCharset);
  23. exit;
  24. }
  25. if ($this -> FromCharset == "utf-8")
  26. {
  27. $this -> CharsetTable = $this -> MakeConvertTable ($this -> ToCharset);
  28. }
  29. else if ($this -> ToCharset == "utf-8")
  30. {
  31. $this -> CharsetTable = $this -> MakeConvertTable ($this -> FromCharset);
  32. }
  33. else
  34. {
  35. $this -> CharsetTable = $this -> MakeConvertTable ($this -> FromCharset, $this -> ToCharset);
  36. }
  37. }
  38. function UnicodeEntity ($UnicodeString)
  39. {
  40. $OutString = "";
  41. $StringLenght = strlen ($UnicodeString);
  42. for ($CharPosition = 0; $CharPosition < $StringLenght; $CharPosition++)
  43. {
  44. $Char = $UnicodeString [$CharPosition];
  45. $AsciiChar = ord ($Char);
  46. if ($AsciiChar < 128){
  47. $OutString .= $Char;
  48. }
  49. else if ($AsciiChar >> 5 == 6){
  50. $FirstByte = ($AsciiChar & 31);
  51. $CharPosition++;
  52. $Char = $UnicodeString [$CharPosition];
  53. $AsciiChar = ord ($Char);
  54. $SecondByte = ($AsciiChar & 63);
  55. $AsciiChar = ($FirstByte * 64) + $SecondByte;
  56. $Entity = sprintf ("&#%d;", $AsciiChar);
  57. $OutString .= $Entity;
  58. }
  59. else if ($AsciiChar >> 4 == 14){
  60. $FirstByte = ($AsciiChar & 31);
  61. $CharPosition++;
  62. $Char = $UnicodeString [$CharPosition];
  63. $AsciiChar = ord ($Char);
  64. $SecondByte = ($AsciiChar & 63);
  65. $CharPosition++;
  66. $Char = $UnicodeString [$CharPosition];
  67. $AsciiChar = ord ($Char);
  68. $ThidrByte = ($AsciiChar & 63);
  69. $AsciiChar = ((($FirstByte * 64) + $SecondByte) * 64) + $ThidrByte;
  70. $Entity = sprintf ("&#%d;", $AsciiChar);
  71. $OutString .= $Entity;
  72. }
  73. else if ($AsciiChar >> 3 == 30){
  74. $FirstByte = ($AsciiChar & 31);
  75. $CharPosition++;
  76. $Char = $UnicodeString [$CharPosition];
  77. $AsciiChar = ord ($Char);
  78. $SecondByte = ($AsciiChar & 63);
  79. $CharPosition++;
  80. $Char = $UnicodeString [$CharPosition];
  81. $AsciiChar = ord ($Char);
  82. $ThidrByte = ($AsciiChar & 63);
  83. $CharPosition++;
  84. $Char = $UnicodeString [$CharPosition];
  85. $AsciiChar = ord ($Char);
  86. $FourthByte = ($AsciiChar & 63);
  87. $AsciiChar = ((((($FirstByte * 64) + $SecondByte) * 64) + $ThidrByte) * 64) + $FourthByte;
  88. $Entity = sprintf ("&#%d;", $AsciiChar);
  89. $OutString .= $Entity;
  90. }
  91. }
  92. return $OutString;
  93. }
  94. function HexToUtf ($UtfCharInHex)
  95. {
  96. $OutputChar = "";
  97. $UtfCharInDec = hexdec($UtfCharInHex);
  98. if($UtfCharInDec < 128) $OutputChar .= chr($UtfCharInDec);
  99. else if($UtfCharInDec < 2048)$OutputChar .= chr(($UtfCharInDec >> 6) + 192) . chr(($UtfCharInDec & 63) + 128);
  100. else if($UtfCharInDec < 65536)$OutputChar .= chr(($UtfCharInDec >> 12) + 224) . chr((($UtfCharInDec >> 6) & 63) + 128) . chr(($UtfCharInDec & 63) + 128);
  101. else if($UtfCharInDec < 2097152)$OutputChar .= chr($UtfCharInDec >> 18 + 240) . chr((($UtfCharInDec >> 12) & 63) + 128) . chr(($UtfCharInDec >> 6) & 63 + 128) . chr($UtfCharInDec & 63 + 128);
  102. return $OutputChar;
  103. }
  104. function MakeConvertTable ($FromCharset, $ToCharset = '')
  105. {
  106. $ConvertTable = array();
  107. for($i = 0; $i < func_num_args(); $i++)
  108. {
  109. $FileName = func_get_arg($i);
  110. if (!is_file(CONVERT_TABLES_DIR . $FileName))
  111. {
  112. print $this -> DebugOutput(0, 0, CONVERT_TABLES_DIR . $FileName); //Print an error message
  113. exit;
  114. }
  115. $FileWithEncTabe = fopen(CONVERT_TABLES_DIR . $FileName, "r") or die(); //This die(); is just to make sure...
  116. while(!feof($FileWithEncTabe))
  117. {
  118. if($OneLine = trim(fgets($FileWithEncTabe, 1024)))
  119. {
  120. if (substr($OneLine, 0, 1) != "#")
  121. {
  122. $HexValue = preg_split ("/[\s,]+/", $OneLine, 3); //We need only first 2 values
  123. if (substr($HexValue[1], 0, 1) != "#")
  124. {
  125. $ArrayKey = strtoupper(str_replace(strtolower("0x"), "", $HexValue[1]));
  126. $ArrayValue = strtoupper(str_replace(strtolower("0x"), "", $HexValue[0]));
  127. $ConvertTable[func_get_arg($i)][$ArrayKey] = $ArrayValue;
  128. }
  129. } //if (substr($OneLine,...
  130. } //if($OneLine=trim(f...
  131. } //while(!feof($FirstFileWi...
  132. } //for($i = 0; $i < func_...
  133. if(!is_array($ConvertTable[$FromCharset])) $ConvertTable[$FromCharset] = array();
  134. if ((func_num_args() > 1) && (count($ConvertTable[$FromCharset]) == count($ConvertTable[$ToCharset])) && (count(array_diff_assoc($ConvertTable[$FromCharset], $ConvertTable[$ToCharset])) == 0))
  135. {
  136. print $this -> DebugOutput(1, 1, "$FromCharset, $ToCharset");
  137. }
  138. return $ConvertTable;
  139. }
  140. function Convert ($StringToChange)
  141. {
  142. if(!strlen($StringToChange)) return '';
  143. $StringToChange = (string)($StringToChange);
  144. if($this -> FromCharset == $this -> ToCharset) return $StringToChange;
  145. $NewString = "";
  146. if ($this -> FromCharset != "utf-8")
  147. {
  148. for ($i = 0; $i < strlen($StringToChange); $i++)
  149. {
  150. $HexChar = "";
  151. $UnicodeHexChar = "";
  152. $HexChar = strtoupper(dechex(ord($StringToChange[$i])));
  153. if (strlen($HexChar) == 1) $HexChar = "0" . $HexChar;
  154. if (($this -> FromCharset == "gsm0338") && ($HexChar == '1B')){
  155. $i++;
  156. $HexChar .= strtoupper(dechex(ord($StringToChange[$i])));
  157. }
  158. if ($this -> ToCharset != "utf-8")
  159. {
  160. if (in_array($HexChar, $this -> CharsetTable[$this -> FromCharset]))
  161. {
  162. $UnicodeHexChar = array_search($HexChar, $this -> CharsetTable[$this -> FromCharset]);
  163. $UnicodeHexChars = explode("+", $UnicodeHexChar);
  164. for($UnicodeHexCharElement = 0; $UnicodeHexCharElement < count($UnicodeHexChars); $UnicodeHexCharElement++)
  165. {
  166. if (array_key_exists($UnicodeHexChars[$UnicodeHexCharElement], $this -> CharsetTable[$this -> ToCharset]))
  167. {
  168. if ($this -> Entities == true)
  169. {
  170. $NewString .= $this -> UnicodeEntity($this -> HexToUtf($UnicodeHexChars[$UnicodeHexCharElement]));
  171. }
  172. else
  173. {
  174. $NewString .= chr(hexdec($this -> CharsetTable[$this -> ToCharset][$UnicodeHexChars[$UnicodeHexCharElement]]));
  175. }
  176. }
  177. else
  178. {
  179. print $this -> DebugOutput(0, 1, $StringToChange[$i]);
  180. }
  181. } //for($UnicodeH...
  182. }
  183. else
  184. {
  185. print $this -> DebugOutput(0, 2, $StringToChange[$i]);
  186. }
  187. }
  188. else
  189. {
  190. if (in_array("$HexChar", $this -> CharsetTable[$this -> FromCharset]))
  191. {
  192. $UnicodeHexChar = array_search($HexChar, $this -> CharsetTable[$this -> FromCharset]);
  193. $UnicodeHexChars = explode("+", $UnicodeHexChar);
  194. for($UnicodeHexCharElement = 0; $UnicodeHexCharElement < count($UnicodeHexChars); $UnicodeHexCharElement++)
  195. {
  196. if ($this -> Entities == true)
  197. {
  198. $NewString .= $this -> UnicodeEntity($this -> HexToUtf($UnicodeHexChars[$UnicodeHexCharElement]));
  199. }
  200. else
  201. {
  202. $NewString .= $this -> HexToUtf($UnicodeHexChars[$UnicodeHexCharElement]);
  203. }
  204. } // for
  205. }
  206. else
  207. {
  208. print $this -> DebugOutput(0, 2, $StringToChange[$i]);
  209. }
  210. }
  211. }
  212. }
  213. else if($this -> FromCharset == "utf-8")
  214. {
  215. $HexChar = "";
  216. $UnicodeHexChar = "";
  217. $this -> CharsetTable = $this -> MakeConvertTable ($this -> ToCharset);
  218. foreach ($this -> CharsetTable[$this -> ToCharset] as $UnicodeHexChar => $HexChar)
  219. {
  220. if ($this -> Entities == true){
  221. $EntitieOrChar = $this -> UnicodeEntity($this -> HexToUtf($UnicodeHexChar));
  222. }
  223. else
  224. {
  225. $EntitieOrChar = chr(hexdec($HexChar));
  226. }
  227. $StringToChange = str_replace($this -> HexToUtf($UnicodeHexChar), $EntitieOrChar, $StringToChange);
  228. }
  229. $NewString = $StringToChange;
  230. }
  231. return $NewString;
  232. }
  233. function ConvertArray(& $array)
  234. {
  235. if (!is_array($array))
  236. {
  237. $array = $this -> Convert($array);
  238. return;
  239. }
  240. while(list($k, $v) = each($array))
  241. {
  242. $this -> ConvertArray($v);
  243. $array[$k] = $v;
  244. }
  245. }
  246. function DebugOutput ($Group, $Number, $Value = false)
  247. {
  248. $Debug[0][0] = "Error, can NOT read file: " . $Value . "<br>";
  249. $Debug[0][1] = "Error, can't find maching char \"" . $Value . "\" in destination encoding table!" . "<br>";
  250. $Debug[0][2] = "Error, can't find maching char \"" . $Value . "\" in source encoding table!" . "<br>";
  251. $Debug[0][3] = "Error, you did NOT set variable " . $Value . " in Convert() function." . "<br>";
  252. $Debug[0][4] = "You can NOT convert string from " . $Value . " to " . $Value . "!" . "<BR>";
  253. $Debug[1][0] = "Notice, you are trying to convert string from " . $Value . " to " . $Value . ", don't you feel it's strange? ;-)" . "<br>";
  254. $Debug[1][1] = "Notice, both charsets " . $Value . " are identical! Check encoding tables files." . "<br>";
  255. $Debug[1][2] = "Notice, there is no unicode char in the string you are trying to convert." . "<br>";
  256. if (DEBUG_MODE >= $Group)
  257. {
  258. return $Debug[$Group][$Number];
  259. }
  260. } // function DebugOutput
  261. } //class ends here
  262. ?>