PageRenderTime 62ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 0ms

/includes/utf/utf_tools.php

http://github.com/MightyGorgon/icy_phoenix
PHP | 1992 lines | 1484 code | 137 blank | 371 comment | 129 complexity | 1ffe2fb20e765e90e54e132f5e536f1a MD5 | raw file
Possible License(s): AGPL-1.0

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. *
  4. * @package Icy Phoenix
  5. * @version $Id$
  6. * @copyright (c) 2008 Icy Phoenix
  7. * @license http://opensource.org/licenses/gpl-license.php GNU Public License
  8. *
  9. */
  10. /**
  11. *
  12. * @Icy Phoenix is based on phpBB
  13. * @copyright (c) 2008 phpBB Group
  14. *
  15. */
  16. if (!defined('IN_ICYPHOENIX'))
  17. {
  18. die('Hacking attempt');
  19. }
  20. // Enforce ASCII only string handling
  21. setlocale(LC_CTYPE, 'C');
  22. /**
  23. * UTF-8 tools
  24. *
  25. * Whenever possible, these functions will try to use PHP's built-in functions or
  26. * extensions, otherwise they will default to custom routines.
  27. *
  28. * @package utf
  29. */
  30. if (!extension_loaded('xml'))
  31. {
  32. /**
  33. * Implementation of PHP's native utf8_encode for people without XML support
  34. * This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
  35. *
  36. * @param string $str ISO-8859-1 encoded data
  37. * @return string UTF-8 encoded data
  38. */
  39. function utf8_encode($str)
  40. {
  41. $out = '';
  42. for ($i = 0, $len = strlen($str); $i < $len; $i++)
  43. {
  44. $letter = $str[$i];
  45. $num = ord($letter);
  46. if ($num < 0x80)
  47. {
  48. $out .= $letter;
  49. }
  50. else if ($num < 0xC0)
  51. {
  52. $out .= "\xC2" . $letter;
  53. }
  54. else
  55. {
  56. $out .= "\xC3" . chr($num - 64);
  57. }
  58. }
  59. return $out;
  60. }
  61. /**
  62. * Implementation of PHP's native utf8_decode for people without XML support
  63. *
  64. * @param string $str UTF-8 encoded data
  65. * @return string ISO-8859-1 encoded data
  66. */
  67. function utf8_decode($str)
  68. {
  69. $pos = 0;
  70. $len = strlen($str);
  71. $ret = '';
  72. while ($pos < $len)
  73. {
  74. $ord = ord($str[$pos]) & 0xF0;
  75. if ($ord === 0xC0 || $ord === 0xD0)
  76. {
  77. $charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
  78. $pos += 2;
  79. $ret .= (($charval < 256) ? chr($charval) : '?');
  80. }
  81. else if ($ord === 0xE0)
  82. {
  83. $ret .= '?';
  84. $pos += 3;
  85. }
  86. else if ($ord === 0xF0)
  87. {
  88. $ret .= '?';
  89. $pos += 4;
  90. }
  91. else
  92. {
  93. $ret .= $str[$pos];
  94. ++$pos;
  95. }
  96. }
  97. return $ret;
  98. }
  99. }
  100. // mbstring is old and has it's functions around for older versions of PHP.
  101. // if mbstring is not loaded, we go into native mode.
  102. if (extension_loaded('mbstring'))
  103. {
  104. mb_internal_encoding('UTF-8');
  105. /**
  106. * UTF-8 aware alternative to strrpos
  107. * Find position of last occurrence of a char in a string
  108. *
  109. * Notes:
  110. * - offset for mb_strrpos was added in 5.2.0, we emulate if it is lower
  111. */
  112. if (version_compare(PHP_VERSION, '5.2.0', '>='))
  113. {
  114. /**
  115. * UTF-8 aware alternative to strrpos
  116. * @ignore
  117. */
  118. function utf8_strrpos($str, $needle, $offset = null)
  119. {
  120. // Emulate behaviour of strrpos rather than raising warning
  121. if (empty($str))
  122. {
  123. return false;
  124. }
  125. if (is_null($offset))
  126. {
  127. return mb_strrpos($str, $needle);
  128. }
  129. else
  130. {
  131. return mb_strrpos($str, $needle, $offset);
  132. }
  133. }
  134. }
  135. else
  136. {
  137. /**
  138. * UTF-8 aware alternative to strrpos
  139. * @ignore
  140. */
  141. function utf8_strrpos($str, $needle, $offset = null)
  142. {
  143. // offset for mb_strrpos was added in 5.2.0
  144. if (is_null($offset))
  145. {
  146. // Emulate behaviour of strrpos rather than raising warning
  147. if (empty($str))
  148. {
  149. return false;
  150. }
  151. return mb_strrpos($str, $needle);
  152. }
  153. else
  154. {
  155. if (!is_int($offset))
  156. {
  157. trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
  158. return false;
  159. }
  160. $str = mb_substr($str, $offset);
  161. if (false !== ($pos = mb_strrpos($str, $needle)))
  162. {
  163. return $pos + $offset;
  164. }
  165. return false;
  166. }
  167. }
  168. }
  169. /**
  170. * UTF-8 aware alternative to strpos
  171. * @ignore
  172. */
  173. function utf8_strpos($str, $needle, $offset = null)
  174. {
  175. if (is_null($offset))
  176. {
  177. return mb_strpos($str, $needle);
  178. }
  179. else
  180. {
  181. return mb_strpos($str, $needle, $offset);
  182. }
  183. }
  184. /**
  185. * UTF-8 aware alternative to strtolower
  186. * @ignore
  187. */
  188. function utf8_strtolower($str)
  189. {
  190. return mb_strtolower($str);
  191. }
  192. /**
  193. * UTF-8 aware alternative to strtoupper
  194. * @ignore
  195. */
  196. function utf8_strtoupper($str)
  197. {
  198. return mb_strtoupper($str);
  199. }
  200. /**
  201. * UTF-8 aware alternative to substr
  202. * @ignore
  203. */
  204. function utf8_substr($str, $offset, $length = null)
  205. {
  206. if (is_null($length))
  207. {
  208. return mb_substr($str, $offset);
  209. }
  210. else
  211. {
  212. return mb_substr($str, $offset, $length);
  213. }
  214. }
  215. /**
  216. * Return the length (in characters) of a UTF-8 string
  217. * @ignore
  218. */
  219. function utf8_strlen($text)
  220. {
  221. return mb_strlen($text, 'utf-8');
  222. }
  223. }
  224. else
  225. {
  226. /**
  227. * UTF-8 aware alternative to strrpos
  228. * Find position of last occurrence of a char in a string
  229. *
  230. * @author Harry Fuecks
  231. * @param string $str haystack
  232. * @param string $needle needle
  233. * @param integer $offset (optional) offset (from left)
  234. * @return mixed integer position or FALSE on failure
  235. */
  236. function utf8_strrpos($str, $needle, $offset = null)
  237. {
  238. if (is_null($offset))
  239. {
  240. $ar = explode($needle, $str);
  241. if (sizeof($ar) > 1)
  242. {
  243. // Pop off the end of the string where the last match was made
  244. array_pop($ar);
  245. $str = join($needle, $ar);
  246. return utf8_strlen($str);
  247. }
  248. return false;
  249. }
  250. else
  251. {
  252. if (!is_int($offset))
  253. {
  254. trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
  255. return false;
  256. }
  257. $str = utf8_substr($str, $offset);
  258. if (false !== ($pos = utf8_strrpos($str, $needle)))
  259. {
  260. return $pos + $offset;
  261. }
  262. return false;
  263. }
  264. }
  265. /**
  266. * UTF-8 aware alternative to strpos
  267. * Find position of first occurrence of a string
  268. *
  269. * @author Harry Fuecks
  270. * @param string $str haystack
  271. * @param string $needle needle
  272. * @param integer $offset offset in characters (from left)
  273. * @return mixed integer position or FALSE on failure
  274. */
  275. function utf8_strpos($str, $needle, $offset = null)
  276. {
  277. if (is_null($offset))
  278. {
  279. $ar = explode($needle, $str);
  280. if (sizeof($ar) > 1)
  281. {
  282. return utf8_strlen($ar[0]);
  283. }
  284. return false;
  285. }
  286. else
  287. {
  288. if (!is_int($offset))
  289. {
  290. trigger_error('utf8_strpos: Offset must be an integer', E_USER_ERROR);
  291. return false;
  292. }
  293. $str = utf8_substr($str, $offset);
  294. if (false !== ($pos = utf8_strpos($str, $needle)))
  295. {
  296. return $pos + $offset;
  297. }
  298. return false;
  299. }
  300. }
  301. /**
  302. * UTF-8 aware alternative to strtolower
  303. * Make a string lowercase
  304. * Note: The concept of a characters "case" only exists is some alphabets
  305. * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
  306. * not exist in the Chinese alphabet, for example. See Unicode Standard
  307. * Annex #21: Case Mappings
  308. *
  309. * @param string
  310. * @return string string in lowercase
  311. */
  312. function utf8_strtolower($string)
  313. {
  314. static $utf8_upper_to_lower = array(
  315. "\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
  316. "\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
  317. "\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
  318. "\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
  319. "\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
  320. "\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
  321. "\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
  322. "\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
  323. "\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
  324. "\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
  325. "\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
  326. "\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
  327. "\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
  328. "\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
  329. "\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
  330. "\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
  331. "\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
  332. "\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
  333. "\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
  334. "\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
  335. "\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
  336. "\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
  337. "\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
  338. "\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
  339. "\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
  340. "\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
  341. "\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
  342. "\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
  343. "\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
  344. "\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
  345. "\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
  346. "\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
  347. "\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
  348. "\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
  349. "\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
  350. "\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
  351. "\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
  352. "\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
  353. "\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
  354. "\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
  355. "\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
  356. "\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
  357. "\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
  358. "\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
  359. "\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
  360. "\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
  361. );
  362. return strtr(strtolower($string), $utf8_upper_to_lower);
  363. }
  364. /**
  365. * UTF-8 aware alternative to strtoupper
  366. * Make a string uppercase
  367. * Note: The concept of a characters "case" only exists is some alphabets
  368. * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
  369. * not exist in the Chinese alphabet, for example. See Unicode Standard
  370. * Annex #21: Case Mappings
  371. *
  372. * @param string
  373. * @return string string in uppercase
  374. */
  375. function utf8_strtoupper($string)
  376. {
  377. static $utf8_lower_to_upper = array(
  378. "\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
  379. "\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
  380. "\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
  381. "\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
  382. "\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
  383. "\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
  384. "\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
  385. "\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
  386. "\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
  387. "\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
  388. "\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
  389. "\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
  390. "\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
  391. "\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
  392. "\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
  393. "\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
  394. "\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
  395. "\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
  396. "\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
  397. "\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
  398. "\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
  399. "\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
  400. "\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
  401. "\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
  402. "\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
  403. "\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
  404. "\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
  405. "\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
  406. "\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
  407. "\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
  408. "\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
  409. "\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
  410. "\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
  411. "\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
  412. "\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
  413. "\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
  414. "\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
  415. "\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
  416. "\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
  417. "\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
  418. "\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
  419. "\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
  420. "\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
  421. "\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
  422. "\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
  423. "\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
  424. );
  425. return strtr(strtoupper($string), $utf8_lower_to_upper);
  426. }
  427. /**
  428. * UTF-8 aware alternative to substr
  429. * Return part of a string given character offset (and optionally length)
  430. *
  431. * Note arguments: comparied to substr - if offset or length are
  432. * not integers, this version will not complain but rather massages them
  433. * into an integer.
  434. *
  435. * Note on returned values: substr documentation states false can be
  436. * returned in some cases (e.g. offset > string length)
  437. * mb_substr never returns false, it will return an empty string instead.
  438. * This adopts the mb_substr approach
  439. *
  440. * Note on implementation: PCRE only supports repetitions of less than
  441. * 65536, in order to accept up to MAXINT values for offset and length,
  442. * we'll repeat a group of 65535 characters when needed.
  443. *
  444. * Note on implementation: calculating the number of characters in the
  445. * string is a relatively expensive operation, so we only carry it out when
  446. * necessary. It isn't necessary for +ve offsets and no specified length
  447. *
  448. * @author Chris Smith<chris@jalakai.co.uk>
  449. * @param string $str
  450. * @param integer $offset number of UTF-8 characters offset (from left)
  451. * @param integer $length (optional) length in UTF-8 characters from offset
  452. * @return mixed string or FALSE if failure
  453. */
  454. function utf8_substr($str, $offset, $length = NULL)
  455. {
  456. // generates E_NOTICE
  457. // for PHP4 objects, but not PHP5 objects
  458. $str = (string) $str;
  459. $offset = (int) $offset;
  460. if (!is_null($length))
  461. {
  462. $length = (int) $length;
  463. }
  464. // handle trivial cases
  465. if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
  466. {
  467. return '';
  468. }
  469. // normalise negative offsets (we could use a tail
  470. // anchored pattern, but they are horribly slow!)
  471. if ($offset < 0)
  472. {
  473. // see notes
  474. $strlen = utf8_strlen($str);
  475. $offset = $strlen + $offset;
  476. if ($offset < 0)
  477. {
  478. $offset = 0;
  479. }
  480. }
  481. $op = '';
  482. $lp = '';
  483. // establish a pattern for offset, a
  484. // non-captured group equal in length to offset
  485. if ($offset > 0)
  486. {
  487. $ox = (int) ($offset / 65535);
  488. $oy = $offset % 65535;
  489. if ($ox)
  490. {
  491. $op = '(?:.{65535}){' . $ox . '}';
  492. }
  493. $op = '^(?:' . $op . '.{' . $oy . '})';
  494. }
  495. else
  496. {
  497. // offset == 0; just anchor the pattern
  498. $op = '^';
  499. }
  500. // establish a pattern for length
  501. if (is_null($length))
  502. {
  503. // the rest of the string
  504. $lp = '(.*)$';
  505. }
  506. else
  507. {
  508. if (!isset($strlen))
  509. {
  510. // see notes
  511. $strlen = utf8_strlen($str);
  512. }
  513. // another trivial case
  514. if ($offset > $strlen)
  515. {
  516. return '';
  517. }
  518. if ($length > 0)
  519. {
  520. // reduce any length that would
  521. // go passed the end of the string
  522. $length = min($strlen - $offset, $length);
  523. $lx = (int) ($length / 65535);
  524. $ly = $length % 65535;
  525. // negative length requires a captured group
  526. // of length characters
  527. if ($lx)
  528. {
  529. $lp = '(?:.{65535}){' . $lx . '}';
  530. }
  531. $lp = '(' . $lp . '.{'. $ly . '})';
  532. }
  533. else if ($length < 0)
  534. {
  535. if ($length < ($offset - $strlen))
  536. {
  537. return '';
  538. }
  539. $lx = (int)((-$length) / 65535);
  540. $ly = (-$length) % 65535;
  541. // negative length requires ... capture everything
  542. // except a group of -length characters
  543. // anchored at the tail-end of the string
  544. if ($lx)
  545. {
  546. $lp = '(?:.{65535}){' . $lx . '}';
  547. }
  548. $lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
  549. }
  550. }
  551. if (!preg_match('#' . $op . $lp . '#us', $str, $match))
  552. {
  553. return '';
  554. }
  555. return $match[1];
  556. }
  557. /**
  558. * Return the length (in characters) of a UTF-8 string
  559. *
  560. * @param string $text UTF-8 string
  561. * @return integer Length (in chars) of given string
  562. */
  563. function utf8_strlen($text)
  564. {
  565. // Since utf8_decode is replacing multibyte characters to ? strlen works fine
  566. return strlen(utf8_decode($text));
  567. }
  568. }
  569. /**
  570. * UTF-8 aware alternative to str_split
  571. * Convert a string to an array
  572. *
  573. * @author Harry Fuecks
  574. * @param string $str UTF-8 encoded
  575. * @param int $split_len number to characters to split string by
  576. * @return array characters in string reverses
  577. */
  578. function utf8_str_split($str, $split_len = 1)
  579. {
  580. if (!is_int($split_len) || $split_len < 1)
  581. {
  582. return false;
  583. }
  584. $len = utf8_strlen($str);
  585. if ($len <= $split_len)
  586. {
  587. return array($str);
  588. }
  589. preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
  590. return $ar[0];
  591. }
  592. /**
  593. * UTF-8 aware alternative to strspn
  594. * Find length of initial segment matching the mask
  595. *
  596. * @author Harry Fuecks
  597. */
  598. function utf8_strspn($str, $mask, $start = null, $length = null)
  599. {
  600. if ($start !== null || $length !== null)
  601. {
  602. $str = utf8_substr($str, $start, $length);
  603. }
  604. preg_match('/^[' . $mask . ']+/u', $str, $matches);
  605. if (isset($matches[0]))
  606. {
  607. return utf8_strlen($matches[0]);
  608. }
  609. return 0;
  610. }
  611. /**
  612. * UTF-8 aware alternative to ucfirst
  613. * Make a string's first character uppercase
  614. *
  615. * @author Harry Fuecks
  616. * @param string
  617. * @return string with first character as upper case (if applicable)
  618. */
  619. function utf8_ucfirst($str)
  620. {
  621. switch (utf8_strlen($str))
  622. {
  623. case 0:
  624. return '';
  625. break;
  626. case 1:
  627. return utf8_strtoupper($str);
  628. break;
  629. default:
  630. preg_match('/^(.{1})(.*)$/us', $str, $matches);
  631. return utf8_strtoupper($matches[1]) . $matches[2];
  632. break;
  633. }
  634. }
  635. /**
  636. * Recode a string to UTF-8
  637. *
  638. * If the encoding is not supported, the string is returned as-is
  639. *
  640. * @param string $string Original string
  641. * @param string $encoding Original encoding (lowered)
  642. * @return string The string, encoded in UTF-8
  643. */
  644. function utf8_recode($string, $encoding)
  645. {
  646. $encoding = strtolower($encoding);
  647. if ($encoding == 'utf-8' || !is_string($string) || empty($string))
  648. {
  649. return $string;
  650. }
  651. // we force iso-8859-1 to be cp1252
  652. if ($encoding == 'iso-8859-1')
  653. {
  654. $encoding = 'cp1252';
  655. }
  656. // convert iso-8859-8-i to iso-8859-8
  657. else if ($encoding == 'iso-8859-8-i')
  658. {
  659. $encoding = 'iso-8859-8';
  660. $string = hebrev($string);
  661. }
  662. // First, try iconv()
  663. if (function_exists('iconv'))
  664. {
  665. $ret = @iconv($encoding, 'utf-8', $string);
  666. if (!empty($ret))
  667. {
  668. return $ret;
  669. }
  670. }
  671. // Try the mb_string extension
  672. if (function_exists('mb_convert_encoding'))
  673. {
  674. // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
  675. switch ($encoding)
  676. {
  677. case 'iso-8859-1':
  678. case 'iso-8859-2':
  679. case 'iso-8859-4':
  680. case 'iso-8859-7':
  681. case 'iso-8859-9':
  682. case 'iso-8859-15':
  683. case 'windows-1251':
  684. case 'windows-1252':
  685. case 'cp1252':
  686. case 'shift_jis':
  687. case 'euc-kr':
  688. case 'big5':
  689. case 'gb2312':
  690. $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
  691. if (!empty($ret))
  692. {
  693. return $ret;
  694. }
  695. }
  696. }
  697. // Try the recode extension
  698. if (function_exists('recode_string'))
  699. {
  700. $ret = @recode_string($encoding . '..utf-8', $string);
  701. if (!empty($ret))
  702. {
  703. return $ret;
  704. }
  705. }
  706. // If nothing works, check if we have a custom transcoder available
  707. if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding))
  708. {
  709. // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
  710. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  711. }
  712. // iso-8859-* character encoding
  713. if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))
  714. {
  715. switch ($array[1])
  716. {
  717. case '1':
  718. case '2':
  719. case '4':
  720. case '7':
  721. case '8':
  722. case '9':
  723. case '15':
  724. if (!function_exists('iso_8859_' . $array[1]))
  725. {
  726. if (!file_exists(IP_ROOT_PATH . 'includes/utf/data/recode_basic.' . PHP_EXT))
  727. {
  728. trigger_error('Basic reencoder file is missing', E_USER_ERROR);
  729. }
  730. include(IP_ROOT_PATH . 'includes/utf/data/recode_basic.' . PHP_EXT);
  731. }
  732. return call_user_func('iso_8859_' . $array[1], $string);
  733. break;
  734. default:
  735. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  736. break;
  737. }
  738. }
  739. // CP/WIN character encoding
  740. if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))
  741. {
  742. switch ($array[1])
  743. {
  744. case '932':
  745. break;
  746. case '1250':
  747. case '1251':
  748. case '1252':
  749. case '1254':
  750. case '1255':
  751. case '1256':
  752. case '1257':
  753. case '874':
  754. if (!function_exists('cp' . $array[1]))
  755. {
  756. if (!file_exists(IP_ROOT_PATH . 'includes/utf/data/recode_basic.' . PHP_EXT))
  757. {
  758. trigger_error('Basic reencoder file is missing', E_USER_ERROR);
  759. }
  760. include(IP_ROOT_PATH . 'includes/utf/data/recode_basic.' . PHP_EXT);
  761. }
  762. return call_user_func('cp' . $array[1], $string);
  763. break;
  764. default:
  765. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  766. break;
  767. }
  768. }
  769. // TIS-620
  770. if (preg_match('/tis[_ -]?620/', $encoding))
  771. {
  772. if (!function_exists('tis_620'))
  773. {
  774. if (!file_exists(IP_ROOT_PATH . 'includes/utf/data/recode_basic.' . PHP_EXT))
  775. {
  776. trigger_error('Basic reencoder file is missing', E_USER_ERROR);
  777. }
  778. include(IP_ROOT_PATH . 'includes/utf/data/recode_basic.' . PHP_EXT);
  779. }
  780. return tis_620($string);
  781. }
  782. // SJIS
  783. if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
  784. {
  785. if (!function_exists('sjis'))
  786. {
  787. if (!file_exists(IP_ROOT_PATH . 'includes/utf/data/recode_cjk.' . PHP_EXT))
  788. {
  789. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  790. }
  791. include(IP_ROOT_PATH . 'includes/utf/data/recode_cjk.' . PHP_EXT);
  792. }
  793. return sjis($string);
  794. }
  795. // EUC_KR
  796. if (preg_match('/euc[_ -]?kr/', $encoding))
  797. {
  798. if (!function_exists('euc_kr'))
  799. {
  800. if (!file_exists(IP_ROOT_PATH . 'includes/utf/data/recode_cjk.' . PHP_EXT))
  801. {
  802. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  803. }
  804. include(IP_ROOT_PATH . 'includes/utf/data/recode_cjk.' . PHP_EXT);
  805. }
  806. return euc_kr($string);
  807. }
  808. // BIG-5
  809. if (preg_match('/big[_ -]?5/', $encoding))
  810. {
  811. if (!function_exists('big5'))
  812. {
  813. if (!file_exists(IP_ROOT_PATH . 'includes/utf/data/recode_cjk.' . PHP_EXT))
  814. {
  815. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  816. }
  817. include(IP_ROOT_PATH . 'includes/utf/data/recode_cjk.' . PHP_EXT);
  818. }
  819. return big5($string);
  820. }
  821. // GB2312
  822. if (preg_match('/gb[_ -]?2312/', $encoding))
  823. {
  824. if (!function_exists('gb2312'))
  825. {
  826. if (!file_exists(IP_ROOT_PATH . 'includes/utf/data/recode_cjk.' . PHP_EXT))
  827. {
  828. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  829. }
  830. include(IP_ROOT_PATH . 'includes/utf/data/recode_cjk.' . PHP_EXT);
  831. }
  832. return gb2312($string);
  833. }
  834. // Trigger an error?! Fow now just give bad data :-(
  835. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  836. //return $string; // use utf_normalizer::cleanup() ?
  837. }
  838. /**
  839. * Replace all UTF-8 chars that are not in ASCII with their NCR
  840. *
  841. * @param string $text UTF-8 string in NFC
  842. * @return string ASCII string using NCRs for non-ASCII chars
  843. */
  844. function utf8_encode_ncr($text)
  845. {
  846. return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
  847. }
  848. /**
  849. * Callback used in encode_ncr()
  850. *
  851. * Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
  852. *
  853. * @param array $m 0-based numerically indexed array passed by preg_replace_callback()
  854. * @return string A HTML NCR if the character is valid, or the original string otherwise
  855. */
  856. function utf8_encode_ncr_callback($m)
  857. {
  858. return '&#' . utf8_ord($m[0]) . ';';
  859. }
  860. /**
  861. * Converts a UTF-8 char to an NCR
  862. *
  863. * @param string $chr UTF-8 char
  864. * @return integer UNICODE code point
  865. */
  866. function utf8_ord($chr)
  867. {
  868. switch (strlen($chr))
  869. {
  870. case 1:
  871. return ord($chr);
  872. break;
  873. case 2:
  874. return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
  875. break;
  876. case 3:
  877. return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
  878. break;
  879. case 4:
  880. return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
  881. break;
  882. default:
  883. return $chr;
  884. }
  885. }
  886. /**
  887. * Converts an NCR to a UTF-8 char
  888. *
  889. * @param int $cp UNICODE code point
  890. * @return string UTF-8 char
  891. */
  892. function utf8_chr($cp)
  893. {
  894. if ($cp > 0xFFFF)
  895. {
  896. return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
  897. }
  898. else if ($cp > 0x7FF)
  899. {
  900. return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
  901. }
  902. else if ($cp > 0x7F)
  903. {
  904. return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
  905. }
  906. else
  907. {
  908. return chr($cp);
  909. }
  910. }
  911. /**
  912. * Convert Numeric Character References to UTF-8 chars
  913. *
  914. * Notes:
  915. * - we do not convert NCRs recursively, if you pass &#38;#38; it will return &#38;
  916. * - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
  917. *
  918. * @param string $text String to convert, encoded in UTF-8 (no normal form required)
  919. * @return string UTF-8 string where NCRs have been replaced with the actual chars
  920. */
  921. function utf8_decode_ncr($text)
  922. {
  923. return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text);
  924. }
  925. /**
  926. * Callback used in decode_ncr()
  927. *
  928. * Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
  929. * It will ignore most of invalid NCRs, but not all!
  930. *
  931. * @param array $m 0-based numerically indexed array passed by preg_replace_callback()
  932. * @return string UTF-8 char
  933. */
  934. function utf8_decode_ncr_callback($m)
  935. {
  936. $cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
  937. return utf8_chr($cp);
  938. }
  939. /**
  940. * Case folds a unicode string as per Unicode 5.0, section 3.13
  941. *
  942. * @param string $text text to be case folded
  943. * @param string $option determines how we will fold the cases
  944. * @return string case folded text
  945. */
  946. function utf8_case_fold($text, $option = 'full')
  947. {
  948. static $uniarray = array();
  949. // common is always set
  950. if (!isset($uniarray['c']))
  951. {
  952. $uniarray['c'] = include(IP_ROOT_PATH . 'includes/utf/data/case_fold_c.' . PHP_EXT);
  953. }
  954. // only set full if we need to
  955. if ($option === 'full' && !isset($uniarray['f']))
  956. {
  957. $uniarray['f'] = include(IP_ROOT_PATH . 'includes/utf/data/case_fold_f.' . PHP_EXT);
  958. }
  959. // only set simple if we need to
  960. if ($option !== 'full' && !isset($uniarray['s']))
  961. {
  962. $uniarray['s'] = include(IP_ROOT_PATH . 'includes/utf/data/case_fold_s.' . PHP_EXT);
  963. }
  964. // common is always replaced
  965. $text = strtr($text, $uniarray['c']);
  966. if ($option === 'full')
  967. {
  968. // full replaces a character with multiple characters
  969. $text = strtr($text, $uniarray['f']);
  970. }
  971. else
  972. {
  973. // simple replaces a character with another character
  974. $text = strtr($text, $uniarray['s']);
  975. }
  976. return $text;
  977. }
  978. /**
  979. * Takes the input and does a "special" case fold. It does minor normalization
  980. * and returns NFKC compatable text
  981. *
  982. * @param string $text text to be case folded
  983. * @param string $option determines how we will fold the cases
  984. * @return string case folded text
  985. */
  986. function utf8_case_fold_nfkc($text, $option = 'full')
  987. {
  988. static $fc_nfkc_closure = array(
  989. "\xCD\xBA" => "\x20\xCE\xB9",
  990. "\xCF\x92" => "\xCF\x85",
  991. "\xCF\x93" => "\xCF\x8D",
  992. "\xCF\x94" => "\xCF\x8B",
  993. "\xCF\xB2" => "\xCF\x83",
  994. "\xCF\xB9" => "\xCF\x83",
  995. "\xE1\xB4\xAC" => "\x61",
  996. "\xE1\xB4\xAD" => "\xC3\xA6",
  997. "\xE1\xB4\xAE" => "\x62",
  998. "\xE1\xB4\xB0" => "\x64",
  999. "\xE1\xB4\xB1" => "\x65",
  1000. "\xE1\xB4\xB2" => "\xC7\x9D",
  1001. "\xE1\xB4\xB3" => "\x67",
  1002. "\xE1\xB4\xB4" => "\x68",
  1003. "\xE1\xB4\xB5" => "\x69",
  1004. "\xE1\xB4\xB6" => "\x6A",
  1005. "\xE1\xB4\xB7" => "\x6B",
  1006. "\xE1\xB4\xB8" => "\x6C",
  1007. "\xE1\xB4\xB9" => "\x6D",
  1008. "\xE1\xB4\xBA" => "\x6E",
  1009. "\xE1\xB4\xBC" => "\x6F",
  1010. "\xE1\xB4\xBD" => "\xC8\xA3",
  1011. "\xE1\xB4\xBE" => "\x70",
  1012. "\xE1\xB4\xBF" => "\x72",
  1013. "\xE1\xB5\x80" => "\x74",
  1014. "\xE1\xB5\x81" => "\x75",
  1015. "\xE1\xB5\x82" => "\x77",
  1016. "\xE2\x82\xA8" => "\x72\x73",
  1017. "\xE2\x84\x82" => "\x63",
  1018. "\xE2\x84\x83" => "\xC2\xB0\x63",
  1019. "\xE2\x84\x87" => "\xC9\x9B",
  1020. "\xE2\x84\x89" => "\xC2\xB0\x66",
  1021. "\xE2\x84\x8B" => "\x68",
  1022. "\xE2\x84\x8C" => "\x68",
  1023. "\xE2\x84\x8D" => "\x68",
  1024. "\xE2\x84\x90" => "\x69",
  1025. "\xE2\x84\x91" => "\x69",
  1026. "\xE2\x84\x92" => "\x6C",
  1027. "\xE2\x84\x95" => "\x6E",
  1028. "\xE2\x84\x96" => "\x6E\x6F",
  1029. "\xE2\x84\x99" => "\x70",
  1030. "\xE2\x84\x9A" => "\x71",
  1031. "\xE2\x84\x9B" => "\x72",
  1032. "\xE2\x84\x9C" => "\x72",
  1033. "\xE2\x84\x9D" => "\x72",
  1034. "\xE2\x84\xA0" => "\x73\x6D",
  1035. "\xE2\x84\xA1" => "\x74\x65\x6C",
  1036. "\xE2\x84\xA2" => "\x74\x6D",
  1037. "\xE2\x84\xA4" => "\x7A",
  1038. "\xE2\x84\xA8" => "\x7A",
  1039. "\xE2\x84\xAC" => "\x62",
  1040. "\xE2\x84\xAD" => "\x63",
  1041. "\xE2\x84\xB0" => "\x65",
  1042. "\xE2\x84\xB1" => "\x66",
  1043. "\xE2\x84\xB3" => "\x6D",
  1044. "\xE2\x84\xBB" => "\x66\x61\x78",
  1045. "\xE2\x84\xBE" => "\xCE\xB3",
  1046. "\xE2\x84\xBF" => "\xCF\x80",
  1047. "\xE2\x85\x85" => "\x64",
  1048. "\xE3\x89\x90" => "\x70\x74\x65",
  1049. "\xE3\x8B\x8C" => "\x68\x67",
  1050. "\xE3\x8B\x8E" => "\x65\x76",
  1051. "\xE3\x8B\x8F" => "\x6C\x74\x64",
  1052. "\xE3\x8D\xB1" => "\x68\x70\x61",
  1053. "\xE3\x8D\xB3" => "\x61\x75",
  1054. "\xE3\x8D\xB5" => "\x6F\x76",
  1055. "\xE3\x8D\xBA" => "\x69\x75",
  1056. "\xE3\x8E\x80" => "\x70\x61",
  1057. "\xE3\x8E\x81" => "\x6E\x61",
  1058. "\xE3\x8E\x82" => "\xCE\xBC\x61",
  1059. "\xE3\x8E\x83" => "\x6D\x61",
  1060. "\xE3\x8E\x84" => "\x6B\x61",
  1061. "\xE3\x8E\x85" => "\x6B\x62",
  1062. "\xE3\x8E\x86" => "\x6D\x62",
  1063. "\xE3\x8E\x87" => "\x67\x62",
  1064. "\xE3\x8E\x8A" => "\x70\x66",
  1065. "\xE3\x8E\x8B" => "\x6E\x66",
  1066. "\xE3\x8E\x8C" => "\xCE\xBC\x66",
  1067. "\xE3\x8E\x90" => "\x68\x7A",
  1068. "\xE3\x8E\x91" => "\x6B\x68\x7A",
  1069. "\xE3\x8E\x92" => "\x6D\x68\x7A",
  1070. "\xE3\x8E\x93" => "\x67\x68\x7A",
  1071. "\xE3\x8E\x94" => "\x74\x68\x7A",
  1072. "\xE3\x8E\xA9" => "\x70\x61",
  1073. "\xE3\x8E\xAA" => "\x6B\x70\x61",
  1074. "\xE3\x8E\xAB" => "\x6D\x70\x61",
  1075. "\xE3\x8E\xAC" => "\x67\x70\x61",
  1076. "\xE3\x8E\xB4" => "\x70\x76",
  1077. "\xE3\x8E\xB5" => "\x6E\x76",
  1078. "\xE3\x8E\xB6" => "\xCE\xBC\x76",
  1079. "\xE3\x8E\xB7" => "\x6D\x76",
  1080. "\xE3\x8E\xB8" => "\x6B\x76",
  1081. "\xE3\x8E\xB9" => "\x6D\x76",
  1082. "\xE3\x8E\xBA" => "\x70\x77",
  1083. "\xE3\x8E\xBB" => "\x6E\x77",
  1084. "\xE3\x8E\xBC" => "\xCE\xBC\x77",
  1085. "\xE3\x8E\xBD" => "\x6D\x77",
  1086. "\xE3\x8E\xBE" => "\x6B\x77",
  1087. "\xE3\x8E\xBF" => "\x6D\x77",
  1088. "\xE3\x8F\x80" => "\x6B\xCF\x89",
  1089. "\xE3\x8F\x81" => "\x6D\xCF\x89",
  1090. "\xE3\x8F\x83" => "\x62\x71",
  1091. "\xE3\x8F\x86" => "\x63\xE2\x88\x95\x6B\x67",
  1092. "\xE3\x8F\x87" => "\x63\x6F\x2E",
  1093. "\xE3\x8F\x88" => "\x64\x62",
  1094. "\xE3\x8F\x89" => "\x67\x79",
  1095. "\xE3\x8F\x8B" => "\x68\x70",
  1096. "\xE3\x8F\x8D" => "\x6B\x6B",
  1097. "\xE3\x8F\x8E" => "\x6B\x6D",
  1098. "\xE3\x8F\x97" => "\x70\x68",
  1099. "\xE3\x8F\x99" => "\x70\x70\x6D",
  1100. "\xE3\x8F\x9A" => "\x70\x72",
  1101. "\xE3\x8F\x9C" => "\x73\x76",
  1102. "\xE3\x8F\x9D" => "\x77\x62",
  1103. "\xE3\x8F\x9E" => "\x76\xE2\x88\x95\x6D",
  1104. "\xE3\x8F\x9F" => "\x61\xE2\x88\x95\x6D",
  1105. "\xF0\x9D\x90\x80" => "\x61",
  1106. "\xF0\x9D\x90\x81" => "\x62",
  1107. "\xF0\x9D\x90\x82" => "\x63",
  1108. "\xF0\x9D\x90\x83" => "\x64",
  1109. "\xF0\x9D\x90\x84" => "\x65",
  1110. "\xF0\x9D\x90\x85" => "\x66",
  1111. "\xF0\x9D\x90\x86" => "\x67",
  1112. "\xF0\x9D\x90\x87" => "\x68",
  1113. "\xF0\x9D\x90\x88" => "\x69",
  1114. "\xF0\x9D\x90\x89" => "\x6A",
  1115. "\xF0\x9D\x90\x8A" => "\x6B",
  1116. "\xF0\x9D\x90\x8B" => "\x6C",
  1117. "\xF0\x9D\x90\x8C" => "\x6D",
  1118. "\xF0\x9D\x90\x8D" => "\x6E",
  1119. "\xF0\x9D\x90\x8E" => "\x6F",
  1120. "\xF0\x9D\x90\x8F" => "\x70",
  1121. "\xF0\x9D\x90\x90" => "\x71",
  1122. "\xF0\x9D\x90\x91" => "\x72",
  1123. "\xF0\x9D\x90\x92" => "\x73",
  1124. "\xF0\x9D\x90\x93" => "\x74",
  1125. "\xF0\x9D\x90\x94" => "\x75",
  1126. "\xF0\x9D\x90\x95" => "\x76",
  1127. "\xF0\x9D\x90\x96" => "\x77",
  1128. "\xF0\x9D\x90\x97" => "\x78",
  1129. "\xF0\x9D\x90\x98" => "\x79",
  1130. "\xF0\x9D\x90\x99" => "\x7A",
  1131. "\xF0\x9D\x90\xB4" => "\x61",
  1132. "\xF0\x9D\x90\xB5" => "\x62",
  1133. "\xF0\x9D\x90\xB6" => "\x63",
  1134. "\xF0\x9D\x90\xB7" => "\x64",
  1135. "\xF0\x9D\x90\xB8" => "\x65",
  1136. "\xF0\x9D\x90\xB9" => "\x66",
  1137. "\xF0\x9D\x90\xBA" => "\x67",
  1138. "\xF0\x9D\x90\xBB" => "\x68",
  1139. "\xF0\x9D\x90\xBC" => "\x69",
  1140. "\xF0\x9D\x90\xBD" => "\x6A",
  1141. "\xF0\x9D\x90\xBE" => "\x6B",
  1142. "\xF0\x9D\x90\xBF" => "\x6C",
  1143. "\xF0\x9D\x91\x80" => "\x6D",
  1144. "\xF0\x9D\x91\x81" => "\x6E",
  1145. "\xF0\x9D\x91\x82" => "\x6F",
  1146. "\xF0\x9D\x91\x83" => "\x70",
  1147. "\xF0\x9D\x91\x84" => "\x71",
  1148. "\xF0\x9D\x91\x85" => "\x72",
  1149. "\xF0\x9D\x91\x86" => "\x73",
  1150. "\xF0\x9D\x91\x87" => "\x74",
  1151. "\xF0\x9D\x91\x88" => "\x75",
  1152. "\xF0\x9D\x91\x89" => "\x76",
  1153. "\xF0\x9D\x91\x8A" => "\x77",
  1154. "\xF0\x9D\x91\x8B" => "\x78",
  1155. "\xF0\x9D\x91\x8C" => "\x79",
  1156. "\xF0\x9D\x91\x8D" => "\x7A",
  1157. "\xF0\x9D\x91\xA8" => "\x61",
  1158. "\xF0\x9D\x91\xA9" => "\x62",
  1159. "\xF0\x9D\x91\xAA" => "\x63",
  1160. "\xF0\x9D\x91\xAB" => "\x64",
  1161. "\xF0\x9D\x91\xAC" => "\x65",
  1162. "\xF0\x9D\x91\xAD" => "\x66",
  1163. "\xF0\x9D\x91\xAE" => "\x67",
  1164. "\xF0\x9D\x91\xAF" => "\x68",
  1165. "\xF0\x9D\x91\xB0" => "\x69",
  1166. "\xF0\x9D\x91\xB1" => "\x6A",
  1167. "\xF0\x9D\x91\xB2" => "\x6B",
  1168. "\xF0\x9D\x91\xB3" => "\x6C",
  1169. "\xF0\x9D\x91\xB4" => "\x6D",
  1170. "\xF0\x9D\x91\xB5" => "\x6E",
  1171. "\xF0\x9D\x91\xB6" => "\x6F",
  1172. "\xF0\x9D\x91\xB7" => "\x70",
  1173. "\xF0\x9D\x91\xB8" => "\x71",
  1174. "\xF0\x9D\x91\xB9" => "\x72",
  1175. "\xF0\x9D\x91\xBA" => "\x73",
  1176. "\xF0\x9D\x91\xBB" => "\x74",
  1177. "\xF0\x9D\x91\xBC" => "\x75",
  1178. "\xF0\x9D\x91\xBD" => "\x76",
  1179. "\xF0\x9D\x91\xBE" => "\x77",
  1180. "\xF0\x9D\x91\xBF" => "\x78",
  1181. "\xF0\x9D\x92\x80" => "\x79",
  1182. "\xF0\x9D\x92\x81" => "\x7A",
  1183. "\xF0\x9D\x92\x9C" => "\x61",
  1184. "\xF0\x9D\x92\x9E" => "\x63",
  1185. "\xF0\x9D\x92\x9F" => "\x64",
  1186. "\xF0\x9D\x92\xA2" => "\x67",
  1187. "\xF0\x9D\x92\xA5" => "\x6A",
  1188. "\xF0\x9D\x92\xA6" => "\x6B",
  1189. "\xF0\x9D\x92\xA9" => "\x6E",
  1190. "\xF0\x9D\x92\xAA" => "\x6F",
  1191. "\xF0\x9D\x92\xAB" => "\x70",
  1192. "\xF0\x9D\x92\xAC" => "\x71",
  1193. "\xF0\x9D\x92\xAE" => "\x73",
  1194. "\xF0\x9D\x92\xAF" => "\x74",
  1195. "\xF0\x9D\x92\xB0" => "\x75",
  1196. "\xF0\x9D\x92\xB1" => "\x76",
  1197. "\xF0\x9D\x92\xB2" => "\x77",
  1198. "\xF0\x9D\x92\xB3" => "\x78",
  1199. "\xF0\x9D\x92\xB4" => "\x79",
  1200. "\xF0\x9D\x92\xB5" => "\x7A",
  1201. "\xF0\x9D\x93\x90" => "\x61",
  1202. "\xF0\x9D\x93\x91" => "\x62",
  1203. "\xF0\x9D\x93\x92" => "\x63",
  1204. "\xF0\x9D\x93\x93" => "\x64",
  1205. "\xF0\x9D\x93\x94" => "\x65",
  1206. "\xF0\x9D\x93\x95" => "\x66",
  1207. "\xF0\x9D\x93\x96" => "\x67",
  1208. "\xF0\x9D\x93\x97" => "\x68",
  1209. "\xF0\x9D\x93\x98" => "\x69",
  1210. "\xF0\x9D\x93\x99" => "\x6A",
  1211. "\xF0\x9D\x93\x9A" => "\x6B",
  1212. "\xF0\x9D\x93\x9B" => "\x6C",
  1213. "\xF0\x9D\x93\x9C" => "\x6D",
  1214. "\xF0\x9D\x93\x9D" => "\x6E",
  1215. "\xF0\x9D\x93\x9E" => "\x6F",
  1216. "\xF0\x9D\x93\x9F" => "\x70",
  1217. "\xF0\x9D\x93\xA0" => "\x71",
  1218. "\xF0\x9D\x93\xA1" => "\x72",
  1219. "\xF0\x9D\x93\xA2" => "\x73",
  1220. "\xF0\x9D\x93\xA3" => "\x74",
  1221. "\xF0\x9D\x93\xA4" => "\x75",
  1222. "\xF0\x9D\x93\xA5" => "\x76",
  1223. "\xF0\x9D\x93\xA6" => "\x77",
  1224. "\xF0\x9D\x93\xA7" => "\x78",
  1225. "\xF0\x9D\x93\xA8" => "\x79",
  1226. "\xF0\x9D\x93\xA9" => "\x7A",
  1227. "\xF0\x9D\x94\x84" => "\x61",
  1228. "\xF0\x9D\x94\x85" => "\x62",
  1229. "\xF0\x9D\x94\x87" => "\x64",
  1230. "\xF0\x9D\x94\x88" => "\x65",
  1231. "\xF0\x9D\x94\x89" => "\x66",
  1232. "\xF0\x9D\x94\x8A" => "\x67",
  1233. "\xF0\x9D\x94\x8D" => "\x6A",
  1234. "\xF0\x9D\x94\x8E" => "\x6B",
  1235. "\xF0\x9D\x94\x8F" => "\x6C",
  1236. "\xF0\x9D\x94\x90" => "\x6D",
  1237. "\xF0\x9D\x94\x91" => "\x6E",
  1238. "\xF0\x9D\x94\x92" => "\x6F",
  1239. "\xF0\x9D\x94\x93" => "\x70",
  1240. "\xF0\x9D\x94\x94" => "\x71",
  1241. "\xF0\x9D\x94\x96" => "\x73",
  1242. "\xF0\x9D\x94\x97" => "\x74",
  1243. "\xF0\x9D\x94\x98" => "\x75",
  1244. "\xF0\x9D\x94\x99" => "\x76",
  1245. "\xF0\x9D\x94\x9A" => "\x77",
  1246. "\xF0\x9D\x94\x9B" => "\x78",
  1247. "\xF0\x9D\x94\x9C" => "\x79",
  1248. "\xF0\x9D\x94\xB8" => "\x61",
  1249. "\xF0\x9D\x94\xB9" => "\x62",
  1250. "\xF0\x9D\x94\xBB" => "\x64",
  1251. "\xF0\x9D\x94\xBC" => "\x65",
  1252. "\xF0\x9D\x94\xBD" => "\x66",
  1253. "\xF0\x9D\x94\xBE" => "\x67",
  1254. "\xF0\x9D\x95\x80" => "\x69",
  1255. "\xF0\x9D\x95\x81" => "\x6A",
  1256. "\xF0\x9D\x95\x82" => "\x6B",
  1257. "\xF0\x9D\x95\x83" => "\x6C",
  1258. "\xF0\x9D\x95\x84" => "\x6D",
  1259. "\xF0\x9D\x95\x86" => "\x6F",
  1260. "\xF0\x9D\x95\x8A" => "\x73",
  1261. "\xF0\x9D\x95\x8B" => "\x74",
  1262. "\xF0\x9D\x95\x8C" => "\x75",
  1263. "\xF0\x9D\x95\x8D" => "\x76",
  1264. "\xF0\x9D\x95\x8E" => "\x77",
  1265. "\xF0\x9D\x95\x8F" => "\x78",
  1266. "\xF0\x9D\x95\x90" => "\x79",
  1267. "\xF0\x9D\x95\xAC" => "\x61",
  1268. "\xF0\x9D\x95\xAD" => "\x62",
  1269. "\xF0\x9D\x95\xAE" => "\x63",
  1270. "\xF0\x9D\x95\xAF" => "\x64",
  1271. "\xF0\x9D\x95\xB0" => "\x65",
  1272. "\xF0\x9D\x95\xB1" => "\x66",
  1273. "\xF0\x9D\x95\xB2" => "\x67",
  1274. "\xF0\x9D\x95\xB3" => "\x68",
  1275. "\xF0\x9D\x95\xB4" => "\x69",
  1276. "\xF0\x9D\x95\xB5" => "\x6A",
  1277. "\xF0\x9D\x95\xB6" => "\x6B",
  1278. "\xF0\x9D\x95\xB7" => "\x6C",
  1279. "\xF0\x9D\x95\xB8" => "\x6D",
  1280. "\xF0\x9D\x95\xB9" => "\x6E",
  1281. "\xF0\x9D\x95\xBA" => "\x6F",
  1282. "\xF0\x9D\x95\xBB" => "\x70",
  1283. "\xF0\x9D\x95\xBC" => "\x71",
  1284. "\xF0\x9D\x95\xBD" => "\x72",
  1285. "\xF0\x9D\x95\xBE" => "\x73",
  1286. "\xF0\x9D\x95\xBF" => "\x74",
  1287. "\xF0\x9D\x96\x80" => "\x75",
  1288. "\xF0\x9D\x96\x81" => "\x76",
  1289. "\xF0\x9D\x96\x82" => "\x77",
  1290. "\xF0\x9D\x96\x83" => "\x78",
  1291. "\xF0\x9D\x96\x84" => "\x79",
  1292. "\xF0\x9D\x96\x85" => "\x7A",
  1293. "\xF0\x9D\x96\xA0" => "\x61",
  1294. "\xF0\x9D\x96\xA1" => "\x62",
  1295. "\xF0\x9D\x96\xA2" => "\x63",
  1296. "\xF0\x9D\x96\xA3" => "\x64",
  1297. "\xF0\x9D\x96\xA4" => "\x65",
  1298. "\xF0\x9D\x96\xA5" => "\x66",
  1299. "\xF0\x9D\x96\xA6" => "\x67",
  1300. "\xF0\x9D\x96\xA7" => "\x68",
  1301. "\xF0\x9D\x96\xA8" => "\x69",
  1302. "\xF0\x9D\x96\xA9" => "\x6A",
  1303. "\xF0\x9D\x96\xAA" => "\x6B",
  1304. "\xF0\x9D\x96\xAB" => "\x6C",
  1305. "\xF0\x9D\x96\xAC" => "\x6D",
  1306. "\xF0\x9D\x96\xAD" => "\x6E",
  1307. "\xF0\x9D\x96\xAE" => "\x6F",
  1308. "\xF0\x9D\x96\xAF" => "\x70",
  1309. "\xF0\x9D\x96\xB0" => "\x71",
  1310. "\xF0\x9D\x96\xB1" => "\x72",
  1311. "\xF0\x9D\x96\xB2" => "\x73",
  1312. "\xF0\x9D\x96\xB3" => "\x74",
  1313. "\xF0\x9D\x96\xB4" => "\x75",
  1314. "\xF0\x9D\x96\xB5" => "\x76",
  1315. "\xF0\x9D\x96\xB6" => "\x77",
  1316. "\xF0\x9D\x96\xB7" => "\x78",
  1317. "\xF0\x9D\x96\xB8" => "\x79",
  1318. "\xF0\x9D\x96\xB9" => "\x7A",
  1319. "\xF0\x9D\x97\x94" => "\x61",
  1320. "\xF0\x9D\x97\x95" => "\x62",
  1321. "\xF0\x9D\x97\x96" => "\x63",
  1322. "\xF0\x9D\x97\x97" => "\x64",
  1323. "\xF0\x9D\x97\x98" => "\x65",
  1324. "\xF0\x9D\x97\x99" => "\x66",
  1325. "\xF0\x9D\x97\x9A" => "\x67",
  1326. "\xF0\x9D\x97\x9B" => "\x68",
  1327. "\xF0\x9D\x97\x9C" => "\x69",
  1328. "\xF0\x9D\x97\x9D" => "\x6A",
  1329. "\xF0\x9D\x97\x9E" => "\x6B",
  1330. "\xF0\x9D\x97\x9F" => "\x6C",
  1331. "\xF0\x9D\x97\xA0" => "\x6D",
  1332. "\xF0\x9D\x97\xA1" => "\x6E",
  1333. "\xF0\x9D\x97\xA2" => "\x6F",
  1334. "\xF0\x9D\x97\xA3" => "\x70",
  1335. "\xF0\x9D\x97\xA4" => "\x71",
  1336. "\xF0\x9D\x97\xA5" => "\x72",
  1337. "\xF0\x9D\x97\xA6" => "\x73",
  1338. "\xF0\x9D\x97\xA7" => "\x74",
  1339. "\xF0\x9D\x97\xA8" => "\x75",
  1340. "\xF0\x9D\x97\xA9" => "\x76",
  1341. "\xF0\x9D\x97\xAA" => "\x77",
  1342. "\xF0\x9D\x97\xAB" => "\x78",
  1343. "\xF0\x9D\x97\xAC" => "\x79",
  1344. "\xF0\x9D\x97\xAD" => "\x7A",
  1345. "\xF0\x9D\x98\x88" => "\x61",
  1346. "\xF0\x9D\x98\x89" => "\x62",
  1347. "\xF0\x9D\x98\x8A" => "\x63",
  1348. "\xF0\x9D\x98\x8B" => "\x64",
  1349. "\xF0\x9D\x98\x8C" => "\x65",
  1350. "\xF0\x9D\x98\x8D" => "\x66",
  1351. "\xF0\x9D\x98\x8E" => "\x67",
  1352. "\xF0\x9D\x98\x8F" => "\x68",
  1353. "\xF0\x9D\x98\x90" => "\x69",
  1354. "\xF0\x9D\x98\x91" => "\x6A",
  1355. "\xF0\x9D\x98\x92" => "\x6B",
  1356. "\xF0\x9D\x98\x93" => "\x6C",
  1357. "\xF0\x9D\x98\x94" => "\x6D",
  1358. "\xF0\x9D\x98\x95" => "\x6E",
  1359. "\xF0\x9D\x98\x96" => "\x6F",
  1360. "\xF0\x9D\x98\x97" => "\x70",
  1361. "\xF0\x9D\x98\x98" => "\x71",
  1362. "\xF0\x9D\x98\x99" => "\x72",
  1363. "\xF0\x9D\x98\x9A" => "\x73",
  1364. "\xF0\x9D\x98\x9B" => "\x74",
  1365. "\xF0\x9D\x98\x9C" => "\x75",
  1366. "\xF0\x9D\x98\x9D" => "\x76",
  1367. "\xF0\x9D\x98\x9E" => "\x77",
  1368. "\xF0\x9D\x98\x9F" => "\x78",
  1369. "\xF0\x9D\x98\xA0" => "\x79",
  1370. "\xF0\x9D\x98\xA1" => "\x7A",
  1371. "\xF0\x9D\x98\xBC" => "\x61",
  1372. "\xF0\x9D\x98\xBD" => "\x62",
  1373. "\xF0\x9D\x98\xBE" => "\x63",
  1374. "\xF0\x9D\x98\xBF" => "\x64",
  1375. "\xF0\x9D\x99\x80" => "\x65",
  1376. "\xF0\x9D\x99\x81" => "\x66",
  1377. "\xF0\x9D\x99\x82" => "\x67",
  1378. "\xF0\x9D\x99\x83" => "\x68",
  1379. "\xF0\x9D\x99\x84" => "\x69",
  1380. "\xF0\x9D\x99\x85" => "\x6A",
  1381. "\xF0\x9D\x99\x86" => "\x6B",
  1382. "\xF0\x9D\x99\x87" => "\x6C",
  1383. "\xF0\x9D\x99\x88" => "\x6D",
  1384. "\xF0\x9D\x99\x89" => "\x6E",
  1385. "\xF0\x9D\x99\x8A" => "\x6F",
  1386. "\xF0\x9D\x99\x8B" => "\x70",
  1387. "\xF0\x9D\x99\x8C" => "\x71",
  1388. "\xF0\x9D\x99\x8D" => "\x72",
  1389. "\xF0\x9D\x99\x8E" => "\x73",
  1390. "\xF0\x9D\x99\x8F" => "\x74",
  1391. "\xF0\x9D\x99\x90" => "\x75",
  1392. "\xF0\x9D\x99\x91" => "\x76",
  1393. "\xF0\x9D\x99\x92" => "\x77",
  1394. "\xF0\x9D\x99\x93" => "\x78",
  1395. "\xF0\x9D\x99\x94" => "\x79",
  1396. "\xF0\x9D\x99\x95" => "\x7A",
  1397. "\xF0\x9D\x99\xB0" => "\x61",
  1398. "\xF0\x9D\x99\xB1" => "\x62",
  1399. "\xF0\x9D\x99\xB2" => "\x63",
  1400. "\xF0\x9D\x99\xB3" => "\x64",
  1401. "\xF0\x9D\x99\xB4" => "\x65",
  1402. "\xF0\x9D\x99\xB5" => "\x66",
  1403. "\xF0\x9D\x99\xB6" => "\x67",
  1404. "\xF0\x9D\x99\xB7" => "\x68",
  1405. "\xF0\x9D\x99\xB8" => "\x69",
  1406. "\xF0\x9D\x99\xB9" => "\x6A",
  1407. "\xF0\x9D\x99\xBA" => "\x6B",
  1408. "\xF0\x9D\x99\xBB" => "\x6C",
  1409. "\xF0\x9D\x99\xBC" => "\x6D",
  1410. "\xF0\x9D\x99\xBD" => "\x6E",
  1411. "\xF0\x9D\x99\xBE" => "\x6F",
  1412. "\xF0\x9D\x99\xBF" => "\x70",
  1413. "\xF0\x9D\x9A\x80" => "\x71",
  1414. "\xF0\x9D\x9A\x81" => "\x72",
  1415. "\xF0\x9D\x9A\x82" => "\x73",
  1416. "\xF0\x9D\x9A\x83" => "\x74",
  1417. "\xF0\x9D\x9A\x84" => "\x75",
  1418. "\xF0\x9D\x9A\x85" => "\x76",
  1419. "\xF0\x9D\x9A\x86" => "\x77",
  1420. "\xF0\x9D\x9A\x87" => "\x78",
  1421. "\xF0\x9D\x9A\x88" => "\x79",
  1422. "\xF0\x9D\x9A\x89" => "\x7A",
  1423. "\xF0\x9D\x9A\xA8" => "\xCE\xB1",
  1424. "\xF0\x9D\x9A\xA9" => "\xCE\xB2",
  1425. "\xF0\x9D\x9A\xAA" => "\xCE\xB3",
  1426. "\xF0\x9D\x9A\xAB" => "\xCE\xB4",
  1427. "\xF0\x9D\x9A\xAC" => "\xCE\xB5",
  1428. "\xF0\x9D\x9A\xAD" => "\xCE\xB6",
  1429. "\xF0\x9D\x9A\xAE" => "\xCE\xB7",
  1430. "\xF0\x9D\x9A\xAF" => "\xCE\xB8",
  1431. "\xF0\x9D\x9A\xB0" => "\xCE\xB9",
  1432. "\xF0\x9D\x9A\xB1" => "\xCE\xBA",
  1433. "\xF0\x9D\x9A\xB2" => "\xCE\xBB",
  1434. "\xF0\x9D\x9A\xB3" => "\xCE\xBC",
  1435. "\xF0\x9D\x9A\xB4" => "\xCE\xBD",
  1436. "\xF0\x9D\x9A\xB5" => "\xCE\xBE",
  1437. "\xF0\x9D\x9A\xB6" => "\xCE\xBF",
  1438. "\xF0\x9D\x9A\xB7" => "\xCF\x80",
  1439. "\xF0\x9D\x9A\xB8" => "\xCF\x81",
  1440. "\xF0\x9D\x9A\xB9" => "\xCE\xB8",
  1441. "\xF0\x9D\x9A\xBA" => "\xCF\x83",
  1442. "\xF0\x9D\x9A\xBB" => "\xCF\x84",
  1443. "\xF0\x9D\x9A\xBC" => "\xCF\x85",
  1444. "\xF0\x9D\x9A\xBD" => "\xCF\x86",
  1445. "\xF0\x9D\x9A\xBE" => "\xCF\x87",
  1446. "\xF0\x9D\x9A\xBF" => "\xCF\x88",
  1447. "\xF0\x9D\x9B\x80" => "\xCF\x89",
  1448. "\xF0\x9D\x9B\x93" => "\xCF\x83",
  1449. "\xF0\x9D\x9B\xA2" => "\xCE\xB1",
  1450. "\xF0\x9D\x9B\xA3" => "\xCE\xB2",
  1451. "\xF0\x9D\x9B\xA4" => "\xCE\xB3",
  1452. "\xF0\x9D\x9B\xA5" => "\xCE\xB4",
  1453. "\xF0\x9D\x9B\xA6" => "\xCE\xB5",
  1454. "\xF0\x9D\x9B\xA7" => "\xCE\xB6",
  1455. "\xF0\x9D\x9B\xA8" => "\xCE\xB7",
  1456. "\xF0\x9D\x9B\xA9" => "\xCE\xB8",
  1457. "\xF0\x9D\x9B\xAA" => "\xCE\xB9",
  1458. "\xF0\x9D\x9B\xAB" => "\xCE\xBA",
  1459. "\xF0\x9D\x9B\xAC" => "\xCE\xBB",
  1460. "\xF0\x9D\x9B\xAD" => "\xCE\xBC",
  1461. "\xF0\x9D\x9B\xAE" => "\xCE\xBD",
  1462. "\xF0\x9D\x9B\xAF" => "\xCE\xBE",
  1463. "\xF0\x9D\x9B\xB0" => "\xCE\xBF",
  1464. "\xF0\x9D\x9B\xB1" => "\xCF\x80",
  1465. "\xF0\x9D\x9B\xB2" => "\xCF\x81",
  1466. "\xF0\x9D\x9B\xB3" => "\xCE\xB8",
  1467. "\xF0\x9D\x9B\xB4" => "\xCF\x83",
  1468. "\xF0\x9D\x9B\xB5" => "\xCF\x84",
  1469. "\xF0\x9D\x9B\xB6" => "\xCF\x85",
  1470. "\xF0\x9D\x9B\xB7" => "\xCF\x86",
  1471. "\xF0\x9D\x9B\xB8" => "\xCF\x87",
  1472. "\xF0\x9D\x9B\xB9" => "\xCF\x88",
  1473. "\xF0\x9D\x9B\xBA" => "\xCF\x89",
  1474. "\xF0\x9D\x9C\x8D" => "\xCF\x83",
  1475. "\xF0\x9D\x9C\x9C" => "\xCE\xB1",
  1476. "\xF0\x9D\x9C\x9D" => "\xCE\xB2",
  1477. "\xF0\x9D\x9C\x9E" => "\xCE\xB3",
  1478. "\xF0\x9D\x9C\x9F" => "\xCE\xB4",
  1479. "\xF0\x9D\x9C\xA0" => "\xCE\xB5",
  1480. "\xF0\x9D\x9C\xA1" => "\xCE\xB6",
  1481. "\xF0\x9D\x9C\xA2" => "\xCE\xB7",
  1482. "\xF0\x9D\x9C\xA3" => "\xCE\xB8",
  1483. "\xF0\x9D\x9C\xA4" => "\xCE\xB9",
  1484. "\xF0\x9D\x9C\xA5" => "\xCE\xBA",
  1485. "\xF0\x9D\x9C\xA6" => "\xCE\xBB",
  1486. "\xF0\x9D\x9C\xA7" => "\xCE\xBC",
  1487. "\xF0\x9D\x9C\xA8" => "\xCE\xBD",
  1488. "\xF0\x9D\x9C\xA9" => "\xCE\xBE",
  1489. "\xF0\x9D\x9C\xAA" => "\xCE\xBF",
  1490. "\xF0\x9D\x9C\xAB" => "\xCF\x80",
  1491. "\xF0\x9D\x9C\xAC" => "\xCF\x81",
  1492. "\xF0\x9D\x9C\xAD" => "\xCE\xB8",
  1493. "\xF0\x9D\x9C\xAE" => "\xCF\x83",
  1494. "\xF0\x9D\x9C\xAF" => "\xCF\x84",
  1495. "\xF0\x9D\x9C\xB0" => "\xCF\x85",
  1496. "\xF0\x9D\x9C\xB1" => "\xCF\x86",
  1497. "\xF0\x9D\x9C\xB2" => "\xCF\x87",
  1498. "\xF0\x9D\x9C\xB3" => "\xCF\x88",
  1499. "\xF0\x9D\x9C\xB4" => "\xCF\x89",
  1500. "\xF0\x9D\x9D\x87" => "\xCF\x83",
  1501. "\xF0\x9D\x9D\x96" => "\xCE\xB1",
  1502. "\xF0\x9D\x9D\x97" => "\xCE\xB2",
  1503. "\xF0\x9D\x9D\x98" => "\xCE\xB3",
  1504. "\xF0\x9D\x9D\x99" => "\xCE\xB4",
  1505. "\xF0\x9D\x9D\x9A" => "\xCE\xB5",
  1506. "\xF0\x9D\x9D\x9B" => "\xCE\xB6",
  1507. "\xF0\x9D\x9D\x9C" => "\xCE\xB7",
  1508. "\xF0\x9D\x9D\x9D" => "\xCE\xB8",
  1509. "\xF0\x9D\x9D\x9E" => "\xCE\xB9",
  1510. "\xF0\x9D\x9D\x9F" => "\xCE\xBA",
  1511. "\xF0\x9D\x9D\xA0" => "\xCE\xBB",
  1512. "\xF0\x9D\x9D\xA1" => "\xCE\xBC",
  1513. "\xF0\x9D\x9D\xA2" => "\xCE\xBD",
  1514. "\xF0\x9D\x9D\xA3" => "\xCE\xBE",
  1515. "\xF0\x9D\x9D\xA4" => "\xCE\xBF",
  1516. "\xF0\x9D\x9D\xA5" => "\xCF\x80",
  1517. "\xF0\x9D\x9D\xA6" => "\xCF\x81",
  1518. "\xF0\x9D\x9D\xA7" => "\xCE\xB8",
  1519. "\xF0\x9D\x9D\xA8" => "\xCF\x83",
  1520. "\xF0\x9D\x9D\xA9" => "\xCF\x84",
  1521. "\xF0\x9D\x9D\xAA" => "\xCF\x85",
  1522. "\xF0\x9D\x9D\xAB" => "\xCF\x86",
  1523. "\xF0\x9D\x9D\xAC" => "\xCF\x87",
  1524. "\xF0\x9D\x9D\xAD" => "\xCF\x88",
  1525. "\xF0\x9D\x9D\xAE" => "\xCF\x89",
  1526. "\xF0\x9D\x9E\x81" => "\xCF\x83",
  1527. "\xF0\x9D\x9E\x90" => "\xCE\xB1",
  1528. "\xF0\x9D\x9E\x91" => "\xCE\xB2",
  1529. "\xF0\x9D\x9E\x92" => "\xCE\xB3",
  1530. "\xF0\x9D\x9E\x93" => "\xCE\xB4",
  1531. "\xF0\x9D\x9E\x94" => "\xCE\xB5",
  1532. "\xF0\x9D\x9E\x95" => "\xCE\xB6",
  1533. "\xF0\x9D\x9E\x96" => "\xCE\xB7",
  1534. "\xF0\x9D\x9E\x97" => "\xCE\xB8",
  1535. "\xF0\x9D\x9E\x98" => "\xCE\xB9",
  1536. "\xF0\x9D\x9E\x99" => "\xCE\xBA",
  1537. "\xF0\x9D\x9E\x9A" => "\xCE\xBB",
  1538. "\xF0\x9D\x9E\x9B" => "\xCE\xBC",
  1539. "\xF0\x9D\x9E\x9C" => "\xCE\xBD",
  1540. "\xF0\x9D\x9E\x9D" => "\xCE\xBE",
  1541. "\xF0\x9D\x9E\x9E" => "\xCE\xBF",
  1542. "\xF0\x9D\x9E\x9F" => "\xCF\x80",
  1543. "\xF0\x9D\x9E\xA0" => "\xCF\x…

Large files files are truncated, but you can click here to view the full file