PageRenderTime 58ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/forum/includes/utf/utf_tools.php

https://code.google.com/p/mwenhanced/
PHP | 1995 lines | 1491 code | 137 blank | 367 comment | 129 complexity | 4d60cecaea89053ec9ca02d16cfa1a87 MD5 | raw file
Possible License(s): LGPL-2.1, AGPL-3.0, AGPL-1.0, GPL-2.0, MPL-2.0-no-copyleft-exception

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. *
  4. * @package utf
  5. * @version $Id$
  6. * @copyright (c) 2006 phpBB Group
  7. * @license http://opensource.org/licenses/gpl-license.php GNU Public License
  8. *
  9. */
  10. /**
  11. */
  12. if (!defined('IN_PHPBB'))
  13. {
  14. exit;
  15. }
  16. // Enforce ASCII only string handling
  17. setlocale(LC_CTYPE, 'C');
  18. /**
  19. * UTF-8 tools
  20. *
  21. * Whenever possible, these functions will try to use PHP's built-in functions or
  22. * extensions, otherwise they will default to custom routines.
  23. *
  24. * @package utf
  25. */
  26. if (!extension_loaded('xml'))
  27. {
  28. /**
  29. * Implementation of PHP's native utf8_encode for people without XML support
  30. * This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
  31. *
  32. * @param string $str ISO-8859-1 encoded data
  33. * @return string UTF-8 encoded data
  34. */
  35. function utf8_encode($str)
  36. {
  37. $out = '';
  38. for ($i = 0, $len = strlen($str); $i < $len; $i++)
  39. {
  40. $letter = $str[$i];
  41. $num = ord($letter);
  42. if ($num < 0x80)
  43. {
  44. $out .= $letter;
  45. }
  46. else if ($num < 0xC0)
  47. {
  48. $out .= "\xC2" . $letter;
  49. }
  50. else
  51. {
  52. $out .= "\xC3" . chr($num - 64);
  53. }
  54. }
  55. return $out;
  56. }
  57. /**
  58. * Implementation of PHP's native utf8_decode for people without XML support
  59. *
  60. * @param string $str UTF-8 encoded data
  61. * @return string ISO-8859-1 encoded data
  62. */
  63. function utf8_decode($str)
  64. {
  65. $pos = 0;
  66. $len = strlen($str);
  67. $ret = '';
  68. while ($pos < $len)
  69. {
  70. $ord = ord($str[$pos]) & 0xF0;
  71. if ($ord === 0xC0 || $ord === 0xD0)
  72. {
  73. $charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
  74. $pos += 2;
  75. $ret .= (($charval < 256) ? chr($charval) : '?');
  76. }
  77. else if ($ord === 0xE0)
  78. {
  79. $ret .= '?';
  80. $pos += 3;
  81. }
  82. else if ($ord === 0xF0)
  83. {
  84. $ret .= '?';
  85. $pos += 4;
  86. }
  87. else
  88. {
  89. $ret .= $str[$pos];
  90. ++$pos;
  91. }
  92. }
  93. return $ret;
  94. }
  95. }
  96. // mbstring is old and has it's functions around for older versions of PHP.
  97. // if mbstring is not loaded, we go into native mode.
  98. if (extension_loaded('mbstring'))
  99. {
  100. mb_internal_encoding('UTF-8');
  101. /**
  102. * UTF-8 aware alternative to strrpos
  103. * Find position of last occurrence of a char in a string
  104. *
  105. * Notes:
  106. * - offset for mb_strrpos was added in 5.2.0, we emulate if it is lower
  107. */
  108. if (version_compare(PHP_VERSION, '5.2.0', '>='))
  109. {
  110. /**
  111. * UTF-8 aware alternative to strrpos
  112. * @ignore
  113. */
  114. function utf8_strrpos($str, $needle, $offset = null)
  115. {
  116. // Emulate behaviour of strrpos rather than raising warning
  117. if (empty($str))
  118. {
  119. return false;
  120. }
  121. if (is_null($offset))
  122. {
  123. return mb_strrpos($str, $needle);
  124. }
  125. else
  126. {
  127. return mb_strrpos($str, $needle, $offset);
  128. }
  129. }
  130. }
  131. else
  132. {
  133. /**
  134. * UTF-8 aware alternative to strrpos
  135. * @ignore
  136. */
  137. function utf8_strrpos($str, $needle, $offset = null)
  138. {
  139. // offset for mb_strrpos was added in 5.2.0
  140. if (is_null($offset))
  141. {
  142. // Emulate behaviour of strrpos rather than raising warning
  143. if (empty($str))
  144. {
  145. return false;
  146. }
  147. return mb_strrpos($str, $needle);
  148. }
  149. else
  150. {
  151. if (!is_int($offset))
  152. {
  153. trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
  154. return false;
  155. }
  156. $str = mb_substr($str, $offset);
  157. if (false !== ($pos = mb_strrpos($str, $needle)))
  158. {
  159. return $pos + $offset;
  160. }
  161. return false;
  162. }
  163. }
  164. }
  165. /**
  166. * UTF-8 aware alternative to strpos
  167. * @ignore
  168. */
  169. function utf8_strpos($str, $needle, $offset = null)
  170. {
  171. if (is_null($offset))
  172. {
  173. return mb_strpos($str, $needle);
  174. }
  175. else
  176. {
  177. return mb_strpos($str, $needle, $offset);
  178. }
  179. }
  180. /**
  181. * UTF-8 aware alternative to strtolower
  182. * @ignore
  183. */
  184. function utf8_strtolower($str)
  185. {
  186. return mb_strtolower($str);
  187. }
  188. /**
  189. * UTF-8 aware alternative to strtoupper
  190. * @ignore
  191. */
  192. function utf8_strtoupper($str)
  193. {
  194. return mb_strtoupper($str);
  195. }
  196. /**
  197. * UTF-8 aware alternative to substr
  198. * @ignore
  199. */
  200. function utf8_substr($str, $offset, $length = null)
  201. {
  202. if (is_null($length))
  203. {
  204. return mb_substr($str, $offset);
  205. }
  206. else
  207. {
  208. return mb_substr($str, $offset, $length);
  209. }
  210. }
  211. /**
  212. * Return the length (in characters) of a UTF-8 string
  213. * @ignore
  214. */
  215. function utf8_strlen($text)
  216. {
  217. return mb_strlen($text, 'utf-8');
  218. }
  219. }
  220. else
  221. {
  222. /**
  223. * UTF-8 aware alternative to strrpos
  224. * Find position of last occurrence of a char in a string
  225. *
  226. * @author Harry Fuecks
  227. * @param string $str haystack
  228. * @param string $needle needle
  229. * @param integer $offset (optional) offset (from left)
  230. * @return mixed integer position or FALSE on failure
  231. */
  232. function utf8_strrpos($str, $needle, $offset = null)
  233. {
  234. if (is_null($offset))
  235. {
  236. $ar = explode($needle, $str);
  237. if (sizeof($ar) > 1)
  238. {
  239. // Pop off the end of the string where the last match was made
  240. array_pop($ar);
  241. $str = join($needle, $ar);
  242. return utf8_strlen($str);
  243. }
  244. return false;
  245. }
  246. else
  247. {
  248. if (!is_int($offset))
  249. {
  250. trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
  251. return false;
  252. }
  253. $str = utf8_substr($str, $offset);
  254. if (false !== ($pos = utf8_strrpos($str, $needle)))
  255. {
  256. return $pos + $offset;
  257. }
  258. return false;
  259. }
  260. }
  261. /**
  262. * UTF-8 aware alternative to strpos
  263. * Find position of first occurrence of a string
  264. *
  265. * @author Harry Fuecks
  266. * @param string $str haystack
  267. * @param string $needle needle
  268. * @param integer $offset offset in characters (from left)
  269. * @return mixed integer position or FALSE on failure
  270. */
  271. function utf8_strpos($str, $needle, $offset = null)
  272. {
  273. if (is_null($offset))
  274. {
  275. $ar = explode($needle, $str);
  276. if (sizeof($ar) > 1)
  277. {
  278. return utf8_strlen($ar[0]);
  279. }
  280. return false;
  281. }
  282. else
  283. {
  284. if (!is_int($offset))
  285. {
  286. trigger_error('utf8_strpos: Offset must be an integer', E_USER_ERROR);
  287. return false;
  288. }
  289. $str = utf8_substr($str, $offset);
  290. if (false !== ($pos = utf8_strpos($str, $needle)))
  291. {
  292. return $pos + $offset;
  293. }
  294. return false;
  295. }
  296. }
  297. /**
  298. * UTF-8 aware alternative to strtolower
  299. * Make a string lowercase
  300. * Note: The concept of a characters "case" only exists is some alphabets
  301. * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
  302. * not exist in the Chinese alphabet, for example. See Unicode Standard
  303. * Annex #21: Case Mappings
  304. *
  305. * @param string
  306. * @return string string in lowercase
  307. */
  308. function utf8_strtolower($string)
  309. {
  310. static $utf8_upper_to_lower = array(
  311. "\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
  312. "\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
  313. "\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
  314. "\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
  315. "\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
  316. "\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
  317. "\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
  318. "\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
  319. "\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
  320. "\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
  321. "\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
  322. "\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
  323. "\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
  324. "\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
  325. "\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
  326. "\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
  327. "\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
  328. "\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
  329. "\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
  330. "\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
  331. "\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
  332. "\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
  333. "\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
  334. "\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
  335. "\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
  336. "\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
  337. "\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
  338. "\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
  339. "\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
  340. "\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
  341. "\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
  342. "\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
  343. "\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
  344. "\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
  345. "\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
  346. "\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
  347. "\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
  348. "\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
  349. "\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
  350. "\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
  351. "\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
  352. "\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
  353. "\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
  354. "\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
  355. "\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
  356. "\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
  357. );
  358. return strtr(strtolower($string), $utf8_upper_to_lower);
  359. }
  360. /**
  361. * UTF-8 aware alternative to strtoupper
  362. * Make a string uppercase
  363. * Note: The concept of a characters "case" only exists is some alphabets
  364. * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
  365. * not exist in the Chinese alphabet, for example. See Unicode Standard
  366. * Annex #21: Case Mappings
  367. *
  368. * @param string
  369. * @return string string in uppercase
  370. */
  371. function utf8_strtoupper($string)
  372. {
  373. static $utf8_lower_to_upper = array(
  374. "\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
  375. "\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
  376. "\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
  377. "\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
  378. "\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
  379. "\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
  380. "\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
  381. "\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
  382. "\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
  383. "\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
  384. "\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
  385. "\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
  386. "\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
  387. "\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
  388. "\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
  389. "\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
  390. "\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
  391. "\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
  392. "\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
  393. "\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
  394. "\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
  395. "\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
  396. "\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
  397. "\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
  398. "\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
  399. "\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
  400. "\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
  401. "\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
  402. "\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
  403. "\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
  404. "\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
  405. "\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
  406. "\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
  407. "\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
  408. "\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
  409. "\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
  410. "\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
  411. "\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
  412. "\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
  413. "\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
  414. "\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
  415. "\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
  416. "\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
  417. "\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
  418. "\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
  419. "\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
  420. );
  421. return strtr(strtoupper($string), $utf8_lower_to_upper);
  422. }
  423. /**
  424. * UTF-8 aware alternative to substr
  425. * Return part of a string given character offset (and optionally length)
  426. *
  427. * Note arguments: comparied to substr - if offset or length are
  428. * not integers, this version will not complain but rather massages them
  429. * into an integer.
  430. *
  431. * Note on returned values: substr documentation states false can be
  432. * returned in some cases (e.g. offset > string length)
  433. * mb_substr never returns false, it will return an empty string instead.
  434. * This adopts the mb_substr approach
  435. *
  436. * Note on implementation: PCRE only supports repetitions of less than
  437. * 65536, in order to accept up to MAXINT values for offset and length,
  438. * we'll repeat a group of 65535 characters when needed.
  439. *
  440. * Note on implementation: calculating the number of characters in the
  441. * string is a relatively expensive operation, so we only carry it out when
  442. * necessary. It isn't necessary for +ve offsets and no specified length
  443. *
  444. * @author Chris Smith<chris@jalakai.co.uk>
  445. * @param string $str
  446. * @param integer $offset number of UTF-8 characters offset (from left)
  447. * @param integer $length (optional) length in UTF-8 characters from offset
  448. * @return mixed string or FALSE if failure
  449. */
  450. function utf8_substr($str, $offset, $length = NULL)
  451. {
  452. // generates E_NOTICE
  453. // for PHP4 objects, but not PHP5 objects
  454. $str = (string) $str;
  455. $offset = (int) $offset;
  456. if (!is_null($length))
  457. {
  458. $length = (int) $length;
  459. }
  460. // handle trivial cases
  461. if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
  462. {
  463. return '';
  464. }
  465. // normalise negative offsets (we could use a tail
  466. // anchored pattern, but they are horribly slow!)
  467. if ($offset < 0)
  468. {
  469. // see notes
  470. $strlen = utf8_strlen($str);
  471. $offset = $strlen + $offset;
  472. if ($offset < 0)
  473. {
  474. $offset = 0;
  475. }
  476. }
  477. $op = '';
  478. $lp = '';
  479. // establish a pattern for offset, a
  480. // non-captured group equal in length to offset
  481. if ($offset > 0)
  482. {
  483. $ox = (int) ($offset / 65535);
  484. $oy = $offset % 65535;
  485. if ($ox)
  486. {
  487. $op = '(?:.{65535}){' . $ox . '}';
  488. }
  489. $op = '^(?:' . $op . '.{' . $oy . '})';
  490. }
  491. else
  492. {
  493. // offset == 0; just anchor the pattern
  494. $op = '^';
  495. }
  496. // establish a pattern for length
  497. if (is_null($length))
  498. {
  499. // the rest of the string
  500. $lp = '(.*)$';
  501. }
  502. else
  503. {
  504. if (!isset($strlen))
  505. {
  506. // see notes
  507. $strlen = utf8_strlen($str);
  508. }
  509. // another trivial case
  510. if ($offset > $strlen)
  511. {
  512. return '';
  513. }
  514. if ($length > 0)
  515. {
  516. // reduce any length that would
  517. // go passed the end of the string
  518. $length = min($strlen - $offset, $length);
  519. $lx = (int) ($length / 65535);
  520. $ly = $length % 65535;
  521. // negative length requires a captured group
  522. // of length characters
  523. if ($lx)
  524. {
  525. $lp = '(?:.{65535}){' . $lx . '}';
  526. }
  527. $lp = '(' . $lp . '.{'. $ly . '})';
  528. }
  529. else if ($length < 0)
  530. {
  531. if ($length < ($offset - $strlen))
  532. {
  533. return '';
  534. }
  535. $lx = (int)((-$length) / 65535);
  536. $ly = (-$length) % 65535;
  537. // negative length requires ... capture everything
  538. // except a group of -length characters
  539. // anchored at the tail-end of the string
  540. if ($lx)
  541. {
  542. $lp = '(?:.{65535}){' . $lx . '}';
  543. }
  544. $lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
  545. }
  546. }
  547. if (!preg_match('#' . $op . $lp . '#us', $str, $match))
  548. {
  549. return '';
  550. }
  551. return $match[1];
  552. }
  553. /**
  554. * Return the length (in characters) of a UTF-8 string
  555. *
  556. * @param string $text UTF-8 string
  557. * @return integer Length (in chars) of given string
  558. */
  559. function utf8_strlen($text)
  560. {
  561. // Since utf8_decode is replacing multibyte characters to ? strlen works fine
  562. return strlen(utf8_decode($text));
  563. }
  564. }
  565. /**
  566. * UTF-8 aware alternative to str_split
  567. * Convert a string to an array
  568. *
  569. * @author Harry Fuecks
  570. * @param string $str UTF-8 encoded
  571. * @param int $split_len number to characters to split string by
  572. * @return array characters in string reverses
  573. */
  574. function utf8_str_split($str, $split_len = 1)
  575. {
  576. if (!is_int($split_len) || $split_len < 1)
  577. {
  578. return false;
  579. }
  580. $len = utf8_strlen($str);
  581. if ($len <= $split_len)
  582. {
  583. return array($str);
  584. }
  585. preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
  586. return $ar[0];
  587. }
  588. /**
  589. * UTF-8 aware alternative to strspn
  590. * Find length of initial segment matching the mask
  591. *
  592. * @author Harry Fuecks
  593. */
  594. function utf8_strspn($str, $mask, $start = null, $length = null)
  595. {
  596. if ($start !== null || $length !== null)
  597. {
  598. $str = utf8_substr($str, $start, $length);
  599. }
  600. preg_match('/^[' . $mask . ']+/u', $str, $matches);
  601. if (isset($matches[0]))
  602. {
  603. return utf8_strlen($matches[0]);
  604. }
  605. return 0;
  606. }
  607. /**
  608. * UTF-8 aware alternative to ucfirst
  609. * Make a string's first character uppercase
  610. *
  611. * @author Harry Fuecks
  612. * @param string
  613. * @return string with first character as upper case (if applicable)
  614. */
  615. function utf8_ucfirst($str)
  616. {
  617. switch (utf8_strlen($str))
  618. {
  619. case 0:
  620. return '';
  621. break;
  622. case 1:
  623. return utf8_strtoupper($str);
  624. break;
  625. default:
  626. preg_match('/^(.{1})(.*)$/us', $str, $matches);
  627. return utf8_strtoupper($matches[1]) . $matches[2];
  628. break;
  629. }
  630. }
  631. /**
  632. * Recode a string to UTF-8
  633. *
  634. * If the encoding is not supported, the string is returned as-is
  635. *
  636. * @param string $string Original string
  637. * @param string $encoding Original encoding (lowered)
  638. * @return string The string, encoded in UTF-8
  639. */
  640. function utf8_recode($string, $encoding)
  641. {
  642. $encoding = strtolower($encoding);
  643. if ($encoding == 'utf-8' || !is_string($string) || empty($string))
  644. {
  645. return $string;
  646. }
  647. // we force iso-8859-1 to be cp1252
  648. if ($encoding == 'iso-8859-1')
  649. {
  650. $encoding = 'cp1252';
  651. }
  652. // convert iso-8859-8-i to iso-8859-8
  653. else if ($encoding == 'iso-8859-8-i')
  654. {
  655. $encoding = 'iso-8859-8';
  656. $string = hebrev($string);
  657. }
  658. // First, try iconv()
  659. if (function_exists('iconv'))
  660. {
  661. $ret = @iconv($encoding, 'utf-8', $string);
  662. if (!empty($ret))
  663. {
  664. return $ret;
  665. }
  666. }
  667. // Try the mb_string extension
  668. if (function_exists('mb_convert_encoding'))
  669. {
  670. // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
  671. switch ($encoding)
  672. {
  673. case 'iso-8859-1':
  674. case 'iso-8859-2':
  675. case 'iso-8859-4':
  676. case 'iso-8859-7':
  677. case 'iso-8859-9':
  678. case 'iso-8859-15':
  679. case 'windows-1251':
  680. case 'windows-1252':
  681. case 'cp1252':
  682. case 'shift_jis':
  683. case 'euc-kr':
  684. case 'big5':
  685. case 'gb2312':
  686. $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
  687. if (!empty($ret))
  688. {
  689. return $ret;
  690. }
  691. }
  692. }
  693. // Try the recode extension
  694. if (function_exists('recode_string'))
  695. {
  696. $ret = @recode_string($encoding . '..utf-8', $string);
  697. if (!empty($ret))
  698. {
  699. return $ret;
  700. }
  701. }
  702. // If nothing works, check if we have a custom transcoder available
  703. if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding))
  704. {
  705. // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
  706. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  707. }
  708. global $phpbb_root_path, $phpEx;
  709. // iso-8859-* character encoding
  710. if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))
  711. {
  712. switch ($array[1])
  713. {
  714. case '1':
  715. case '2':
  716. case '4':
  717. case '7':
  718. case '8':
  719. case '9':
  720. case '15':
  721. if (!function_exists('iso_8859_' . $array[1]))
  722. {
  723. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
  724. {
  725. trigger_error('Basic reencoder file is missing', E_USER_ERROR);
  726. }
  727. include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
  728. }
  729. return call_user_func('iso_8859_' . $array[1], $string);
  730. break;
  731. default:
  732. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  733. break;
  734. }
  735. }
  736. // CP/WIN character encoding
  737. if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))
  738. {
  739. switch ($array[1])
  740. {
  741. case '932':
  742. break;
  743. case '1250':
  744. case '1251':
  745. case '1252':
  746. case '1254':
  747. case '1255':
  748. case '1256':
  749. case '1257':
  750. case '874':
  751. if (!function_exists('cp' . $array[1]))
  752. {
  753. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
  754. {
  755. trigger_error('Basic reencoder file is missing', E_USER_ERROR);
  756. }
  757. include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
  758. }
  759. return call_user_func('cp' . $array[1], $string);
  760. break;
  761. default:
  762. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  763. break;
  764. }
  765. }
  766. // TIS-620
  767. if (preg_match('/tis[_ -]?620/', $encoding))
  768. {
  769. if (!function_exists('tis_620'))
  770. {
  771. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
  772. {
  773. trigger_error('Basic reencoder file is missing', E_USER_ERROR);
  774. }
  775. include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
  776. }
  777. return tis_620($string);
  778. }
  779. // SJIS
  780. if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
  781. {
  782. if (!function_exists('sjis'))
  783. {
  784. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
  785. {
  786. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  787. }
  788. include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
  789. }
  790. return sjis($string);
  791. }
  792. // EUC_KR
  793. if (preg_match('/euc[_ -]?kr/', $encoding))
  794. {
  795. if (!function_exists('euc_kr'))
  796. {
  797. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
  798. {
  799. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  800. }
  801. include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
  802. }
  803. return euc_kr($string);
  804. }
  805. // BIG-5
  806. if (preg_match('/big[_ -]?5/', $encoding))
  807. {
  808. if (!function_exists('big5'))
  809. {
  810. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
  811. {
  812. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  813. }
  814. include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
  815. }
  816. return big5($string);
  817. }
  818. // GB2312
  819. if (preg_match('/gb[_ -]?2312/', $encoding))
  820. {
  821. if (!function_exists('gb2312'))
  822. {
  823. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
  824. {
  825. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  826. }
  827. include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
  828. }
  829. return gb2312($string);
  830. }
  831. // Trigger an error?! Fow now just give bad data :-(
  832. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  833. //return $string; // use utf_normalizer::cleanup() ?
  834. }
  835. /**
  836. * Replace all UTF-8 chars that are not in ASCII with their NCR
  837. *
  838. * @param string $text UTF-8 string in NFC
  839. * @return string ASCII string using NCRs for non-ASCII chars
  840. */
  841. function utf8_encode_ncr($text)
  842. {
  843. return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
  844. }
  845. /**
  846. * Callback used in encode_ncr()
  847. *
  848. * Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
  849. *
  850. * @param array $m 0-based numerically indexed array passed by preg_replace_callback()
  851. * @return string A HTML NCR if the character is valid, or the original string otherwise
  852. */
  853. function utf8_encode_ncr_callback($m)
  854. {
  855. return '&#' . utf8_ord($m[0]) . ';';
  856. }
  857. /**
  858. * Converts a UTF-8 char to an NCR
  859. *
  860. * @param string $chr UTF-8 char
  861. * @return integer UNICODE code point
  862. */
  863. function utf8_ord($chr)
  864. {
  865. switch (strlen($chr))
  866. {
  867. case 1:
  868. return ord($chr);
  869. break;
  870. case 2:
  871. return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
  872. break;
  873. case 3:
  874. return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
  875. break;
  876. case 4:
  877. return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
  878. break;
  879. default:
  880. return $chr;
  881. }
  882. }
  883. /**
  884. * Converts an NCR to a UTF-8 char
  885. *
  886. * @param int $cp UNICODE code point
  887. * @return string UTF-8 char
  888. */
  889. function utf8_chr($cp)
  890. {
  891. if ($cp > 0xFFFF)
  892. {
  893. return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
  894. }
  895. else if ($cp > 0x7FF)
  896. {
  897. return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
  898. }
  899. else if ($cp > 0x7F)
  900. {
  901. return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
  902. }
  903. else
  904. {
  905. return chr($cp);
  906. }
  907. }
  908. /**
  909. * Convert Numeric Character References to UTF-8 chars
  910. *
  911. * Notes:
  912. * - we do not convert NCRs recursively, if you pass &#38;#38; it will return &#38;
  913. * - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
  914. *
  915. * @param string $text String to convert, encoded in UTF-8 (no normal form required)
  916. * @return string UTF-8 string where NCRs have been replaced with the actual chars
  917. */
  918. function utf8_decode_ncr($text)
  919. {
  920. return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text);
  921. }
  922. /**
  923. * Callback used in decode_ncr()
  924. *
  925. * Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
  926. * It will ignore most of invalid NCRs, but not all!
  927. *
  928. * @param array $m 0-based numerically indexed array passed by preg_replace_callback()
  929. * @return string UTF-8 char
  930. */
  931. function utf8_decode_ncr_callback($m)
  932. {
  933. $cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
  934. return utf8_chr($cp);
  935. }
  936. /**
  937. * Case folds a unicode string as per Unicode 5.0, section 3.13
  938. *
  939. * @param string $text text to be case folded
  940. * @param string $option determines how we will fold the cases
  941. * @return string case folded text
  942. */
  943. function utf8_case_fold($text, $option = 'full')
  944. {
  945. static $uniarray = array();
  946. global $phpbb_root_path, $phpEx;
  947. // common is always set
  948. if (!isset($uniarray['c']))
  949. {
  950. $uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
  951. }
  952. // only set full if we need to
  953. if ($option === 'full' && !isset($uniarray['f']))
  954. {
  955. $uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
  956. }
  957. // only set simple if we need to
  958. if ($option !== 'full' && !isset($uniarray['s']))
  959. {
  960. $uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
  961. }
  962. // common is always replaced
  963. $text = strtr($text, $uniarray['c']);
  964. if ($option === 'full')
  965. {
  966. // full replaces a character with multiple characters
  967. $text = strtr($text, $uniarray['f']);
  968. }
  969. else
  970. {
  971. // simple replaces a character with another character
  972. $text = strtr($text, $uniarray['s']);
  973. }
  974. return $text;
  975. }
  976. /**
  977. * Takes the input and does a "special" case fold. It does minor normalization
  978. * and returns NFKC compatable text
  979. *
  980. * @param string $text text to be case folded
  981. * @param string $option determines how we will fold the cases
  982. * @return string case folded text
  983. */
  984. function utf8_case_fold_nfkc($text, $option = 'full')
  985. {
  986. static $fc_nfkc_closure = array(
  987. "\xCD\xBA" => "\x20\xCE\xB9",
  988. "\xCF\x92" => "\xCF\x85",
  989. "\xCF\x93" => "\xCF\x8D",
  990. "\xCF\x94" => "\xCF\x8B",
  991. "\xCF\xB2" => "\xCF\x83",
  992. "\xCF\xB9" => "\xCF\x83",
  993. "\xE1\xB4\xAC" => "\x61",
  994. "\xE1\xB4\xAD" => "\xC3\xA6",
  995. "\xE1\xB4\xAE" => "\x62",
  996. "\xE1\xB4\xB0" => "\x64",
  997. "\xE1\xB4\xB1" => "\x65",
  998. "\xE1\xB4\xB2" => "\xC7\x9D",
  999. "\xE1\xB4\xB3" => "\x67",
  1000. "\xE1\xB4\xB4" => "\x68",
  1001. "\xE1\xB4\xB5" => "\x69",
  1002. "\xE1\xB4\xB6" => "\x6A",
  1003. "\xE1\xB4\xB7" => "\x6B",
  1004. "\xE1\xB4\xB8" => "\x6C",
  1005. "\xE1\xB4\xB9" => "\x6D",
  1006. "\xE1\xB4\xBA" => "\x6E",
  1007. "\xE1\xB4\xBC" => "\x6F",
  1008. "\xE1\xB4\xBD" => "\xC8\xA3",
  1009. "\xE1\xB4\xBE" => "\x70",
  1010. "\xE1\xB4\xBF" => "\x72",
  1011. "\xE1\xB5\x80" => "\x74",
  1012. "\xE1\xB5\x81" => "\x75",
  1013. "\xE1\xB5\x82" => "\x77",
  1014. "\xE2\x82\xA8" => "\x72\x73",
  1015. "\xE2\x84\x82" => "\x63",
  1016. "\xE2\x84\x83" => "\xC2\xB0\x63",
  1017. "\xE2\x84\x87" => "\xC9\x9B",
  1018. "\xE2\x84\x89" => "\xC2\xB0\x66",
  1019. "\xE2\x84\x8B" => "\x68",
  1020. "\xE2\x84\x8C" => "\x68",
  1021. "\xE2\x84\x8D" => "\x68",
  1022. "\xE2\x84\x90" => "\x69",
  1023. "\xE2\x84\x91" => "\x69",
  1024. "\xE2\x84\x92" => "\x6C",
  1025. "\xE2\x84\x95" => "\x6E",
  1026. "\xE2\x84\x96" => "\x6E\x6F",
  1027. "\xE2\x84\x99" => "\x70",
  1028. "\xE2\x84\x9A" => "\x71",
  1029. "\xE2\x84\x9B" => "\x72",
  1030. "\xE2\x84\x9C" => "\x72",
  1031. "\xE2\x84\x9D" => "\x72",
  1032. "\xE2\x84\xA0" => "\x73\x6D",
  1033. "\xE2\x84\xA1" => "\x74\x65\x6C",
  1034. "\xE2\x84\xA2" => "\x74\x6D",
  1035. "\xE2\x84\xA4" => "\x7A",
  1036. "\xE2\x84\xA8" => "\x7A",
  1037. "\xE2\x84\xAC" => "\x62",
  1038. "\xE2\x84\xAD" => "\x63",
  1039. "\xE2\x84\xB0" => "\x65",
  1040. "\xE2\x84\xB1" => "\x66",
  1041. "\xE2\x84\xB3" => "\x6D",
  1042. "\xE2\x84\xBB" => "\x66\x61\x78",
  1043. "\xE2\x84\xBE" => "\xCE\xB3",
  1044. "\xE2\x84\xBF" => "\xCF\x80",
  1045. "\xE2\x85\x85" => "\x64",
  1046. "\xE3\x89\x90" => "\x70\x74\x65",
  1047. "\xE3\x8B\x8C" => "\x68\x67",
  1048. "\xE3\x8B\x8E" => "\x65\x76",
  1049. "\xE3\x8B\x8F" => "\x6C\x74\x64",
  1050. "\xE3\x8D\xB1" => "\x68\x70\x61",
  1051. "\xE3\x8D\xB3" => "\x61\x75",
  1052. "\xE3\x8D\xB5" => "\x6F\x76",
  1053. "\xE3\x8D\xBA" => "\x69\x75",
  1054. "\xE3\x8E\x80" => "\x70\x61",
  1055. "\xE3\x8E\x81" => "\x6E\x61",
  1056. "\xE3\x8E\x82" => "\xCE\xBC\x61",
  1057. "\xE3\x8E\x83" => "\x6D\x61",
  1058. "\xE3\x8E\x84" => "\x6B\x61",
  1059. "\xE3\x8E\x85" => "\x6B\x62",
  1060. "\xE3\x8E\x86" => "\x6D\x62",
  1061. "\xE3\x8E\x87" => "\x67\x62",
  1062. "\xE3\x8E\x8A" => "\x70\x66",
  1063. "\xE3\x8E\x8B" => "\x6E\x66",
  1064. "\xE3\x8E\x8C" => "\xCE\xBC\x66",
  1065. "\xE3\x8E\x90" => "\x68\x7A",
  1066. "\xE3\x8E\x91" => "\x6B\x68\x7A",
  1067. "\xE3\x8E\x92" => "\x6D\x68\x7A",
  1068. "\xE3\x8E\x93" => "\x67\x68\x7A",
  1069. "\xE3\x8E\x94" => "\x74\x68\x7A",
  1070. "\xE3\x8E\xA9" => "\x70\x61",
  1071. "\xE3\x8E\xAA" => "\x6B\x70\x61",
  1072. "\xE3\x8E\xAB" => "\x6D\x70\x61",
  1073. "\xE3\x8E\xAC" => "\x67\x70\x61",
  1074. "\xE3\x8E\xB4" => "\x70\x76",
  1075. "\xE3\x8E\xB5" => "\x6E\x76",
  1076. "\xE3\x8E\xB6" => "\xCE\xBC\x76",
  1077. "\xE3\x8E\xB7" => "\x6D\x76",
  1078. "\xE3\x8E\xB8" => "\x6B\x76",
  1079. "\xE3\x8E\xB9" => "\x6D\x76",
  1080. "\xE3\x8E\xBA" => "\x70\x77",
  1081. "\xE3\x8E\xBB" => "\x6E\x77",
  1082. "\xE3\x8E\xBC" => "\xCE\xBC\x77",
  1083. "\xE3\x8E\xBD" => "\x6D\x77",
  1084. "\xE3\x8E\xBE" => "\x6B\x77",
  1085. "\xE3\x8E\xBF" => "\x6D\x77",
  1086. "\xE3\x8F\x80" => "\x6B\xCF\x89",
  1087. "\xE3\x8F\x81" => "\x6D\xCF\x89",
  1088. "\xE3\x8F\x83" => "\x62\x71",
  1089. "\xE3\x8F\x86" => "\x63\xE2\x88\x95\x6B\x67",
  1090. "\xE3\x8F\x87" => "\x63\x6F\x2E",
  1091. "\xE3\x8F\x88" => "\x64\x62",
  1092. "\xE3\x8F\x89" => "\x67\x79",
  1093. "\xE3\x8F\x8B" => "\x68\x70",
  1094. "\xE3\x8F\x8D" => "\x6B\x6B",
  1095. "\xE3\x8F\x8E" => "\x6B\x6D",
  1096. "\xE3\x8F\x97" => "\x70\x68",
  1097. "\xE3\x8F\x99" => "\x70\x70\x6D",
  1098. "\xE3\x8F\x9A" => "\x70\x72",
  1099. "\xE3\x8F\x9C" => "\x73\x76",
  1100. "\xE3\x8F\x9D" => "\x77\x62",
  1101. "\xE3\x8F\x9E" => "\x76\xE2\x88\x95\x6D",
  1102. "\xE3\x8F\x9F" => "\x61\xE2\x88\x95\x6D",
  1103. "\xF0\x9D\x90\x80" => "\x61",
  1104. "\xF0\x9D\x90\x81" => "\x62",
  1105. "\xF0\x9D\x90\x82" => "\x63",
  1106. "\xF0\x9D\x90\x83" => "\x64",
  1107. "\xF0\x9D\x90\x84" => "\x65",
  1108. "\xF0\x9D\x90\x85" => "\x66",
  1109. "\xF0\x9D\x90\x86" => "\x67",
  1110. "\xF0\x9D\x90\x87" => "\x68",
  1111. "\xF0\x9D\x90\x88" => "\x69",
  1112. "\xF0\x9D\x90\x89" => "\x6A",
  1113. "\xF0\x9D\x90\x8A" => "\x6B",
  1114. "\xF0\x9D\x90\x8B" => "\x6C",
  1115. "\xF0\x9D\x90\x8C" => "\x6D",
  1116. "\xF0\x9D\x90\x8D" => "\x6E",
  1117. "\xF0\x9D\x90\x8E" => "\x6F",
  1118. "\xF0\x9D\x90\x8F" => "\x70",
  1119. "\xF0\x9D\x90\x90" => "\x71",
  1120. "\xF0\x9D\x90\x91" => "\x72",
  1121. "\xF0\x9D\x90\x92" => "\x73",
  1122. "\xF0\x9D\x90\x93" => "\x74",
  1123. "\xF0\x9D\x90\x94" => "\x75",
  1124. "\xF0\x9D\x90\x95" => "\x76",
  1125. "\xF0\x9D\x90\x96" => "\x77",
  1126. "\xF0\x9D\x90\x97" => "\x78",
  1127. "\xF0\x9D\x90\x98" => "\x79",
  1128. "\xF0\x9D\x90\x99" => "\x7A",
  1129. "\xF0\x9D\x90\xB4" => "\x61",
  1130. "\xF0\x9D\x90\xB5" => "\x62",
  1131. "\xF0\x9D\x90\xB6" => "\x63",
  1132. "\xF0\x9D\x90\xB7" => "\x64",
  1133. "\xF0\x9D\x90\xB8" => "\x65",
  1134. "\xF0\x9D\x90\xB9" => "\x66",
  1135. "\xF0\x9D\x90\xBA" => "\x67",
  1136. "\xF0\x9D\x90\xBB" => "\x68",
  1137. "\xF0\x9D\x90\xBC" => "\x69",
  1138. "\xF0\x9D\x90\xBD" => "\x6A",
  1139. "\xF0\x9D\x90\xBE" => "\x6B",
  1140. "\xF0\x9D\x90\xBF" => "\x6C",
  1141. "\xF0\x9D\x91\x80" => "\x6D",
  1142. "\xF0\x9D\x91\x81" => "\x6E",
  1143. "\xF0\x9D\x91\x82" => "\x6F",
  1144. "\xF0\x9D\x91\x83" => "\x70",
  1145. "\xF0\x9D\x91\x84" => "\x71",
  1146. "\xF0\x9D\x91\x85" => "\x72",
  1147. "\xF0\x9D\x91\x86" => "\x73",
  1148. "\xF0\x9D\x91\x87" => "\x74",
  1149. "\xF0\x9D\x91\x88" => "\x75",
  1150. "\xF0\x9D\x91\x89" => "\x76",
  1151. "\xF0\x9D\x91\x8A" => "\x77",
  1152. "\xF0\x9D\x91\x8B" => "\x78",
  1153. "\xF0\x9D\x91\x8C" => "\x79",
  1154. "\xF0\x9D\x91\x8D" => "\x7A",
  1155. "\xF0\x9D\x91\xA8" => "\x61",
  1156. "\xF0\x9D\x91\xA9" => "\x62",
  1157. "\xF0\x9D\x91\xAA" => "\x63",
  1158. "\xF0\x9D\x91\xAB" => "\x64",
  1159. "\xF0\x9D\x91\xAC" => "\x65",
  1160. "\xF0\x9D\x91\xAD" => "\x66",
  1161. "\xF0\x9D\x91\xAE" => "\x67",
  1162. "\xF0\x9D\x91\xAF" => "\x68",
  1163. "\xF0\x9D\x91\xB0" => "\x69",
  1164. "\xF0\x9D\x91\xB1" => "\x6A",
  1165. "\xF0\x9D\x91\xB2" => "\x6B",
  1166. "\xF0\x9D\x91\xB3" => "\x6C",
  1167. "\xF0\x9D\x91\xB4" => "\x6D",
  1168. "\xF0\x9D\x91\xB5" => "\x6E",
  1169. "\xF0\x9D\x91\xB6" => "\x6F",
  1170. "\xF0\x9D\x91\xB7" => "\x70",
  1171. "\xF0\x9D\x91\xB8" => "\x71",
  1172. "\xF0\x9D\x91\xB9" => "\x72",
  1173. "\xF0\x9D\x91\xBA" => "\x73",
  1174. "\xF0\x9D\x91\xBB" => "\x74",
  1175. "\xF0\x9D\x91\xBC" => "\x75",
  1176. "\xF0\x9D\x91\xBD" => "\x76",
  1177. "\xF0\x9D\x91\xBE" => "\x77",
  1178. "\xF0\x9D\x91\xBF" => "\x78",
  1179. "\xF0\x9D\x92\x80" => "\x79",
  1180. "\xF0\x9D\x92\x81" => "\x7A",
  1181. "\xF0\x9D\x92\x9C" => "\x61",
  1182. "\xF0\x9D\x92\x9E" => "\x63",
  1183. "\xF0\x9D\x92\x9F" => "\x64",
  1184. "\xF0\x9D\x92\xA2" => "\x67",
  1185. "\xF0\x9D\x92\xA5" => "\x6A",
  1186. "\xF0\x9D\x92\xA6" => "\x6B",
  1187. "\xF0\x9D\x92\xA9" => "\x6E",
  1188. "\xF0\x9D\x92\xAA" => "\x6F",
  1189. "\xF0\x9D\x92\xAB" => "\x70",
  1190. "\xF0\x9D\x92\xAC" => "\x71",
  1191. "\xF0\x9D\x92\xAE" => "\x73",
  1192. "\xF0\x9D\x92\xAF" => "\x74",
  1193. "\xF0\x9D\x92\xB0" => "\x75",
  1194. "\xF0\x9D\x92\xB1" => "\x76",
  1195. "\xF0\x9D\x92\xB2" => "\x77",
  1196. "\xF0\x9D\x92\xB3" => "\x78",
  1197. "\xF0\x9D\x92\xB4" => "\x79",
  1198. "\xF0\x9D\x92\xB5" => "\x7A",
  1199. "\xF0\x9D\x93\x90" => "\x61",
  1200. "\xF0\x9D\x93\x91" => "\x62",
  1201. "\xF0\x9D\x93\x92" => "\x63",
  1202. "\xF0\x9D\x93\x93" => "\x64",
  1203. "\xF0\x9D\x93\x94" => "\x65",
  1204. "\xF0\x9D\x93\x95" => "\x66",
  1205. "\xF0\x9D\x93\x96" => "\x67",
  1206. "\xF0\x9D\x93\x97" => "\x68",
  1207. "\xF0\x9D\x93\x98" => "\x69",
  1208. "\xF0\x9D\x93\x99" => "\x6A",
  1209. "\xF0\x9D\x93\x9A" => "\x6B",
  1210. "\xF0\x9D\x93\x9B" => "\x6C",
  1211. "\xF0\x9D\x93\x9C" => "\x6D",
  1212. "\xF0\x9D\x93\x9D" => "\x6E",
  1213. "\xF0\x9D\x93\x9E" => "\x6F",
  1214. "\xF0\x9D\x93\x9F" => "\x70",
  1215. "\xF0\x9D\x93\xA0" => "\x71",
  1216. "\xF0\x9D\x93\xA1" => "\x72",
  1217. "\xF0\x9D\x93\xA2" => "\x73",
  1218. "\xF0\x9D\x93\xA3" => "\x74",
  1219. "\xF0\x9D\x93\xA4" => "\x75",
  1220. "\xF0\x9D\x93\xA5" => "\x76",
  1221. "\xF0\x9D\x93\xA6" => "\x77",
  1222. "\xF0\x9D\x93\xA7" => "\x78",
  1223. "\xF0\x9D\x93\xA8" => "\x79",
  1224. "\xF0\x9D\x93\xA9" => "\x7A",
  1225. "\xF0\x9D\x94\x84" => "\x61",
  1226. "\xF0\x9D\x94\x85" => "\x62",
  1227. "\xF0\x9D\x94\x87" => "\x64",
  1228. "\xF0\x9D\x94\x88" => "\x65",
  1229. "\xF0\x9D\x94\x89" => "\x66",
  1230. "\xF0\x9D\x94\x8A" => "\x67",
  1231. "\xF0\x9D\x94\x8D" => "\x6A",
  1232. "\xF0\x9D\x94\x8E" => "\x6B",
  1233. "\xF0\x9D\x94\x8F" => "\x6C",
  1234. "\xF0\x9D\x94\x90" => "\x6D",
  1235. "\xF0\x9D\x94\x91" => "\x6E",
  1236. "\xF0\x9D\x94\x92" => "\x6F",
  1237. "\xF0\x9D\x94\x93" => "\x70",
  1238. "\xF0\x9D\x94\x94" => "\x71",
  1239. "\xF0\x9D\x94\x96" => "\x73",
  1240. "\xF0\x9D\x94\x97" => "\x74",
  1241. "\xF0\x9D\x94\x98" => "\x75",
  1242. "\xF0\x9D\x94\x99" => "\x76",
  1243. "\xF0\x9D\x94\x9A" => "\x77",
  1244. "\xF0\x9D\x94\x9B" => "\x78",
  1245. "\xF0\x9D\x94\x9C" => "\x79",
  1246. "\xF0\x9D\x94\xB8" => "\x61",
  1247. "\xF0\x9D\x94\xB9" => "\x62",
  1248. "\xF0\x9D\x94\xBB" => "\x64",
  1249. "\xF0\x9D\x94\xBC" => "\x65",
  1250. "\xF0\x9D\x94\xBD" => "\x66",
  1251. "\xF0\x9D\x94\xBE" => "\x67",
  1252. "\xF0\x9D\x95\x80" => "\x69",
  1253. "\xF0\x9D\x95\x81" => "\x6A",
  1254. "\xF0\x9D\x95\x82" => "\x6B",
  1255. "\xF0\x9D\x95\x83" => "\x6C",
  1256. "\xF0\x9D\x95\x84" => "\x6D",
  1257. "\xF0\x9D\x95\x86" => "\x6F",
  1258. "\xF0\x9D\x95\x8A" => "\x73",
  1259. "\xF0\x9D\x95\x8B" => "\x74",
  1260. "\xF0\x9D\x95\x8C" => "\x75",
  1261. "\xF0\x9D\x95\x8D" => "\x76",
  1262. "\xF0\x9D\x95\x8E" => "\x77",
  1263. "\xF0\x9D\x95\x8F" => "\x78",
  1264. "\xF0\x9D\x95\x90" => "\x79",
  1265. "\xF0\x9D\x95\xAC" => "\x61",
  1266. "\xF0\x9D\x95\xAD" => "\x62",
  1267. "\xF0\x9D\x95\xAE" => "\x63",
  1268. "\xF0\x9D\x95\xAF" => "\x64",
  1269. "\xF0\x9D\x95\xB0" => "\x65",
  1270. "\xF0\x9D\x95\xB1" => "\x66",
  1271. "\xF0\x9D\x95\xB2" => "\x67",
  1272. "\xF0\x9D\x95\xB3" => "\x68",
  1273. "\xF0\x9D\x95\xB4" => "\x69",
  1274. "\xF0\x9D\x95\xB5" => "\x6A",
  1275. "\xF0\x9D\x95\xB6" => "\x6B",
  1276. "\xF0\x9D\x95\xB7" => "\x6C",
  1277. "\xF0\x9D\x95\xB8" => "\x6D",
  1278. "\xF0\x9D\x95\xB9" => "\x6E",
  1279. "\xF0\x9D\x95\xBA" => "\x6F",
  1280. "\xF0\x9D\x95\xBB" => "\x70",
  1281. "\xF0\x9D\x95\xBC" => "\x71",
  1282. "\xF0\x9D\x95\xBD" => "\x72",
  1283. "\xF0\x9D\x95\xBE" => "\x73",
  1284. "\xF0\x9D\x95\xBF" => "\x74",
  1285. "\xF0\x9D\x96\x80" => "\x75",
  1286. "\xF0\x9D\x96\x81" => "\x76",
  1287. "\xF0\x9D\x96\x82" => "\x77",
  1288. "\xF0\x9D\x96\x83" => "\x78",
  1289. "\xF0\x9D\x96\x84" => "\x79",
  1290. "\xF0\x9D\x96\x85" => "\x7A",
  1291. "\xF0\x9D\x96\xA0" => "\x61",
  1292. "\xF0\x9D\x96\xA1" => "\x62",
  1293. "\xF0\x9D\x96\xA2" => "\x63",
  1294. "\xF0\x9D\x96\xA3" => "\x64",
  1295. "\xF0\x9D\x96\xA4" => "\x65",
  1296. "\xF0\x9D\x96\xA5" => "\x66",
  1297. "\xF0\x9D\x96\xA6" => "\x67",
  1298. "\xF0\x9D\x96\xA7" => "\x68",
  1299. "\xF0\x9D\x96\xA8" => "\x69",
  1300. "\xF0\x9D\x96\xA9" => "\x6A",
  1301. "\xF0\x9D\x96\xAA" => "\x6B",
  1302. "\xF0\x9D\x96\xAB" => "\x6C",
  1303. "\xF0\x9D\x96\xAC" => "\x6D",
  1304. "\xF0\x9D\x96\xAD" => "\x6E",
  1305. "\xF0\x9D\x96\xAE" => "\x6F",
  1306. "\xF0\x9D\x96\xAF" => "\x70",
  1307. "\xF0\x9D\x96\xB0" => "\x71",
  1308. "\xF0\x9D\x96\xB1" => "\x72",
  1309. "\xF0\x9D\x96\xB2" => "\x73",
  1310. "\xF0\x9D\x96\xB3" => "\x74",
  1311. "\xF0\x9D\x96\xB4" => "\x75",
  1312. "\xF0\x9D\x96\xB5" => "\x76",
  1313. "\xF0\x9D\x96\xB6" => "\x77",
  1314. "\xF0\x9D\x96\xB7" => "\x78",
  1315. "\xF0\x9D\x96\xB8" => "\x79",
  1316. "\xF0\x9D\x96\xB9" => "\x7A",
  1317. "\xF0\x9D\x97\x94" => "\x61",
  1318. "\xF0\x9D\x97\x95" => "\x62",
  1319. "\xF0\x9D\x97\x96" => "\x63",
  1320. "\xF0\x9D\x97\x97" => "\x64",
  1321. "\xF0\x9D\x97\x98" => "\x65",
  1322. "\xF0\x9D\x97\x99" => "\x66",
  1323. "\xF0\x9D\x97\x9A" => "\x67",
  1324. "\xF0\x9D\x97\x9B" => "\x68",
  1325. "\xF0\x9D\x97\x9C" => "\x69",
  1326. "\xF0\x9D\x97\x9D" => "\x6A",
  1327. "\xF0\x9D\x97\x9E" => "\x6B",
  1328. "\xF0\x9D\x97\x9F" => "\x6C",
  1329. "\xF0\x9D\x97\xA0" => "\x6D",
  1330. "\xF0\x9D\x97\xA1" => "\x6E",
  1331. "\xF0\x9D\x97\xA2" => "\x6F",
  1332. "\xF0\x9D\x97\xA3" => "\x70",
  1333. "\xF0\x9D\x97\xA4" => "\x71",
  1334. "\xF0\x9D\x97\xA5" => "\x72",
  1335. "\xF0\x9D\x97\xA6" => "\x73",
  1336. "\xF0\x9D\x97\xA7" => "\x74",
  1337. "\xF0\x9D\x97\xA8" => "\x75",
  1338. "\xF0\x9D\x97\xA9" => "\x76",
  1339. "\xF0\x9D\x97\xAA" => "\x77",
  1340. "\xF0\x9D\x97\xAB" => "\x78",
  1341. "\xF0\x9D\x97\xAC" => "\x79",
  1342. "\xF0\x9D\x97\xAD" => "\x7A",
  1343. "\xF0\x9D\x98\x88" => "\x61",
  1344. "\xF0\x9D\x98\x89" => "\x62",
  1345. "\xF0\x9D\x98\x8A" => "\x63",
  1346. "\xF0\x9D\x98\x8B" => "\x64",
  1347. "\xF0\x9D\x98\x8C" => "\x65",
  1348. "\xF0\x9D\x98\x8D" => "\x66",
  1349. "\xF0\x9D\x98\x8E" => "\x67",
  1350. "\xF0\x9D\x98\x8F" => "\x68",
  1351. "\xF0\x9D\x98\x90" => "\x69",
  1352. "\xF0\x9D\x98\x91" => "\x6A",
  1353. "\xF0\x9D\x98\x92" => "\x6B",
  1354. "\xF0\x9D\x98\x93" => "\x6C",
  1355. "\xF0\x9D\x98\x94" => "\x6D",
  1356. "\xF0\x9D\x98\x95" => "\x6E",
  1357. "\xF0\x9D\x98\x96" => "\x6F",
  1358. "\xF0\x9D\x98\x97" => "\x70",
  1359. "\xF0\x9D\x98\x98" => "\x71",
  1360. "\xF0\x9D\x98\x99" => "\x72",
  1361. "\xF0\x9D\x98\x9A" => "\x73",
  1362. "\xF0\x9D\x98\x9B" => "\x74",
  1363. "\xF0\x9D\x98\x9C" => "\x75",
  1364. "\xF0\x9D\x98\x9D" => "\x76",
  1365. "\xF0\x9D\x98\x9E" => "\x77",
  1366. "\xF0\x9D\x98\x9F" => "\x78",
  1367. "\xF0\x9D\x98\xA0" => "\x79",
  1368. "\xF0\x9D\x98\xA1" => "\x7A",
  1369. "\xF0\x9D\x98\xBC" => "\x61",
  1370. "\xF0\x9D\x98\xBD" => "\x62",
  1371. "\xF0\x9D\x98\xBE" => "\x63",
  1372. "\xF0\x9D\x98\xBF" => "\x64",
  1373. "\xF0\x9D\x99\x80" => "\x65",
  1374. "\xF0\x9D\x99\x81" => "\x66",
  1375. "\xF0\x9D\x99\x82" => "\x67",
  1376. "\xF0\x9D\x99\x83" => "\x68",
  1377. "\xF0\x9D\x99\x84" => "\x69",
  1378. "\xF0\x9D\x99\x85" => "\x6A",
  1379. "\xF0\x9D\x99\x86" => "\x6B",
  1380. "\xF0\x9D\x99\x87" => "\x6C",
  1381. "\xF0\x9D\x99\x88" => "\x6D",
  1382. "\xF0\x9D\x99\x89" => "\x6E",
  1383. "\xF0\x9D\x99\x8A" => "\x6F",
  1384. "\xF0\x9D\x99\x8B" => "\x70",
  1385. "\xF0\x9D\x99\x8C" => "\x71",
  1386. "\xF0\x9D\x99\x8D" => "\x72",
  1387. "\xF0\x9D\x99\x8E" => "\x73",
  1388. "\xF0\x9D\x99\x8F" => "\x74",
  1389. "\xF0\x9D\x99\x90" => "\x75",
  1390. "\xF0\x9D\x99\x91" => "\x76",
  1391. "\xF0\x9D\x99\x92" => "\x77",
  1392. "\xF0\x9D\x99\x93" => "\x78",
  1393. "\xF0\x9D\x99\x94" => "\x79",
  1394. "\xF0\x9D\x99\x95" => "\x7A",
  1395. "\xF0\x9D\x99\xB0" => "\x61",
  1396. "\xF0\x9D\x99\xB1" => "\x62",
  1397. "\xF0\x9D\x99\xB2" => "\x63",
  1398. "\xF0\x9D\x99\xB3" => "\x64",
  1399. "\xF0\x9D\x99\xB4" => "\x65",
  1400. "\xF0\x9D\x99\xB5" => "\x66",
  1401. "\xF0\x9D\x99\xB6" => "\x67",
  1402. "\xF0\x9D\x99\xB7" => "\x68",
  1403. "\xF0\x9D\x99\xB8" => "\x69",
  1404. "\xF0\x9D\x99\xB9" => "\x6A",
  1405. "\xF0\x9D\x99\xBA" => "\x6B",
  1406. "\xF0\x9D\x99\xBB" => "\x6C",
  1407. "\xF0\x9D\x99\xBC" => "\x6D",
  1408. "\xF0\x9D\x99\xBD" => "\x6E",
  1409. "\xF0\x9D\x99\xBE" => "\x6F",
  1410. "\xF0\x9D\x99\xBF" => "\x70",
  1411. "\xF0\x9D\x9A\x80" => "\x71",
  1412. "\xF0\x9D\x9A\x81" => "\x72",
  1413. "\xF0\x9D\x9A\x82" => "\x73",
  1414. "\xF0\x9D\x9A\x83" => "\x74",
  1415. "\xF0\x9D\x9A\x84" => "\x75",
  1416. "\xF0\x9D\x9A\x85" => "\x76",
  1417. "\xF0\x9D\x9A\x86" => "\x77",
  1418. "\xF0\x9D\x9A\x87" => "\x78",
  1419. "\xF0\x9D\x9A\x88" => "\x79",
  1420. "\xF0\x9D\x9A\x89" => "\x7A",
  1421. "\xF0\x9D\x9A\xA8" => "\xCE\xB1",
  1422. "\xF0\x9D\x9A\xA9" => "\xCE\xB2",
  1423. "\xF0\x9D\x9A\xAA" => "\xCE\xB3",
  1424. "\xF0\x9D\x9A\xAB" => "\xCE\xB4",
  1425. "\xF0\x9D\x9A\xAC" => "\xCE\xB5",
  1426. "\xF0\x9D\x9A\xAD" => "\xCE\xB6",
  1427. "\xF0\x9D\x9A\xAE" => "\xCE\xB7",
  1428. "\xF0\x9D\x9A\xAF" => "\xCE\xB8",
  1429. "\xF0\x9D\x9A\xB0" => "\xCE\xB9",
  1430. "\xF0\x9D\x9A\xB1" => "\xCE\xBA",
  1431. "\xF0\x9D\x9A\xB2" => "\xCE\xBB",
  1432. "\xF0\x9D\x9A\xB3" => "\xCE\xBC",
  1433. "\xF0\x9D\x9A\xB4" => "\xCE\xBD",
  1434. "\xF0\x9D\x9A\xB5" => "\xCE\xBE",
  1435. "\xF0\x9D\x9A\xB6" => "\xCE\xBF",
  1436. "\xF0\x9D\x9A\xB7" => "\xCF\x80",
  1437. "\xF0\x9D\x9A\xB8" => "\xCF\x81",
  1438. "\xF0\x9D\x9A\xB9" => "\xCE\xB8",
  1439. "\xF0\x9D\x9A\xBA" => "\xCF\x83",
  1440. "\xF0\x9D\x9A\xBB" => "\xCF\x84",
  1441. "\xF0\x9D\x9A\xBC" => "\xCF\x85",
  1442. "\xF0\x9D\x9A\xBD" => "\xCF\x86",
  1443. "\xF0\x9D\x9A\xBE" => "\xCF\x87",
  1444. "\xF0\x9D\x9A\xBF" => "\xCF\x88",
  1445. "\xF0\x9D\x9B\x80" => "\xCF\x89",
  1446. "\xF0\x9D\x9B\x93" => "\xCF\x83",
  1447. "\xF0\x9D\x9B\xA2" => "\xCE\xB1",
  1448. "\xF0\x9D\x9B\xA3" => "\xCE\xB2",
  1449. "\xF0\x9D\x9B\xA4" => "\xCE\xB3",
  1450. "\xF0\x9D\x9B\xA5" => "\xCE\xB4",
  1451. "\xF0\x9D\x9B\xA6" => "\xCE\xB5",
  1452. "\xF0\x9D\x9B\xA7" => "\xCE\xB6",
  1453. "\xF0\x9D\x9B\xA8" => "\xCE\xB7",
  1454. "\xF0\x9D\x9B\xA9" => "\xCE\xB8",
  1455. "\xF0\x9D\x9B\xAA" => "\xCE\xB9",
  1456. "\xF0\x9D\x9B\xAB" => "\xCE\xBA",
  1457. "\xF0\x9D\x9B\xAC" => "\xCE\xBB",
  1458. "\xF0\x9D\x9B\xAD" => "\xCE\xBC",
  1459. "\xF0\x9D\x9B\xAE" => "\xCE\xBD",
  1460. "\xF0\x9D\x9B\xAF" => "\xCE\xBE",
  1461. "\xF0\x9D\x9B\xB0" => "\xCE\xBF",
  1462. "\xF0\x9D\x9B\xB1" => "\xCF\x80",
  1463. "\xF0\x9D\x9B\xB2" => "\xCF\x81",
  1464. "\xF0\x9D\x9B\xB3" => "\xCE\xB8",
  1465. "\xF0\x9D\x9B\xB4" => "\xCF\x83",
  1466. "\xF0\x9D\x9B\xB5" => "\xCF\x84",
  1467. "\xF0\x9D\x9B\xB6" => "\xCF\x85",
  1468. "\xF0\x9D\x9B\xB7" => "\xCF\x86",
  1469. "\xF0\x9D\x9B\xB8" => "\xCF\x87",
  1470. "\xF0\x9D\x9B\xB9" => "\xCF\x88",
  1471. "\xF0\x9D\x9B\xBA" => "\xCF\x89",
  1472. "\xF0\x9D\x9C\x8D" => "\xCF\x83",
  1473. "\xF0\x9D\x9C\x9C" => "\xCE\xB1",
  1474. "\xF0\x9D\x9C\x9D" => "\xCE\xB2",
  1475. "\xF0\x9D\x9C\x9E" => "\xCE\xB3",
  1476. "\xF0\x9D\x9C\x9F" => "\xCE\xB4",
  1477. "\xF0\x9D\x9C\xA0" => "\xCE\xB5",
  1478. "\xF0\x9D\x9C\xA1" => "\xCE\xB6",
  1479. "\xF0\x9D\x9C\xA2" => "\xCE\xB7",
  1480. "\xF0\x9D\x9C\xA3" => "\xCE\xB8",
  1481. "\xF0\x9D\x9C\xA4" => "\xCE\xB9",
  1482. "\xF0\x9D\x9C\xA5" => "\xCE\xBA",
  1483. "\xF0\x9D\x9C\xA6" => "\xCE\xBB",
  1484. "\xF0\x9D\x9C\xA7" => "\xCE\xBC",
  1485. "\xF0\x9D\x9C\xA8" => "\xCE\xBD",
  1486. "\xF0\x9D\x9C\xA9" => "\xCE\xBE",
  1487. "\xF0\x9D\x9C\xAA" => "\xCE\xBF",
  1488. "\xF0\x9D\x9C\xAB" => "\xCF\x80",
  1489. "\xF0\x9D\x9C\xAC" => "\xCF\x81",
  1490. "\xF0\x9D\x9C\xAD" => "\xCE\xB8",
  1491. "\xF0\x9D\x9C\xAE" => "\xCF\x83",
  1492. "\xF0\x9D\x9C\xAF" => "\xCF\x84",
  1493. "\xF0\x9D\x9C\xB0" => "\xCF\x85",
  1494. "\xF0\x9D\x9C\xB1" => "\xCF\x86",
  1495. "\xF0\x9D\x9C\xB2" => "\xCF\x87",
  1496. "\xF0\x9D\x9C\xB3" => "\xCF\x88",
  1497. "\xF0\x9D\x9C\xB4" => "\xCF\x89",
  1498. "\xF0\x9D\x9D\x87" => "\xCF\x83",
  1499. "\xF0\x9D\x9D\x96" => "\xCE\xB1",
  1500. "\xF0\x9D\x9D\x97" => "\xCE\xB2",
  1501. "\xF0\x9D\x9D\x98" => "\xCE\xB3",
  1502. "\xF0\x9D\x9D\x99" => "\xCE\xB4",
  1503. "\xF0\x9D\x9D\x9A" => "\xCE\xB5",
  1504. "\xF0\x9D\x9D\x9B" => "\xCE\xB6",
  1505. "\xF0\x9D\x9D\x9C" => "\xCE\xB7",
  1506. "\xF0\x9D\x9D\x9D" => "\xCE\xB8",
  1507. "\xF0\x9D\x9D\x9E" => "\xCE\xB9",
  1508. "\xF0\x9D\x9D\x9F" => "\xCE\xBA",
  1509. "\xF0\x9D\x9D\xA0" => "\xCE\xBB",
  1510. "\xF0\x9D\x9D\xA1" => "\xCE\xBC",
  1511. "\xF0\x9D\x9D\xA2" => "\xCE\xBD",
  1512. "\xF0\x9D\x9D\xA3" => "\xCE\xBE",
  1513. "\xF0\x9D\x9D\xA4" => "\xCE\xBF",
  1514. "\xF0\x9D\x9D\xA5" => "\xCF\x80",
  1515. "\xF0\x9D\x9D\xA6" => "\xCF\x81",
  1516. "\xF0\x9D\x9D\xA7" => "\xCE\xB8",
  1517. "\xF0\x9D\x9D\xA8" => "\xCF\x83",
  1518. "\xF0\x9D\x9D\xA9" => "\xCF\x84",
  1519. "\xF0\x9D\x9D\xAA" => "\xCF\x85",
  1520. "\xF0\x9D\x9D\xAB" => "\xCF\x86",
  1521. "\xF0\x9D\x9D\xAC" => "\xCF\x87",
  1522. "\xF0\x9D\x9D\xAD" => "\xCF\x88",
  1523. "\xF0\x9D\x9D\xAE" => "\xCF\x89",
  1524. "\xF0\x9D\x9E\x81" => "\xCF\x83",
  1525. "\xF0\x9D\x9E\x90" => "\xCE\xB1",
  1526. "\xF0\x9D\x9E\x91" => "\xCE\xB2",
  1527. "\xF0\x9D\x9E\x92" => "\xCE\xB3",
  1528. "\xF0\x9D\x9E\x93" => "\xCE\xB4",
  1529. "\xF0\x9D\x9E\x94" => "\xCE\xB5",
  1530. "\xF0\x9D\x9E\x95" => "\xCE\xB6",
  1531. "\xF0\x9D\x9E\x96" => "\xCE\xB7",
  1532. "\xF0\x9D\x9E\x97" => "\xCE\xB8",
  1533. "\xF0\x9D\x9E\x98" => "\xCE\xB9",
  1534. "\xF0\x9D\x9E\x99" => "\xCE\xBA",
  1535. "\xF0\x9D\x9E\x9A" => "\xCE\xBB",
  1536. "\xF0\x9D\x9E\x9B" => "\xCE\xBC",
  1537. "\xF0\x9D\x9E\x9C" => "\xCE\xBD",
  1538. "\xF0\x9D\x9E\x9D" => "\xCE\xBE",
  1539. "\xF0\x9D\x9E\x9E" => "\xCE\xBF",
  1540. "\xF0\x9D\x9E\x9F" => "\xCF\x80",
  1541. "\xF0\x9D\x9…

Large files files are truncated, but you can click here to view the full file