PageRenderTime 56ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/forum/includes/utf/utf_tools.php

https://github.com/GreyTeardrop/socionicasys-forum
PHP | 2018 lines | 1492 code | 134 blank | 392 comment | 129 complexity | ca564dde6899cb748cdf8fb1943ee1c7 MD5 | raw file
Possible License(s): AGPL-1.0, LGPL-3.0, MPL-2.0-no-copyleft-exception

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. *
  4. * @package utf
  5. * @version $Id$
  6. * @copyright (c) 2006 phpBB Group
  7. * @license http://opensource.org/licenses/gpl-license.php GNU Public License
  8. *
  9. */
  10. /**
  11. */
  12. if (!defined('IN_PHPBB'))
  13. {
  14. exit;
  15. }
  16. // Enforce ASCII only string handling
  17. setlocale(LC_CTYPE, 'C');
  18. /**
  19. * UTF-8 tools
  20. *
  21. * Whenever possible, these functions will try to use PHP's built-in functions or
  22. * extensions, otherwise they will default to custom routines.
  23. *
  24. * @package utf
  25. */
  26. if (!extension_loaded('xml'))
  27. {
  28. /**
  29. * Implementation of PHP's native utf8_encode for people without XML support
  30. * This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
  31. *
  32. * @param string $str ISO-8859-1 encoded data
  33. * @return string UTF-8 encoded data
  34. */
  35. function utf8_encode($str)
  36. {
  37. $out = '';
  38. for ($i = 0, $len = strlen($str); $i < $len; $i++)
  39. {
  40. $letter = $str[$i];
  41. $num = ord($letter);
  42. if ($num < 0x80)
  43. {
  44. $out .= $letter;
  45. }
  46. else if ($num < 0xC0)
  47. {
  48. $out .= "\xC2" . $letter;
  49. }
  50. else
  51. {
  52. $out .= "\xC3" . chr($num - 64);
  53. }
  54. }
  55. return $out;
  56. }
  57. /**
  58. * Implementation of PHP's native utf8_decode for people without XML support
  59. *
  60. * @param string $str UTF-8 encoded data
  61. * @return string ISO-8859-1 encoded data
  62. */
  63. function utf8_decode($str)
  64. {
  65. $pos = 0;
  66. $len = strlen($str);
  67. $ret = '';
  68. while ($pos < $len)
  69. {
  70. $ord = ord($str[$pos]) & 0xF0;
  71. if ($ord === 0xC0 || $ord === 0xD0)
  72. {
  73. $charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
  74. $pos += 2;
  75. $ret .= (($charval < 256) ? chr($charval) : '?');
  76. }
  77. else if ($ord === 0xE0)
  78. {
  79. $ret .= '?';
  80. $pos += 3;
  81. }
  82. else if ($ord === 0xF0)
  83. {
  84. $ret .= '?';
  85. $pos += 4;
  86. }
  87. else
  88. {
  89. $ret .= $str[$pos];
  90. ++$pos;
  91. }
  92. }
  93. return $ret;
  94. }
  95. }
  96. // mbstring is old and has it's functions around for older versions of PHP.
  97. // if mbstring is not loaded, we go into native mode.
  98. if (extension_loaded('mbstring'))
  99. {
  100. mb_internal_encoding('UTF-8');
  101. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  102. // ini_set is only used to try to make things better for mods using mbstring directly
  103. // I know they're not supposed to, but you know they still could and the fix is costless
  104. @ini_set("mbstring.internal_encoding", 'UTF-8');
  105. /**
  106. * UTF-8 aware alternative to strrpos
  107. * Find position of last occurrence of a char in a string
  108. *
  109. * Notes:
  110. * - offset for mb_strrpos was added in 5.2.0, we emulate if it is lower
  111. */
  112. if (version_compare(PHP_VERSION, '5.2.0', '>='))
  113. {
  114. /**
  115. * UTF-8 aware alternative to strrpos
  116. * @ignore
  117. */
  118. function utf8_strrpos($str, $needle, $offset = null)
  119. {
  120. // Emulate behaviour of strrpos rather than raising warning
  121. if (empty($str))
  122. {
  123. return false;
  124. }
  125. if (is_null($offset))
  126. {
  127. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  128. // Explicit encoding
  129. return mb_strrpos($str, $needle, 0, 'UTF-8');
  130. }
  131. else
  132. {
  133. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  134. // Explicit encoding
  135. return mb_strrpos($str, $needle, $offset, 'UTF-8');
  136. }
  137. }
  138. }
  139. else
  140. {
  141. /**
  142. * UTF-8 aware alternative to strrpos
  143. * @ignore
  144. */
  145. function utf8_strrpos($str, $needle, $offset = null)
  146. {
  147. // offset for mb_strrpos was added in 5.2.0
  148. if (is_null($offset))
  149. {
  150. // Emulate behaviour of strrpos rather than raising warning
  151. if (empty($str))
  152. {
  153. return false;
  154. }
  155. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  156. // Explicit encoding
  157. return mb_strrpos($str, $needle, 'UTF-8');
  158. }
  159. else
  160. {
  161. if (!is_int($offset))
  162. {
  163. trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
  164. return false;
  165. }
  166. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  167. // Explicit encoding
  168. $str = mb_substr($str, $offset, mb_strlen($str, 'UTF-8'), 'UTF-8');
  169. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  170. // Explicit encoding
  171. if (false !== ($pos = mb_strrpos($str, $needle, 'UTF-8')))
  172. {
  173. return $pos + $offset;
  174. }
  175. return false;
  176. }
  177. }
  178. }
  179. /**
  180. * UTF-8 aware alternative to strpos
  181. * @ignore
  182. */
  183. function utf8_strpos($str, $needle, $offset = null)
  184. {
  185. if (is_null($offset))
  186. {
  187. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  188. // Explicit encoding
  189. return mb_strpos($str, $needle, 0, 'UTF-8');
  190. }
  191. else
  192. {
  193. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  194. // Explicit encoding
  195. return mb_strpos($str, $needle, $offset, 'UTF-8');
  196. }
  197. }
  198. /**
  199. * UTF-8 aware alternative to strtolower
  200. * @ignore
  201. */
  202. function utf8_strtolower($str)
  203. {
  204. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  205. // Explicit encoding
  206. return mb_strtolower($str, 'UTF-8');
  207. }
  208. /**
  209. * UTF-8 aware alternative to strtoupper
  210. * @ignore
  211. */
  212. function utf8_strtoupper($str)
  213. {
  214. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  215. // Explicit encoding
  216. return mb_strtoupper($str, 'UTF-8');
  217. }
  218. /**
  219. * UTF-8 aware alternative to substr
  220. * @ignore
  221. */
  222. function utf8_substr($str, $offset, $length = null)
  223. {
  224. if (is_null($length))
  225. {
  226. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  227. // Explicit encoding
  228. return mb_substr($str, $offset, mb_strlen($str, 'UTF-8'), 'UTF-8');
  229. }
  230. else
  231. {
  232. // Fix for http://www.phpbb.com/bugs/phpbb3/52315
  233. // Explicit encoding
  234. return mb_substr($str, $offset, $length, 'UTF-8');
  235. }
  236. }
  237. /**
  238. * Return the length (in characters) of a UTF-8 string
  239. * @ignore
  240. */
  241. function utf8_strlen($text)
  242. {
  243. return mb_strlen($text, 'utf-8');
  244. }
  245. }
  246. else
  247. {
  248. /**
  249. * UTF-8 aware alternative to strrpos
  250. * Find position of last occurrence of a char in a string
  251. *
  252. * @author Harry Fuecks
  253. * @param string $str haystack
  254. * @param string $needle needle
  255. * @param integer $offset (optional) offset (from left)
  256. * @return mixed integer position or FALSE on failure
  257. */
  258. function utf8_strrpos($str, $needle, $offset = null)
  259. {
  260. if (is_null($offset))
  261. {
  262. $ar = explode($needle, $str);
  263. if (sizeof($ar) > 1)
  264. {
  265. // Pop off the end of the string where the last match was made
  266. array_pop($ar);
  267. $str = join($needle, $ar);
  268. return utf8_strlen($str);
  269. }
  270. return false;
  271. }
  272. else
  273. {
  274. if (!is_int($offset))
  275. {
  276. trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
  277. return false;
  278. }
  279. $str = utf8_substr($str, $offset);
  280. if (false !== ($pos = utf8_strrpos($str, $needle)))
  281. {
  282. return $pos + $offset;
  283. }
  284. return false;
  285. }
  286. }
  287. /**
  288. * UTF-8 aware alternative to strpos
  289. * Find position of first occurrence of a string
  290. *
  291. * @author Harry Fuecks
  292. * @param string $str haystack
  293. * @param string $needle needle
  294. * @param integer $offset offset in characters (from left)
  295. * @return mixed integer position or FALSE on failure
  296. */
  297. function utf8_strpos($str, $needle, $offset = null)
  298. {
  299. if (is_null($offset))
  300. {
  301. $ar = explode($needle, $str);
  302. if (sizeof($ar) > 1)
  303. {
  304. return utf8_strlen($ar[0]);
  305. }
  306. return false;
  307. }
  308. else
  309. {
  310. if (!is_int($offset))
  311. {
  312. trigger_error('utf8_strpos: Offset must be an integer', E_USER_ERROR);
  313. return false;
  314. }
  315. $str = utf8_substr($str, $offset);
  316. if (false !== ($pos = utf8_strpos($str, $needle)))
  317. {
  318. return $pos + $offset;
  319. }
  320. return false;
  321. }
  322. }
  323. /**
  324. * UTF-8 aware alternative to strtolower
  325. * Make a string lowercase
  326. * Note: The concept of a characters "case" only exists is some alphabets
  327. * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
  328. * not exist in the Chinese alphabet, for example. See Unicode Standard
  329. * Annex #21: Case Mappings
  330. *
  331. * @param string
  332. * @return string string in lowercase
  333. */
  334. function utf8_strtolower($string)
  335. {
  336. static $utf8_upper_to_lower = array(
  337. "\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
  338. "\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
  339. "\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
  340. "\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
  341. "\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
  342. "\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
  343. "\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
  344. "\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
  345. "\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
  346. "\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
  347. "\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
  348. "\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
  349. "\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
  350. "\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
  351. "\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
  352. "\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
  353. "\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
  354. "\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
  355. "\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
  356. "\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
  357. "\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
  358. "\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
  359. "\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
  360. "\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
  361. "\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
  362. "\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
  363. "\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
  364. "\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
  365. "\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
  366. "\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
  367. "\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
  368. "\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
  369. "\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
  370. "\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
  371. "\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
  372. "\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
  373. "\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
  374. "\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
  375. "\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
  376. "\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
  377. "\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
  378. "\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
  379. "\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
  380. "\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
  381. "\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
  382. "\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
  383. );
  384. return strtr(strtolower($string), $utf8_upper_to_lower);
  385. }
  386. /**
  387. * UTF-8 aware alternative to strtoupper
  388. * Make a string uppercase
  389. * Note: The concept of a characters "case" only exists is some alphabets
  390. * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
  391. * not exist in the Chinese alphabet, for example. See Unicode Standard
  392. * Annex #21: Case Mappings
  393. *
  394. * @param string
  395. * @return string string in uppercase
  396. */
  397. function utf8_strtoupper($string)
  398. {
  399. static $utf8_lower_to_upper = array(
  400. "\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
  401. "\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
  402. "\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
  403. "\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
  404. "\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
  405. "\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
  406. "\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
  407. "\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
  408. "\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
  409. "\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
  410. "\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
  411. "\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
  412. "\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
  413. "\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
  414. "\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
  415. "\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
  416. "\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
  417. "\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
  418. "\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
  419. "\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
  420. "\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
  421. "\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
  422. "\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
  423. "\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
  424. "\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
  425. "\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
  426. "\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
  427. "\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
  428. "\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
  429. "\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
  430. "\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
  431. "\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
  432. "\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
  433. "\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
  434. "\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
  435. "\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
  436. "\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
  437. "\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
  438. "\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
  439. "\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
  440. "\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
  441. "\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
  442. "\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
  443. "\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
  444. "\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
  445. "\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
  446. );
  447. return strtr(strtoupper($string), $utf8_lower_to_upper);
  448. }
  449. /**
  450. * UTF-8 aware alternative to substr
  451. * Return part of a string given character offset (and optionally length)
  452. *
  453. * Note arguments: comparied to substr - if offset or length are
  454. * not integers, this version will not complain but rather massages them
  455. * into an integer.
  456. *
  457. * Note on returned values: substr documentation states false can be
  458. * returned in some cases (e.g. offset > string length)
  459. * mb_substr never returns false, it will return an empty string instead.
  460. * This adopts the mb_substr approach
  461. *
  462. * Note on implementation: PCRE only supports repetitions of less than
  463. * 65536, in order to accept up to MAXINT values for offset and length,
  464. * we'll repeat a group of 65535 characters when needed.
  465. *
  466. * Note on implementation: calculating the number of characters in the
  467. * string is a relatively expensive operation, so we only carry it out when
  468. * necessary. It isn't necessary for +ve offsets and no specified length
  469. *
  470. * @author Chris Smith<chris@jalakai.co.uk>
  471. * @param string $str
  472. * @param integer $offset number of UTF-8 characters offset (from left)
  473. * @param integer $length (optional) length in UTF-8 characters from offset
  474. * @return mixed string or FALSE if failure
  475. */
  476. function utf8_substr($str, $offset, $length = NULL)
  477. {
  478. // generates E_NOTICE
  479. // for PHP4 objects, but not PHP5 objects
  480. $str = (string) $str;
  481. $offset = (int) $offset;
  482. if (!is_null($length))
  483. {
  484. $length = (int) $length;
  485. }
  486. // handle trivial cases
  487. if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
  488. {
  489. return '';
  490. }
  491. // normalise negative offsets (we could use a tail
  492. // anchored pattern, but they are horribly slow!)
  493. if ($offset < 0)
  494. {
  495. // see notes
  496. $strlen = utf8_strlen($str);
  497. $offset = $strlen + $offset;
  498. if ($offset < 0)
  499. {
  500. $offset = 0;
  501. }
  502. }
  503. $op = '';
  504. $lp = '';
  505. // establish a pattern for offset, a
  506. // non-captured group equal in length to offset
  507. if ($offset > 0)
  508. {
  509. $ox = (int) ($offset / 65535);
  510. $oy = $offset % 65535;
  511. if ($ox)
  512. {
  513. $op = '(?:.{65535}){' . $ox . '}';
  514. }
  515. $op = '^(?:' . $op . '.{' . $oy . '})';
  516. }
  517. else
  518. {
  519. // offset == 0; just anchor the pattern
  520. $op = '^';
  521. }
  522. // establish a pattern for length
  523. if (is_null($length))
  524. {
  525. // the rest of the string
  526. $lp = '(.*)$';
  527. }
  528. else
  529. {
  530. if (!isset($strlen))
  531. {
  532. // see notes
  533. $strlen = utf8_strlen($str);
  534. }
  535. // another trivial case
  536. if ($offset > $strlen)
  537. {
  538. return '';
  539. }
  540. if ($length > 0)
  541. {
  542. // reduce any length that would
  543. // go passed the end of the string
  544. $length = min($strlen - $offset, $length);
  545. $lx = (int) ($length / 65535);
  546. $ly = $length % 65535;
  547. // negative length requires a captured group
  548. // of length characters
  549. if ($lx)
  550. {
  551. $lp = '(?:.{65535}){' . $lx . '}';
  552. }
  553. $lp = '(' . $lp . '.{'. $ly . '})';
  554. }
  555. else if ($length < 0)
  556. {
  557. if ($length < ($offset - $strlen))
  558. {
  559. return '';
  560. }
  561. $lx = (int)((-$length) / 65535);
  562. $ly = (-$length) % 65535;
  563. // negative length requires ... capture everything
  564. // except a group of -length characters
  565. // anchored at the tail-end of the string
  566. if ($lx)
  567. {
  568. $lp = '(?:.{65535}){' . $lx . '}';
  569. }
  570. $lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
  571. }
  572. }
  573. if (!preg_match('#' . $op . $lp . '#us', $str, $match))
  574. {
  575. return '';
  576. }
  577. return $match[1];
  578. }
  579. /**
  580. * Return the length (in characters) of a UTF-8 string
  581. *
  582. * @param string $text UTF-8 string
  583. * @return integer Length (in chars) of given string
  584. */
  585. function utf8_strlen($text)
  586. {
  587. // Since utf8_decode is replacing multibyte characters to ? strlen works fine
  588. return strlen(utf8_decode($text));
  589. }
  590. }
  591. /**
  592. * UTF-8 aware alternative to str_split
  593. * Convert a string to an array
  594. *
  595. * @author Harry Fuecks
  596. * @param string $str UTF-8 encoded
  597. * @param int $split_len number to characters to split string by
  598. * @return array characters in string reverses
  599. */
  600. function utf8_str_split($str, $split_len = 1)
  601. {
  602. if (!is_int($split_len) || $split_len < 1)
  603. {
  604. return false;
  605. }
  606. $len = utf8_strlen($str);
  607. if ($len <= $split_len)
  608. {
  609. return array($str);
  610. }
  611. preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
  612. return $ar[0];
  613. }
  614. /**
  615. * UTF-8 aware alternative to strspn
  616. * Find length of initial segment matching the mask
  617. *
  618. * @author Harry Fuecks
  619. */
  620. function utf8_strspn($str, $mask, $start = null, $length = null)
  621. {
  622. if ($start !== null || $length !== null)
  623. {
  624. $str = utf8_substr($str, $start, $length);
  625. }
  626. preg_match('/^[' . $mask . ']+/u', $str, $matches);
  627. if (isset($matches[0]))
  628. {
  629. return utf8_strlen($matches[0]);
  630. }
  631. return 0;
  632. }
  633. /**
  634. * UTF-8 aware alternative to ucfirst
  635. * Make a string's first character uppercase
  636. *
  637. * @author Harry Fuecks
  638. * @param string
  639. * @return string with first character as upper case (if applicable)
  640. */
  641. function utf8_ucfirst($str)
  642. {
  643. switch (utf8_strlen($str))
  644. {
  645. case 0:
  646. return '';
  647. break;
  648. case 1:
  649. return utf8_strtoupper($str);
  650. break;
  651. default:
  652. preg_match('/^(.{1})(.*)$/us', $str, $matches);
  653. return utf8_strtoupper($matches[1]) . $matches[2];
  654. break;
  655. }
  656. }
  657. /**
  658. * Recode a string to UTF-8
  659. *
  660. * If the encoding is not supported, the string is returned as-is
  661. *
  662. * @param string $string Original string
  663. * @param string $encoding Original encoding (lowered)
  664. * @return string The string, encoded in UTF-8
  665. */
  666. function utf8_recode($string, $encoding)
  667. {
  668. $encoding = strtolower($encoding);
  669. if ($encoding == 'utf-8' || !is_string($string) || empty($string))
  670. {
  671. return $string;
  672. }
  673. // we force iso-8859-1 to be cp1252
  674. if ($encoding == 'iso-8859-1')
  675. {
  676. $encoding = 'cp1252';
  677. }
  678. // convert iso-8859-8-i to iso-8859-8
  679. else if ($encoding == 'iso-8859-8-i')
  680. {
  681. $encoding = 'iso-8859-8';
  682. $string = hebrev($string);
  683. }
  684. // First, try iconv()
  685. if (function_exists('iconv'))
  686. {
  687. $ret = @iconv($encoding, 'utf-8', $string);
  688. if (!empty($ret))
  689. {
  690. return $ret;
  691. }
  692. }
  693. // Try the mb_string extension
  694. if (function_exists('mb_convert_encoding'))
  695. {
  696. // mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
  697. switch ($encoding)
  698. {
  699. case 'iso-8859-1':
  700. case 'iso-8859-2':
  701. case 'iso-8859-4':
  702. case 'iso-8859-7':
  703. case 'iso-8859-9':
  704. case 'iso-8859-15':
  705. case 'windows-1251':
  706. case 'windows-1252':
  707. case 'cp1252':
  708. case 'shift_jis':
  709. case 'euc-kr':
  710. case 'big5':
  711. case 'gb2312':
  712. $ret = @mb_convert_encoding($string, 'utf-8', $encoding);
  713. if (!empty($ret))
  714. {
  715. return $ret;
  716. }
  717. }
  718. }
  719. // Try the recode extension
  720. if (function_exists('recode_string'))
  721. {
  722. $ret = @recode_string($encoding . '..utf-8', $string);
  723. if (!empty($ret))
  724. {
  725. return $ret;
  726. }
  727. }
  728. // If nothing works, check if we have a custom transcoder available
  729. if (!preg_match('#^[a-z0-9_ \\-]+$#', $encoding))
  730. {
  731. // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
  732. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  733. }
  734. global $phpbb_root_path, $phpEx;
  735. // iso-8859-* character encoding
  736. if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))
  737. {
  738. switch ($array[1])
  739. {
  740. case '1':
  741. case '2':
  742. case '4':
  743. case '7':
  744. case '8':
  745. case '9':
  746. case '15':
  747. if (!function_exists('iso_8859_' . $array[1]))
  748. {
  749. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
  750. {
  751. trigger_error('Basic reencoder file is missing', E_USER_ERROR);
  752. }
  753. include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
  754. }
  755. return call_user_func('iso_8859_' . $array[1], $string);
  756. break;
  757. default:
  758. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  759. break;
  760. }
  761. }
  762. // CP/WIN character encoding
  763. if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))
  764. {
  765. switch ($array[1])
  766. {
  767. case '932':
  768. break;
  769. case '1250':
  770. case '1251':
  771. case '1252':
  772. case '1254':
  773. case '1255':
  774. case '1256':
  775. case '1257':
  776. case '874':
  777. if (!function_exists('cp' . $array[1]))
  778. {
  779. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
  780. {
  781. trigger_error('Basic reencoder file is missing', E_USER_ERROR);
  782. }
  783. include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
  784. }
  785. return call_user_func('cp' . $array[1], $string);
  786. break;
  787. default:
  788. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  789. break;
  790. }
  791. }
  792. // TIS-620
  793. if (preg_match('/tis[_ -]?620/', $encoding))
  794. {
  795. if (!function_exists('tis_620'))
  796. {
  797. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
  798. {
  799. trigger_error('Basic reencoder file is missing', E_USER_ERROR);
  800. }
  801. include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
  802. }
  803. return tis_620($string);
  804. }
  805. // SJIS
  806. if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
  807. {
  808. if (!function_exists('sjis'))
  809. {
  810. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
  811. {
  812. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  813. }
  814. include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
  815. }
  816. return sjis($string);
  817. }
  818. // EUC_KR
  819. if (preg_match('/euc[_ -]?kr/', $encoding))
  820. {
  821. if (!function_exists('euc_kr'))
  822. {
  823. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
  824. {
  825. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  826. }
  827. include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
  828. }
  829. return euc_kr($string);
  830. }
  831. // BIG-5
  832. if (preg_match('/big[_ -]?5/', $encoding))
  833. {
  834. if (!function_exists('big5'))
  835. {
  836. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
  837. {
  838. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  839. }
  840. include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
  841. }
  842. return big5($string);
  843. }
  844. // GB2312
  845. if (preg_match('/gb[_ -]?2312/', $encoding))
  846. {
  847. if (!function_exists('gb2312'))
  848. {
  849. if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
  850. {
  851. trigger_error('CJK reencoder file is missing', E_USER_ERROR);
  852. }
  853. include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
  854. }
  855. return gb2312($string);
  856. }
  857. // Trigger an error?! Fow now just give bad data :-(
  858. trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
  859. //return $string; // use utf_normalizer::cleanup() ?
  860. }
  861. /**
  862. * Replace all UTF-8 chars that are not in ASCII with their NCR
  863. *
  864. * @param string $text UTF-8 string in NFC
  865. * @return string ASCII string using NCRs for non-ASCII chars
  866. */
  867. function utf8_encode_ncr($text)
  868. {
  869. return preg_replace_callback('#[\\xC2-\\xF4][\\x80-\\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
  870. }
  871. /**
  872. * Callback used in encode_ncr()
  873. *
  874. * Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
  875. *
  876. * @param array $m 0-based numerically indexed array passed by preg_replace_callback()
  877. * @return string A HTML NCR if the character is valid, or the original string otherwise
  878. */
  879. function utf8_encode_ncr_callback($m)
  880. {
  881. return '&#' . utf8_ord($m[0]) . ';';
  882. }
  883. /**
  884. * Converts a UTF-8 char to an NCR
  885. *
  886. * @param string $chr UTF-8 char
  887. * @return integer UNICODE code point
  888. */
  889. function utf8_ord($chr)
  890. {
  891. switch (strlen($chr))
  892. {
  893. case 1:
  894. return ord($chr);
  895. break;
  896. case 2:
  897. return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
  898. break;
  899. case 3:
  900. return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
  901. break;
  902. case 4:
  903. return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
  904. break;
  905. default:
  906. return $chr;
  907. }
  908. }
  909. /**
  910. * Converts an NCR to a UTF-8 char
  911. *
  912. * @param int $cp UNICODE code point
  913. * @return string UTF-8 char
  914. */
  915. function utf8_chr($cp)
  916. {
  917. if ($cp > 0xFFFF)
  918. {
  919. return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
  920. }
  921. else if ($cp > 0x7FF)
  922. {
  923. return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
  924. }
  925. else if ($cp > 0x7F)
  926. {
  927. return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
  928. }
  929. else
  930. {
  931. return chr($cp);
  932. }
  933. }
  934. /**
  935. * Convert Numeric Character References to UTF-8 chars
  936. *
  937. * Notes:
  938. * - we do not convert NCRs recursively, if you pass &#38;#38; it will return &#38;
  939. * - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
  940. *
  941. * @param string $text String to convert, encoded in UTF-8 (no normal form required)
  942. * @return string UTF-8 string where NCRs have been replaced with the actual chars
  943. */
  944. function utf8_decode_ncr($text)
  945. {
  946. return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text);
  947. }
  948. /**
  949. * Callback used in decode_ncr()
  950. *
  951. * Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
  952. * It will ignore most of invalid NCRs, but not all!
  953. *
  954. * @param array $m 0-based numerically indexed array passed by preg_replace_callback()
  955. * @return string UTF-8 char
  956. */
  957. function utf8_decode_ncr_callback($m)
  958. {
  959. $cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
  960. return utf8_chr($cp);
  961. }
  962. /**
  963. * Case folds a unicode string as per Unicode 5.0, section 3.13
  964. *
  965. * @param string $text text to be case folded
  966. * @param string $option determines how we will fold the cases
  967. * @return string case folded text
  968. */
  969. function utf8_case_fold($text, $option = 'full')
  970. {
  971. static $uniarray = array();
  972. global $phpbb_root_path, $phpEx;
  973. // common is always set
  974. if (!isset($uniarray['c']))
  975. {
  976. $uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
  977. }
  978. // only set full if we need to
  979. if ($option === 'full' && !isset($uniarray['f']))
  980. {
  981. $uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
  982. }
  983. // only set simple if we need to
  984. if ($option !== 'full' && !isset($uniarray['s']))
  985. {
  986. $uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
  987. }
  988. // common is always replaced
  989. $text = strtr($text, $uniarray['c']);
  990. if ($option === 'full')
  991. {
  992. // full replaces a character with multiple characters
  993. $text = strtr($text, $uniarray['f']);
  994. }
  995. else
  996. {
  997. // simple replaces a character with another character
  998. $text = strtr($text, $uniarray['s']);
  999. }
  1000. return $text;
  1001. }
  1002. /**
  1003. * Takes the input and does a "special" case fold. It does minor normalization
  1004. * and returns NFKC compatable text
  1005. *
  1006. * @param string $text text to be case folded
  1007. * @param string $option determines how we will fold the cases
  1008. * @return string case folded text
  1009. */
  1010. function utf8_case_fold_nfkc($text, $option = 'full')
  1011. {
  1012. static $fc_nfkc_closure = array(
  1013. "\xCD\xBA" => "\x20\xCE\xB9",
  1014. "\xCF\x92" => "\xCF\x85",
  1015. "\xCF\x93" => "\xCF\x8D",
  1016. "\xCF\x94" => "\xCF\x8B",
  1017. "\xCF\xB2" => "\xCF\x83",
  1018. "\xCF\xB9" => "\xCF\x83",
  1019. "\xE1\xB4\xAC" => "\x61",
  1020. "\xE1\xB4\xAD" => "\xC3\xA6",
  1021. "\xE1\xB4\xAE" => "\x62",
  1022. "\xE1\xB4\xB0" => "\x64",
  1023. "\xE1\xB4\xB1" => "\x65",
  1024. "\xE1\xB4\xB2" => "\xC7\x9D",
  1025. "\xE1\xB4\xB3" => "\x67",
  1026. "\xE1\xB4\xB4" => "\x68",
  1027. "\xE1\xB4\xB5" => "\x69",
  1028. "\xE1\xB4\xB6" => "\x6A",
  1029. "\xE1\xB4\xB7" => "\x6B",
  1030. "\xE1\xB4\xB8" => "\x6C",
  1031. "\xE1\xB4\xB9" => "\x6D",
  1032. "\xE1\xB4\xBA" => "\x6E",
  1033. "\xE1\xB4\xBC" => "\x6F",
  1034. "\xE1\xB4\xBD" => "\xC8\xA3",
  1035. "\xE1\xB4\xBE" => "\x70",
  1036. "\xE1\xB4\xBF" => "\x72",
  1037. "\xE1\xB5\x80" => "\x74",
  1038. "\xE1\xB5\x81" => "\x75",
  1039. "\xE1\xB5\x82" => "\x77",
  1040. "\xE2\x82\xA8" => "\x72\x73",
  1041. "\xE2\x84\x82" => "\x63",
  1042. "\xE2\x84\x83" => "\xC2\xB0\x63",
  1043. "\xE2\x84\x87" => "\xC9\x9B",
  1044. "\xE2\x84\x89" => "\xC2\xB0\x66",
  1045. "\xE2\x84\x8B" => "\x68",
  1046. "\xE2\x84\x8C" => "\x68",
  1047. "\xE2\x84\x8D" => "\x68",
  1048. "\xE2\x84\x90" => "\x69",
  1049. "\xE2\x84\x91" => "\x69",
  1050. "\xE2\x84\x92" => "\x6C",
  1051. "\xE2\x84\x95" => "\x6E",
  1052. "\xE2\x84\x96" => "\x6E\x6F",
  1053. "\xE2\x84\x99" => "\x70",
  1054. "\xE2\x84\x9A" => "\x71",
  1055. "\xE2\x84\x9B" => "\x72",
  1056. "\xE2\x84\x9C" => "\x72",
  1057. "\xE2\x84\x9D" => "\x72",
  1058. "\xE2\x84\xA0" => "\x73\x6D",
  1059. "\xE2\x84\xA1" => "\x74\x65\x6C",
  1060. "\xE2\x84\xA2" => "\x74\x6D",
  1061. "\xE2\x84\xA4" => "\x7A",
  1062. "\xE2\x84\xA8" => "\x7A",
  1063. "\xE2\x84\xAC" => "\x62",
  1064. "\xE2\x84\xAD" => "\x63",
  1065. "\xE2\x84\xB0" => "\x65",
  1066. "\xE2\x84\xB1" => "\x66",
  1067. "\xE2\x84\xB3" => "\x6D",
  1068. "\xE2\x84\xBB" => "\x66\x61\x78",
  1069. "\xE2\x84\xBE" => "\xCE\xB3",
  1070. "\xE2\x84\xBF" => "\xCF\x80",
  1071. "\xE2\x85\x85" => "\x64",
  1072. "\xE3\x89\x90" => "\x70\x74\x65",
  1073. "\xE3\x8B\x8C" => "\x68\x67",
  1074. "\xE3\x8B\x8E" => "\x65\x76",
  1075. "\xE3\x8B\x8F" => "\x6C\x74\x64",
  1076. "\xE3\x8D\xB1" => "\x68\x70\x61",
  1077. "\xE3\x8D\xB3" => "\x61\x75",
  1078. "\xE3\x8D\xB5" => "\x6F\x76",
  1079. "\xE3\x8D\xBA" => "\x69\x75",
  1080. "\xE3\x8E\x80" => "\x70\x61",
  1081. "\xE3\x8E\x81" => "\x6E\x61",
  1082. "\xE3\x8E\x82" => "\xCE\xBC\x61",
  1083. "\xE3\x8E\x83" => "\x6D\x61",
  1084. "\xE3\x8E\x84" => "\x6B\x61",
  1085. "\xE3\x8E\x85" => "\x6B\x62",
  1086. "\xE3\x8E\x86" => "\x6D\x62",
  1087. "\xE3\x8E\x87" => "\x67\x62",
  1088. "\xE3\x8E\x8A" => "\x70\x66",
  1089. "\xE3\x8E\x8B" => "\x6E\x66",
  1090. "\xE3\x8E\x8C" => "\xCE\xBC\x66",
  1091. "\xE3\x8E\x90" => "\x68\x7A",
  1092. "\xE3\x8E\x91" => "\x6B\x68\x7A",
  1093. "\xE3\x8E\x92" => "\x6D\x68\x7A",
  1094. "\xE3\x8E\x93" => "\x67\x68\x7A",
  1095. "\xE3\x8E\x94" => "\x74\x68\x7A",
  1096. "\xE3\x8E\xA9" => "\x70\x61",
  1097. "\xE3\x8E\xAA" => "\x6B\x70\x61",
  1098. "\xE3\x8E\xAB" => "\x6D\x70\x61",
  1099. "\xE3\x8E\xAC" => "\x67\x70\x61",
  1100. "\xE3\x8E\xB4" => "\x70\x76",
  1101. "\xE3\x8E\xB5" => "\x6E\x76",
  1102. "\xE3\x8E\xB6" => "\xCE\xBC\x76",
  1103. "\xE3\x8E\xB7" => "\x6D\x76",
  1104. "\xE3\x8E\xB8" => "\x6B\x76",
  1105. "\xE3\x8E\xB9" => "\x6D\x76",
  1106. "\xE3\x8E\xBA" => "\x70\x77",
  1107. "\xE3\x8E\xBB" => "\x6E\x77",
  1108. "\xE3\x8E\xBC" => "\xCE\xBC\x77",
  1109. "\xE3\x8E\xBD" => "\x6D\x77",
  1110. "\xE3\x8E\xBE" => "\x6B\x77",
  1111. "\xE3\x8E\xBF" => "\x6D\x77",
  1112. "\xE3\x8F\x80" => "\x6B\xCF\x89",
  1113. "\xE3\x8F\x81" => "\x6D\xCF\x89",
  1114. "\xE3\x8F\x83" => "\x62\x71",
  1115. "\xE3\x8F\x86" => "\x63\xE2\x88\x95\x6B\x67",
  1116. "\xE3\x8F\x87" => "\x63\x6F\x2E",
  1117. "\xE3\x8F\x88" => "\x64\x62",
  1118. "\xE3\x8F\x89" => "\x67\x79",
  1119. "\xE3\x8F\x8B" => "\x68\x70",
  1120. "\xE3\x8F\x8D" => "\x6B\x6B",
  1121. "\xE3\x8F\x8E" => "\x6B\x6D",
  1122. "\xE3\x8F\x97" => "\x70\x68",
  1123. "\xE3\x8F\x99" => "\x70\x70\x6D",
  1124. "\xE3\x8F\x9A" => "\x70\x72",
  1125. "\xE3\x8F\x9C" => "\x73\x76",
  1126. "\xE3\x8F\x9D" => "\x77\x62",
  1127. "\xE3\x8F\x9E" => "\x76\xE2\x88\x95\x6D",
  1128. "\xE3\x8F\x9F" => "\x61\xE2\x88\x95\x6D",
  1129. "\xF0\x9D\x90\x80" => "\x61",
  1130. "\xF0\x9D\x90\x81" => "\x62",
  1131. "\xF0\x9D\x90\x82" => "\x63",
  1132. "\xF0\x9D\x90\x83" => "\x64",
  1133. "\xF0\x9D\x90\x84" => "\x65",
  1134. "\xF0\x9D\x90\x85" => "\x66",
  1135. "\xF0\x9D\x90\x86" => "\x67",
  1136. "\xF0\x9D\x90\x87" => "\x68",
  1137. "\xF0\x9D\x90\x88" => "\x69",
  1138. "\xF0\x9D\x90\x89" => "\x6A",
  1139. "\xF0\x9D\x90\x8A" => "\x6B",
  1140. "\xF0\x9D\x90\x8B" => "\x6C",
  1141. "\xF0\x9D\x90\x8C" => "\x6D",
  1142. "\xF0\x9D\x90\x8D" => "\x6E",
  1143. "\xF0\x9D\x90\x8E" => "\x6F",
  1144. "\xF0\x9D\x90\x8F" => "\x70",
  1145. "\xF0\x9D\x90\x90" => "\x71",
  1146. "\xF0\x9D\x90\x91" => "\x72",
  1147. "\xF0\x9D\x90\x92" => "\x73",
  1148. "\xF0\x9D\x90\x93" => "\x74",
  1149. "\xF0\x9D\x90\x94" => "\x75",
  1150. "\xF0\x9D\x90\x95" => "\x76",
  1151. "\xF0\x9D\x90\x96" => "\x77",
  1152. "\xF0\x9D\x90\x97" => "\x78",
  1153. "\xF0\x9D\x90\x98" => "\x79",
  1154. "\xF0\x9D\x90\x99" => "\x7A",
  1155. "\xF0\x9D\x90\xB4" => "\x61",
  1156. "\xF0\x9D\x90\xB5" => "\x62",
  1157. "\xF0\x9D\x90\xB6" => "\x63",
  1158. "\xF0\x9D\x90\xB7" => "\x64",
  1159. "\xF0\x9D\x90\xB8" => "\x65",
  1160. "\xF0\x9D\x90\xB9" => "\x66",
  1161. "\xF0\x9D\x90\xBA" => "\x67",
  1162. "\xF0\x9D\x90\xBB" => "\x68",
  1163. "\xF0\x9D\x90\xBC" => "\x69",
  1164. "\xF0\x9D\x90\xBD" => "\x6A",
  1165. "\xF0\x9D\x90\xBE" => "\x6B",
  1166. "\xF0\x9D\x90\xBF" => "\x6C",
  1167. "\xF0\x9D\x91\x80" => "\x6D",
  1168. "\xF0\x9D\x91\x81" => "\x6E",
  1169. "\xF0\x9D\x91\x82" => "\x6F",
  1170. "\xF0\x9D\x91\x83" => "\x70",
  1171. "\xF0\x9D\x91\x84" => "\x71",
  1172. "\xF0\x9D\x91\x85" => "\x72",
  1173. "\xF0\x9D\x91\x86" => "\x73",
  1174. "\xF0\x9D\x91\x87" => "\x74",
  1175. "\xF0\x9D\x91\x88" => "\x75",
  1176. "\xF0\x9D\x91\x89" => "\x76",
  1177. "\xF0\x9D\x91\x8A" => "\x77",
  1178. "\xF0\x9D\x91\x8B" => "\x78",
  1179. "\xF0\x9D\x91\x8C" => "\x79",
  1180. "\xF0\x9D\x91\x8D" => "\x7A",
  1181. "\xF0\x9D\x91\xA8" => "\x61",
  1182. "\xF0\x9D\x91\xA9" => "\x62",
  1183. "\xF0\x9D\x91\xAA" => "\x63",
  1184. "\xF0\x9D\x91\xAB" => "\x64",
  1185. "\xF0\x9D\x91\xAC" => "\x65",
  1186. "\xF0\x9D\x91\xAD" => "\x66",
  1187. "\xF0\x9D\x91\xAE" => "\x67",
  1188. "\xF0\x9D\x91\xAF" => "\x68",
  1189. "\xF0\x9D\x91\xB0" => "\x69",
  1190. "\xF0\x9D\x91\xB1" => "\x6A",
  1191. "\xF0\x9D\x91\xB2" => "\x6B",
  1192. "\xF0\x9D\x91\xB3" => "\x6C",
  1193. "\xF0\x9D\x91\xB4" => "\x6D",
  1194. "\xF0\x9D\x91\xB5" => "\x6E",
  1195. "\xF0\x9D\x91\xB6" => "\x6F",
  1196. "\xF0\x9D\x91\xB7" => "\x70",
  1197. "\xF0\x9D\x91\xB8" => "\x71",
  1198. "\xF0\x9D\x91\xB9" => "\x72",
  1199. "\xF0\x9D\x91\xBA" => "\x73",
  1200. "\xF0\x9D\x91\xBB" => "\x74",
  1201. "\xF0\x9D\x91\xBC" => "\x75",
  1202. "\xF0\x9D\x91\xBD" => "\x76",
  1203. "\xF0\x9D\x91\xBE" => "\x77",
  1204. "\xF0\x9D\x91\xBF" => "\x78",
  1205. "\xF0\x9D\x92\x80" => "\x79",
  1206. "\xF0\x9D\x92\x81" => "\x7A",
  1207. "\xF0\x9D\x92\x9C" => "\x61",
  1208. "\xF0\x9D\x92\x9E" => "\x63",
  1209. "\xF0\x9D\x92\x9F" => "\x64",
  1210. "\xF0\x9D\x92\xA2" => "\x67",
  1211. "\xF0\x9D\x92\xA5" => "\x6A",
  1212. "\xF0\x9D\x92\xA6" => "\x6B",
  1213. "\xF0\x9D\x92\xA9" => "\x6E",
  1214. "\xF0\x9D\x92\xAA" => "\x6F",
  1215. "\xF0\x9D\x92\xAB" => "\x70",
  1216. "\xF0\x9D\x92\xAC" => "\x71",
  1217. "\xF0\x9D\x92\xAE" => "\x73",
  1218. "\xF0\x9D\x92\xAF" => "\x74",
  1219. "\xF0\x9D\x92\xB0" => "\x75",
  1220. "\xF0\x9D\x92\xB1" => "\x76",
  1221. "\xF0\x9D\x92\xB2" => "\x77",
  1222. "\xF0\x9D\x92\xB3" => "\x78",
  1223. "\xF0\x9D\x92\xB4" => "\x79",
  1224. "\xF0\x9D\x92\xB5" => "\x7A",
  1225. "\xF0\x9D\x93\x90" => "\x61",
  1226. "\xF0\x9D\x93\x91" => "\x62",
  1227. "\xF0\x9D\x93\x92" => "\x63",
  1228. "\xF0\x9D\x93\x93" => "\x64",
  1229. "\xF0\x9D\x93\x94" => "\x65",
  1230. "\xF0\x9D\x93\x95" => "\x66",
  1231. "\xF0\x9D\x93\x96" => "\x67",
  1232. "\xF0\x9D\x93\x97" => "\x68",
  1233. "\xF0\x9D\x93\x98" => "\x69",
  1234. "\xF0\x9D\x93\x99" => "\x6A",
  1235. "\xF0\x9D\x93\x9A" => "\x6B",
  1236. "\xF0\x9D\x93\x9B" => "\x6C",
  1237. "\xF0\x9D\x93\x9C" => "\x6D",
  1238. "\xF0\x9D\x93\x9D" => "\x6E",
  1239. "\xF0\x9D\x93\x9E" => "\x6F",
  1240. "\xF0\x9D\x93\x9F" => "\x70",
  1241. "\xF0\x9D\x93\xA0" => "\x71",
  1242. "\xF0\x9D\x93\xA1" => "\x72",
  1243. "\xF0\x9D\x93\xA2" => "\x73",
  1244. "\xF0\x9D\x93\xA3" => "\x74",
  1245. "\xF0\x9D\x93\xA4" => "\x75",
  1246. "\xF0\x9D\x93\xA5" => "\x76",
  1247. "\xF0\x9D\x93\xA6" => "\x77",
  1248. "\xF0\x9D\x93\xA7" => "\x78",
  1249. "\xF0\x9D\x93\xA8" => "\x79",
  1250. "\xF0\x9D\x93\xA9" => "\x7A",
  1251. "\xF0\x9D\x94\x84" => "\x61",
  1252. "\xF0\x9D\x94\x85" => "\x62",
  1253. "\xF0\x9D\x94\x87" => "\x64",
  1254. "\xF0\x9D\x94\x88" => "\x65",
  1255. "\xF0\x9D\x94\x89" => "\x66",
  1256. "\xF0\x9D\x94\x8A" => "\x67",
  1257. "\xF0\x9D\x94\x8D" => "\x6A",
  1258. "\xF0\x9D\x94\x8E" => "\x6B",
  1259. "\xF0\x9D\x94\x8F" => "\x6C",
  1260. "\xF0\x9D\x94\x90" => "\x6D",
  1261. "\xF0\x9D\x94\x91" => "\x6E",
  1262. "\xF0\x9D\x94\x92" => "\x6F",
  1263. "\xF0\x9D\x94\x93" => "\x70",
  1264. "\xF0\x9D\x94\x94" => "\x71",
  1265. "\xF0\x9D\x94\x96" => "\x73",
  1266. "\xF0\x9D\x94\x97" => "\x74",
  1267. "\xF0\x9D\x94\x98" => "\x75",
  1268. "\xF0\x9D\x94\x99" => "\x76",
  1269. "\xF0\x9D\x94\x9A" => "\x77",
  1270. "\xF0\x9D\x94\x9B" => "\x78",
  1271. "\xF0\x9D\x94\x9C" => "\x79",
  1272. "\xF0\x9D\x94\xB8" => "\x61",
  1273. "\xF0\x9D\x94\xB9" => "\x62",
  1274. "\xF0\x9D\x94\xBB" => "\x64",
  1275. "\xF0\x9D\x94\xBC" => "\x65",
  1276. "\xF0\x9D\x94\xBD" => "\x66",
  1277. "\xF0\x9D\x94\xBE" => "\x67",
  1278. "\xF0\x9D\x95\x80" => "\x69",
  1279. "\xF0\x9D\x95\x81" => "\x6A",
  1280. "\xF0\x9D\x95\x82" => "\x6B",
  1281. "\xF0\x9D\x95\x83" => "\x6C",
  1282. "\xF0\x9D\x95\x84" => "\x6D",
  1283. "\xF0\x9D\x95\x86" => "\x6F",
  1284. "\xF0\x9D\x95\x8A" => "\x73",
  1285. "\xF0\x9D\x95\x8B" => "\x74",
  1286. "\xF0\x9D\x95\x8C" => "\x75",
  1287. "\xF0\x9D\x95\x8D" => "\x76",
  1288. "\xF0\x9D\x95\x8E" => "\x77",
  1289. "\xF0\x9D\x95\x8F" => "\x78",
  1290. "\xF0\x9D\x95\x90" => "\x79",
  1291. "\xF0\x9D\x95\xAC" => "\x61",
  1292. "\xF0\x9D\x95\xAD" => "\x62",
  1293. "\xF0\x9D\x95\xAE" => "\x63",
  1294. "\xF0\x9D\x95\xAF" => "\x64",
  1295. "\xF0\x9D\x95\xB0" => "\x65",
  1296. "\xF0\x9D\x95\xB1" => "\x66",
  1297. "\xF0\x9D\x95\xB2" => "\x67",
  1298. "\xF0\x9D\x95\xB3" => "\x68",
  1299. "\xF0\x9D\x95\xB4" => "\x69",
  1300. "\xF0\x9D\x95\xB5" => "\x6A",
  1301. "\xF0\x9D\x95\xB6" => "\x6B",
  1302. "\xF0\x9D\x95\xB7" => "\x6C",
  1303. "\xF0\x9D\x95\xB8" => "\x6D",
  1304. "\xF0\x9D\x95\xB9" => "\x6E",
  1305. "\xF0\x9D\x95\xBA" => "\x6F",
  1306. "\xF0\x9D\x95\xBB" => "\x70",
  1307. "\xF0\x9D\x95\xBC" => "\x71",
  1308. "\xF0\x9D\x95\xBD" => "\x72",
  1309. "\xF0\x9D\x95\xBE" => "\x73",
  1310. "\xF0\x9D\x95\xBF" => "\x74",
  1311. "\xF0\x9D\x96\x80" => "\x75",
  1312. "\xF0\x9D\x96\x81" => "\x76",
  1313. "\xF0\x9D\x96\x82" => "\x77",
  1314. "\xF0\x9D\x96\x83" => "\x78",
  1315. "\xF0\x9D\x96\x84" => "\x79",
  1316. "\xF0\x9D\x96\x85" => "\x7A",
  1317. "\xF0\x9D\x96\xA0" => "\x61",
  1318. "\xF0\x9D\x96\xA1" => "\x62",
  1319. "\xF0\x9D\x96\xA2" => "\x63",
  1320. "\xF0\x9D\x96\xA3" => "\x64",
  1321. "\xF0\x9D\x96\xA4" => "\x65",
  1322. "\xF0\x9D\x96\xA5" => "\x66",
  1323. "\xF0\x9D\x96\xA6" => "\x67",
  1324. "\xF0\x9D\x96\xA7" => "\x68",
  1325. "\xF0\x9D\x96\xA8" => "\x69",
  1326. "\xF0\x9D\x96\xA9" => "\x6A",
  1327. "\xF0\x9D\x96\xAA" => "\x6B",
  1328. "\xF0\x9D\x96\xAB" => "\x6C",
  1329. "\xF0\x9D\x96\xAC" => "\x6D",
  1330. "\xF0\x9D\x96\xAD" => "\x6E",
  1331. "\xF0\x9D\x96\xAE" => "\x6F",
  1332. "\xF0\x9D\x96\xAF" => "\x70",
  1333. "\xF0\x9D\x96\xB0" => "\x71",
  1334. "\xF0\x9D\x96\xB1" => "\x72",
  1335. "\xF0\x9D\x96\xB2" => "\x73",
  1336. "\xF0\x9D\x96\xB3" => "\x74",
  1337. "\xF0\x9D\x96\xB4" => "\x75",
  1338. "\xF0\x9D\x96\xB5" => "\x76",
  1339. "\xF0\x9D\x96\xB6" => "\x77",
  1340. "\xF0\x9D\x96\xB7" => "\x78",
  1341. "\xF0\x9D\x96\xB8" => "\x79",
  1342. "\xF0\x9D\x96\xB9" => "\x7A",
  1343. "\xF0\x9D\x97\x94" => "\x61",
  1344. "\xF0\x9D\x97\x95" => "\x62",
  1345. "\xF0\x9D\x97\x96" => "\x63",
  1346. "\xF0\x9D\x97\x97" => "\x64",
  1347. "\xF0\x9D\x97\x98" => "\x65",
  1348. "\xF0\x9D\x97\x99" => "\x66",
  1349. "\xF0\x9D\x97\x9A" => "\x67",
  1350. "\xF0\x9D\x97\x9B" => "\x68",
  1351. "\xF0\x9D\x97\x9C" => "\x69",
  1352. "\xF0\x9D\x97\x9D" => "\x6A",
  1353. "\xF0\x9D\x97\x9E" => "\x6B",
  1354. "\xF0\x9D\x97\x9F" => "\x6C",
  1355. "\xF0\x9D\x97\xA0" => "\x6D",
  1356. "\xF0\x9D\x97\xA1" => "\x6E",
  1357. "\xF0\x9D\x97\xA2" => "\x6F",
  1358. "\xF0\x9D\x97\xA3" => "\x70",
  1359. "\xF0\x9D\x97\xA4" => "\x71",
  1360. "\xF0\x9D\x97\xA5" => "\x72",
  1361. "\xF0\x9D\x97\xA6" => "\x73",
  1362. "\xF0\x9D\x97\xA7" => "\x74",
  1363. "\xF0\x9D\x97\xA8" => "\x75",
  1364. "\xF0\x9D\x97\xA9" => "\x76",
  1365. "\xF0\x9D\x97\xAA" => "\x77",
  1366. "\xF0\x9D\x97\xAB" => "\x78",
  1367. "\xF0\x9D\x97\xAC" => "\x79",
  1368. "\xF0\x9D\x97\xAD" => "\x7A",
  1369. "\xF0\x9D\x98\x88" => "\x61",
  1370. "\xF0\x9D\x98\x89" => "\x62",
  1371. "\xF0\x9D\x98\x8A" => "\x63",
  1372. "\xF0\x9D\x98\x8B" => "\x64",
  1373. "\xF0\x9D\x98\x8C" => "\x65",
  1374. "\xF0\x9D\x98\x8D" => "\x66",
  1375. "\xF0\x9D\x98\x8E" => "\x67",
  1376. "\xF0\x9D\x98\x8F" => "\x68",
  1377. "\xF0\x9D\x98\x90" => "\x69",
  1378. "\xF0\x9D\x98\x91" => "\x6A",
  1379. "\xF0\x9D\x98\x92" => "\x6B",
  1380. "\xF0\x9D\x98\x93" => "\x6C",
  1381. "\xF0\x9D\x98\x94" => "\x6D",
  1382. "\xF0\x9D\x98\x95" => "\x6E",
  1383. "\xF0\x9D\x98\x96" => "\x6F",
  1384. "\xF0\x9D\x98\x97" => "\x70",
  1385. "\xF0\x9D\x98\x98" => "\x71",
  1386. "\xF0\x9D\x98\x99" => "\x72",
  1387. "\xF0\x9D\x98\x9A" => "\x73",
  1388. "\xF0\x9D\x98\x9B" => "\x74",
  1389. "\xF0\x9D\x98\x9C" => "\x75",
  1390. "\xF0\x9D\x98\x9D" => "\x76",
  1391. "\xF0\x9D\x98\x9E" => "\x77",
  1392. "\xF0\x9D\x98\x9F" => "\x78",
  1393. "\xF0\x9D\x98\xA0" => "\x79",
  1394. "\xF0\x9D\x98\xA1" => "\x7A",
  1395. "\xF0\x9D\x98\xBC" => "\x61",
  1396. "\xF0\x9D\x98\xBD" => "\x62",
  1397. "\xF0\x9D\x98\xBE" => "\x63",
  1398. "\xF0\x9D\x98\xBF" => "\x64",
  1399. "\xF0\x9D\x99\x80" => "\x65",
  1400. "\xF0\x9D\x99\x81" => "\x66",
  1401. "\xF0\x9D\x99\x82" => "\x67",
  1402. "\xF0\x9D\x99\x83" => "\x68",
  1403. "\xF0\x9D\x99\x84" => "\x69",
  1404. "\xF0\x9D\x99\x85" => "\x6A",
  1405. "\xF0\x9D\x99\x86" => "\x6B",
  1406. "\xF0\x9D\x99\x87" => "\x6C",
  1407. "\xF0\x9D\x99\x88" => "\x6D",
  1408. "\xF0\x9D\x99\x89" => "\x6E",
  1409. "\xF0\x9D\x99\x8A" => "\x6F",
  1410. "\xF0\x9D\x99\x8B" => "\x70",
  1411. "\xF0\x9D\x99\x8C" => "\x71",
  1412. "\xF0\x9D\x99\x8D" => "\x72",
  1413. "\xF0\x9D\x99\x8E" => "\x73",
  1414. "\xF0\x9D\x99\x8F" => "\x74",
  1415. "\xF0\x9D\x99\x90" => "\x75",
  1416. "\xF0\x9D\x99\x91" => "\x76",
  1417. "\xF0\x9D\x99\x92" => "\x77",
  1418. "\xF0\x9D\x99\x93" => "\x78",
  1419. "\xF0\x9D\x99\x94" => "\x79",
  1420. "\xF0\x9D\x99\x95" => "\x7A",
  1421. "\xF0\x9D\x99\xB0" => "\x61",
  1422. "\xF0\x9D\x99\xB1" => "\x62",
  1423. "\xF0\x9D\x99\xB2" => "\x63",
  1424. "\xF0\x9D\x99\xB3" => "\x64",
  1425. "\xF0\x9D\x99\xB4" => "\x65",
  1426. "\xF0\x9D\x99\xB5" => "\x66",
  1427. "\xF0\x9D\x99\xB6" => "\x67",
  1428. "\xF0\x9D\x99\xB7" => "\x68",
  1429. "\xF0\x9D\x99\xB8" => "\x69",
  1430. "\xF0\x9D\x99\xB9" => "\x6A",
  1431. "\xF0\x9D\x99\xBA" => "\x6B",
  1432. "\xF0\x9D\x99\xBB" => "\x6C",
  1433. "\xF0\x9D\x99\xBC" => "\x6D",
  1434. "\xF0\x9D\x99\xBD" => "\x6E",
  1435. "\xF0\x9D\x99\xBE" => "\x6F",
  1436. "\xF0\x9D\x99\xBF" => "\x70",
  1437. "\xF0\x9D\x9A\x80" => "\x71",
  1438. "\xF0\x9D\x9A\x81" => "\x72",
  1439. "\xF0\x9D\x9A\x82" => "\x73",
  1440. "\xF0\x9D\x9A\x83" => "\x74",
  1441. "\xF0\x9D\x9A\x84" => "\x75",
  1442. "\xF0\x9D\x9A\x85" => "\x76",
  1443. "\xF0\x9D\x9A\x86" => "\x77",
  1444. "\xF0\x9D\x9A\x87" => "\x78",
  1445. "\xF0\x9D\x9A\x88" => "\x79",
  1446. "\xF0\x9D\x9A\x89" => "\x7A",
  1447. "\xF0\x9D\x9A\xA8" => "\xCE\xB1",
  1448. "\xF0\x9D\x9A\xA9" => "\xCE\xB2",
  1449. "\xF0\x9D\x9A\xAA" => "\xCE\xB3",
  1450. "\xF0\x9D\x9A\xAB" => "\xCE\xB4",
  1451. "\xF0\x9D\x9A\xAC" => "\xCE\xB5",
  1452. "\xF0\x9D\x9A\xAD" => "\xCE\xB6",
  1453. "\xF0\x9D\x9A\xAE" => "\xCE\xB7",
  1454. "\xF0\x9D\x9A\xAF" => "\xCE\xB8",
  1455. "\xF0\x9D\x9A\xB0" => "\xCE\xB9",
  1456. "\xF0\x9D\x9A\xB1" => "\xCE\xBA",
  1457. "\xF0\x9D\x9A\xB2" => "\xCE\xBB",
  1458. "\xF0\x9D\x9A\xB3" => "\xCE\xBC",
  1459. "\xF0\x9D\x9A\xB4" => "\xCE\xBD",
  1460. "\xF0\x9D\x9A\xB5" => "\xCE\xBE",
  1461. "\xF0\x9D\x9A\xB6" => "\xCE\xBF",
  1462. "\xF0\x9D\x9A\xB7" => "\xCF\x80",
  1463. "\xF0\x9D\x9A\xB8" => "\xCF\x81",
  1464. "\xF0\x9D\x9A\xB9" => "\xCE\xB8",
  1465. "\xF0\x9D\x9A\xBA" => "\xCF\x83",
  1466. "\xF0\x9D\x9A\xBB" => "\xCF\x84",
  1467. "\xF0\x9D\x9A\xBC" => "\xCF\x85",
  1468. "\xF0\x9D\x9A\xBD" => "\xCF\x86",
  1469. "\xF0\x9D\x9A\xBE" => "\xCF\x87",
  1470. "\xF0\x9D\x9A\xBF" => "\xCF\x88",
  1471. "\xF0\x9D\x9B\x80" => "\xCF\x89",
  1472. "\xF0\x9D\x9B\x93" => "\xCF\x83",
  1473. "\xF0\x9D\x9B\xA2" => "\xCE\xB1",
  1474. "\xF0\x9D\x9B\xA3" => "\xCE\xB2",
  1475. "\xF0\x9D\x9B\xA4" => "\xCE\xB3",
  1476. "\xF0\x9D\x9B\xA5" => "\xCE\xB4",
  1477. "\xF0\x9D\x9B\xA6" => "\xCE\xB5",
  1478. "\xF0\x9D\x9B\xA7" => "\xCE\xB6",
  1479. "\xF0\x9D\x9B\xA8" => "\xCE\xB7",
  1480. "\xF0\x9D\x9B\xA9" => "\xCE\xB8",
  1481. "\xF0\x9D\x9B\xAA" => "\xCE\xB9",
  1482. "\xF0\x9D\x9B\xAB" => "\xCE\xBA",
  1483. "\xF0\x9D\x9B\xAC" => "\xCE\xBB",
  1484. "\xF0\x9D\x9B\xAD" => "\xCE\xBC",
  1485. "\xF0\x9D\x9B\xAE" => "\xCE\xBD",
  1486. "\xF0\x9D\x9B\xAF" => "\xCE\xBE",
  1487. "\xF0\x9D\x9B\xB0" => "\xCE\xBF",
  1488. "\xF0\x9D\x9B\xB1" => "\xCF\x80",
  1489. "\xF0\x9D\x9B\xB2" => "\xCF\x81",
  1490. "\xF0\x9D\x9B\xB3" => "\xCE\xB8",
  1491. "\xF0\x9D\x9B\xB4" => "\xCF\x83",
  1492. "\xF0\x9D\x9B\xB5" => "\xCF\x84",
  1493. "\xF0\x9D\x9B\xB6" => "\xCF\x85",
  1494. "\xF0\x9D\x9B\xB7" => "\xCF\x86",
  1495. "\xF0\x9D\x9B\xB8" => "\xCF\x87",
  1496. "\xF0\x9D\x9B\xB9" => "\xCF\x88",
  1497. "\xF0\x9D\x9B\xBA" => "\xCF\x89",
  1498. "\xF0\x9D\x9C\x8D" => "\xCF\x83",
  1499. "\xF0\x9D\x9C\x9C" => "\xCE\xB1",
  1500. "\xF0\x9D\x9C\x9D" => "\xCE\xB2",
  1501. "\xF0\x9D\x9C\x9E" => "\xCE\xB3",
  1502. "\xF0\x9D\x9C\x9F" => "\xCE\xB4",
  1503. "\xF0\x9D\x9C\xA0" => "\xCE\xB5",
  1504. "\xF0\x9D\x9C\xA1" => "\xCE\xB6",
  1505. "\xF0\x9D\x9C\xA2" => "\xCE\xB7",
  1506. "\xF0\x9D\x9C\xA3" => "\xCE\xB8",
  1507. "\xF0\x9D\x9C\xA4" => "\xCE\xB9",
  1508. "\xF0\x9D\x9C\xA5" => "\xCE\xBA",
  1509. "\xF0\x9D\x9C\xA6" => "\xCE\xBB",
  1510. "\xF0\x9D\x9C\xA7" => "\xCE\xBC",
  1511. "\xF0\x9D\x9C\xA8" => "\xCE\xBD",
  1512. "\xF0\x9D\x9C\xA9" => "\xCE\xBE",
  1513. "\xF0\x9D\x9C\xAA" => "\xCE\xBF",
  1514. "\xF0\x9D\x9C\xAB" => "\xCF\x80",
  1515. "\xF0\x9D\x9C\xAC" => "\xCF\x81",
  1516. "\xF0\x9D\x9C\xAD" => "\xCE\xB8",
  1517. "\xF0\x9D\x9C\xAE" => "\xCF\x83",
  1518. "\xF0\x9D\x9C\xAF" => "\xCF\x84",
  1519. "\xF0\x9D\x9C\xB0" => "\xCF\x85",
  1520. "\xF0\x9D\x9C\xB1" => "\xCF\x86",
  1521. "\xF0\x9D\x9C\xB2" => "\xCF\x87",
  1522. "\xF0\x9D\x9C\xB3" => "\xCF\x88",
  1523. "\xF0\x9D\x9C\xB4" => "\xCF\x89",
  1524. "\xF0\x9D\x9D\x87" => "\xCF\x83",
  1525. "\xF0\x9D\x9D\x96" => "\xCE\xB1",
  1526. "\xF0\x9D\x9D\x97" => "\xCE\xB2",
  1527. "\xF0\x9D\x9D\x98" => "\xCE\xB3",
  1528. "\xF0\x9D\x9D\x99" => "\xCE\xB4",
  1529. "\xF0\x9D\x9D\x9A" => "\xCE\xB5",
  1530. "\xF0\x9D\x9D\x9B" => "\xCE\xB6",
  1531. "\xF0\x9D\x9D\x9C" => "…

Large files files are truncated, but you can click here to view the full file