PageRenderTime 48ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/src/Ofire/vendor/php-lang-correct/UTF8.php

https://bitbucket.org/multifinger/ofire-utils
PHP | 4072 lines | 3218 code | 134 blank | 720 comment | 312 complexity | 9b23432c441cb479c84c2c67d66a973d MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. * PHP5 UTF-8 is a UTF-8 aware library of functions mirroring PHP's own string functions.
  4. *
  5. * The powerful solution/contribution for UTF-8 support in your framework/CMS, written on PHP.
  6. * This library is advance of http://sourceforge.net/projects/phputf8 (last updated in 2007).
  7. *
  8. * UTF-8 support in PHP 5.
  9. *
  10. * Features and benefits of using this class
  11. * * Compatibility with the interface standard PHP functions that deal with single-byte encodings
  12. * * Ability to work without PHP extensions ICONV and MBSTRING, if any, that are actively used!
  13. * * Useful features are missing from the ICONV and MBSTRING
  14. * * The methods that take and return a string, are able to take and return null (useful for selects from a database)
  15. * * Several methods are able to process arrays recursively
  16. * * A single interface and encapsulation (you can inherit and override)
  17. * * High performance, reliability and quality code
  18. * * PHP> = 5.3.x
  19. *
  20. * In Russian:
  21. *
  22. * Поддержка UTF-8 в PHP 5.
  23. *
  24. * Возможности и преимущества использования этого класса
  25. * * Совместимость с интерфейсом стандартных PHP функций, работающих с однобайтовыми кодировками
  26. * * Возможность работы без PHP расширений ICONV и MBSTRING, если они есть, то активно используются!
  27. * * Полезные функции, отсутствующие в ICONV и MBSTRING
  28. * * Методы, которые принимают и возвращают строку, умеют принимать и возвращать null (удобно при выборках значений из базы данных)
  29. * * Несколько методов умеют обрабатывать массивы рекурсивно
  30. * * Единый интерфейс и инкапсуляция (можно унаследоваться и переопределить методы)
  31. * * Высокая производительность, надёжность и качественный код
  32. * * PHP >= 5.3.x
  33. *
  34. * Example:
  35. * $s = 'Hello, Привет';
  36. * if (UTF8::is_utf8($s)) echo UTF8::strlen($s);
  37. *
  38. * UTF-8 encoding scheme:
  39. * 2^7 0x00000000 — 0x0000007F 0xxxxxxx
  40. * 2^11 0x00000080 — 0x000007FF 110xxxxx 10xxxxxx
  41. * 2^16 0x00000800 — 0x0000FFFF 1110xxxx 10xxxxxx 10xxxxxx
  42. * 2^21 0x00010000 — 0x001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  43. * 1-4 bytes length: 2^7 + 2^11 + 2^16 + 2^21 = 2 164 864
  44. *
  45. * If I was a owner of the world, I would leave only 2 encoding: UTF-8 and UTF-32 ;-)
  46. *
  47. * Useful links
  48. * http://ru.wikipedia.org/wiki/UTF8
  49. * http://www.madore.org/~david/misc/unitest/ A Unicode Test Page
  50. * http://www.unicode.org/
  51. * http://www.unicode.org/reports/
  52. * http://www.unicode.org/reports/tr10/ Unicode Collation Algorithm
  53. * http://www.unicode.org/Public/UCA/6.0.0/ Unicode Collation Algorithm
  54. * http://www.unicode.org/reports/tr6/ A Standard Compression Scheme for Unicode
  55. * http://www.fileformat.info/info/unicode/char/search.htm Unicode Character Search
  56. *
  57. * @link http://code.google.com/p/php5-utf8/
  58. * @license http://creativecommons.org/licenses/by-sa/3.0/
  59. * @author Nasibullin Rinat
  60. * @version 2.2.2
  61. */
  62. class UTF8
  63. {
  64. #REPLACEMENT CHARACTER (for broken char)
  65. const REPLACEMENT_CHAR = "\xEF\xBF\xBD"; #U+FFFD
  66. /**
  67. * Regular expression for a character in UTF-8 without the use of a flag /u
  68. * @deprecated Instead, use a dot (".") and the flag /u, it works faster!
  69. * @var string
  70. */
  71. public static $char_re = ' [\x09\x0A\x0D\x20-\x7E] # ASCII strict
  72. # [\x00-\x7F] # ASCII non-strict (including control chars)
  73. | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
  74. | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
  75. | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
  76. | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
  77. | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
  78. | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
  79. | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
  80. ';
  81. /**
  82. * Combining diactrical marks (Unicode 5.1).
  83. *
  84. * For example, russian letters in composed form: "Ё" (U+0401), "Й" (U+0419),
  85. * decomposed form: (U+0415 U+0308), (U+0418 U+0306)
  86. *
  87. * @link http://www.unicode.org/charts/PDF/U0300.pdf
  88. * @link http://www.unicode.org/charts/PDF/U1DC0.pdf
  89. * @link http://www.unicode.org/charts/PDF/UFE20.pdf
  90. * @var string
  91. */
  92. #public static $diactrical_re = '\p{M}'; #alternative, but only with /u flag
  93. public static $diactrical_re = ' \xcc[\x80-\xb9]|\xcd[\x80-\xaf] #UNICODE range: U+0300 — U+036F (for letters)
  94. | \xe2\x83[\x90-\xbf] #UNICODE range: U+20D0 U+20FF (for symbols)
  95. | \xe1\xb7[\x80-\xbf] #UNICODE range: U+1DC0 U+1DFF (supplement)
  96. | \xef\xb8[\xa0-\xaf] #UNICODE range: U+FE20 U+FE2F (combining half marks)
  97. ';
  98. /**
  99. * @var array
  100. */
  101. public static $html_special_chars_table = array(
  102. '&quot;' => "\x22", #U+0022 ["] &#34; quotation mark = APL quote
  103. '&amp;' => "\x26", #U+0026 [&] &#38; ampersand
  104. '&lt;' => "\x3c", #U+003C [<] &#60; less-than sign
  105. '&gt;' => "\x3e", #U+003E [>] &#62; greater-than sign
  106. );
  107. /**
  108. * @link http://www.fileformat.info/format/w3c/entitytest.htm?sort=Unicode%20Character HTML Entity Browser Test Page
  109. * @var array
  110. */
  111. public static $html_entity_table = array(
  112. #Latin-1 Entities:
  113. '&nbsp;' => "\xc2\xa0", #U+00A0 [ ] no-break space = non-breaking space
  114. '&iexcl;' => "\xc2\xa1", #U+00A1 [¡] inverted exclamation mark
  115. '&cent;' => "\xc2\xa2", #U+00A2 [¢] cent sign
  116. '&pound;' => "\xc2\xa3", #U+00A3 [£] pound sign
  117. '&curren;' => "\xc2\xa4", #U+00A4 [¤] currency sign
  118. '&yen;' => "\xc2\xa5", #U+00A5 [¥] yen sign = yuan sign
  119. '&brvbar;' => "\xc2\xa6", #U+00A6 [¦] broken bar = broken vertical bar
  120. '&sect;' => "\xc2\xa7", #U+00A7 [§] section sign
  121. '&uml;' => "\xc2\xa8", #U+00A8 [¨] diaeresis = spacing diaeresis
  122. '&copy;' => "\xc2\xa9", #U+00A9 [©] copyright sign
  123. '&ordf;' => "\xc2\xaa", #U+00AA [ª] feminine ordinal indicator
  124. '&laquo;' => "\xc2\xab", #U+00AB [«] left-pointing double angle quotation mark = left pointing guillemet
  125. '&not;' => "\xc2\xac", #U+00AC [¬] not sign
  126. '&shy;' => "\xc2\xad", #U+00AD [ ] soft hyphen = discretionary hyphen
  127. '&reg;' => "\xc2\xae", #U+00AE [®] registered sign = registered trade mark sign
  128. '&macr;' => "\xc2\xaf", #U+00AF [¯] macron = spacing macron = overline = APL overbar
  129. '&deg;' => "\xc2\xb0", #U+00B0 [°] degree sign
  130. '&plusmn;' => "\xc2\xb1", #U+00B1 [±] plus-minus sign = plus-or-minus sign
  131. '&sup2;' => "\xc2\xb2", #U+00B2 [²] superscript two = superscript digit two = squared
  132. '&sup3;' => "\xc2\xb3", #U+00B3 [³] superscript three = superscript digit three = cubed
  133. '&acute;' => "\xc2\xb4", #U+00B4 [´] acute accent = spacing acute
  134. '&micro;' => "\xc2\xb5", #U+00B5 [µ] micro sign
  135. '&para;' => "\xc2\xb6", #U+00B6 [] pilcrow sign = paragraph sign
  136. '&middot;' => "\xc2\xb7", #U+00B7 [·] middle dot = Georgian comma = Greek middle dot
  137. '&cedil;' => "\xc2\xb8", #U+00B8 [¸] cedilla = spacing cedilla
  138. '&sup1;' => "\xc2\xb9", #U+00B9 [¹] superscript one = superscript digit one
  139. '&ordm;' => "\xc2\xba", #U+00BA [º] masculine ordinal indicator
  140. '&raquo;' => "\xc2\xbb", #U+00BB [»] right-pointing double angle quotation mark = right pointing guillemet
  141. '&frac14;' => "\xc2\xbc", #U+00BC [¼] vulgar fraction one quarter = fraction one quarter
  142. '&frac12;' => "\xc2\xbd", #U+00BD [½] vulgar fraction one half = fraction one half
  143. '&frac34;' => "\xc2\xbe", #U+00BE [¾] vulgar fraction three quarters = fraction three quarters
  144. '&iquest;' => "\xc2\xbf", #U+00BF [¿] inverted question mark = turned question mark
  145. #Latin capital letter
  146. '&Agrave;' => "\xc3\x80", #Latin capital letter A with grave = Latin capital letter A grave
  147. '&Aacute;' => "\xc3\x81", #Latin capital letter A with acute
  148. '&Acirc;' => "\xc3\x82", #Latin capital letter A with circumflex
  149. '&Atilde;' => "\xc3\x83", #Latin capital letter A with tilde
  150. '&Auml;' => "\xc3\x84", #Latin capital letter A with diaeresis
  151. '&Aring;' => "\xc3\x85", #Latin capital letter A with ring above = Latin capital letter A ring
  152. '&AElig;' => "\xc3\x86", #Latin capital letter AE = Latin capital ligature AE
  153. '&Ccedil;' => "\xc3\x87", #Latin capital letter C with cedilla
  154. '&Egrave;' => "\xc3\x88", #Latin capital letter E with grave
  155. '&Eacute;' => "\xc3\x89", #Latin capital letter E with acute
  156. '&Ecirc;' => "\xc3\x8a", #Latin capital letter E with circumflex
  157. '&Euml;' => "\xc3\x8b", #Latin capital letter E with diaeresis
  158. '&Igrave;' => "\xc3\x8c", #Latin capital letter I with grave
  159. '&Iacute;' => "\xc3\x8d", #Latin capital letter I with acute
  160. '&Icirc;' => "\xc3\x8e", #Latin capital letter I with circumflex
  161. '&Iuml;' => "\xc3\x8f", #Latin capital letter I with diaeresis
  162. '&ETH;' => "\xc3\x90", #Latin capital letter ETH
  163. '&Ntilde;' => "\xc3\x91", #Latin capital letter N with tilde
  164. '&Ograve;' => "\xc3\x92", #Latin capital letter O with grave
  165. '&Oacute;' => "\xc3\x93", #Latin capital letter O with acute
  166. '&Ocirc;' => "\xc3\x94", #Latin capital letter O with circumflex
  167. '&Otilde;' => "\xc3\x95", #Latin capital letter O with tilde
  168. '&Ouml;' => "\xc3\x96", #Latin capital letter O with diaeresis
  169. '&times;' => "\xc3\x97", #U+00D7 [×] multiplication sign
  170. '&Oslash;' => "\xc3\x98", #Latin capital letter O with stroke = Latin capital letter O slash
  171. '&Ugrave;' => "\xc3\x99", #Latin capital letter U with grave
  172. '&Uacute;' => "\xc3\x9a", #Latin capital letter U with acute
  173. '&Ucirc;' => "\xc3\x9b", #Latin capital letter U with circumflex
  174. '&Uuml;' => "\xc3\x9c", #Latin capital letter U with diaeresis
  175. '&Yacute;' => "\xc3\x9d", #Latin capital letter Y with acute
  176. '&THORN;' => "\xc3\x9e", #Latin capital letter THORN
  177. #Latin small letter
  178. '&szlig;' => "\xc3\x9f", #Latin small letter sharp s = ess-zed
  179. '&agrave;' => "\xc3\xa0", #Latin small letter a with grave = Latin small letter a grave
  180. '&aacute;' => "\xc3\xa1", #Latin small letter a with acute
  181. '&acirc;' => "\xc3\xa2", #Latin small letter a with circumflex
  182. '&atilde;' => "\xc3\xa3", #Latin small letter a with tilde
  183. '&auml;' => "\xc3\xa4", #Latin small letter a with diaeresis
  184. '&aring;' => "\xc3\xa5", #Latin small letter a with ring above = Latin small letter a ring
  185. '&aelig;' => "\xc3\xa6", #Latin small letter ae = Latin small ligature ae
  186. '&ccedil;' => "\xc3\xa7", #Latin small letter c with cedilla
  187. '&egrave;' => "\xc3\xa8", #Latin small letter e with grave
  188. '&eacute;' => "\xc3\xa9", #Latin small letter e with acute
  189. '&ecirc;' => "\xc3\xaa", #Latin small letter e with circumflex
  190. '&euml;' => "\xc3\xab", #Latin small letter e with diaeresis
  191. '&igrave;' => "\xc3\xac", #Latin small letter i with grave
  192. '&iacute;' => "\xc3\xad", #Latin small letter i with acute
  193. '&icirc;' => "\xc3\xae", #Latin small letter i with circumflex
  194. '&iuml;' => "\xc3\xaf", #Latin small letter i with diaeresis
  195. '&eth;' => "\xc3\xb0", #Latin small letter eth
  196. '&ntilde;' => "\xc3\xb1", #Latin small letter n with tilde
  197. '&ograve;' => "\xc3\xb2", #Latin small letter o with grave
  198. '&oacute;' => "\xc3\xb3", #Latin small letter o with acute
  199. '&ocirc;' => "\xc3\xb4", #Latin small letter o with circumflex
  200. '&otilde;' => "\xc3\xb5", #Latin small letter o with tilde
  201. '&ouml;' => "\xc3\xb6", #Latin small letter o with diaeresis
  202. '&divide;' => "\xc3\xb7", #U+00F7 [÷] division sign
  203. '&oslash;' => "\xc3\xb8", #Latin small letter o with stroke = Latin small letter o slash
  204. '&ugrave;' => "\xc3\xb9", #Latin small letter u with grave
  205. '&uacute;' => "\xc3\xba", #Latin small letter u with acute
  206. '&ucirc;' => "\xc3\xbb", #Latin small letter u with circumflex
  207. '&uuml;' => "\xc3\xbc", #Latin small letter u with diaeresis
  208. '&yacute;' => "\xc3\xbd", #Latin small letter y with acute
  209. '&thorn;' => "\xc3\xbe", #Latin small letter thorn
  210. '&yuml;' => "\xc3\xbf", #Latin small letter y with diaeresis
  211. #Symbols and Greek Letters:
  212. '&fnof;' => "\xc6\x92", #U+0192 [ƒ] Latin small f with hook = function = florin
  213. '&Alpha;' => "\xce\x91", #Greek capital letter alpha
  214. '&Beta;' => "\xce\x92", #Greek capital letter beta
  215. '&Gamma;' => "\xce\x93", #Greek capital letter gamma
  216. '&Delta;' => "\xce\x94", #Greek capital letter delta
  217. '&Epsilon;' => "\xce\x95", #Greek capital letter epsilon
  218. '&Zeta;' => "\xce\x96", #Greek capital letter zeta
  219. '&Eta;' => "\xce\x97", #Greek capital letter eta
  220. '&Theta;' => "\xce\x98", #Greek capital letter theta
  221. '&Iota;' => "\xce\x99", #Greek capital letter iota
  222. '&Kappa;' => "\xce\x9a", #Greek capital letter kappa
  223. '&Lambda;' => "\xce\x9b", #Greek capital letter lambda
  224. '&Mu;' => "\xce\x9c", #Greek capital letter mu
  225. '&Nu;' => "\xce\x9d", #Greek capital letter nu
  226. '&Xi;' => "\xce\x9e", #Greek capital letter xi
  227. '&Omicron;' => "\xce\x9f", #Greek capital letter omicron
  228. '&Pi;' => "\xce\xa0", #Greek capital letter pi
  229. '&Rho;' => "\xce\xa1", #Greek capital letter rho
  230. '&Sigma;' => "\xce\xa3", #Greek capital letter sigma
  231. '&Tau;' => "\xce\xa4", #Greek capital letter tau
  232. '&Upsilon;' => "\xce\xa5", #Greek capital letter upsilon
  233. '&Phi;' => "\xce\xa6", #Greek capital letter phi
  234. '&Chi;' => "\xce\xa7", #Greek capital letter chi
  235. '&Psi;' => "\xce\xa8", #Greek capital letter psi
  236. '&Omega;' => "\xce\xa9", #Greek capital letter omega
  237. '&alpha;' => "\xce\xb1", #Greek small letter alpha
  238. '&beta;' => "\xce\xb2", #Greek small letter beta
  239. '&gamma;' => "\xce\xb3", #Greek small letter gamma
  240. '&delta;' => "\xce\xb4", #Greek small letter delta
  241. '&epsilon;' => "\xce\xb5", #Greek small letter epsilon
  242. '&zeta;' => "\xce\xb6", #Greek small letter zeta
  243. '&eta;' => "\xce\xb7", #Greek small letter eta
  244. '&theta;' => "\xce\xb8", #Greek small letter theta
  245. '&iota;' => "\xce\xb9", #Greek small letter iota
  246. '&kappa;' => "\xce\xba", #Greek small letter kappa
  247. '&lambda;' => "\xce\xbb", #Greek small letter lambda
  248. '&mu;' => "\xce\xbc", #Greek small letter mu
  249. '&nu;' => "\xce\xbd", #Greek small letter nu
  250. '&xi;' => "\xce\xbe", #Greek small letter xi
  251. '&omicron;' => "\xce\xbf", #Greek small letter omicron
  252. '&pi;' => "\xcf\x80", #Greek small letter pi
  253. '&rho;' => "\xcf\x81", #Greek small letter rho
  254. '&sigmaf;' => "\xcf\x82", #Greek small letter final sigma
  255. '&sigma;' => "\xcf\x83", #Greek small letter sigma
  256. '&tau;' => "\xcf\x84", #Greek small letter tau
  257. '&upsilon;' => "\xcf\x85", #Greek small letter upsilon
  258. '&phi;' => "\xcf\x86", #Greek small letter phi
  259. '&chi;' => "\xcf\x87", #Greek small letter chi
  260. '&psi;' => "\xcf\x88", #Greek small letter psi
  261. '&omega;' => "\xcf\x89", #Greek small letter omega
  262. '&thetasym;'=> "\xcf\x91", #Greek small letter theta symbol
  263. '&upsih;' => "\xcf\x92", #Greek upsilon with hook symbol
  264. '&piv;' => "\xcf\x96", #U+03D6 [ϖ] Greek pi symbol
  265. '&bull;' => "\xe2\x80\xa2", #U+2022 [] bullet = black small circle
  266. '&hellip;' => "\xe2\x80\xa6", #U+2026 [] horizontal ellipsis = three dot leader
  267. '&prime;' => "\xe2\x80\xb2", #U+2032 [] prime = minutes = feet (для обозначения минут и футов)
  268. '&Prime;' => "\xe2\x80\xb3", #U+2033 [] double prime = seconds = inches (для обозначения секунд и дюймов).
  269. '&oline;' => "\xe2\x80\xbe", #U+203E [] overline = spacing overscore
  270. '&frasl;' => "\xe2\x81\x84", #U+2044 [] fraction slash
  271. '&weierp;' => "\xe2\x84\x98", #U+2118 [] script capital P = power set = Weierstrass p
  272. '&image;' => "\xe2\x84\x91", #U+2111 [] blackletter capital I = imaginary part
  273. '&real;' => "\xe2\x84\x9c", #U+211C [] blackletter capital R = real part symbol
  274. '&trade;' => "\xe2\x84\xa2", #U+2122 [] trade mark sign
  275. '&alefsym;' => "\xe2\x84\xb5", #U+2135 [] alef symbol = first transfinite cardinal
  276. '&larr;' => "\xe2\x86\x90", #U+2190 [] leftwards arrow
  277. '&uarr;' => "\xe2\x86\x91", #U+2191 [] upwards arrow
  278. '&rarr;' => "\xe2\x86\x92", #U+2192 [] rightwards arrow
  279. '&darr;' => "\xe2\x86\x93", #U+2193 [] downwards arrow
  280. '&harr;' => "\xe2\x86\x94", #U+2194 [] left right arrow
  281. '&crarr;' => "\xe2\x86\xb5", #U+21B5 [] downwards arrow with corner leftwards = carriage return
  282. '&lArr;' => "\xe2\x87\x90", #U+21D0 [] leftwards double arrow
  283. '&uArr;' => "\xe2\x87\x91", #U+21D1 [] upwards double arrow
  284. '&rArr;' => "\xe2\x87\x92", #U+21D2 [] rightwards double arrow
  285. '&dArr;' => "\xe2\x87\x93", #U+21D3 [] downwards double arrow
  286. '&hArr;' => "\xe2\x87\x94", #U+21D4 [] left right double arrow
  287. '&forall;' => "\xe2\x88\x80", #U+2200 [] for all
  288. '&part;' => "\xe2\x88\x82", #U+2202 [] partial differential
  289. '&exist;' => "\xe2\x88\x83", #U+2203 [] there exists
  290. '&empty;' => "\xe2\x88\x85", #U+2205 [] empty set = null set = diameter
  291. '&nabla;' => "\xe2\x88\x87", #U+2207 [] nabla = backward difference
  292. '&isin;' => "\xe2\x88\x88", #U+2208 [] element of
  293. '&notin;' => "\xe2\x88\x89", #U+2209 [] not an element of
  294. '&ni;' => "\xe2\x88\x8b", #U+220B [] contains as member
  295. '&prod;' => "\xe2\x88\x8f", #U+220F [] n-ary product = product sign
  296. '&sum;' => "\xe2\x88\x91", #U+2211 [] n-ary sumation
  297. '&minus;' => "\xe2\x88\x92", #U+2212 [] minus sign
  298. '&lowast;' => "\xe2\x88\x97", #U+2217 [] asterisk operator
  299. '&radic;' => "\xe2\x88\x9a", #U+221A [] square root = radical sign
  300. '&prop;' => "\xe2\x88\x9d", #U+221D [] proportional to
  301. '&infin;' => "\xe2\x88\x9e", #U+221E [] infinity
  302. '&ang;' => "\xe2\x88\xa0", #U+2220 [] angle
  303. '&and;' => "\xe2\x88\xa7", #U+2227 [] logical and = wedge
  304. '&or;' => "\xe2\x88\xa8", #U+2228 [] logical or = vee
  305. '&cap;' => "\xe2\x88\xa9", #U+2229 [] intersection = cap
  306. '&cup;' => "\xe2\x88\xaa", #U+222A [] union = cup
  307. '&int;' => "\xe2\x88\xab", #U+222B [] integral
  308. '&there4;' => "\xe2\x88\xb4", #U+2234 [] therefore
  309. '&sim;' => "\xe2\x88\xbc", #U+223C [] tilde operator = varies with = similar to
  310. '&cong;' => "\xe2\x89\x85", #U+2245 [] approximately equal to
  311. '&asymp;' => "\xe2\x89\x88", #U+2248 [] almost equal to = asymptotic to
  312. '&ne;' => "\xe2\x89\xa0", #U+2260 [] not equal to
  313. '&equiv;' => "\xe2\x89\xa1", #U+2261 [] identical to
  314. '&le;' => "\xe2\x89\xa4", #U+2264 [] less-than or equal to
  315. '&ge;' => "\xe2\x89\xa5", #U+2265 [] greater-than or equal to
  316. '&sub;' => "\xe2\x8a\x82", #U+2282 [] subset of
  317. '&sup;' => "\xe2\x8a\x83", #U+2283 [] superset of
  318. '&nsub;' => "\xe2\x8a\x84", #U+2284 [] not a subset of
  319. '&sube;' => "\xe2\x8a\x86", #U+2286 [] subset of or equal to
  320. '&supe;' => "\xe2\x8a\x87", #U+2287 [] superset of or equal to
  321. '&oplus;' => "\xe2\x8a\x95", #U+2295 [] circled plus = direct sum
  322. '&otimes;' => "\xe2\x8a\x97", #U+2297 [] circled times = vector product
  323. '&perp;' => "\xe2\x8a\xa5", #U+22A5 [] up tack = orthogonal to = perpendicular
  324. '&sdot;' => "\xe2\x8b\x85", #U+22C5 [] dot operator
  325. '&lceil;' => "\xe2\x8c\x88", #U+2308 [] left ceiling = APL upstile
  326. '&rceil;' => "\xe2\x8c\x89", #U+2309 [] right ceiling
  327. '&lfloor;' => "\xe2\x8c\x8a", #U+230A [] left floor = APL downstile
  328. '&rfloor;' => "\xe2\x8c\x8b", #U+230B [] right floor
  329. '&lang;' => "\xe2\x8c\xa9", #U+2329 [] left-pointing angle bracket = bra
  330. '&rang;' => "\xe2\x8c\xaa", #U+232A [] right-pointing angle bracket = ket
  331. '&loz;' => "\xe2\x97\x8a", #U+25CA [] lozenge
  332. '&spades;' => "\xe2\x99\xa0", #U+2660 [] black spade suit
  333. '&clubs;' => "\xe2\x99\xa3", #U+2663 [] black club suit = shamrock
  334. '&hearts;' => "\xe2\x99\xa5", #U+2665 [] black heart suit = valentine
  335. '&diams;' => "\xe2\x99\xa6", #U+2666 [] black diamond suit
  336. #Other Special Characters:
  337. '&OElig;' => "\xc5\x92", #U+0152 [Œ] Latin capital ligature OE
  338. '&oelig;' => "\xc5\x93", #U+0153 [œ] Latin small ligature oe
  339. '&Scaron;' => "\xc5\xa0", #U+0160 [Š] Latin capital letter S with caron
  340. '&scaron;' => "\xc5\xa1", #U+0161 [š] Latin small letter s with caron
  341. '&Yuml;' => "\xc5\xb8", #U+0178 [Ÿ] Latin capital letter Y with diaeresis
  342. '&circ;' => "\xcb\x86", #U+02C6 [ˆ] modifier letter circumflex accent
  343. '&tilde;' => "\xcb\x9c", #U+02DC [˜] small tilde
  344. '&ensp;' => "\xe2\x80\x82", #U+2002 [] en space
  345. '&emsp;' => "\xe2\x80\x83", #U+2003 [] em space
  346. '&thinsp;' => "\xe2\x80\x89", #U+2009 [] thin space
  347. '&zwnj;' => "\xe2\x80\x8c", #U+200C [] zero width non-joiner
  348. '&zwj;' => "\xe2\x80\x8d", #U+200D [] zero width joiner
  349. '&lrm;' => "\xe2\x80\x8e", #U+200E [] left-to-right mark
  350. '&rlm;' => "\xe2\x80\x8f", #U+200F [] right-to-left mark
  351. '&ndash;' => "\xe2\x80\x93", #U+2013 [] en dash
  352. '&mdash;' => "\xe2\x80\x94", #U+2014 [] em dash
  353. '&lsquo;' => "\xe2\x80\x98", #U+2018 [] left single quotation mark
  354. '&rsquo;' => "\xe2\x80\x99", #U+2019 [] right single quotation mark (and apostrophe!)
  355. '&sbquo;' => "\xe2\x80\x9a", #U+201A [] single low-9 quotation mark
  356. '&ldquo;' => "\xe2\x80\x9c", #U+201C [] left double quotation mark
  357. '&rdquo;' => "\xe2\x80\x9d", #U+201D [] right double quotation mark
  358. '&bdquo;' => "\xe2\x80\x9e", #U+201E [] double low-9 quotation mark
  359. '&dagger;' => "\xe2\x80\xa0", #U+2020 [] dagger
  360. '&Dagger;' => "\xe2\x80\xa1", #U+2021 [] double dagger
  361. '&permil;' => "\xe2\x80\xb0", #U+2030 [] per mille sign
  362. '&lsaquo;' => "\xe2\x80\xb9", #U+2039 [] single left-pointing angle quotation mark
  363. '&rsaquo;' => "\xe2\x80\xba", #U+203A [] single right-pointing angle quotation mark
  364. '&euro;' => "\xe2\x82\xac", #U+20AC [] euro sign
  365. );
  366. /**
  367. * This table contains the data on how cp1259 characters map into Unicode (UTF-8).
  368. * The cp1259 map describes standart tatarish cyrillic charset and based on the cp1251 table.
  369. * cp1259 -- this is an outdated one byte encoding of the Tatar language,
  370. * which includes all the Russian letters from cp1251.
  371. *
  372. * @link http://search.cpan.org/CPAN/authors/id/A/AM/AMICHAUER/Lingua-TT-Yanalif-0.08.tar.gz
  373. * @link http://www.unicode.org/charts/PDF/U0400.pdf
  374. */
  375. public static $cp1259_table = array(
  376. #bytes from 0x00 to 0x7F (ASCII) saved as is
  377. "\x80" => "\xd3\x98", #U+04d8 CYRILLIC CAPITAL LETTER SCHWA
  378. "\x81" => "\xd0\x83", #U+0403 CYRILLIC CAPITAL LETTER GJE
  379. "\x82" => "\xe2\x80\x9a", #U+201a SINGLE LOW-9 QUOTATION MARK
  380. "\x83" => "\xd1\x93", #U+0453 CYRILLIC SMALL LETTER GJE
  381. "\x84" => "\xe2\x80\x9e", #U+201e DOUBLE LOW-9 QUOTATION MARK
  382. "\x85" => "\xe2\x80\xa6", #U+2026 HORIZONTAL ELLIPSIS
  383. "\x86" => "\xe2\x80\xa0", #U+2020 DAGGER
  384. "\x87" => "\xe2\x80\xa1", #U+2021 DOUBLE DAGGER
  385. "\x88" => "\xe2\x82\xac", #U+20ac EURO SIGN
  386. "\x89" => "\xe2\x80\xb0", #U+2030 PER MILLE SIGN
  387. "\x8a" => "\xd3\xa8", #U+04e8 CYRILLIC CAPITAL LETTER BARRED O
  388. "\x8b" => "\xe2\x80\xb9", #U+2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
  389. "\x8c" => "\xd2\xae", #U+04ae CYRILLIC CAPITAL LETTER STRAIGHT U
  390. "\x8d" => "\xd2\x96", #U+0496 CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
  391. "\x8e" => "\xd2\xa2", #U+04a2 CYRILLIC CAPITAL LETTER EN WITH HOOK
  392. "\x8f" => "\xd2\xba", #U+04ba CYRILLIC CAPITAL LETTER SHHA
  393. "\x90" => "\xd3\x99", #U+04d9 CYRILLIC SMALL LETTER SCHWA
  394. "\x91" => "\xe2\x80\x98", #U+2018 LEFT SINGLE QUOTATION MARK
  395. "\x92" => "\xe2\x80\x99", #U+2019 RIGHT SINGLE QUOTATION MARK
  396. "\x93" => "\xe2\x80\x9c", #U+201c LEFT DOUBLE QUOTATION MARK
  397. "\x94" => "\xe2\x80\x9d", #U+201d RIGHT DOUBLE QUOTATION MARK
  398. "\x95" => "\xe2\x80\xa2", #U+2022 BULLET
  399. "\x96" => "\xe2\x80\x93", #U+2013 EN DASH
  400. "\x97" => "\xe2\x80\x94", #U+2014 EM DASH
  401. #"\x98" #UNDEFINED
  402. "\x99" => "\xe2\x84\xa2", #U+2122 TRADE MARK SIGN
  403. "\x9a" => "\xd3\xa9", #U+04e9 CYRILLIC SMALL LETTER BARRED O
  404. "\x9b" => "\xe2\x80\xba", #U+203a SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
  405. "\x9c" => "\xd2\xaf", #U+04af CYRILLIC SMALL LETTER STRAIGHT U
  406. "\x9d" => "\xd2\x97", #U+0497 CYRILLIC SMALL LETTER ZHE WITH DESCENDER
  407. "\x9e" => "\xd2\xa3", #U+04a3 CYRILLIC SMALL LETTER EN WITH HOOK
  408. "\x9f" => "\xd2\xbb", #U+04bb CYRILLIC SMALL LETTER SHHA
  409. "\xa0" => "\xc2\xa0", #U+00a0 NO-BREAK SPACE
  410. "\xa1" => "\xd0\x8e", #U+040e CYRILLIC CAPITAL LETTER SHORT U
  411. "\xa2" => "\xd1\x9e", #U+045e CYRILLIC SMALL LETTER SHORT U
  412. "\xa3" => "\xd0\x88", #U+0408 CYRILLIC CAPITAL LETTER JE
  413. "\xa4" => "\xc2\xa4", #U+00a4 CURRENCY SIGN
  414. "\xa5" => "\xd2\x90", #U+0490 CYRILLIC CAPITAL LETTER GHE WITH UPTURN
  415. "\xa6" => "\xc2\xa6", #U+00a6 BROKEN BAR
  416. "\xa7" => "\xc2\xa7", #U+00a7 SECTION SIGN
  417. "\xa8" => "\xd0\x81", #U+0401 CYRILLIC CAPITAL LETTER IO
  418. "\xa9" => "\xc2\xa9", #U+00a9 COPYRIGHT SIGN
  419. "\xaa" => "\xd0\x84", #U+0404 CYRILLIC CAPITAL LETTER UKRAINIAN IE
  420. "\xab" => "\xc2\xab", #U+00ab LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
  421. "\xac" => "\xc2\xac", #U+00ac NOT SIGN
  422. "\xad" => "\xc2\xad", #U+00ad SOFT HYPHEN
  423. "\xae" => "\xc2\xae", #U+00ae REGISTERED SIGN
  424. "\xaf" => "\xd0\x87", #U+0407 CYRILLIC CAPITAL LETTER YI
  425. "\xb0" => "\xc2\xb0", #U+00b0 DEGREE SIGN
  426. "\xb1" => "\xc2\xb1", #U+00b1 PLUS-MINUS SIGN
  427. "\xb2" => "\xd0\x86", #U+0406 CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
  428. "\xb3" => "\xd1\x96", #U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
  429. "\xb4" => "\xd2\x91", #U+0491 CYRILLIC SMALL LETTER GHE WITH UPTURN
  430. "\xb5" => "\xc2\xb5", #U+00b5 MICRO SIGN
  431. "\xb6" => "\xc2\xb6", #U+00b6 PILCROW SIGN
  432. "\xb7" => "\xc2\xb7", #U+00b7 MIDDLE DOT
  433. "\xb8" => "\xd1\x91", #U+0451 CYRILLIC SMALL LETTER IO
  434. "\xb9" => "\xe2\x84\x96", #U+2116 NUMERO SIGN
  435. "\xba" => "\xd1\x94", #U+0454 CYRILLIC SMALL LETTER UKRAINIAN IE
  436. "\xbb" => "\xc2\xbb", #U+00bb RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
  437. "\xbc" => "\xd1\x98", #U+0458 CYRILLIC SMALL LETTER JE
  438. "\xbd" => "\xd0\x85", #U+0405 CYRILLIC CAPITAL LETTER DZE
  439. "\xbe" => "\xd1\x95", #U+0455 CYRILLIC SMALL LETTER DZE
  440. "\xbf" => "\xd1\x97", #U+0457 CYRILLIC SMALL LETTER YI
  441. "\xc0" => "\xd0\x90", #U+0410 CYRILLIC CAPITAL LETTER A
  442. "\xc1" => "\xd0\x91", #U+0411 CYRILLIC CAPITAL LETTER BE
  443. "\xc2" => "\xd0\x92", #U+0412 CYRILLIC CAPITAL LETTER VE
  444. "\xc3" => "\xd0\x93", #U+0413 CYRILLIC CAPITAL LETTER GHE
  445. "\xc4" => "\xd0\x94", #U+0414 CYRILLIC CAPITAL LETTER DE
  446. "\xc5" => "\xd0\x95", #U+0415 CYRILLIC CAPITAL LETTER IE
  447. "\xc6" => "\xd0\x96", #U+0416 CYRILLIC CAPITAL LETTER ZHE
  448. "\xc7" => "\xd0\x97", #U+0417 CYRILLIC CAPITAL LETTER ZE
  449. "\xc8" => "\xd0\x98", #U+0418 CYRILLIC CAPITAL LETTER I
  450. "\xc9" => "\xd0\x99", #U+0419 CYRILLIC CAPITAL LETTER SHORT I
  451. "\xca" => "\xd0\x9a", #U+041a CYRILLIC CAPITAL LETTER KA
  452. "\xcb" => "\xd0\x9b", #U+041b CYRILLIC CAPITAL LETTER EL
  453. "\xcc" => "\xd0\x9c", #U+041c CYRILLIC CAPITAL LETTER EM
  454. "\xcd" => "\xd0\x9d", #U+041d CYRILLIC CAPITAL LETTER EN
  455. "\xce" => "\xd0\x9e", #U+041e CYRILLIC CAPITAL LETTER O
  456. "\xcf" => "\xd0\x9f", #U+041f CYRILLIC CAPITAL LETTER PE
  457. "\xd0" => "\xd0\xa0", #U+0420 CYRILLIC CAPITAL LETTER ER
  458. "\xd1" => "\xd0\xa1", #U+0421 CYRILLIC CAPITAL LETTER ES
  459. "\xd2" => "\xd0\xa2", #U+0422 CYRILLIC CAPITAL LETTER TE
  460. "\xd3" => "\xd0\xa3", #U+0423 CYRILLIC CAPITAL LETTER U
  461. "\xd4" => "\xd0\xa4", #U+0424 CYRILLIC CAPITAL LETTER EF
  462. "\xd5" => "\xd0\xa5", #U+0425 CYRILLIC CAPITAL LETTER HA
  463. "\xd6" => "\xd0\xa6", #U+0426 CYRILLIC CAPITAL LETTER TSE
  464. "\xd7" => "\xd0\xa7", #U+0427 CYRILLIC CAPITAL LETTER CHE
  465. "\xd8" => "\xd0\xa8", #U+0428 CYRILLIC CAPITAL LETTER SHA
  466. "\xd9" => "\xd0\xa9", #U+0429 CYRILLIC CAPITAL LETTER SHCHA
  467. "\xda" => "\xd0\xaa", #U+042a CYRILLIC CAPITAL LETTER HARD SIGN
  468. "\xdb" => "\xd0\xab", #U+042b CYRILLIC CAPITAL LETTER YERU
  469. "\xdc" => "\xd0\xac", #U+042c CYRILLIC CAPITAL LETTER SOFT SIGN
  470. "\xdd" => "\xd0\xad", #U+042d CYRILLIC CAPITAL LETTER E
  471. "\xde" => "\xd0\xae", #U+042e CYRILLIC CAPITAL LETTER YU
  472. "\xdf" => "\xd0\xaf", #U+042f CYRILLIC CAPITAL LETTER YA
  473. "\xe0" => "\xd0\xb0", #U+0430 CYRILLIC SMALL LETTER A
  474. "\xe1" => "\xd0\xb1", #U+0431 CYRILLIC SMALL LETTER BE
  475. "\xe2" => "\xd0\xb2", #U+0432 CYRILLIC SMALL LETTER VE
  476. "\xe3" => "\xd0\xb3", #U+0433 CYRILLIC SMALL LETTER GHE
  477. "\xe4" => "\xd0\xb4", #U+0434 CYRILLIC SMALL LETTER DE
  478. "\xe5" => "\xd0\xb5", #U+0435 CYRILLIC SMALL LETTER IE
  479. "\xe6" => "\xd0\xb6", #U+0436 CYRILLIC SMALL LETTER ZHE
  480. "\xe7" => "\xd0\xb7", #U+0437 CYRILLIC SMALL LETTER ZE
  481. "\xe8" => "\xd0\xb8", #U+0438 CYRILLIC SMALL LETTER I
  482. "\xe9" => "\xd0\xb9", #U+0439 CYRILLIC SMALL LETTER SHORT I
  483. "\xea" => "\xd0\xba", #U+043a CYRILLIC SMALL LETTER KA
  484. "\xeb" => "\xd0\xbb", #U+043b CYRILLIC SMALL LETTER EL
  485. "\xec" => "\xd0\xbc", #U+043c CYRILLIC SMALL LETTER EM
  486. "\xed" => "\xd0\xbd", #U+043d CYRILLIC SMALL LETTER EN
  487. "\xee" => "\xd0\xbe", #U+043e CYRILLIC SMALL LETTER O
  488. "\xef" => "\xd0\xbf", #U+043f CYRILLIC SMALL LETTER PE
  489. "\xf0" => "\xd1\x80", #U+0440 CYRILLIC SMALL LETTER ER
  490. "\xf1" => "\xd1\x81", #U+0441 CYRILLIC SMALL LETTER ES
  491. "\xf2" => "\xd1\x82", #U+0442 CYRILLIC SMALL LETTER TE
  492. "\xf3" => "\xd1\x83", #U+0443 CYRILLIC SMALL LETTER U
  493. "\xf4" => "\xd1\x84", #U+0444 CYRILLIC SMALL LETTER EF
  494. "\xf5" => "\xd1\x85", #U+0445 CYRILLIC SMALL LETTER HA
  495. "\xf6" => "\xd1\x86", #U+0446 CYRILLIC SMALL LETTER TSE
  496. "\xf7" => "\xd1\x87", #U+0447 CYRILLIC SMALL LETTER CHE
  497. "\xf8" => "\xd1\x88", #U+0448 CYRILLIC SMALL LETTER SHA
  498. "\xf9" => "\xd1\x89", #U+0449 CYRILLIC SMALL LETTER SHCHA
  499. "\xfa" => "\xd1\x8a", #U+044a CYRILLIC SMALL LETTER HARD SIGN
  500. "\xfb" => "\xd1\x8b", #U+044b CYRILLIC SMALL LETTER YERU
  501. "\xfc" => "\xd1\x8c", #U+044c CYRILLIC SMALL LETTER SOFT SIGN
  502. "\xfd" => "\xd1\x8d", #U+044d CYRILLIC SMALL LETTER E
  503. "\xfe" => "\xd1\x8e", #U+044e CYRILLIC SMALL LETTER YU
  504. "\xff" => "\xd1\x8f", #U+044f CYRILLIC SMALL LETTER YA
  505. );
  506. /**
  507. * UTF-8 Case lookup table
  508. *
  509. * This lookuptable defines the upper case letters to their correspponding
  510. * lower case letter in UTF-8
  511. *
  512. * @author Andreas Gohr <andi@splitbrain.org>
  513. */
  514. public static $convert_case_table = array(
  515. #CASE_UPPER => case_lower
  516. "\x41" => "\x61", #A a
  517. "\x42" => "\x62", #B b
  518. "\x43" => "\x63", #C c
  519. "\x44" => "\x64", #D d
  520. "\x45" => "\x65", #E e
  521. "\x46" => "\x66", #F f
  522. "\x47" => "\x67", #G g
  523. "\x48" => "\x68", #H h
  524. "\x49" => "\x69", #I i
  525. "\x4a" => "\x6a", #J j
  526. "\x4b" => "\x6b", #K k
  527. "\x4c" => "\x6c", #L l
  528. "\x4d" => "\x6d", #M m
  529. "\x4e" => "\x6e", #N n
  530. "\x4f" => "\x6f", #O o
  531. "\x50" => "\x70", #P p
  532. "\x51" => "\x71", #Q q
  533. "\x52" => "\x72", #R r
  534. "\x53" => "\x73", #S s
  535. "\x54" => "\x74", #T t
  536. "\x55" => "\x75", #U u
  537. "\x56" => "\x76", #V v
  538. "\x57" => "\x77", #W w
  539. "\x58" => "\x78", #X x
  540. "\x59" => "\x79", #Y y
  541. "\x5a" => "\x7a", #Z z
  542. "\xc3\x80" => "\xc3\xa0",
  543. "\xc3\x81" => "\xc3\xa1",
  544. "\xc3\x82" => "\xc3\xa2",
  545. "\xc3\x83" => "\xc3\xa3",
  546. "\xc3\x84" => "\xc3\xa4",
  547. "\xc3\x85" => "\xc3\xa5",
  548. "\xc3\x86" => "\xc3\xa6",
  549. "\xc3\x87" => "\xc3\xa7",
  550. "\xc3\x88" => "\xc3\xa8",
  551. "\xc3\x89" => "\xc3\xa9",
  552. "\xc3\x8a" => "\xc3\xaa",
  553. "\xc3\x8b" => "\xc3\xab",
  554. "\xc3\x8c" => "\xc3\xac",
  555. "\xc3\x8d" => "\xc3\xad",
  556. "\xc3\x8e" => "\xc3\xae",
  557. "\xc3\x8f" => "\xc3\xaf",
  558. "\xc3\x90" => "\xc3\xb0",
  559. "\xc3\x91" => "\xc3\xb1",
  560. "\xc3\x92" => "\xc3\xb2",
  561. "\xc3\x93" => "\xc3\xb3",
  562. "\xc3\x94" => "\xc3\xb4",
  563. "\xc3\x95" => "\xc3\xb5",
  564. "\xc3\x96" => "\xc3\xb6",
  565. "\xc3\x98" => "\xc3\xb8",
  566. "\xc3\x99" => "\xc3\xb9",
  567. "\xc3\x9a" => "\xc3\xba",
  568. "\xc3\x9b" => "\xc3\xbb",
  569. "\xc3\x9c" => "\xc3\xbc",
  570. "\xc3\x9d" => "\xc3\xbd",
  571. "\xc3\x9e" => "\xc3\xbe",
  572. "\xc4\x80" => "\xc4\x81",
  573. "\xc4\x82" => "\xc4\x83",
  574. "\xc4\x84" => "\xc4\x85",
  575. "\xc4\x86" => "\xc4\x87",
  576. "\xc4\x88" => "\xc4\x89",
  577. "\xc4\x8a" => "\xc4\x8b",
  578. "\xc4\x8c" => "\xc4\x8d",
  579. "\xc4\x8e" => "\xc4\x8f",
  580. "\xc4\x90" => "\xc4\x91",
  581. "\xc4\x92" => "\xc4\x93",
  582. "\xc4\x94" => "\xc4\x95",
  583. "\xc4\x96" => "\xc4\x97",
  584. "\xc4\x98" => "\xc4\x99",
  585. "\xc4\x9a" => "\xc4\x9b",
  586. "\xc4\x9c" => "\xc4\x9d",
  587. "\xc4\x9e" => "\xc4\x9f",
  588. "\xc4\xa0" => "\xc4\xa1",
  589. "\xc4\xa2" => "\xc4\xa3",
  590. "\xc4\xa4" => "\xc4\xa5",
  591. "\xc4\xa6" => "\xc4\xa7",
  592. "\xc4\xa8" => "\xc4\xa9",
  593. "\xc4\xaa" => "\xc4\xab",
  594. "\xc4\xac" => "\xc4\xad",
  595. "\xc4\xae" => "\xc4\xaf",
  596. "\xc4\xb2" => "\xc4\xb3",
  597. "\xc4\xb4" => "\xc4\xb5",
  598. "\xc4\xb6" => "\xc4\xb7",
  599. "\xc4\xb9" => "\xc4\xba",
  600. "\xc4\xbb" => "\xc4\xbc",
  601. "\xc4\xbd" => "\xc4\xbe",
  602. "\xc4\xbf" => "\xc5\x80",
  603. "\xc5\x81" => "\xc5\x82",
  604. "\xc5\x83" => "\xc5\x84",
  605. "\xc5\x85" => "\xc5\x86",
  606. "\xc5\x87" => "\xc5\x88",
  607. "\xc5\x8a" => "\xc5\x8b",
  608. "\xc5\x8c" => "\xc5\x8d",
  609. "\xc5\x8e" => "\xc5\x8f",
  610. "\xc5\x90" => "\xc5\x91",
  611. "\xc5\x92" => "\xc5\x93",
  612. "\xc5\x94" => "\xc5\x95",
  613. "\xc5\x96" => "\xc5\x97",
  614. "\xc5\x98" => "\xc5\x99",
  615. "\xc5\x9a" => "\xc5\x9b",
  616. "\xc5\x9c" => "\xc5\x9d",
  617. "\xc5\x9e" => "\xc5\x9f",
  618. "\xc5\xa0" => "\xc5\xa1",
  619. "\xc5\xa2" => "\xc5\xa3",
  620. "\xc5\xa4" => "\xc5\xa5",
  621. "\xc5\xa6" => "\xc5\xa7",
  622. "\xc5\xa8" => "\xc5\xa9",
  623. "\xc5\xaa" => "\xc5\xab",
  624. "\xc5\xac" => "\xc5\xad",
  625. "\xc5\xae" => "\xc5\xaf",
  626. "\xc5\xb0" => "\xc5\xb1",
  627. "\xc5\xb2" => "\xc5\xb3",
  628. "\xc5\xb4" => "\xc5\xb5",
  629. "\xc5\xb6" => "\xc5\xb7",
  630. "\xc5\xb8" => "\xc3\xbf",
  631. "\xc5\xb9" => "\xc5\xba",
  632. "\xc5\xbb" => "\xc5\xbc",
  633. "\xc5\xbd" => "\xc5\xbe",
  634. "\xc6\x81" => "\xc9\x93",
  635. "\xc6\x82" => "\xc6\x83",
  636. "\xc6\x84" => "\xc6\x85",
  637. "\xc6\x86" => "\xc9\x94",
  638. "\xc6\x87" => "\xc6\x88",
  639. "\xc6\x89" => "\xc9\x96",
  640. "\xc6\x8a" => "\xc9\x97",
  641. "\xc6\x8b" => "\xc6\x8c",
  642. "\xc6\x8e" => "\xc7\x9d",
  643. "\xc6\x8f" => "\xc9\x99",
  644. "\xc6\x90" => "\xc9\x9b",
  645. "\xc6\x91" => "\xc6\x92",
  646. "\xc6\x94" => "\xc9\xa3",
  647. "\xc6\x96" => "\xc9\xa9",
  648. "\xc6\x97" => "\xc9\xa8",
  649. "\xc6\x98" => "\xc6\x99",
  650. "\xc6\x9c" => "\xc9\xaf",
  651. "\xc6\x9d" => "\xc9\xb2",
  652. "\xc6\x9f" => "\xc9\xb5",
  653. "\xc6\xa0" => "\xc6\xa1",
  654. "\xc6\xa2" => "\xc6\xa3",
  655. "\xc6\xa4" => "\xc6\xa5",
  656. "\xc6\xa6" => "\xca\x80",
  657. "\xc6\xa7" => "\xc6\xa8",
  658. "\xc6\xa9" => "\xca\x83",
  659. "\xc6\xac" => "\xc6\xad",
  660. "\xc6\xae" => "\xca\x88",
  661. "\xc6\xaf" => "\xc6\xb0",
  662. "\xc6\xb1" => "\xca\x8a",
  663. "\xc6\xb2" => "\xca\x8b",
  664. "\xc6\xb3" => "\xc6\xb4",
  665. "\xc6\xb5" => "\xc6\xb6",
  666. "\xc6\xb7" => "\xca\x92",
  667. "\xc6\xb8" => "\xc6\xb9",
  668. "\xc6\xbc" => "\xc6\xbd",
  669. "\xc7\x85" => "\xc7\x86",
  670. "\xc7\x88" => "\xc7\x89",
  671. "\xc7\x8b" => "\xc7\x8c",
  672. "\xc7\x8d" => "\xc7\x8e",
  673. "\xc7\x8f" => "\xc7\x90",
  674. "\xc7\x91" => "\xc7\x92",
  675. "\xc7\x93" => "\xc7\x94",
  676. "\xc7\x95" => "\xc7\x96",
  677. "\xc7\x97" => "\xc7\x98",
  678. "\xc7\x99" => "\xc7\x9a",
  679. "\xc7\x9b" => "\xc7\x9c",
  680. "\xc7\x9e" => "\xc7\x9f",
  681. "\xc7\xa0" => "\xc7\xa1",
  682. "\xc7\xa2" => "\xc7\xa3",
  683. "\xc7\xa4" => "\xc7\xa5",
  684. "\xc7\xa6" => "\xc7\xa7",
  685. "\xc7\xa8" => "\xc7\xa9",
  686. "\xc7\xaa" => "\xc7\xab",
  687. "\xc7\xac" => "\xc7\xad",
  688. "\xc7\xae" => "\xc7\xaf",
  689. "\xc7\xb2" => "\xc7\xb3",
  690. "\xc7\xb4" => "\xc7\xb5",
  691. "\xc7\xb6" => "\xc6\x95",
  692. "\xc7\xb7" => "\xc6\xbf",
  693. "\xc7\xb8" => "\xc7\xb9",
  694. "\xc7\xba" => "\xc7\xbb",
  695. "\xc7\xbc" => "\xc7\xbd",
  696. "\xc7\xbe" => "\xc7\xbf",
  697. "\xc8\x80" => "\xc8\x81",
  698. "\xc8\x82" => "\xc8\x83",
  699. "\xc8\x84" => "\xc8\x85",
  700. "\xc8\x86" => "\xc8\x87",
  701. "\xc8\x88" => "\xc8\x89",
  702. "\xc8\x8a" => "\xc8\x8b",
  703. "\xc8\x8c" => "\xc8\x8d",
  704. "\xc8\x8e" => "\xc8\x8f",
  705. "\xc8\x90" => "\xc8\x91",
  706. "\xc8\x92" => "\xc8\x93",
  707. "\xc8\x94" => "\xc8\x95",
  708. "\xc8\x96" => "\xc8\x97",
  709. "\xc8\x98" => "\xc8\x99",
  710. "\xc8\x9a" => "\xc8\x9b",
  711. "\xc8\x9c" => "\xc8\x9d",
  712. "\xc8\x9e" => "\xc8\x9f",
  713. "\xc8\xa0" => "\xc6\x9e",
  714. "\xc8\xa2" => "\xc8\xa3",
  715. "\xc8\xa4" => "\xc8\xa5",
  716. "\xc8\xa6" => "\xc8\xa7",
  717. "\xc8\xa8" => "\xc8\xa9",
  718. "\xc8\xaa" => "\xc8\xab",
  719. "\xc8\xac" => "\xc8\xad",
  720. "\xc8\xae" => "\xc8\xaf",
  721. "\xc8\xb0" => "\xc8\xb1",
  722. "\xc8\xb2" => "\xc8\xb3",
  723. "\xce\x86" => "\xce\xac",
  724. "\xce\x88" => "\xce\xad",
  725. "\xce\x89" => "\xce\xae",
  726. "\xce\x8a" => "\xce\xaf",
  727. "\xce\x8c" => "\xcf\x8c",
  728. "\xce\x8e" => "\xcf\x8d",
  729. "\xce\x8f" => "\xcf\x8e",
  730. "\xce\x91" => "\xce\xb1",
  731. "\xce\x92" => "\xce\xb2",
  732. "\xce\x93" => "\xce\xb3",
  733. "\xce\x94" => "\xce\xb4",
  734. "\xce\x95" => "\xce\xb5",
  735. "\xce\x96" => "\xce\xb6",
  736. "\xce\x97" => "\xce\xb7",
  737. "\xce\x98" => "\xce\xb8",
  738. "\xce\x99" => "\xce\xb9",
  739. "\xce\x9a" => "\xce\xba",
  740. "\xce\x9b" => "\xce\xbb",
  741. "\xce\x9c" => "\xc2\xb5",
  742. "\xce\x9d" => "\xce\xbd",
  743. "\xce\x9e" => "\xce\xbe",
  744. "\xce\x9f" => "\xce\xbf",
  745. "\xce\xa0" => "\xcf\x80",
  746. "\xce\xa1" => "\xcf\x81",
  747. "\xce\xa3" => "\xcf\x82",
  748. "\xce\xa4" => "\xcf\x84",
  749. "\xce\xa5" => "\xcf\x85",
  750. "\xce\xa6" => "\xcf\x86",
  751. "\xce\xa7" => "\xcf\x87",
  752. "\xce\xa8" => "\xcf\x88",
  753. "\xce\xa9" => "\xcf\x89",
  754. "\xce\xaa" => "\xcf\x8a",
  755. "\xce\xab" => "\xcf\x8b",
  756. "\xcf\x98" => "\xcf\x99",
  757. "\xcf\x9a" => "\xcf\x9b",
  758. "\xcf\x9c" => "\xcf\x9d",
  759. "\xcf\x9e" => "\xcf\x9f",
  760. "\xcf\xa0" => "\xcf\xa1",
  761. "\xcf\xa2" => "\xcf\xa3",
  762. "\xcf\xa4" => "\xcf\xa5",
  763. "\xcf\xa6" => "\xcf\xa7",
  764. "\xcf\xa8" => "\xcf\xa9",
  765. "\xcf\xaa" => "\xcf\xab",
  766. "\xcf\xac" => "\xcf\xad",
  767. "\xcf\xae" => "\xcf\xaf",
  768. "\xd0\x80" => "\xd1\x90",
  769. "\xd0\x81" => "\xd1\x91",
  770. "\xd0\x82" => "\xd1\x92",
  771. "\xd0\x83" => "\xd1\x93",
  772. "\xd0\x84" => "\xd1\x94",
  773. "\xd0\x85" => "\xd1\x95",
  774. "\xd0\x86" => "\xd1\x96",
  775. "\xd0\x87" => "\xd1\x97",
  776. "\xd0\x88" => "\xd1\x98",
  777. "\xd0\x89" => "\xd1\x99",
  778. "\xd0\x8a" => "\xd1\x9a",
  779. "\xd0\x8b" => "\xd1\x9b",
  780. "\xd0\x8c" => "\xd1\x9c",
  781. "\xd0\x8d" => "\xd1\x9d",
  782. "\xd0\x8e" => "\xd1\x9e",
  783. "\xd0\x8f" => "\xd1\x9f",
  784. "\xd0\x90" => "\xd0\xb0",
  785. "\xd0\x91" => "\xd0\xb1",
  786. "\xd0\x92" => "\xd0\xb2",
  787. "\xd0\x93" => "\xd0\xb3",
  788. "\xd0\x94" => "\xd0\xb4",
  789. "\xd0\x95" => "\xd0\xb5",
  790. "\xd0\x96" => "\xd0\xb6",
  791. "\xd0\x97" => "\xd0\xb7",
  792. "\xd0\x98" => "\xd0\xb8",
  793. "\xd0\x99" => "\xd0\xb9",
  794. "\xd0\x9a" => "\xd0\xba",
  795. "\xd0\x9b" => "\xd0\xbb",
  796. "\xd0\x9c" => "\xd0\xbc",
  797. "\xd0\x9d" => "\xd0\xbd",
  798. "\xd0\x9e" => "\xd0\xbe",
  799. "\xd0\x9f" => "\xd0\xbf",
  800. "\xd0\xa0" => "\xd1\x80",
  801. "\xd0\xa1" => "\xd1\x81",
  802. "\xd0\xa2" => "\xd1\x82",
  803. "\xd0\xa3" => "\xd1\x83",
  804. "\xd0\xa4" => "\xd1\x84",
  805. "\xd0\xa5" => "\xd1\x85",
  806. "\xd0\xa6" => "\xd1\x86",
  807. "\xd0\xa7" => "\xd1\x87",
  808. "\xd0\xa8" => "\xd1\x88",
  809. "\xd0\xa9" => "\xd1\x89",
  810. "\xd0\xaa" => "\xd1\x8a",
  811. "\xd0\xab" => "\xd1\x8b",
  812. "\xd0\xac" => "\xd1\x8c",
  813. "\xd0\xad" => "\xd1\x8d",
  814. "\xd0\xae" => "\xd1\x8e",
  815. "\xd0\xaf" => "\xd1\x8f",
  816. "\xd1\xa0" => "\xd1\xa1",
  817. "\xd1\xa2" => "\xd1\xa3",
  818. "\xd1\xa4" => "\xd1\xa5",
  819. "\xd1\xa6" => "\xd1\xa7",
  820. "\xd1\xa8" => "\xd1\xa9",
  821. "\xd1\xaa" => "\xd1\xab",
  822. "\xd1\xac" => "\xd1\xad",
  823. "\xd1\xae" => "\xd1\xaf",
  824. "\xd1\xb0" => "\xd1\xb1",
  825. "\xd1\xb2" => "\xd1\xb3",
  826. "\xd1\xb4" => "\xd1\xb5",
  827. "\xd1\xb6" => "\xd1\xb7",
  828. "\xd1\xb8" => "\xd1\xb9",
  829. "\xd1\xba" => "\xd1\xbb",
  830. "\xd1\xbc" => "\xd1\xbd",
  831. "\xd1\xbe" => "\xd1\xbf",
  832. "\xd2\x80" => "\xd2\x81",
  833. "\xd2\x8a" => "\xd2\x8b",
  834. "\xd2\x8c" => "\xd2\x8d",
  835. "\xd2\x8e" => "\xd2\x8f",
  836. "\xd2\x90" => "\xd2\x91",
  837. "\xd2\x92" => "\xd2\x93",
  838. "\xd2\x94" => "\xd2\x95",
  839. "\xd2\x96" => "\xd2\x97",
  840. "\xd2\x98" => "\xd2\x99",
  841. "\xd2\x9a" => "\xd2\x9b",
  842. "\xd2\x9c" => "\xd2\x9d",
  843. "\xd2\x9e" => "\xd2\x9f",
  844. "\xd2\xa0" => "\xd2\xa1",
  845. "\xd2\xa2" => "\xd2\xa3",
  846. "\xd2\xa4" => "\xd2\xa5",
  847. "\xd2\xa6" => "\xd2\xa7",
  848. "\xd2\xa8" => "\xd2\xa9",
  849. "\xd2\xaa" => "\xd2\xab",
  850. "\xd2\xac" => "\xd2\xad",
  851. "\xd2\xae" => "\xd2\xaf",
  852. "\xd2\xb0" => "\xd2\xb1",
  853. "\xd2\xb2" => "\xd2\xb3",
  854. "\xd2\xb4" => "\xd2\xb5",
  855. "\xd2\xb6" => "\xd2\xb7",
  856. "\xd2\xb8" => "\xd2\xb9",
  857. "\xd2\xba" => "\xd2\xbb",
  858. "\xd2\xbc" => "\xd2\xbd",
  859. "\xd2\xbe" => "\xd2\xbf",
  860. "\xd3\x81" => "\xd3\x82",
  861. "\xd3\x83" => "\xd3\x84",
  862. "\xd3\x85" => "\xd3\x86",
  863. "\xd3\x87" => "\xd3\x88",
  864. "\xd3\x89" => "\xd3\x8a",
  865. "\xd3\x8b" => "\xd3\x8c",
  866. "\xd3\x8d" => "\xd3\x8e",
  867. "\xd3\x90" => "\xd3\x91",
  868. "\xd3\x92" => "\xd3\x93",
  869. "\xd3\x94" => "\xd3\x95",
  870. "\xd3\x96" => "\xd3\x97",
  871. "\xd3\x98" => "\xd3\x99",
  872. "\xd3\x9a" => "\xd3\x9b",
  873. "\xd3\x9c" => "\xd3\x9d",
  874. "\xd3\x9e" => "\xd3\x9f",
  875. "\xd3\xa0" => "\xd3\xa1",
  876. "\xd3\xa2" => "\xd3\xa3",
  877. "\xd3\xa4" => "\xd3\xa5",
  878. "\xd3\xa6" => "\xd3\xa7",
  879. "\xd3\xa8" => "\xd3\xa9",
  880. "\xd3\xaa" => "\xd3\xab",
  881. "\xd3\xac" => "\xd3\xad",
  882. "\xd3\xae" => "\xd3\xaf",
  883. "\xd3\xb0" => "\xd3\xb1",
  884. "\xd3\xb2" => "\xd3\xb3",
  885. "\xd3\xb4" => "\xd3\xb5",
  886. "\xd3\xb8" => "\xd3\xb9",
  887. "\xd4\x80" => "\xd4\x81",
  888. "\xd4\x82" => "\xd4\x83",
  889. "\xd4\x84" => "\xd4\x85",
  890. "\xd4\x86" => "\xd4\x87",
  891. "\xd4\x88" => "\xd4\x89",
  892. "\xd4\x8a" => "\xd4\x8b",
  893. "\xd4\x8c" => "\xd4\x8d",
  894. "\xd4\x8e" => "\xd4\x8f",
  895. "\xd4\xb1" => "\xd5\xa1",
  896. "\xd4\xb2" => "\xd5\xa2",
  897. "\xd4\xb3" => "\xd5\xa3",
  898. "\xd4\xb4" => "\xd5\xa4",
  899. "\xd4\xb5" => "\xd5\xa5",
  900. "\xd4\xb6" => "\xd5\xa6",
  901. "\xd4\xb7" => "\xd5\xa7",
  902. "\xd4\xb8" => "\xd5\xa8",
  903. "\xd4\xb9" => "\xd5\xa9",
  904. "\xd4\xba" => "\xd5\xaa",
  905. "\xd4\xbb" => "\xd5\xab",
  906. "\xd4\xbc" => "\xd5\xac",
  907. "\xd4\xbd" => "\xd5\xad",
  908. "\xd4\xbe" => "\xd5\xae",
  909. "\xd4\xbf" => "\xd5\xaf",
  910. "\xd5\x80" => "\xd5\xb0",
  911. "\xd5\x81" => "\xd5\xb1",
  912. "\xd5\x82" => "\xd5\xb2",
  913. "\xd5\x83" => "\xd5\xb3",
  914. "\xd5\x84" => "\xd5\xb4",
  915. "\xd5\x85" => "\xd5\xb5",
  916. "\xd5\x86" => "\xd5\xb6",
  917. "\xd5\x87" => "\xd5\xb7",
  918. "\xd5\x88" => "\xd5\xb8",
  919. "\xd5\x89" => "\xd5\xb9",
  920. "\xd5\x8a" => "\xd5\xba",
  921. "\xd5\x8b" => "\xd5\xbb",
  922. "\xd5\x8c" => "\xd5\xbc",
  923. "\xd5\x8d" => "\xd5\xbd",
  924. "\xd5\x8e" => "\xd5\xbe",
  925. "\xd5\x8f" => "\xd5\xbf",
  926. "\xd5\x90" => "\xd6\x80",
  927. "\xd5\x91" => "\xd6\x81",
  928. "\xd5\x92" => "\xd6\x82",
  929. "\xd5\x93" => "\xd6\x83",
  930. "\xd5\x94" => "\xd6\x84",
  931. "\xd5\x95" => "\xd6\x85",
  932. "\xd5\x96" => "\xd6\x86",
  933. "\xe1\xb8\x80" => "\xe1\xb8\x81",
  934. "\xe1\xb8\x82" => "\xe1\xb8\x83",
  935. "\xe1\xb8\x84" => "\xe1\xb8\x85",
  936. "\xe1\xb8\x86" => "\xe1\xb8\x87",
  937. "\xe1\xb8\x88" => "\xe1\xb8\x89",
  938. "\xe1\xb8\x8a" => "\xe1\xb8\x8b",
  939. "\xe1\xb8\x8c" => "\xe1\xb8\x8d",
  940. "\xe1\xb8\x8e" => "\xe1\xb8\x8f",
  941. "\xe1\xb8\x90" => "\xe1\xb8\x91",
  942. "\xe1\xb8\x92" => "\xe1\xb8\x93",
  943. "\xe1\xb8\x94" => "\xe1\xb8\x95",
  944. "\xe1\xb8\x96" => "\xe1\xb8\x97",
  945. "\xe1\xb8\x98" => "\xe1\xb8\x99",
  946. "\xe1\xb8\x9a" => "\xe1\xb8\x9b",
  947. "\xe1\xb8\x9c" => "\xe1\xb8\x9d",
  948. "\xe1\xb8\x9e" => "\xe1\xb8\x9f",
  949. "\xe1\xb8\xa0" => "\xe1\xb8\xa1",
  950. "\xe1\xb8\xa2" => "\xe1\xb8\xa3",
  951. "\xe1\xb8\xa4" => "\xe1\xb8\xa5",
  952. "\xe1\xb8\xa6" => "\xe1\xb8\xa7",
  953. "\xe1\xb8\xa8" => "\xe1\xb8\xa9",
  954. "\xe1\xb8\xaa" => "\xe1\xb8\xab",
  955. "\xe1\xb8\xac" => "\xe1\xb8\xad",
  956. "\xe1\xb8\xae" => "\xe1\xb8\xaf",
  957. "\xe1\xb8\xb0" => "\xe1\xb8\xb1",
  958. "\xe1\xb8\xb2" => "\xe1\xb8\xb3",
  959. "\xe1\xb8\xb4" => "\xe1\xb8\xb5",
  960. "\xe1\xb8\xb6" => "\xe1\xb8\xb7",
  961. "\xe1\xb8\xb8" => "\xe1\xb8\xb9",
  962. "\xe1\xb8\xba" => "\xe1\xb8\xbb",
  963. "\xe1\xb8\xbc" => "\xe1\xb8\xbd",
  964. "\xe1\xb8\xbe" => "\xe1\xb8\xbf",
  965. "\xe1\xb9\x80" => "\xe1\xb9\x81",
  966. "\xe1\xb9\x82" => "\xe1\xb9\x83",
  967. "\xe1\xb9\x84" => "\xe1\xb9\x85",
  968. "\xe1\xb9\x86" => "\xe1\xb9\x87",
  969. "\xe1\xb9\x88" => "\xe1\xb9\x89",
  970. "\xe1\xb9\x8a" => "\xe1\xb9\x8b",
  971. "\xe1\xb9\x8c" => "\xe1\xb9\x8d",
  972. "\xe1\xb9\x8e" => "\xe1\xb9\x8f",
  973. "\xe1\xb9\x90" => "\xe1\xb9\x91",
  974. "\xe1\xb9\x92" => "\xe1\xb9\x93",
  975. "\xe1\xb9\x94" => "\xe1\xb9\x95",
  976. "\xe1\xb9\x96" => "\xe1\xb9\x97",
  977. "\xe1\xb9\x98" => "\xe1\xb9\x99",
  978. "\xe1\xb9\x9a" => "\xe1\xb9\x9b",
  979. "\xe1\xb9\x9c" => "\xe1\xb9\x9d",
  980. "\xe1\xb9\x9e" => "\xe1\xb9\x9f",
  981. "\xe1\xb9\xa0" => "\xe1\xb9\xa1",
  982. "\xe1\xb9\xa2" => "\xe1\xb9\xa3",
  983. "\xe1\xb9\xa4" => "\xe1\xb9\xa5",
  984. "\xe1\xb9\xa6" => "\xe1\xb9\xa7",
  985. "\xe1\xb9\xa8" => "\xe1\xb9\xa9",
  986. "\xe1\xb9\xaa" => "\xe1\xb9\xab",
  987. "\xe1\xb9\xac" => "\xe1\xb9\xad",
  988. "\xe1\xb9\xae" => "\xe1\xb9\xaf",
  989. "\xe1\xb9\xb0" => "\xe1\xb9\xb1",
  990. "\xe1\xb9\xb2" => "\xe1\xb9\xb3",
  991. "\xe1\xb9\xb4" => "\xe1\xb9\xb5",
  992. "\xe1\xb9\xb6" => "\xe1\xb9\xb7",
  993. "\xe1\xb9\xb8" => "\xe1\xb9\xb9",
  994. "\xe1\xb9\xba" => "\xe1\xb9\xbb",
  995. "\xe1\xb9\xbc" => "\xe1\xb9\xbd",
  996. "\xe1\xb9\xbe" => "\xe1\xb9\xbf",
  997. "\xe1\xba\x80" => "\xe1\xba\x81",
  998. "\xe1\xba\x82" => "\xe1\xba\x83",
  999. "\xe1\xba\x84" => "\xe1\xba\x85",
  1000. "\xe1\xba\x86" => "\xe1\xba\x87",
  1001. "\xe1\xba\x88" => "\xe1\xba\x89",
  1002. "\xe1\xba\x8a" => "\xe1\xba\x8b",
  1003. "\xe1\xba\x8c" => "\xe1\xba\x8d",
  1004. "\xe1\xba\x8e" => "\xe1\xba\x8f",
  1005. "\xe1\xba\x90" => "\xe1\xba\x91",
  1006. "\xe1\xba\x92" => "\xe1\xba\x93",
  1007. "\xe1\xba\x94" => "\xe1\xba\x95",
  1008. "\xe1\xba\xa0" => "\xe1\xba\xa1",
  1009. "\xe1\xba\xa2" => "\xe1\xba\xa3",
  1010. "\xe1\xba\xa4" => "\xe1\xba\xa5",
  1011. "\xe1\xba\xa6" => "\xe1\xba\xa7",
  1012. "\xe1\xba\xa8" => "\xe1\xba\xa9",
  1013. "\xe1\xba\xaa" => "\xe1\xba\xab",
  1014. "\xe1\xba\xac" => "\xe1\xba\xad",
  1015. "\xe1\xba\xae" => "\xe1\xba\xaf",
  1016. "\xe1\xba\xb0" => "\xe1\xba\xb1",
  1017. "\xe1\xba\xb2" => "\xe1\xba\xb3",
  1018. "\xe1\xba\xb4" => "\xe1\xba\xb5",
  1019. "\xe1\xba\xb6" => "\xe1\xba\xb7",
  1020. "\xe1\xba\xb8" => "\xe1\xba\xb9",
  1021. "\xe1\xba\xba" => "\xe1\xba\xbb",
  1022. "\xe1\xba\xbc" => "\xe1\xba\xbd",
  1023. "\xe1\xba\xbe" => "\xe1\xba\xbf",
  1024. "\xe1\xbb\x80" => "\xe1\xbb\x81",
  1025. "\xe1\xbb\x82" => "\xe1\xbb\x83",
  1026. "\xe1\xbb\x84" => "\xe1\xbb\x85",
  1027. "\xe1\xbb\x86" => "\xe1\xbb\x87",
  1028. "\xe1\xbb\x88" => "\xe1\xbb\x89",
  1029. "\xe1\xbb\x8a" => "\xe1\xbb\x8b",
  1030. "\xe1\xbb\x8c" => "\xe1\xbb\x8d",
  1031. "\xe1\xbb\x8e" => "\xe1\xbb\x8f",
  1032. "\xe1\xbb\x90" => "\xe1\xbb\x91",
  1033. "\xe1\xbb\x92" => "\xe1\xbb\x93",
  1034. "\xe1\xbb\x94" => "\xe1\xbb\x95",
  1035. "\xe1\xbb\x96" => "\xe1\xbb\x97",
  1036. "\xe1\xbb\x98" => "\xe1\xbb\x99",
  1037. "\xe1\xbb\x9a" => "\xe1\xbb\x9b",
  1038. "\xe1\xbb\x9c" => "\xe1\xbb\x9d",
  1039. "\xe1\xbb\x9e" => "\xe1\xbb\x9f",
  1040. "\xe1\xbb\xa0" => "\xe1\xbb\xa1",
  1041. "\xe1\xbb\xa2" => "\xe1\xbb\xa3",
  1042. "\xe1\xbb\xa4" => "\xe1\xbb\xa5",
  1043. "\xe1\xbb\xa6" => "\xe1\xbb\xa7",
  1044. "\xe1\xbb\xa8" => "\xe1\xbb\xa9",
  1045. "\xe1\xbb\xaa" => "\xe1\xbb\xab",
  1046. "\xe1\xbb\xac" => "\xe1\xbb\xad",
  1047. "\xe1\xbb\xae" => "\xe1\xbb\xaf",
  1048. "\xe1\xbb\xb0" => "\xe1\xbb\xb1",
  1049. "\xe1\xbb\xb2" => "\xe1\xbb\xb3",
  1050. "\xe1\xbb\xb4" => "\xe1\xbb\xb5",
  1051. "\xe1\xbb\xb6" => "\xe1\xbb\xb7",
  1052. "\xe1\xbb\xb8" => "\xe1\xbb\xb9",
  1053. "\xe1\xbc\x88" => "\xe1\xbc\x80",
  1054. "\xe1\xbc\x89" => "\xe1\xbc\x81",
  1055. "\xe1\xbc\x8a" => "\xe1\xbc\x82",
  1056. "\xe1\xbc\x8b" => "\xe1\xbc\x83",
  1057. "\xe1\xbc\x8c" => "\xe1\xbc\x84",
  1058. "\xe1\xbc\x8d" => "\xe1\xbc\x85",
  1059. "\xe1\xbc\x8e" => "\xe1\xbc\x86",
  1060. "\xe1\xbc\x8f" => "\xe1\xbc\x87",
  1061. "\xe1\xbc\x98" => "\xe1\xbc\x90",
  1062. "\xe1\xbc\x99" => "\xe1\xbc\x91",
  1063. "\xe1\xbc\x9a" => "\xe1\xbc\x92",
  1064. "\xe1\xbc\x9b" => "\xe1\xbc\x93",
  1065. "\xe1\xbc\x9c" => "\xe1\xbc\x94",
  1066. "\xe1\xbc\x9d" => "\xe1\xbc\x95",
  1067. "\xe1\xbc\xa9" => "\xe1\xbc\xa1",
  1068. "\xe1\xbc\xaa" => "\xe1\xbc\xa2",
  1069. "\xe1\xbc\xab" => "\xe1\xbc\xa3",
  1070. "\xe1\xbc\xac" => "\xe1\xbc\xa4",
  1071. "\xe1\xbc\xad" => "\xe1\xbc\xa5",
  1072. "\xe1\xbc\xae" => "\xe1\xbc\xa6",
  1073. "\xe1\xbc\xaf" => "\xe1\xbc\xa7",
  1074. "\xe1\xbc\xb8" => "\xe1\xbc\xb0",
  1075. "\xe1\xbc\xb9" => "\xe1\xbc\xb1",
  1076. "\xe1\xbc\xba" => "\xe1\xbc\xb2",
  1077. "\xe1\xbc\xbb" => "\xe1\xbc\xb3",
  1078. "\xe1\xbc\xbc" => "\xe1\xbc\xb4",
  1079. "\xe1\xbc\xbd" => "\xe1\xbc\xb5",
  1080. "\xe1\xbc\xbe" => "\xe1\xbc\xb6",
  1081. "\xe1\xbc\xbf" => "\xe1\xbc\xb7",
  1082. "\xe1\xbd\x88" => "\xe1\xbd\x80",
  1083. "\xe1\xbd\x89" => "\xe1\xbd\x81",
  1084. "\xe1\xbd\x8a" => "\xe1\xbd\x82",
  1085. "\xe1\xbd\x8b" => "\xe1\xbd\x83",
  1086. "\xe1\xbd\x8c" => "\xe1\…

Large files files are truncated, but you can click here to view the full file