/src/Ofire/vendor/php-lang-correct/UTF8.php
PHP | 4072 lines | 3218 code | 134 blank | 720 comment | 312 complexity | 9b23432c441cb479c84c2c67d66a973d MD5 | raw file
Large files files are truncated, but you can click here to view the full file
- <?php
- /**
- * PHP5 UTF-8 is a UTF-8 aware library of functions mirroring PHP's own string functions.
- *
- * The powerful solution/contribution for UTF-8 support in your framework/CMS, written on PHP.
- * This library is advance of http://sourceforge.net/projects/phputf8 (last updated in 2007).
- *
- * UTF-8 support in PHP 5.
- *
- * Features and benefits of using this class
- * * Compatibility with the interface standard PHP functions that deal with single-byte encodings
- * * Ability to work without PHP extensions ICONV and MBSTRING, if any, that are actively used!
- * * Useful features are missing from the ICONV and MBSTRING
- * * The methods that take and return a string, are able to take and return null (useful for selects from a database)
- * * Several methods are able to process arrays recursively
- * * A single interface and encapsulation (you can inherit and override)
- * * High performance, reliability and quality code
- * * PHP> = 5.3.x
- *
- * In Russian:
- *
- * Поддержка UTF-8 в PHP 5.
- *
- * Возможности и преимущества использования этого класса
- * * Совместимость с интерфейсом стандартных PHP функций, работающих с однобайтовыми кодировками
- * * Возможность работы без PHP расширений ICONV и MBSTRING, если они есть, то активно используются!
- * * Полезные функции, отсутствующие в ICONV и MBSTRING
- * * Методы, которые принимают и возвращают строку, умеют принимать и возвращать null (удобно при выборках значений из базы данных)
- * * Несколько методов умеют обрабатывать массивы рекурсивно
- * * Единый интерфейс и инкапсуляция (можно унаследоваться и переопределить методы)
- * * Высокая производительность, надёжность и качественный код
- * * PHP >= 5.3.x
- *
- * Example:
- * $s = 'Hello, Привет';
- * if (UTF8::is_utf8($s)) echo UTF8::strlen($s);
- *
- * UTF-8 encoding scheme:
- * 2^7 0x00000000 — 0x0000007F 0xxxxxxx
- * 2^11 0x00000080 — 0x000007FF 110xxxxx 10xxxxxx
- * 2^16 0x00000800 — 0x0000FFFF 1110xxxx 10xxxxxx 10xxxxxx
- * 2^21 0x00010000 — 0x001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- * 1-4 bytes length: 2^7 + 2^11 + 2^16 + 2^21 = 2 164 864
- *
- * If I was a owner of the world, I would leave only 2 encoding: UTF-8 and UTF-32 ;-)
- *
- * Useful links
- * http://ru.wikipedia.org/wiki/UTF8
- * http://www.madore.org/~david/misc/unitest/ A Unicode Test Page
- * http://www.unicode.org/
- * http://www.unicode.org/reports/
- * http://www.unicode.org/reports/tr10/ Unicode Collation Algorithm
- * http://www.unicode.org/Public/UCA/6.0.0/ Unicode Collation Algorithm
- * http://www.unicode.org/reports/tr6/ A Standard Compression Scheme for Unicode
- * http://www.fileformat.info/info/unicode/char/search.htm Unicode Character Search
- *
- * @link http://code.google.com/p/php5-utf8/
- * @license http://creativecommons.org/licenses/by-sa/3.0/
- * @author Nasibullin Rinat
- * @version 2.2.2
- */
- class UTF8
- {
- #REPLACEMENT CHARACTER (for broken char)
- const REPLACEMENT_CHAR = "\xEF\xBF\xBD"; #U+FFFD
-
- /**
- * Regular expression for a character in UTF-8 without the use of a flag /u
- * @deprecated Instead, use a dot (".") and the flag /u, it works faster!
- * @var string
- */
- public static $char_re = ' [\x09\x0A\x0D\x20-\x7E] # ASCII strict
- # [\x00-\x7F] # ASCII non-strict (including control chars)
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
- | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
- ';
-
- /**
- * Combining diactrical marks (Unicode 5.1).
- *
- * For example, russian letters in composed form: "Ё" (U+0401), "Й" (U+0419),
- * decomposed form: (U+0415 U+0308), (U+0418 U+0306)
- *
- * @link http://www.unicode.org/charts/PDF/U0300.pdf
- * @link http://www.unicode.org/charts/PDF/U1DC0.pdf
- * @link http://www.unicode.org/charts/PDF/UFE20.pdf
- * @var string
- */
- #public static $diactrical_re = '\p{M}'; #alternative, but only with /u flag
- public static $diactrical_re = ' \xcc[\x80-\xb9]|\xcd[\x80-\xaf] #UNICODE range: U+0300 — U+036F (for letters)
- | \xe2\x83[\x90-\xbf] #UNICODE range: U+20D0 — U+20FF (for symbols)
- | \xe1\xb7[\x80-\xbf] #UNICODE range: U+1DC0 — U+1DFF (supplement)
- | \xef\xb8[\xa0-\xaf] #UNICODE range: U+FE20 — U+FE2F (combining half marks)
- ';
-
- /**
- * @var array
- */
- public static $html_special_chars_table = array(
- '"' => "\x22", #U+0022 ["] " quotation mark = APL quote
- '&' => "\x26", #U+0026 [&] & ampersand
- '<' => "\x3c", #U+003C [<] < less-than sign
- '>' => "\x3e", #U+003E [>] > greater-than sign
- );
-
- /**
- * @link http://www.fileformat.info/format/w3c/entitytest.htm?sort=Unicode%20Character HTML Entity Browser Test Page
- * @var array
- */
- public static $html_entity_table = array(
- #Latin-1 Entities:
- ' ' => "\xc2\xa0", #U+00A0 [ ] no-break space = non-breaking space
- '¡' => "\xc2\xa1", #U+00A1 [¡] inverted exclamation mark
- '¢' => "\xc2\xa2", #U+00A2 [¢] cent sign
- '£' => "\xc2\xa3", #U+00A3 [£] pound sign
- '¤' => "\xc2\xa4", #U+00A4 [¤] currency sign
- '¥' => "\xc2\xa5", #U+00A5 [¥] yen sign = yuan sign
- '¦' => "\xc2\xa6", #U+00A6 [¦] broken bar = broken vertical bar
- '§' => "\xc2\xa7", #U+00A7 [§] section sign
- '¨' => "\xc2\xa8", #U+00A8 [¨] diaeresis = spacing diaeresis
- '©' => "\xc2\xa9", #U+00A9 [©] copyright sign
- 'ª' => "\xc2\xaa", #U+00AA [ª] feminine ordinal indicator
- '«' => "\xc2\xab", #U+00AB [«] left-pointing double angle quotation mark = left pointing guillemet
- '¬' => "\xc2\xac", #U+00AC [¬] not sign
- '­' => "\xc2\xad", #U+00AD [ ] soft hyphen = discretionary hyphen
- '®' => "\xc2\xae", #U+00AE [®] registered sign = registered trade mark sign
- '¯' => "\xc2\xaf", #U+00AF [¯] macron = spacing macron = overline = APL overbar
- '°' => "\xc2\xb0", #U+00B0 [°] degree sign
- '±' => "\xc2\xb1", #U+00B1 [±] plus-minus sign = plus-or-minus sign
- '²' => "\xc2\xb2", #U+00B2 [²] superscript two = superscript digit two = squared
- '³' => "\xc2\xb3", #U+00B3 [³] superscript three = superscript digit three = cubed
- '´' => "\xc2\xb4", #U+00B4 [´] acute accent = spacing acute
- 'µ' => "\xc2\xb5", #U+00B5 [µ] micro sign
- '¶' => "\xc2\xb6", #U+00B6 [¶] pilcrow sign = paragraph sign
- '·' => "\xc2\xb7", #U+00B7 [·] middle dot = Georgian comma = Greek middle dot
- '¸' => "\xc2\xb8", #U+00B8 [¸] cedilla = spacing cedilla
- '¹' => "\xc2\xb9", #U+00B9 [¹] superscript one = superscript digit one
- 'º' => "\xc2\xba", #U+00BA [º] masculine ordinal indicator
- '»' => "\xc2\xbb", #U+00BB [»] right-pointing double angle quotation mark = right pointing guillemet
- '¼' => "\xc2\xbc", #U+00BC [¼] vulgar fraction one quarter = fraction one quarter
- '½' => "\xc2\xbd", #U+00BD [½] vulgar fraction one half = fraction one half
- '¾' => "\xc2\xbe", #U+00BE [¾] vulgar fraction three quarters = fraction three quarters
- '¿' => "\xc2\xbf", #U+00BF [¿] inverted question mark = turned question mark
- #Latin capital letter
- 'À' => "\xc3\x80", #Latin capital letter A with grave = Latin capital letter A grave
- 'Á' => "\xc3\x81", #Latin capital letter A with acute
- 'Â' => "\xc3\x82", #Latin capital letter A with circumflex
- 'Ã' => "\xc3\x83", #Latin capital letter A with tilde
- 'Ä' => "\xc3\x84", #Latin capital letter A with diaeresis
- 'Å' => "\xc3\x85", #Latin capital letter A with ring above = Latin capital letter A ring
- 'Æ' => "\xc3\x86", #Latin capital letter AE = Latin capital ligature AE
- 'Ç' => "\xc3\x87", #Latin capital letter C with cedilla
- 'È' => "\xc3\x88", #Latin capital letter E with grave
- 'É' => "\xc3\x89", #Latin capital letter E with acute
- 'Ê' => "\xc3\x8a", #Latin capital letter E with circumflex
- 'Ë' => "\xc3\x8b", #Latin capital letter E with diaeresis
- 'Ì' => "\xc3\x8c", #Latin capital letter I with grave
- 'Í' => "\xc3\x8d", #Latin capital letter I with acute
- 'Î' => "\xc3\x8e", #Latin capital letter I with circumflex
- 'Ï' => "\xc3\x8f", #Latin capital letter I with diaeresis
- 'Ð' => "\xc3\x90", #Latin capital letter ETH
- 'Ñ' => "\xc3\x91", #Latin capital letter N with tilde
- 'Ò' => "\xc3\x92", #Latin capital letter O with grave
- 'Ó' => "\xc3\x93", #Latin capital letter O with acute
- 'Ô' => "\xc3\x94", #Latin capital letter O with circumflex
- 'Õ' => "\xc3\x95", #Latin capital letter O with tilde
- 'Ö' => "\xc3\x96", #Latin capital letter O with diaeresis
- '×' => "\xc3\x97", #U+00D7 [×] multiplication sign
- 'Ø' => "\xc3\x98", #Latin capital letter O with stroke = Latin capital letter O slash
- 'Ù' => "\xc3\x99", #Latin capital letter U with grave
- 'Ú' => "\xc3\x9a", #Latin capital letter U with acute
- 'Û' => "\xc3\x9b", #Latin capital letter U with circumflex
- 'Ü' => "\xc3\x9c", #Latin capital letter U with diaeresis
- 'Ý' => "\xc3\x9d", #Latin capital letter Y with acute
- 'Þ' => "\xc3\x9e", #Latin capital letter THORN
- #Latin small letter
- 'ß' => "\xc3\x9f", #Latin small letter sharp s = ess-zed
- 'à' => "\xc3\xa0", #Latin small letter a with grave = Latin small letter a grave
- 'á' => "\xc3\xa1", #Latin small letter a with acute
- 'â' => "\xc3\xa2", #Latin small letter a with circumflex
- 'ã' => "\xc3\xa3", #Latin small letter a with tilde
- 'ä' => "\xc3\xa4", #Latin small letter a with diaeresis
- 'å' => "\xc3\xa5", #Latin small letter a with ring above = Latin small letter a ring
- 'æ' => "\xc3\xa6", #Latin small letter ae = Latin small ligature ae
- 'ç' => "\xc3\xa7", #Latin small letter c with cedilla
- 'è' => "\xc3\xa8", #Latin small letter e with grave
- 'é' => "\xc3\xa9", #Latin small letter e with acute
- 'ê' => "\xc3\xaa", #Latin small letter e with circumflex
- 'ë' => "\xc3\xab", #Latin small letter e with diaeresis
- 'ì' => "\xc3\xac", #Latin small letter i with grave
- 'í' => "\xc3\xad", #Latin small letter i with acute
- 'î' => "\xc3\xae", #Latin small letter i with circumflex
- 'ï' => "\xc3\xaf", #Latin small letter i with diaeresis
- 'ð' => "\xc3\xb0", #Latin small letter eth
- 'ñ' => "\xc3\xb1", #Latin small letter n with tilde
- 'ò' => "\xc3\xb2", #Latin small letter o with grave
- 'ó' => "\xc3\xb3", #Latin small letter o with acute
- 'ô' => "\xc3\xb4", #Latin small letter o with circumflex
- 'õ' => "\xc3\xb5", #Latin small letter o with tilde
- 'ö' => "\xc3\xb6", #Latin small letter o with diaeresis
- '÷' => "\xc3\xb7", #U+00F7 [÷] division sign
- 'ø' => "\xc3\xb8", #Latin small letter o with stroke = Latin small letter o slash
- 'ù' => "\xc3\xb9", #Latin small letter u with grave
- 'ú' => "\xc3\xba", #Latin small letter u with acute
- 'û' => "\xc3\xbb", #Latin small letter u with circumflex
- 'ü' => "\xc3\xbc", #Latin small letter u with diaeresis
- 'ý' => "\xc3\xbd", #Latin small letter y with acute
- 'þ' => "\xc3\xbe", #Latin small letter thorn
- 'ÿ' => "\xc3\xbf", #Latin small letter y with diaeresis
- #Symbols and Greek Letters:
- 'ƒ' => "\xc6\x92", #U+0192 [ƒ] Latin small f with hook = function = florin
- 'Α' => "\xce\x91", #Greek capital letter alpha
- 'Β' => "\xce\x92", #Greek capital letter beta
- 'Γ' => "\xce\x93", #Greek capital letter gamma
- 'Δ' => "\xce\x94", #Greek capital letter delta
- 'Ε' => "\xce\x95", #Greek capital letter epsilon
- 'Ζ' => "\xce\x96", #Greek capital letter zeta
- 'Η' => "\xce\x97", #Greek capital letter eta
- 'Θ' => "\xce\x98", #Greek capital letter theta
- 'Ι' => "\xce\x99", #Greek capital letter iota
- 'Κ' => "\xce\x9a", #Greek capital letter kappa
- 'Λ' => "\xce\x9b", #Greek capital letter lambda
- 'Μ' => "\xce\x9c", #Greek capital letter mu
- 'Ν' => "\xce\x9d", #Greek capital letter nu
- 'Ξ' => "\xce\x9e", #Greek capital letter xi
- 'Ο' => "\xce\x9f", #Greek capital letter omicron
- 'Π' => "\xce\xa0", #Greek capital letter pi
- 'Ρ' => "\xce\xa1", #Greek capital letter rho
- 'Σ' => "\xce\xa3", #Greek capital letter sigma
- 'Τ' => "\xce\xa4", #Greek capital letter tau
- 'Υ' => "\xce\xa5", #Greek capital letter upsilon
- 'Φ' => "\xce\xa6", #Greek capital letter phi
- 'Χ' => "\xce\xa7", #Greek capital letter chi
- 'Ψ' => "\xce\xa8", #Greek capital letter psi
- 'Ω' => "\xce\xa9", #Greek capital letter omega
- 'α' => "\xce\xb1", #Greek small letter alpha
- 'β' => "\xce\xb2", #Greek small letter beta
- 'γ' => "\xce\xb3", #Greek small letter gamma
- 'δ' => "\xce\xb4", #Greek small letter delta
- 'ε' => "\xce\xb5", #Greek small letter epsilon
- 'ζ' => "\xce\xb6", #Greek small letter zeta
- 'η' => "\xce\xb7", #Greek small letter eta
- 'θ' => "\xce\xb8", #Greek small letter theta
- 'ι' => "\xce\xb9", #Greek small letter iota
- 'κ' => "\xce\xba", #Greek small letter kappa
- 'λ' => "\xce\xbb", #Greek small letter lambda
- 'μ' => "\xce\xbc", #Greek small letter mu
- 'ν' => "\xce\xbd", #Greek small letter nu
- 'ξ' => "\xce\xbe", #Greek small letter xi
- 'ο' => "\xce\xbf", #Greek small letter omicron
- 'π' => "\xcf\x80", #Greek small letter pi
- 'ρ' => "\xcf\x81", #Greek small letter rho
- 'ς' => "\xcf\x82", #Greek small letter final sigma
- 'σ' => "\xcf\x83", #Greek small letter sigma
- 'τ' => "\xcf\x84", #Greek small letter tau
- 'υ' => "\xcf\x85", #Greek small letter upsilon
- 'φ' => "\xcf\x86", #Greek small letter phi
- 'χ' => "\xcf\x87", #Greek small letter chi
- 'ψ' => "\xcf\x88", #Greek small letter psi
- 'ω' => "\xcf\x89", #Greek small letter omega
- 'ϑ'=> "\xcf\x91", #Greek small letter theta symbol
- 'ϒ' => "\xcf\x92", #Greek upsilon with hook symbol
- 'ϖ' => "\xcf\x96", #U+03D6 [ϖ] Greek pi symbol
-
- '•' => "\xe2\x80\xa2", #U+2022 [•] bullet = black small circle
- '…' => "\xe2\x80\xa6", #U+2026 […] horizontal ellipsis = three dot leader
- '′' => "\xe2\x80\xb2", #U+2032 [′] prime = minutes = feet (для обозначения минут и футов)
- '″' => "\xe2\x80\xb3", #U+2033 [″] double prime = seconds = inches (для обозначения секунд и дюймов).
- '‾' => "\xe2\x80\xbe", #U+203E [‾] overline = spacing overscore
- '⁄' => "\xe2\x81\x84", #U+2044 [⁄] fraction slash
- '℘' => "\xe2\x84\x98", #U+2118 [℘] script capital P = power set = Weierstrass p
- 'ℑ' => "\xe2\x84\x91", #U+2111 [ℑ] blackletter capital I = imaginary part
- 'ℜ' => "\xe2\x84\x9c", #U+211C [ℜ] blackletter capital R = real part symbol
- '™' => "\xe2\x84\xa2", #U+2122 [™] trade mark sign
- 'ℵ' => "\xe2\x84\xb5", #U+2135 [ℵ] alef symbol = first transfinite cardinal
- '←' => "\xe2\x86\x90", #U+2190 [←] leftwards arrow
- '↑' => "\xe2\x86\x91", #U+2191 [↑] upwards arrow
- '→' => "\xe2\x86\x92", #U+2192 [→] rightwards arrow
- '↓' => "\xe2\x86\x93", #U+2193 [↓] downwards arrow
- '↔' => "\xe2\x86\x94", #U+2194 [↔] left right arrow
- '↵' => "\xe2\x86\xb5", #U+21B5 [↵] downwards arrow with corner leftwards = carriage return
- '⇐' => "\xe2\x87\x90", #U+21D0 [⇐] leftwards double arrow
- '⇑' => "\xe2\x87\x91", #U+21D1 [⇑] upwards double arrow
- '⇒' => "\xe2\x87\x92", #U+21D2 [⇒] rightwards double arrow
- '⇓' => "\xe2\x87\x93", #U+21D3 [⇓] downwards double arrow
- '⇔' => "\xe2\x87\x94", #U+21D4 [⇔] left right double arrow
- '∀' => "\xe2\x88\x80", #U+2200 [∀] for all
- '∂' => "\xe2\x88\x82", #U+2202 [∂] partial differential
- '∃' => "\xe2\x88\x83", #U+2203 [∃] there exists
- '∅' => "\xe2\x88\x85", #U+2205 [∅] empty set = null set = diameter
- '∇' => "\xe2\x88\x87", #U+2207 [∇] nabla = backward difference
- '∈' => "\xe2\x88\x88", #U+2208 [∈] element of
- '∉' => "\xe2\x88\x89", #U+2209 [∉] not an element of
- '∋' => "\xe2\x88\x8b", #U+220B [∋] contains as member
- '∏' => "\xe2\x88\x8f", #U+220F [∏] n-ary product = product sign
- '∑' => "\xe2\x88\x91", #U+2211 [∑] n-ary sumation
- '−' => "\xe2\x88\x92", #U+2212 [−] minus sign
- '∗' => "\xe2\x88\x97", #U+2217 [∗] asterisk operator
- '√' => "\xe2\x88\x9a", #U+221A [√] square root = radical sign
- '∝' => "\xe2\x88\x9d", #U+221D [∝] proportional to
- '∞' => "\xe2\x88\x9e", #U+221E [∞] infinity
- '∠' => "\xe2\x88\xa0", #U+2220 [∠] angle
- '∧' => "\xe2\x88\xa7", #U+2227 [∧] logical and = wedge
- '∨' => "\xe2\x88\xa8", #U+2228 [∨] logical or = vee
- '∩' => "\xe2\x88\xa9", #U+2229 [∩] intersection = cap
- '∪' => "\xe2\x88\xaa", #U+222A [∪] union = cup
- '∫' => "\xe2\x88\xab", #U+222B [∫] integral
- '∴' => "\xe2\x88\xb4", #U+2234 [∴] therefore
- '∼' => "\xe2\x88\xbc", #U+223C [∼] tilde operator = varies with = similar to
- '≅' => "\xe2\x89\x85", #U+2245 [≅] approximately equal to
- '≈' => "\xe2\x89\x88", #U+2248 [≈] almost equal to = asymptotic to
- '≠' => "\xe2\x89\xa0", #U+2260 [≠] not equal to
- '≡' => "\xe2\x89\xa1", #U+2261 [≡] identical to
- '≤' => "\xe2\x89\xa4", #U+2264 [≤] less-than or equal to
- '≥' => "\xe2\x89\xa5", #U+2265 [≥] greater-than or equal to
- '⊂' => "\xe2\x8a\x82", #U+2282 [⊂] subset of
- '⊃' => "\xe2\x8a\x83", #U+2283 [⊃] superset of
- '⊄' => "\xe2\x8a\x84", #U+2284 [⊄] not a subset of
- '⊆' => "\xe2\x8a\x86", #U+2286 [⊆] subset of or equal to
- '⊇' => "\xe2\x8a\x87", #U+2287 [⊇] superset of or equal to
- '⊕' => "\xe2\x8a\x95", #U+2295 [⊕] circled plus = direct sum
- '⊗' => "\xe2\x8a\x97", #U+2297 [⊗] circled times = vector product
- '⊥' => "\xe2\x8a\xa5", #U+22A5 [⊥] up tack = orthogonal to = perpendicular
- '⋅' => "\xe2\x8b\x85", #U+22C5 [⋅] dot operator
- '⌈' => "\xe2\x8c\x88", #U+2308 [⌈] left ceiling = APL upstile
- '⌉' => "\xe2\x8c\x89", #U+2309 [⌉] right ceiling
- '⌊' => "\xe2\x8c\x8a", #U+230A [⌊] left floor = APL downstile
- '⌋' => "\xe2\x8c\x8b", #U+230B [⌋] right floor
- '⟨' => "\xe2\x8c\xa9", #U+2329 [〈] left-pointing angle bracket = bra
- '⟩' => "\xe2\x8c\xaa", #U+232A [〉] right-pointing angle bracket = ket
- '◊' => "\xe2\x97\x8a", #U+25CA [◊] lozenge
- '♠' => "\xe2\x99\xa0", #U+2660 [♠] black spade suit
- '♣' => "\xe2\x99\xa3", #U+2663 [♣] black club suit = shamrock
- '♥' => "\xe2\x99\xa5", #U+2665 [♥] black heart suit = valentine
- '♦' => "\xe2\x99\xa6", #U+2666 [♦] black diamond suit
- #Other Special Characters:
- 'Œ' => "\xc5\x92", #U+0152 [Œ] Latin capital ligature OE
- 'œ' => "\xc5\x93", #U+0153 [œ] Latin small ligature oe
- 'Š' => "\xc5\xa0", #U+0160 [Š] Latin capital letter S with caron
- 'š' => "\xc5\xa1", #U+0161 [š] Latin small letter s with caron
- 'Ÿ' => "\xc5\xb8", #U+0178 [Ÿ] Latin capital letter Y with diaeresis
- 'ˆ' => "\xcb\x86", #U+02C6 [ˆ] modifier letter circumflex accent
- '˜' => "\xcb\x9c", #U+02DC [˜] small tilde
- ' ' => "\xe2\x80\x82", #U+2002 [ ] en space
- ' ' => "\xe2\x80\x83", #U+2003 [ ] em space
- ' ' => "\xe2\x80\x89", #U+2009 [ ] thin space
- '‌' => "\xe2\x80\x8c", #U+200C [] zero width non-joiner
- '‍' => "\xe2\x80\x8d", #U+200D [] zero width joiner
- '‎' => "\xe2\x80\x8e", #U+200E [] left-to-right mark
- '‏' => "\xe2\x80\x8f", #U+200F [] right-to-left mark
- '–' => "\xe2\x80\x93", #U+2013 [–] en dash
- '—' => "\xe2\x80\x94", #U+2014 [—] em dash
- '‘' => "\xe2\x80\x98", #U+2018 [‘] left single quotation mark
- '’' => "\xe2\x80\x99", #U+2019 [’] right single quotation mark (and apostrophe!)
- '‚' => "\xe2\x80\x9a", #U+201A [‚] single low-9 quotation mark
- '“' => "\xe2\x80\x9c", #U+201C [“] left double quotation mark
- '”' => "\xe2\x80\x9d", #U+201D [”] right double quotation mark
- '„' => "\xe2\x80\x9e", #U+201E [„] double low-9 quotation mark
- '†' => "\xe2\x80\xa0", #U+2020 [†] dagger
- '‡' => "\xe2\x80\xa1", #U+2021 [‡] double dagger
- '‰' => "\xe2\x80\xb0", #U+2030 [‰] per mille sign
- '‹' => "\xe2\x80\xb9", #U+2039 [‹] single left-pointing angle quotation mark
- '›' => "\xe2\x80\xba", #U+203A [›] single right-pointing angle quotation mark
- '€' => "\xe2\x82\xac", #U+20AC [€] euro sign
- );
-
- /**
- * This table contains the data on how cp1259 characters map into Unicode (UTF-8).
- * The cp1259 map describes standart tatarish cyrillic charset and based on the cp1251 table.
- * cp1259 -- this is an outdated one byte encoding of the Tatar language,
- * which includes all the Russian letters from cp1251.
- *
- * @link http://search.cpan.org/CPAN/authors/id/A/AM/AMICHAUER/Lingua-TT-Yanalif-0.08.tar.gz
- * @link http://www.unicode.org/charts/PDF/U0400.pdf
- */
- public static $cp1259_table = array(
- #bytes from 0x00 to 0x7F (ASCII) saved as is
- "\x80" => "\xd3\x98", #U+04d8 CYRILLIC CAPITAL LETTER SCHWA
- "\x81" => "\xd0\x83", #U+0403 CYRILLIC CAPITAL LETTER GJE
- "\x82" => "\xe2\x80\x9a", #U+201a SINGLE LOW-9 QUOTATION MARK
- "\x83" => "\xd1\x93", #U+0453 CYRILLIC SMALL LETTER GJE
- "\x84" => "\xe2\x80\x9e", #U+201e DOUBLE LOW-9 QUOTATION MARK
- "\x85" => "\xe2\x80\xa6", #U+2026 HORIZONTAL ELLIPSIS
- "\x86" => "\xe2\x80\xa0", #U+2020 DAGGER
- "\x87" => "\xe2\x80\xa1", #U+2021 DOUBLE DAGGER
- "\x88" => "\xe2\x82\xac", #U+20ac EURO SIGN
- "\x89" => "\xe2\x80\xb0", #U+2030 PER MILLE SIGN
- "\x8a" => "\xd3\xa8", #U+04e8 CYRILLIC CAPITAL LETTER BARRED O
- "\x8b" => "\xe2\x80\xb9", #U+2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
- "\x8c" => "\xd2\xae", #U+04ae CYRILLIC CAPITAL LETTER STRAIGHT U
- "\x8d" => "\xd2\x96", #U+0496 CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
- "\x8e" => "\xd2\xa2", #U+04a2 CYRILLIC CAPITAL LETTER EN WITH HOOK
- "\x8f" => "\xd2\xba", #U+04ba CYRILLIC CAPITAL LETTER SHHA
- "\x90" => "\xd3\x99", #U+04d9 CYRILLIC SMALL LETTER SCHWA
- "\x91" => "\xe2\x80\x98", #U+2018 LEFT SINGLE QUOTATION MARK
- "\x92" => "\xe2\x80\x99", #U+2019 RIGHT SINGLE QUOTATION MARK
- "\x93" => "\xe2\x80\x9c", #U+201c LEFT DOUBLE QUOTATION MARK
- "\x94" => "\xe2\x80\x9d", #U+201d RIGHT DOUBLE QUOTATION MARK
- "\x95" => "\xe2\x80\xa2", #U+2022 BULLET
- "\x96" => "\xe2\x80\x93", #U+2013 EN DASH
- "\x97" => "\xe2\x80\x94", #U+2014 EM DASH
- #"\x98" #UNDEFINED
- "\x99" => "\xe2\x84\xa2", #U+2122 TRADE MARK SIGN
- "\x9a" => "\xd3\xa9", #U+04e9 CYRILLIC SMALL LETTER BARRED O
- "\x9b" => "\xe2\x80\xba", #U+203a SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
- "\x9c" => "\xd2\xaf", #U+04af CYRILLIC SMALL LETTER STRAIGHT U
- "\x9d" => "\xd2\x97", #U+0497 CYRILLIC SMALL LETTER ZHE WITH DESCENDER
- "\x9e" => "\xd2\xa3", #U+04a3 CYRILLIC SMALL LETTER EN WITH HOOK
- "\x9f" => "\xd2\xbb", #U+04bb CYRILLIC SMALL LETTER SHHA
- "\xa0" => "\xc2\xa0", #U+00a0 NO-BREAK SPACE
- "\xa1" => "\xd0\x8e", #U+040e CYRILLIC CAPITAL LETTER SHORT U
- "\xa2" => "\xd1\x9e", #U+045e CYRILLIC SMALL LETTER SHORT U
- "\xa3" => "\xd0\x88", #U+0408 CYRILLIC CAPITAL LETTER JE
- "\xa4" => "\xc2\xa4", #U+00a4 CURRENCY SIGN
- "\xa5" => "\xd2\x90", #U+0490 CYRILLIC CAPITAL LETTER GHE WITH UPTURN
- "\xa6" => "\xc2\xa6", #U+00a6 BROKEN BAR
- "\xa7" => "\xc2\xa7", #U+00a7 SECTION SIGN
- "\xa8" => "\xd0\x81", #U+0401 CYRILLIC CAPITAL LETTER IO
- "\xa9" => "\xc2\xa9", #U+00a9 COPYRIGHT SIGN
- "\xaa" => "\xd0\x84", #U+0404 CYRILLIC CAPITAL LETTER UKRAINIAN IE
- "\xab" => "\xc2\xab", #U+00ab LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
- "\xac" => "\xc2\xac", #U+00ac NOT SIGN
- "\xad" => "\xc2\xad", #U+00ad SOFT HYPHEN
- "\xae" => "\xc2\xae", #U+00ae REGISTERED SIGN
- "\xaf" => "\xd0\x87", #U+0407 CYRILLIC CAPITAL LETTER YI
- "\xb0" => "\xc2\xb0", #U+00b0 DEGREE SIGN
- "\xb1" => "\xc2\xb1", #U+00b1 PLUS-MINUS SIGN
- "\xb2" => "\xd0\x86", #U+0406 CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
- "\xb3" => "\xd1\x96", #U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
- "\xb4" => "\xd2\x91", #U+0491 CYRILLIC SMALL LETTER GHE WITH UPTURN
- "\xb5" => "\xc2\xb5", #U+00b5 MICRO SIGN
- "\xb6" => "\xc2\xb6", #U+00b6 PILCROW SIGN
- "\xb7" => "\xc2\xb7", #U+00b7 MIDDLE DOT
- "\xb8" => "\xd1\x91", #U+0451 CYRILLIC SMALL LETTER IO
- "\xb9" => "\xe2\x84\x96", #U+2116 NUMERO SIGN
- "\xba" => "\xd1\x94", #U+0454 CYRILLIC SMALL LETTER UKRAINIAN IE
- "\xbb" => "\xc2\xbb", #U+00bb RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
- "\xbc" => "\xd1\x98", #U+0458 CYRILLIC SMALL LETTER JE
- "\xbd" => "\xd0\x85", #U+0405 CYRILLIC CAPITAL LETTER DZE
- "\xbe" => "\xd1\x95", #U+0455 CYRILLIC SMALL LETTER DZE
- "\xbf" => "\xd1\x97", #U+0457 CYRILLIC SMALL LETTER YI
- "\xc0" => "\xd0\x90", #U+0410 CYRILLIC CAPITAL LETTER A
- "\xc1" => "\xd0\x91", #U+0411 CYRILLIC CAPITAL LETTER BE
- "\xc2" => "\xd0\x92", #U+0412 CYRILLIC CAPITAL LETTER VE
- "\xc3" => "\xd0\x93", #U+0413 CYRILLIC CAPITAL LETTER GHE
- "\xc4" => "\xd0\x94", #U+0414 CYRILLIC CAPITAL LETTER DE
- "\xc5" => "\xd0\x95", #U+0415 CYRILLIC CAPITAL LETTER IE
- "\xc6" => "\xd0\x96", #U+0416 CYRILLIC CAPITAL LETTER ZHE
- "\xc7" => "\xd0\x97", #U+0417 CYRILLIC CAPITAL LETTER ZE
- "\xc8" => "\xd0\x98", #U+0418 CYRILLIC CAPITAL LETTER I
- "\xc9" => "\xd0\x99", #U+0419 CYRILLIC CAPITAL LETTER SHORT I
- "\xca" => "\xd0\x9a", #U+041a CYRILLIC CAPITAL LETTER KA
- "\xcb" => "\xd0\x9b", #U+041b CYRILLIC CAPITAL LETTER EL
- "\xcc" => "\xd0\x9c", #U+041c CYRILLIC CAPITAL LETTER EM
- "\xcd" => "\xd0\x9d", #U+041d CYRILLIC CAPITAL LETTER EN
- "\xce" => "\xd0\x9e", #U+041e CYRILLIC CAPITAL LETTER O
- "\xcf" => "\xd0\x9f", #U+041f CYRILLIC CAPITAL LETTER PE
- "\xd0" => "\xd0\xa0", #U+0420 CYRILLIC CAPITAL LETTER ER
- "\xd1" => "\xd0\xa1", #U+0421 CYRILLIC CAPITAL LETTER ES
- "\xd2" => "\xd0\xa2", #U+0422 CYRILLIC CAPITAL LETTER TE
- "\xd3" => "\xd0\xa3", #U+0423 CYRILLIC CAPITAL LETTER U
- "\xd4" => "\xd0\xa4", #U+0424 CYRILLIC CAPITAL LETTER EF
- "\xd5" => "\xd0\xa5", #U+0425 CYRILLIC CAPITAL LETTER HA
- "\xd6" => "\xd0\xa6", #U+0426 CYRILLIC CAPITAL LETTER TSE
- "\xd7" => "\xd0\xa7", #U+0427 CYRILLIC CAPITAL LETTER CHE
- "\xd8" => "\xd0\xa8", #U+0428 CYRILLIC CAPITAL LETTER SHA
- "\xd9" => "\xd0\xa9", #U+0429 CYRILLIC CAPITAL LETTER SHCHA
- "\xda" => "\xd0\xaa", #U+042a CYRILLIC CAPITAL LETTER HARD SIGN
- "\xdb" => "\xd0\xab", #U+042b CYRILLIC CAPITAL LETTER YERU
- "\xdc" => "\xd0\xac", #U+042c CYRILLIC CAPITAL LETTER SOFT SIGN
- "\xdd" => "\xd0\xad", #U+042d CYRILLIC CAPITAL LETTER E
- "\xde" => "\xd0\xae", #U+042e CYRILLIC CAPITAL LETTER YU
- "\xdf" => "\xd0\xaf", #U+042f CYRILLIC CAPITAL LETTER YA
- "\xe0" => "\xd0\xb0", #U+0430 CYRILLIC SMALL LETTER A
- "\xe1" => "\xd0\xb1", #U+0431 CYRILLIC SMALL LETTER BE
- "\xe2" => "\xd0\xb2", #U+0432 CYRILLIC SMALL LETTER VE
- "\xe3" => "\xd0\xb3", #U+0433 CYRILLIC SMALL LETTER GHE
- "\xe4" => "\xd0\xb4", #U+0434 CYRILLIC SMALL LETTER DE
- "\xe5" => "\xd0\xb5", #U+0435 CYRILLIC SMALL LETTER IE
- "\xe6" => "\xd0\xb6", #U+0436 CYRILLIC SMALL LETTER ZHE
- "\xe7" => "\xd0\xb7", #U+0437 CYRILLIC SMALL LETTER ZE
- "\xe8" => "\xd0\xb8", #U+0438 CYRILLIC SMALL LETTER I
- "\xe9" => "\xd0\xb9", #U+0439 CYRILLIC SMALL LETTER SHORT I
- "\xea" => "\xd0\xba", #U+043a CYRILLIC SMALL LETTER KA
- "\xeb" => "\xd0\xbb", #U+043b CYRILLIC SMALL LETTER EL
- "\xec" => "\xd0\xbc", #U+043c CYRILLIC SMALL LETTER EM
- "\xed" => "\xd0\xbd", #U+043d CYRILLIC SMALL LETTER EN
- "\xee" => "\xd0\xbe", #U+043e CYRILLIC SMALL LETTER O
- "\xef" => "\xd0\xbf", #U+043f CYRILLIC SMALL LETTER PE
- "\xf0" => "\xd1\x80", #U+0440 CYRILLIC SMALL LETTER ER
- "\xf1" => "\xd1\x81", #U+0441 CYRILLIC SMALL LETTER ES
- "\xf2" => "\xd1\x82", #U+0442 CYRILLIC SMALL LETTER TE
- "\xf3" => "\xd1\x83", #U+0443 CYRILLIC SMALL LETTER U
- "\xf4" => "\xd1\x84", #U+0444 CYRILLIC SMALL LETTER EF
- "\xf5" => "\xd1\x85", #U+0445 CYRILLIC SMALL LETTER HA
- "\xf6" => "\xd1\x86", #U+0446 CYRILLIC SMALL LETTER TSE
- "\xf7" => "\xd1\x87", #U+0447 CYRILLIC SMALL LETTER CHE
- "\xf8" => "\xd1\x88", #U+0448 CYRILLIC SMALL LETTER SHA
- "\xf9" => "\xd1\x89", #U+0449 CYRILLIC SMALL LETTER SHCHA
- "\xfa" => "\xd1\x8a", #U+044a CYRILLIC SMALL LETTER HARD SIGN
- "\xfb" => "\xd1\x8b", #U+044b CYRILLIC SMALL LETTER YERU
- "\xfc" => "\xd1\x8c", #U+044c CYRILLIC SMALL LETTER SOFT SIGN
- "\xfd" => "\xd1\x8d", #U+044d CYRILLIC SMALL LETTER E
- "\xfe" => "\xd1\x8e", #U+044e CYRILLIC SMALL LETTER YU
- "\xff" => "\xd1\x8f", #U+044f CYRILLIC SMALL LETTER YA
- );
-
- /**
- * UTF-8 Case lookup table
- *
- * This lookuptable defines the upper case letters to their correspponding
- * lower case letter in UTF-8
- *
- * @author Andreas Gohr <andi@splitbrain.org>
- */
- public static $convert_case_table = array(
- #CASE_UPPER => case_lower
- "\x41" => "\x61", #A a
- "\x42" => "\x62", #B b
- "\x43" => "\x63", #C c
- "\x44" => "\x64", #D d
- "\x45" => "\x65", #E e
- "\x46" => "\x66", #F f
- "\x47" => "\x67", #G g
- "\x48" => "\x68", #H h
- "\x49" => "\x69", #I i
- "\x4a" => "\x6a", #J j
- "\x4b" => "\x6b", #K k
- "\x4c" => "\x6c", #L l
- "\x4d" => "\x6d", #M m
- "\x4e" => "\x6e", #N n
- "\x4f" => "\x6f", #O o
- "\x50" => "\x70", #P p
- "\x51" => "\x71", #Q q
- "\x52" => "\x72", #R r
- "\x53" => "\x73", #S s
- "\x54" => "\x74", #T t
- "\x55" => "\x75", #U u
- "\x56" => "\x76", #V v
- "\x57" => "\x77", #W w
- "\x58" => "\x78", #X x
- "\x59" => "\x79", #Y y
- "\x5a" => "\x7a", #Z z
- "\xc3\x80" => "\xc3\xa0",
- "\xc3\x81" => "\xc3\xa1",
- "\xc3\x82" => "\xc3\xa2",
- "\xc3\x83" => "\xc3\xa3",
- "\xc3\x84" => "\xc3\xa4",
- "\xc3\x85" => "\xc3\xa5",
- "\xc3\x86" => "\xc3\xa6",
- "\xc3\x87" => "\xc3\xa7",
- "\xc3\x88" => "\xc3\xa8",
- "\xc3\x89" => "\xc3\xa9",
- "\xc3\x8a" => "\xc3\xaa",
- "\xc3\x8b" => "\xc3\xab",
- "\xc3\x8c" => "\xc3\xac",
- "\xc3\x8d" => "\xc3\xad",
- "\xc3\x8e" => "\xc3\xae",
- "\xc3\x8f" => "\xc3\xaf",
- "\xc3\x90" => "\xc3\xb0",
- "\xc3\x91" => "\xc3\xb1",
- "\xc3\x92" => "\xc3\xb2",
- "\xc3\x93" => "\xc3\xb3",
- "\xc3\x94" => "\xc3\xb4",
- "\xc3\x95" => "\xc3\xb5",
- "\xc3\x96" => "\xc3\xb6",
- "\xc3\x98" => "\xc3\xb8",
- "\xc3\x99" => "\xc3\xb9",
- "\xc3\x9a" => "\xc3\xba",
- "\xc3\x9b" => "\xc3\xbb",
- "\xc3\x9c" => "\xc3\xbc",
- "\xc3\x9d" => "\xc3\xbd",
- "\xc3\x9e" => "\xc3\xbe",
- "\xc4\x80" => "\xc4\x81",
- "\xc4\x82" => "\xc4\x83",
- "\xc4\x84" => "\xc4\x85",
- "\xc4\x86" => "\xc4\x87",
- "\xc4\x88" => "\xc4\x89",
- "\xc4\x8a" => "\xc4\x8b",
- "\xc4\x8c" => "\xc4\x8d",
- "\xc4\x8e" => "\xc4\x8f",
- "\xc4\x90" => "\xc4\x91",
- "\xc4\x92" => "\xc4\x93",
- "\xc4\x94" => "\xc4\x95",
- "\xc4\x96" => "\xc4\x97",
- "\xc4\x98" => "\xc4\x99",
- "\xc4\x9a" => "\xc4\x9b",
- "\xc4\x9c" => "\xc4\x9d",
- "\xc4\x9e" => "\xc4\x9f",
- "\xc4\xa0" => "\xc4\xa1",
- "\xc4\xa2" => "\xc4\xa3",
- "\xc4\xa4" => "\xc4\xa5",
- "\xc4\xa6" => "\xc4\xa7",
- "\xc4\xa8" => "\xc4\xa9",
- "\xc4\xaa" => "\xc4\xab",
- "\xc4\xac" => "\xc4\xad",
- "\xc4\xae" => "\xc4\xaf",
- "\xc4\xb2" => "\xc4\xb3",
- "\xc4\xb4" => "\xc4\xb5",
- "\xc4\xb6" => "\xc4\xb7",
- "\xc4\xb9" => "\xc4\xba",
- "\xc4\xbb" => "\xc4\xbc",
- "\xc4\xbd" => "\xc4\xbe",
- "\xc4\xbf" => "\xc5\x80",
- "\xc5\x81" => "\xc5\x82",
- "\xc5\x83" => "\xc5\x84",
- "\xc5\x85" => "\xc5\x86",
- "\xc5\x87" => "\xc5\x88",
- "\xc5\x8a" => "\xc5\x8b",
- "\xc5\x8c" => "\xc5\x8d",
- "\xc5\x8e" => "\xc5\x8f",
- "\xc5\x90" => "\xc5\x91",
- "\xc5\x92" => "\xc5\x93",
- "\xc5\x94" => "\xc5\x95",
- "\xc5\x96" => "\xc5\x97",
- "\xc5\x98" => "\xc5\x99",
- "\xc5\x9a" => "\xc5\x9b",
- "\xc5\x9c" => "\xc5\x9d",
- "\xc5\x9e" => "\xc5\x9f",
- "\xc5\xa0" => "\xc5\xa1",
- "\xc5\xa2" => "\xc5\xa3",
- "\xc5\xa4" => "\xc5\xa5",
- "\xc5\xa6" => "\xc5\xa7",
- "\xc5\xa8" => "\xc5\xa9",
- "\xc5\xaa" => "\xc5\xab",
- "\xc5\xac" => "\xc5\xad",
- "\xc5\xae" => "\xc5\xaf",
- "\xc5\xb0" => "\xc5\xb1",
- "\xc5\xb2" => "\xc5\xb3",
- "\xc5\xb4" => "\xc5\xb5",
- "\xc5\xb6" => "\xc5\xb7",
- "\xc5\xb8" => "\xc3\xbf",
- "\xc5\xb9" => "\xc5\xba",
- "\xc5\xbb" => "\xc5\xbc",
- "\xc5\xbd" => "\xc5\xbe",
- "\xc6\x81" => "\xc9\x93",
- "\xc6\x82" => "\xc6\x83",
- "\xc6\x84" => "\xc6\x85",
- "\xc6\x86" => "\xc9\x94",
- "\xc6\x87" => "\xc6\x88",
- "\xc6\x89" => "\xc9\x96",
- "\xc6\x8a" => "\xc9\x97",
- "\xc6\x8b" => "\xc6\x8c",
- "\xc6\x8e" => "\xc7\x9d",
- "\xc6\x8f" => "\xc9\x99",
- "\xc6\x90" => "\xc9\x9b",
- "\xc6\x91" => "\xc6\x92",
- "\xc6\x94" => "\xc9\xa3",
- "\xc6\x96" => "\xc9\xa9",
- "\xc6\x97" => "\xc9\xa8",
- "\xc6\x98" => "\xc6\x99",
- "\xc6\x9c" => "\xc9\xaf",
- "\xc6\x9d" => "\xc9\xb2",
- "\xc6\x9f" => "\xc9\xb5",
- "\xc6\xa0" => "\xc6\xa1",
- "\xc6\xa2" => "\xc6\xa3",
- "\xc6\xa4" => "\xc6\xa5",
- "\xc6\xa6" => "\xca\x80",
- "\xc6\xa7" => "\xc6\xa8",
- "\xc6\xa9" => "\xca\x83",
- "\xc6\xac" => "\xc6\xad",
- "\xc6\xae" => "\xca\x88",
- "\xc6\xaf" => "\xc6\xb0",
- "\xc6\xb1" => "\xca\x8a",
- "\xc6\xb2" => "\xca\x8b",
- "\xc6\xb3" => "\xc6\xb4",
- "\xc6\xb5" => "\xc6\xb6",
- "\xc6\xb7" => "\xca\x92",
- "\xc6\xb8" => "\xc6\xb9",
- "\xc6\xbc" => "\xc6\xbd",
- "\xc7\x85" => "\xc7\x86",
- "\xc7\x88" => "\xc7\x89",
- "\xc7\x8b" => "\xc7\x8c",
- "\xc7\x8d" => "\xc7\x8e",
- "\xc7\x8f" => "\xc7\x90",
- "\xc7\x91" => "\xc7\x92",
- "\xc7\x93" => "\xc7\x94",
- "\xc7\x95" => "\xc7\x96",
- "\xc7\x97" => "\xc7\x98",
- "\xc7\x99" => "\xc7\x9a",
- "\xc7\x9b" => "\xc7\x9c",
- "\xc7\x9e" => "\xc7\x9f",
- "\xc7\xa0" => "\xc7\xa1",
- "\xc7\xa2" => "\xc7\xa3",
- "\xc7\xa4" => "\xc7\xa5",
- "\xc7\xa6" => "\xc7\xa7",
- "\xc7\xa8" => "\xc7\xa9",
- "\xc7\xaa" => "\xc7\xab",
- "\xc7\xac" => "\xc7\xad",
- "\xc7\xae" => "\xc7\xaf",
- "\xc7\xb2" => "\xc7\xb3",
- "\xc7\xb4" => "\xc7\xb5",
- "\xc7\xb6" => "\xc6\x95",
- "\xc7\xb7" => "\xc6\xbf",
- "\xc7\xb8" => "\xc7\xb9",
- "\xc7\xba" => "\xc7\xbb",
- "\xc7\xbc" => "\xc7\xbd",
- "\xc7\xbe" => "\xc7\xbf",
- "\xc8\x80" => "\xc8\x81",
- "\xc8\x82" => "\xc8\x83",
- "\xc8\x84" => "\xc8\x85",
- "\xc8\x86" => "\xc8\x87",
- "\xc8\x88" => "\xc8\x89",
- "\xc8\x8a" => "\xc8\x8b",
- "\xc8\x8c" => "\xc8\x8d",
- "\xc8\x8e" => "\xc8\x8f",
- "\xc8\x90" => "\xc8\x91",
- "\xc8\x92" => "\xc8\x93",
- "\xc8\x94" => "\xc8\x95",
- "\xc8\x96" => "\xc8\x97",
- "\xc8\x98" => "\xc8\x99",
- "\xc8\x9a" => "\xc8\x9b",
- "\xc8\x9c" => "\xc8\x9d",
- "\xc8\x9e" => "\xc8\x9f",
- "\xc8\xa0" => "\xc6\x9e",
- "\xc8\xa2" => "\xc8\xa3",
- "\xc8\xa4" => "\xc8\xa5",
- "\xc8\xa6" => "\xc8\xa7",
- "\xc8\xa8" => "\xc8\xa9",
- "\xc8\xaa" => "\xc8\xab",
- "\xc8\xac" => "\xc8\xad",
- "\xc8\xae" => "\xc8\xaf",
- "\xc8\xb0" => "\xc8\xb1",
- "\xc8\xb2" => "\xc8\xb3",
- "\xce\x86" => "\xce\xac",
- "\xce\x88" => "\xce\xad",
- "\xce\x89" => "\xce\xae",
- "\xce\x8a" => "\xce\xaf",
- "\xce\x8c" => "\xcf\x8c",
- "\xce\x8e" => "\xcf\x8d",
- "\xce\x8f" => "\xcf\x8e",
- "\xce\x91" => "\xce\xb1",
- "\xce\x92" => "\xce\xb2",
- "\xce\x93" => "\xce\xb3",
- "\xce\x94" => "\xce\xb4",
- "\xce\x95" => "\xce\xb5",
- "\xce\x96" => "\xce\xb6",
- "\xce\x97" => "\xce\xb7",
- "\xce\x98" => "\xce\xb8",
- "\xce\x99" => "\xce\xb9",
- "\xce\x9a" => "\xce\xba",
- "\xce\x9b" => "\xce\xbb",
- "\xce\x9c" => "\xc2\xb5",
- "\xce\x9d" => "\xce\xbd",
- "\xce\x9e" => "\xce\xbe",
- "\xce\x9f" => "\xce\xbf",
- "\xce\xa0" => "\xcf\x80",
- "\xce\xa1" => "\xcf\x81",
- "\xce\xa3" => "\xcf\x82",
- "\xce\xa4" => "\xcf\x84",
- "\xce\xa5" => "\xcf\x85",
- "\xce\xa6" => "\xcf\x86",
- "\xce\xa7" => "\xcf\x87",
- "\xce\xa8" => "\xcf\x88",
- "\xce\xa9" => "\xcf\x89",
- "\xce\xaa" => "\xcf\x8a",
- "\xce\xab" => "\xcf\x8b",
- "\xcf\x98" => "\xcf\x99",
- "\xcf\x9a" => "\xcf\x9b",
- "\xcf\x9c" => "\xcf\x9d",
- "\xcf\x9e" => "\xcf\x9f",
- "\xcf\xa0" => "\xcf\xa1",
- "\xcf\xa2" => "\xcf\xa3",
- "\xcf\xa4" => "\xcf\xa5",
- "\xcf\xa6" => "\xcf\xa7",
- "\xcf\xa8" => "\xcf\xa9",
- "\xcf\xaa" => "\xcf\xab",
- "\xcf\xac" => "\xcf\xad",
- "\xcf\xae" => "\xcf\xaf",
- "\xd0\x80" => "\xd1\x90",
- "\xd0\x81" => "\xd1\x91",
- "\xd0\x82" => "\xd1\x92",
- "\xd0\x83" => "\xd1\x93",
- "\xd0\x84" => "\xd1\x94",
- "\xd0\x85" => "\xd1\x95",
- "\xd0\x86" => "\xd1\x96",
- "\xd0\x87" => "\xd1\x97",
- "\xd0\x88" => "\xd1\x98",
- "\xd0\x89" => "\xd1\x99",
- "\xd0\x8a" => "\xd1\x9a",
- "\xd0\x8b" => "\xd1\x9b",
- "\xd0\x8c" => "\xd1\x9c",
- "\xd0\x8d" => "\xd1\x9d",
- "\xd0\x8e" => "\xd1\x9e",
- "\xd0\x8f" => "\xd1\x9f",
- "\xd0\x90" => "\xd0\xb0",
- "\xd0\x91" => "\xd0\xb1",
- "\xd0\x92" => "\xd0\xb2",
- "\xd0\x93" => "\xd0\xb3",
- "\xd0\x94" => "\xd0\xb4",
- "\xd0\x95" => "\xd0\xb5",
- "\xd0\x96" => "\xd0\xb6",
- "\xd0\x97" => "\xd0\xb7",
- "\xd0\x98" => "\xd0\xb8",
- "\xd0\x99" => "\xd0\xb9",
- "\xd0\x9a" => "\xd0\xba",
- "\xd0\x9b" => "\xd0\xbb",
- "\xd0\x9c" => "\xd0\xbc",
- "\xd0\x9d" => "\xd0\xbd",
- "\xd0\x9e" => "\xd0\xbe",
- "\xd0\x9f" => "\xd0\xbf",
- "\xd0\xa0" => "\xd1\x80",
- "\xd0\xa1" => "\xd1\x81",
- "\xd0\xa2" => "\xd1\x82",
- "\xd0\xa3" => "\xd1\x83",
- "\xd0\xa4" => "\xd1\x84",
- "\xd0\xa5" => "\xd1\x85",
- "\xd0\xa6" => "\xd1\x86",
- "\xd0\xa7" => "\xd1\x87",
- "\xd0\xa8" => "\xd1\x88",
- "\xd0\xa9" => "\xd1\x89",
- "\xd0\xaa" => "\xd1\x8a",
- "\xd0\xab" => "\xd1\x8b",
- "\xd0\xac" => "\xd1\x8c",
- "\xd0\xad" => "\xd1\x8d",
- "\xd0\xae" => "\xd1\x8e",
- "\xd0\xaf" => "\xd1\x8f",
- "\xd1\xa0" => "\xd1\xa1",
- "\xd1\xa2" => "\xd1\xa3",
- "\xd1\xa4" => "\xd1\xa5",
- "\xd1\xa6" => "\xd1\xa7",
- "\xd1\xa8" => "\xd1\xa9",
- "\xd1\xaa" => "\xd1\xab",
- "\xd1\xac" => "\xd1\xad",
- "\xd1\xae" => "\xd1\xaf",
- "\xd1\xb0" => "\xd1\xb1",
- "\xd1\xb2" => "\xd1\xb3",
- "\xd1\xb4" => "\xd1\xb5",
- "\xd1\xb6" => "\xd1\xb7",
- "\xd1\xb8" => "\xd1\xb9",
- "\xd1\xba" => "\xd1\xbb",
- "\xd1\xbc" => "\xd1\xbd",
- "\xd1\xbe" => "\xd1\xbf",
- "\xd2\x80" => "\xd2\x81",
- "\xd2\x8a" => "\xd2\x8b",
- "\xd2\x8c" => "\xd2\x8d",
- "\xd2\x8e" => "\xd2\x8f",
- "\xd2\x90" => "\xd2\x91",
- "\xd2\x92" => "\xd2\x93",
- "\xd2\x94" => "\xd2\x95",
- "\xd2\x96" => "\xd2\x97",
- "\xd2\x98" => "\xd2\x99",
- "\xd2\x9a" => "\xd2\x9b",
- "\xd2\x9c" => "\xd2\x9d",
- "\xd2\x9e" => "\xd2\x9f",
- "\xd2\xa0" => "\xd2\xa1",
- "\xd2\xa2" => "\xd2\xa3",
- "\xd2\xa4" => "\xd2\xa5",
- "\xd2\xa6" => "\xd2\xa7",
- "\xd2\xa8" => "\xd2\xa9",
- "\xd2\xaa" => "\xd2\xab",
- "\xd2\xac" => "\xd2\xad",
- "\xd2\xae" => "\xd2\xaf",
- "\xd2\xb0" => "\xd2\xb1",
- "\xd2\xb2" => "\xd2\xb3",
- "\xd2\xb4" => "\xd2\xb5",
- "\xd2\xb6" => "\xd2\xb7",
- "\xd2\xb8" => "\xd2\xb9",
- "\xd2\xba" => "\xd2\xbb",
- "\xd2\xbc" => "\xd2\xbd",
- "\xd2\xbe" => "\xd2\xbf",
- "\xd3\x81" => "\xd3\x82",
- "\xd3\x83" => "\xd3\x84",
- "\xd3\x85" => "\xd3\x86",
- "\xd3\x87" => "\xd3\x88",
- "\xd3\x89" => "\xd3\x8a",
- "\xd3\x8b" => "\xd3\x8c",
- "\xd3\x8d" => "\xd3\x8e",
- "\xd3\x90" => "\xd3\x91",
- "\xd3\x92" => "\xd3\x93",
- "\xd3\x94" => "\xd3\x95",
- "\xd3\x96" => "\xd3\x97",
- "\xd3\x98" => "\xd3\x99",
- "\xd3\x9a" => "\xd3\x9b",
- "\xd3\x9c" => "\xd3\x9d",
- "\xd3\x9e" => "\xd3\x9f",
- "\xd3\xa0" => "\xd3\xa1",
- "\xd3\xa2" => "\xd3\xa3",
- "\xd3\xa4" => "\xd3\xa5",
- "\xd3\xa6" => "\xd3\xa7",
- "\xd3\xa8" => "\xd3\xa9",
- "\xd3\xaa" => "\xd3\xab",
- "\xd3\xac" => "\xd3\xad",
- "\xd3\xae" => "\xd3\xaf",
- "\xd3\xb0" => "\xd3\xb1",
- "\xd3\xb2" => "\xd3\xb3",
- "\xd3\xb4" => "\xd3\xb5",
- "\xd3\xb8" => "\xd3\xb9",
- "\xd4\x80" => "\xd4\x81",
- "\xd4\x82" => "\xd4\x83",
- "\xd4\x84" => "\xd4\x85",
- "\xd4\x86" => "\xd4\x87",
- "\xd4\x88" => "\xd4\x89",
- "\xd4\x8a" => "\xd4\x8b",
- "\xd4\x8c" => "\xd4\x8d",
- "\xd4\x8e" => "\xd4\x8f",
- "\xd4\xb1" => "\xd5\xa1",
- "\xd4\xb2" => "\xd5\xa2",
- "\xd4\xb3" => "\xd5\xa3",
- "\xd4\xb4" => "\xd5\xa4",
- "\xd4\xb5" => "\xd5\xa5",
- "\xd4\xb6" => "\xd5\xa6",
- "\xd4\xb7" => "\xd5\xa7",
- "\xd4\xb8" => "\xd5\xa8",
- "\xd4\xb9" => "\xd5\xa9",
- "\xd4\xba" => "\xd5\xaa",
- "\xd4\xbb" => "\xd5\xab",
- "\xd4\xbc" => "\xd5\xac",
- "\xd4\xbd" => "\xd5\xad",
- "\xd4\xbe" => "\xd5\xae",
- "\xd4\xbf" => "\xd5\xaf",
- "\xd5\x80" => "\xd5\xb0",
- "\xd5\x81" => "\xd5\xb1",
- "\xd5\x82" => "\xd5\xb2",
- "\xd5\x83" => "\xd5\xb3",
- "\xd5\x84" => "\xd5\xb4",
- "\xd5\x85" => "\xd5\xb5",
- "\xd5\x86" => "\xd5\xb6",
- "\xd5\x87" => "\xd5\xb7",
- "\xd5\x88" => "\xd5\xb8",
- "\xd5\x89" => "\xd5\xb9",
- "\xd5\x8a" => "\xd5\xba",
- "\xd5\x8b" => "\xd5\xbb",
- "\xd5\x8c" => "\xd5\xbc",
- "\xd5\x8d" => "\xd5\xbd",
- "\xd5\x8e" => "\xd5\xbe",
- "\xd5\x8f" => "\xd5\xbf",
- "\xd5\x90" => "\xd6\x80",
- "\xd5\x91" => "\xd6\x81",
- "\xd5\x92" => "\xd6\x82",
- "\xd5\x93" => "\xd6\x83",
- "\xd5\x94" => "\xd6\x84",
- "\xd5\x95" => "\xd6\x85",
- "\xd5\x96" => "\xd6\x86",
- "\xe1\xb8\x80" => "\xe1\xb8\x81",
- "\xe1\xb8\x82" => "\xe1\xb8\x83",
- "\xe1\xb8\x84" => "\xe1\xb8\x85",
- "\xe1\xb8\x86" => "\xe1\xb8\x87",
- "\xe1\xb8\x88" => "\xe1\xb8\x89",
- "\xe1\xb8\x8a" => "\xe1\xb8\x8b",
- "\xe1\xb8\x8c" => "\xe1\xb8\x8d",
- "\xe1\xb8\x8e" => "\xe1\xb8\x8f",
- "\xe1\xb8\x90" => "\xe1\xb8\x91",
- "\xe1\xb8\x92" => "\xe1\xb8\x93",
- "\xe1\xb8\x94" => "\xe1\xb8\x95",
- "\xe1\xb8\x96" => "\xe1\xb8\x97",
- "\xe1\xb8\x98" => "\xe1\xb8\x99",
- "\xe1\xb8\x9a" => "\xe1\xb8\x9b",
- "\xe1\xb8\x9c" => "\xe1\xb8\x9d",
- "\xe1\xb8\x9e" => "\xe1\xb8\x9f",
- "\xe1\xb8\xa0" => "\xe1\xb8\xa1",
- "\xe1\xb8\xa2" => "\xe1\xb8\xa3",
- "\xe1\xb8\xa4" => "\xe1\xb8\xa5",
- "\xe1\xb8\xa6" => "\xe1\xb8\xa7",
- "\xe1\xb8\xa8" => "\xe1\xb8\xa9",
- "\xe1\xb8\xaa" => "\xe1\xb8\xab",
- "\xe1\xb8\xac" => "\xe1\xb8\xad",
- "\xe1\xb8\xae" => "\xe1\xb8\xaf",
- "\xe1\xb8\xb0" => "\xe1\xb8\xb1",
- "\xe1\xb8\xb2" => "\xe1\xb8\xb3",
- "\xe1\xb8\xb4" => "\xe1\xb8\xb5",
- "\xe1\xb8\xb6" => "\xe1\xb8\xb7",
- "\xe1\xb8\xb8" => "\xe1\xb8\xb9",
- "\xe1\xb8\xba" => "\xe1\xb8\xbb",
- "\xe1\xb8\xbc" => "\xe1\xb8\xbd",
- "\xe1\xb8\xbe" => "\xe1\xb8\xbf",
- "\xe1\xb9\x80" => "\xe1\xb9\x81",
- "\xe1\xb9\x82" => "\xe1\xb9\x83",
- "\xe1\xb9\x84" => "\xe1\xb9\x85",
- "\xe1\xb9\x86" => "\xe1\xb9\x87",
- "\xe1\xb9\x88" => "\xe1\xb9\x89",
- "\xe1\xb9\x8a" => "\xe1\xb9\x8b",
- "\xe1\xb9\x8c" => "\xe1\xb9\x8d",
- "\xe1\xb9\x8e" => "\xe1\xb9\x8f",
- "\xe1\xb9\x90" => "\xe1\xb9\x91",
- "\xe1\xb9\x92" => "\xe1\xb9\x93",
- "\xe1\xb9\x94" => "\xe1\xb9\x95",
- "\xe1\xb9\x96" => "\xe1\xb9\x97",
- "\xe1\xb9\x98" => "\xe1\xb9\x99",
- "\xe1\xb9\x9a" => "\xe1\xb9\x9b",
- "\xe1\xb9\x9c" => "\xe1\xb9\x9d",
- "\xe1\xb9\x9e" => "\xe1\xb9\x9f",
- "\xe1\xb9\xa0" => "\xe1\xb9\xa1",
- "\xe1\xb9\xa2" => "\xe1\xb9\xa3",
- "\xe1\xb9\xa4" => "\xe1\xb9\xa5",
- "\xe1\xb9\xa6" => "\xe1\xb9\xa7",
- "\xe1\xb9\xa8" => "\xe1\xb9\xa9",
- "\xe1\xb9\xaa" => "\xe1\xb9\xab",
- "\xe1\xb9\xac" => "\xe1\xb9\xad",
- "\xe1\xb9\xae" => "\xe1\xb9\xaf",
- "\xe1\xb9\xb0" => "\xe1\xb9\xb1",
- "\xe1\xb9\xb2" => "\xe1\xb9\xb3",
- "\xe1\xb9\xb4" => "\xe1\xb9\xb5",
- "\xe1\xb9\xb6" => "\xe1\xb9\xb7",
- "\xe1\xb9\xb8" => "\xe1\xb9\xb9",
- "\xe1\xb9\xba" => "\xe1\xb9\xbb",
- "\xe1\xb9\xbc" => "\xe1\xb9\xbd",
- "\xe1\xb9\xbe" => "\xe1\xb9\xbf",
- "\xe1\xba\x80" => "\xe1\xba\x81",
- "\xe1\xba\x82" => "\xe1\xba\x83",
- "\xe1\xba\x84" => "\xe1\xba\x85",
- "\xe1\xba\x86" => "\xe1\xba\x87",
- "\xe1\xba\x88" => "\xe1\xba\x89",
- "\xe1\xba\x8a" => "\xe1\xba\x8b",
- "\xe1\xba\x8c" => "\xe1\xba\x8d",
- "\xe1\xba\x8e" => "\xe1\xba\x8f",
- "\xe1\xba\x90" => "\xe1\xba\x91",
- "\xe1\xba\x92" => "\xe1\xba\x93",
- "\xe1\xba\x94" => "\xe1\xba\x95",
- "\xe1\xba\xa0" => "\xe1\xba\xa1",
- "\xe1\xba\xa2" => "\xe1\xba\xa3",
- "\xe1\xba\xa4" => "\xe1\xba\xa5",
- "\xe1\xba\xa6" => "\xe1\xba\xa7",
- "\xe1\xba\xa8" => "\xe1\xba\xa9",
- "\xe1\xba\xaa" => "\xe1\xba\xab",
- "\xe1\xba\xac" => "\xe1\xba\xad",
- "\xe1\xba\xae" => "\xe1\xba\xaf",
- "\xe1\xba\xb0" => "\xe1\xba\xb1",
- "\xe1\xba\xb2" => "\xe1\xba\xb3",
- "\xe1\xba\xb4" => "\xe1\xba\xb5",
- "\xe1\xba\xb6" => "\xe1\xba\xb7",
- "\xe1\xba\xb8" => "\xe1\xba\xb9",
- "\xe1\xba\xba" => "\xe1\xba\xbb",
- "\xe1\xba\xbc" => "\xe1\xba\xbd",
- "\xe1\xba\xbe" => "\xe1\xba\xbf",
- "\xe1\xbb\x80" => "\xe1\xbb\x81",
- "\xe1\xbb\x82" => "\xe1\xbb\x83",
- "\xe1\xbb\x84" => "\xe1\xbb\x85",
- "\xe1\xbb\x86" => "\xe1\xbb\x87",
- "\xe1\xbb\x88" => "\xe1\xbb\x89",
- "\xe1\xbb\x8a" => "\xe1\xbb\x8b",
- "\xe1\xbb\x8c" => "\xe1\xbb\x8d",
- "\xe1\xbb\x8e" => "\xe1\xbb\x8f",
- "\xe1\xbb\x90" => "\xe1\xbb\x91",
- "\xe1\xbb\x92" => "\xe1\xbb\x93",
- "\xe1\xbb\x94" => "\xe1\xbb\x95",
- "\xe1\xbb\x96" => "\xe1\xbb\x97",
- "\xe1\xbb\x98" => "\xe1\xbb\x99",
- "\xe1\xbb\x9a" => "\xe1\xbb\x9b",
- "\xe1\xbb\x9c" => "\xe1\xbb\x9d",
- "\xe1\xbb\x9e" => "\xe1\xbb\x9f",
- "\xe1\xbb\xa0" => "\xe1\xbb\xa1",
- "\xe1\xbb\xa2" => "\xe1\xbb\xa3",
- "\xe1\xbb\xa4" => "\xe1\xbb\xa5",
- "\xe1\xbb\xa6" => "\xe1\xbb\xa7",
- "\xe1\xbb\xa8" => "\xe1\xbb\xa9",
- "\xe1\xbb\xaa" => "\xe1\xbb\xab",
- "\xe1\xbb\xac" => "\xe1\xbb\xad",
- "\xe1\xbb\xae" => "\xe1\xbb\xaf",
- "\xe1\xbb\xb0" => "\xe1\xbb\xb1",
- "\xe1\xbb\xb2" => "\xe1\xbb\xb3",
- "\xe1\xbb\xb4" => "\xe1\xbb\xb5",
- "\xe1\xbb\xb6" => "\xe1\xbb\xb7",
- "\xe1\xbb\xb8" => "\xe1\xbb\xb9",
- "\xe1\xbc\x88" => "\xe1\xbc\x80",
- "\xe1\xbc\x89" => "\xe1\xbc\x81",
- "\xe1\xbc\x8a" => "\xe1\xbc\x82",
- "\xe1\xbc\x8b" => "\xe1\xbc\x83",
- "\xe1\xbc\x8c" => "\xe1\xbc\x84",
- "\xe1\xbc\x8d" => "\xe1\xbc\x85",
- "\xe1\xbc\x8e" => "\xe1\xbc\x86",
- "\xe1\xbc\x8f" => "\xe1\xbc\x87",
- "\xe1\xbc\x98" => "\xe1\xbc\x90",
- "\xe1\xbc\x99" => "\xe1\xbc\x91",
- "\xe1\xbc\x9a" => "\xe1\xbc\x92",
- "\xe1\xbc\x9b" => "\xe1\xbc\x93",
- "\xe1\xbc\x9c" => "\xe1\xbc\x94",
- "\xe1\xbc\x9d" => "\xe1\xbc\x95",
- "\xe1\xbc\xa9" => "\xe1\xbc\xa1",
- "\xe1\xbc\xaa" => "\xe1\xbc\xa2",
- "\xe1\xbc\xab" => "\xe1\xbc\xa3",
- "\xe1\xbc\xac" => "\xe1\xbc\xa4",
- "\xe1\xbc\xad" => "\xe1\xbc\xa5",
- "\xe1\xbc\xae" => "\xe1\xbc\xa6",
- "\xe1\xbc\xaf" => "\xe1\xbc\xa7",
- "\xe1\xbc\xb8" => "\xe1\xbc\xb0",
- "\xe1\xbc\xb9" => "\xe1\xbc\xb1",
- "\xe1\xbc\xba" => "\xe1\xbc\xb2",
- "\xe1\xbc\xbb" => "\xe1\xbc\xb3",
- "\xe1\xbc\xbc" => "\xe1\xbc\xb4",
- "\xe1\xbc\xbd" => "\xe1\xbc\xb5",
- "\xe1\xbc\xbe" => "\xe1\xbc\xb6",
- "\xe1\xbc\xbf" => "\xe1\xbc\xb7",
- "\xe1\xbd\x88" => "\xe1\xbd\x80",
- "\xe1\xbd\x89" => "\xe1\xbd\x81",
- "\xe1\xbd\x8a" => "\xe1\xbd\x82",
- "\xe1\xbd\x8b" => "\xe1\xbd\x83",
- "\xe1\xbd\x8c" => "\xe1\…
Large files files are truncated, but you can click here to view the full file