PageRenderTime 161ms CodeModel.GetById 12ms app.highlight 123ms RepoModel.GetById 1ms app.codeStats 1ms

/wiki/inc/utf8.php

https://github.com/gbby/folders2web
PHP | 1624 lines | 988 code | 149 blank | 487 comment | 168 complexity | f8750418e7437f331696aff455367f49 MD5 | raw file
   1<?php
   2/**
   3 * UTF8 helper functions
   4 *
   5 * @license    LGPL 2.1 (http://www.gnu.org/copyleft/lesser.html)
   6 * @author     Andreas Gohr <andi@splitbrain.org>
   7 */
   8
   9/**
  10 * check for mb_string support
  11 */
  12if(!defined('UTF8_MBSTRING')){
  13    if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){
  14        define('UTF8_MBSTRING',1);
  15    }else{
  16        define('UTF8_MBSTRING',0);
  17    }
  18}
  19
  20if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); }
  21
  22if(!function_exists('utf8_isASCII')){
  23    /**
  24     * Checks if a string contains 7bit ASCII only
  25     *
  26     * @author Andreas Haerter <andreas.haerter@dev.mail-node.com>
  27     */
  28    function utf8_isASCII($str){
  29        return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1);
  30    }
  31}
  32
  33if(!function_exists('utf8_strip')){
  34    /**
  35     * Strips all highbyte chars
  36     *
  37     * Returns a pure ASCII7 string
  38     *
  39     * @author Andreas Gohr <andi@splitbrain.org>
  40     */
  41    function utf8_strip($str){
  42        $ascii = '';
  43        $len = strlen($str);
  44        for($i=0; $i<$len; $i++){
  45            if(ord($str{$i}) <128){
  46                $ascii .= $str{$i};
  47            }
  48        }
  49        return $ascii;
  50    }
  51}
  52
  53if(!function_exists('utf8_check')){
  54    /**
  55     * Tries to detect if a string is in Unicode encoding
  56     *
  57     * @author <bmorel@ssi.fr>
  58     * @link   http://www.php.net/manual/en/function.utf8-encode.php
  59     */
  60    function utf8_check($Str) {
  61        $len = strlen($Str);
  62        for ($i=0; $i<$len; $i++) {
  63            $b = ord($Str[$i]);
  64            if ($b < 0x80) continue; # 0bbbbbbb
  65            elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb
  66            elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb
  67            elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb
  68            elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb
  69            elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b
  70            else return false; # Does not match any model
  71
  72            for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
  73                if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80))
  74                    return false;
  75            }
  76        }
  77        return true;
  78    }
  79}
  80
  81if(!function_exists('utf8_strlen')){
  82    /**
  83     * Unicode aware replacement for strlen()
  84     *
  85     * utf8_decode() converts characters that are not in ISO-8859-1
  86     * to '?', which, for the purpose of counting, is alright - It's
  87     * even faster than mb_strlen.
  88     *
  89     * @author <chernyshevsky at hotmail dot com>
  90     * @see    strlen()
  91     * @see    utf8_decode()
  92     */
  93    function utf8_strlen($string){
  94        return strlen(utf8_decode($string));
  95    }
  96}
  97
  98if(!function_exists('utf8_substr')){
  99    /**
 100     * UTF-8 aware alternative to substr
 101     *
 102     * Return part of a string given character offset (and optionally length)
 103     *
 104     * @author Harry Fuecks <hfuecks@gmail.com>
 105     * @author Chris Smith <chris@jalakai.co.uk>
 106     * @param string
 107     * @param integer number of UTF-8 characters offset (from left)
 108     * @param integer (optional) length in UTF-8 characters from offset
 109     * @return mixed string or false if failure
 110     */
 111    function utf8_substr($str, $offset, $length = null) {
 112        if(UTF8_MBSTRING){
 113            if( $length === null ){
 114                return mb_substr($str, $offset);
 115            }else{
 116                return mb_substr($str, $offset, $length);
 117            }
 118        }
 119
 120        /*
 121         * Notes:
 122         *
 123         * no mb string support, so we'll use pcre regex's with 'u' flag
 124         * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for
 125         * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536)
 126         *
 127         * substr documentation states false can be returned in some cases (e.g. offset > string length)
 128         * mb_substr never returns false, it will return an empty string instead.
 129         *
 130         * calculating the number of characters in the string is a relatively expensive operation, so
 131         * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length
 132         */
 133
 134        // cast parameters to appropriate types to avoid multiple notices/warnings
 135        $str = (string)$str;                          // generates E_NOTICE for PHP4 objects, but not PHP5 objects
 136        $offset = (int)$offset;
 137        if (!is_null($length)) $length = (int)$length;
 138
 139        // handle trivial cases
 140        if ($length === 0) return '';
 141        if ($offset < 0 && $length < 0 && $length < $offset) return '';
 142
 143        $offset_pattern = '';
 144        $length_pattern = '';
 145
 146        // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!)
 147        if ($offset < 0) {
 148            $strlen = strlen(utf8_decode($str));        // see notes
 149            $offset = $strlen + $offset;
 150            if ($offset < 0) $offset = 0;
 151        }
 152
 153        // establish a pattern for offset, a non-captured group equal in length to offset
 154        if ($offset > 0) {
 155            $Ox = (int)($offset/65535);
 156            $Oy = $offset%65535;
 157
 158            if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}';
 159            $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})';
 160        } else {
 161            $offset_pattern = '^';                      // offset == 0; just anchor the pattern
 162        }
 163
 164        // establish a pattern for length
 165        if (is_null($length)) {
 166            $length_pattern = '(.*)$';                  // the rest of the string
 167        } else {
 168
 169            if (!isset($strlen)) $strlen = strlen(utf8_decode($str));    // see notes
 170            if ($offset > $strlen) return '';           // another trivial case
 171
 172            if ($length > 0) {
 173
 174                $length = min($strlen-$offset, $length);  // reduce any length that would go passed the end of the string
 175
 176                $Lx = (int)($length/65535);
 177                $Ly = $length%65535;
 178
 179                // +ve length requires ... a captured group of length characters
 180                if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}';
 181                    $length_pattern = '('.$length_pattern.'.{'.$Ly.'})';
 182
 183            } else if ($length < 0) {
 184
 185                if ($length < ($offset - $strlen)) return '';
 186
 187                $Lx = (int)((-$length)/65535);
 188                $Ly = (-$length)%65535;
 189
 190                // -ve length requires ... capture everything except a group of -length characters
 191                //                         anchored at the tail-end of the string
 192                if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}';
 193                $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$';
 194            }
 195        }
 196
 197        if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return '';
 198        return $match[1];
 199    }
 200}
 201
 202if(!function_exists('utf8_substr_replace')){
 203    /**
 204     * Unicode aware replacement for substr_replace()
 205     *
 206     * @author Andreas Gohr <andi@splitbrain.org>
 207     * @see    substr_replace()
 208     */
 209    function utf8_substr_replace($string, $replacement, $start , $length=0 ){
 210        $ret = '';
 211        if($start>0) $ret .= utf8_substr($string, 0, $start);
 212        $ret .= $replacement;
 213        $ret .= utf8_substr($string, $start+$length);
 214        return $ret;
 215    }
 216}
 217
 218if(!function_exists('utf8_ltrim')){
 219    /**
 220     * Unicode aware replacement for ltrim()
 221     *
 222     * @author Andreas Gohr <andi@splitbrain.org>
 223     * @see    ltrim()
 224     * @return string
 225     */
 226    function utf8_ltrim($str,$charlist=''){
 227        if($charlist == '') return ltrim($str);
 228
 229        //quote charlist for use in a characterclass
 230        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist);
 231
 232        return preg_replace('/^['.$charlist.']+/u','',$str);
 233    }
 234}
 235
 236if(!function_exists('utf8_rtrim')){
 237    /**
 238     * Unicode aware replacement for rtrim()
 239     *
 240     * @author Andreas Gohr <andi@splitbrain.org>
 241     * @see    rtrim()
 242     * @return string
 243     */
 244    function  utf8_rtrim($str,$charlist=''){
 245        if($charlist == '') return rtrim($str);
 246
 247        //quote charlist for use in a characterclass
 248        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist);
 249
 250        return preg_replace('/['.$charlist.']+$/u','',$str);
 251    }
 252}
 253
 254if(!function_exists('utf8_trim')){
 255    /**
 256     * Unicode aware replacement for trim()
 257     *
 258     * @author Andreas Gohr <andi@splitbrain.org>
 259     * @see    trim()
 260     * @return string
 261     */
 262    function  utf8_trim($str,$charlist='') {
 263        if($charlist == '') return trim($str);
 264
 265        return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist);
 266    }
 267}
 268
 269if(!function_exists('utf8_strtolower')){
 270    /**
 271     * This is a unicode aware replacement for strtolower()
 272     *
 273     * Uses mb_string extension if available
 274     *
 275     * @author Leo Feyer <leo@typolight.org>
 276     * @see    strtolower()
 277     * @see    utf8_strtoupper()
 278     */
 279    function utf8_strtolower($string){
 280        if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8');
 281
 282        global $UTF8_UPPER_TO_LOWER;
 283        return strtr($string,$UTF8_UPPER_TO_LOWER);
 284    }
 285}
 286
 287if(!function_exists('utf8_strtoupper')){
 288    /**
 289     * This is a unicode aware replacement for strtoupper()
 290     *
 291     * Uses mb_string extension if available
 292     *
 293     * @author Leo Feyer <leo@typolight.org>
 294     * @see    strtoupper()
 295     * @see    utf8_strtoupper()
 296     */
 297    function utf8_strtoupper($string){
 298        if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8');
 299
 300        global $UTF8_LOWER_TO_UPPER;
 301        return strtr($string,$UTF8_LOWER_TO_UPPER);
 302    }
 303}
 304
 305if(!function_exists('utf8_ucfirst')){
 306    /**
 307     * UTF-8 aware alternative to ucfirst
 308     * Make a string's first character uppercase
 309     *
 310     * @author Harry Fuecks
 311     * @param string
 312     * @return string with first character as upper case (if applicable)
 313     */
 314    function utf8_ucfirst($str){
 315        switch ( utf8_strlen($str) ) {
 316            case 0:
 317                return '';
 318            case 1:
 319                return utf8_strtoupper($str);
 320            default:
 321                preg_match('/^(.{1})(.*)$/us', $str, $matches);
 322                return utf8_strtoupper($matches[1]).$matches[2];
 323        }
 324    }
 325}
 326
 327if(!function_exists('utf8_ucwords')){
 328    /**
 329     * UTF-8 aware alternative to ucwords
 330     * Uppercase the first character of each word in a string
 331     *
 332     * @author Harry Fuecks
 333     * @param string
 334     * @return string with first char of each word uppercase
 335     * @see http://www.php.net/ucwords
 336     */
 337    function utf8_ucwords($str) {
 338        // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
 339        // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
 340        // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords
 341        $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
 342
 343        return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str);
 344    }
 345
 346    /**
 347     * Callback function for preg_replace_callback call in utf8_ucwords
 348     * You don't need to call this yourself
 349     *
 350     * @author Harry Fuecks
 351     * @param array of matches corresponding to a single word
 352     * @return string with first char of the word in uppercase
 353     * @see utf8_ucwords
 354     * @see utf8_strtoupper
 355     */
 356    function utf8_ucwords_callback($matches) {
 357        $leadingws = $matches[2];
 358        $ucfirst = utf8_strtoupper($matches[3]);
 359        $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1);
 360        return $leadingws . $ucword;
 361    }
 362}
 363
 364if(!function_exists('utf8_deaccent')){
 365    /**
 366     * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents
 367     *
 368     * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1)
 369     * letters. Default is to deaccent both cases ($case = 0)
 370     *
 371     * @author Andreas Gohr <andi@splitbrain.org>
 372     */
 373    function utf8_deaccent($string,$case=0){
 374        if($case <= 0){
 375            global $UTF8_LOWER_ACCENTS;
 376            $string = strtr($string,$UTF8_LOWER_ACCENTS);
 377        }
 378        if($case >= 0){
 379            global $UTF8_UPPER_ACCENTS;
 380            $string = strtr($string,$UTF8_UPPER_ACCENTS);
 381        }
 382        return $string;
 383    }
 384}
 385
 386if(!function_exists('utf8_romanize')){
 387    /**
 388     * Romanize a non-latin string
 389     *
 390     * @author Andreas Gohr <andi@splitbrain.org>
 391     */
 392    function utf8_romanize($string){
 393        if(utf8_isASCII($string)) return $string; //nothing to do
 394
 395        global $UTF8_ROMANIZATION;
 396        return strtr($string,$UTF8_ROMANIZATION);
 397    }
 398}
 399
 400if(!function_exists('utf8_stripspecials')){
 401    /**
 402     * Removes special characters (nonalphanumeric) from a UTF-8 string
 403     *
 404     * This function adds the controlchars 0x00 to 0x19 to the array of
 405     * stripped chars (they are not included in $UTF8_SPECIAL_CHARS)
 406     *
 407     * @author Andreas Gohr <andi@splitbrain.org>
 408     * @param  string $string     The UTF8 string to strip of special chars
 409     * @param  string $repl       Replace special with this string
 410     * @param  string $additional Additional chars to strip (used in regexp char class)
 411     */
 412    function utf8_stripspecials($string,$repl='',$additional=''){
 413        global $UTF8_SPECIAL_CHARS;
 414        global $UTF8_SPECIAL_CHARS2;
 415
 416        static $specials = null;
 417        if(is_null($specials)){
 418            #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/');
 419            $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/');
 420        }
 421
 422        return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string);
 423    }
 424}
 425
 426if(!function_exists('utf8_strpos')){
 427    /**
 428     * This is an Unicode aware replacement for strpos
 429     *
 430     * @author Leo Feyer <leo@typolight.org>
 431     * @see    strpos()
 432     * @param  string
 433     * @param  string
 434     * @param  integer
 435     * @return integer
 436     */
 437    function utf8_strpos($haystack, $needle, $offset=0){
 438        $comp = 0;
 439        $length = null;
 440
 441        while (is_null($length) || $length < $offset) {
 442            $pos = strpos($haystack, $needle, $offset + $comp);
 443
 444            if ($pos === false)
 445                return false;
 446
 447            $length = utf8_strlen(substr($haystack, 0, $pos));
 448
 449            if ($length < $offset)
 450                $comp = $pos - $length;
 451        }
 452
 453        return $length;
 454    }
 455}
 456
 457if(!function_exists('utf8_tohtml')){
 458    /**
 459     * Encodes UTF-8 characters to HTML entities
 460     *
 461     * @author Tom N Harris <tnharris@whoopdedo.org>
 462     * @author <vpribish at shopping dot com>
 463     * @link   http://www.php.net/manual/en/function.utf8-decode.php
 464     */
 465    function utf8_tohtml ($str) {
 466        $ret = '';
 467        foreach (utf8_to_unicode($str) as $cp) {
 468            if ($cp < 0x80)
 469                $ret .= chr($cp);
 470            elseif ($cp < 0x100)
 471                $ret .= "&#$cp;";
 472            else
 473                $ret .= '&#x'.dechex($cp).';';
 474        }
 475        return $ret;
 476    }
 477}
 478
 479if(!function_exists('utf8_unhtml')){
 480    /**
 481     * Decodes HTML entities to UTF-8 characters
 482     *
 483     * Convert any &#..; entity to a codepoint,
 484     * The entities flag defaults to only decoding numeric entities.
 485     * Pass HTML_ENTITIES and named entities, including &amp; &lt; etc.
 486     * are handled as well. Avoids the problem that would occur if you
 487     * had to decode "&amp;#38;&#38;amp;#38;"
 488     *
 489     * unhtmlspecialchars(utf8_unhtml($s)) -> "&#38;&#38;"
 490     * utf8_unhtml(unhtmlspecialchars($s)) -> "&&amp#38;"
 491     * what it should be                   -> "&#38;&amp#38;"
 492     *
 493     * @author Tom N Harris <tnharris@whoopdedo.org>
 494     * @param  string  $str      UTF-8 encoded string
 495     * @param  boolean $entities Flag controlling decoding of named entities.
 496     * @return UTF-8 encoded string with numeric (and named) entities replaced.
 497     */
 498    function utf8_unhtml($str, $entities=null) {
 499        static $decoder = null;
 500        if (is_null($decoder))
 501            $decoder = new utf8_entity_decoder();
 502        if (is_null($entities))
 503            return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m',
 504                                         'utf8_decode_numeric', $str);
 505        else
 506            return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m',
 507                                         array(&$decoder, 'decode'), $str);
 508    }
 509}
 510
 511if(!function_exists('utf8_decode_numeric')){
 512    function utf8_decode_numeric($ent) {
 513        switch ($ent[2]) {
 514            case 'X':
 515            case 'x':
 516                $cp = hexdec($ent[3]);
 517                break;
 518            default:
 519                $cp = intval($ent[3]);
 520                break;
 521        }
 522        return unicode_to_utf8(array($cp));
 523    }
 524}
 525
 526if(!class_exists('utf8_entity_decoder')){
 527    class utf8_entity_decoder {
 528        var $table;
 529        function utf8_entity_decoder() {
 530            $table = get_html_translation_table(HTML_ENTITIES);
 531            $table = array_flip($table);
 532            $this->table = array_map(array(&$this,'makeutf8'), $table);
 533        }
 534        function makeutf8($c) {
 535            return unicode_to_utf8(array(ord($c)));
 536        }
 537        function decode($ent) {
 538            if ($ent[1] == '#') {
 539                return utf8_decode_numeric($ent);
 540            } elseif (array_key_exists($ent[0],$this->table)) {
 541                return $this->table[$ent[0]];
 542            } else {
 543                return $ent[0];
 544            }
 545        }
 546    }
 547}
 548
 549if(!function_exists('utf8_to_unicode')){
 550    /**
 551     * Takes an UTF-8 string and returns an array of ints representing the
 552     * Unicode characters. Astral planes are supported ie. the ints in the
 553     * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
 554     * are not allowed.
 555     *
 556     * If $strict is set to true the function returns false if the input
 557     * string isn't a valid UTF-8 octet sequence and raises a PHP error at
 558     * level E_USER_WARNING
 559     *
 560     * Note: this function has been modified slightly in this library to
 561     * trigger errors on encountering bad bytes
 562     *
 563     * @author <hsivonen@iki.fi>
 564     * @author Harry Fuecks <hfuecks@gmail.com>
 565     * @param  string  UTF-8 encoded string
 566     * @param  boolean Check for invalid sequences?
 567     * @return mixed array of unicode code points or false if UTF-8 invalid
 568     * @see    unicode_to_utf8
 569     * @link   http://hsivonen.iki.fi/php-utf8/
 570     * @link   http://sourceforge.net/projects/phputf8/
 571     */
 572    function utf8_to_unicode($str,$strict=false) {
 573        $mState = 0;     // cached expected number of octets after the current octet
 574                         // until the beginning of the next UTF8 character sequence
 575        $mUcs4  = 0;     // cached Unicode character
 576        $mBytes = 1;     // cached expected number of octets in the current sequence
 577
 578        $out = array();
 579
 580        $len = strlen($str);
 581
 582        for($i = 0; $i < $len; $i++) {
 583
 584            $in = ord($str{$i});
 585
 586            if ( $mState == 0) {
 587
 588                // When mState is zero we expect either a US-ASCII character or a
 589                // multi-octet sequence.
 590                if (0 == (0x80 & ($in))) {
 591                    // US-ASCII, pass straight through.
 592                    $out[] = $in;
 593                    $mBytes = 1;
 594
 595                } else if (0xC0 == (0xE0 & ($in))) {
 596                    // First octet of 2 octet sequence
 597                    $mUcs4 = ($in);
 598                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
 599                    $mState = 1;
 600                    $mBytes = 2;
 601
 602                } else if (0xE0 == (0xF0 & ($in))) {
 603                    // First octet of 3 octet sequence
 604                    $mUcs4 = ($in);
 605                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
 606                    $mState = 2;
 607                    $mBytes = 3;
 608
 609                } else if (0xF0 == (0xF8 & ($in))) {
 610                    // First octet of 4 octet sequence
 611                    $mUcs4 = ($in);
 612                    $mUcs4 = ($mUcs4 & 0x07) << 18;
 613                    $mState = 3;
 614                    $mBytes = 4;
 615
 616                } else if (0xF8 == (0xFC & ($in))) {
 617                    /* First octet of 5 octet sequence.
 618                     *
 619                     * This is illegal because the encoded codepoint must be either
 620                     * (a) not the shortest form or
 621                     * (b) outside the Unicode range of 0-0x10FFFF.
 622                     * Rather than trying to resynchronize, we will carry on until the end
 623                     * of the sequence and let the later error handling code catch it.
 624                     */
 625                    $mUcs4 = ($in);
 626                    $mUcs4 = ($mUcs4 & 0x03) << 24;
 627                    $mState = 4;
 628                    $mBytes = 5;
 629
 630                } else if (0xFC == (0xFE & ($in))) {
 631                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
 632                    $mUcs4 = ($in);
 633                    $mUcs4 = ($mUcs4 & 1) << 30;
 634                    $mState = 5;
 635                    $mBytes = 6;
 636
 637                } elseif($strict) {
 638                    /* Current octet is neither in the US-ASCII range nor a legal first
 639                     * octet of a multi-octet sequence.
 640                     */
 641                    trigger_error(
 642                            'utf8_to_unicode: Illegal sequence identifier '.
 643                                'in UTF-8 at byte '.$i,
 644                            E_USER_WARNING
 645                        );
 646                    return false;
 647
 648                }
 649
 650            } else {
 651
 652                // When mState is non-zero, we expect a continuation of the multi-octet
 653                // sequence
 654                if (0x80 == (0xC0 & ($in))) {
 655
 656                    // Legal continuation.
 657                    $shift = ($mState - 1) * 6;
 658                    $tmp = $in;
 659                    $tmp = ($tmp & 0x0000003F) << $shift;
 660                    $mUcs4 |= $tmp;
 661
 662                    /**
 663                     * End of the multi-octet sequence. mUcs4 now contains the final
 664                     * Unicode codepoint to be output
 665                     */
 666                    if (0 == --$mState) {
 667
 668                        /*
 669                         * Check for illegal sequences and codepoints.
 670                         */
 671                        // From Unicode 3.1, non-shortest form is illegal
 672                        if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
 673                            ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
 674                            ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
 675                            (4 < $mBytes) ||
 676                            // From Unicode 3.2, surrogate characters are illegal
 677                            (($mUcs4 & 0xFFFFF800) == 0xD800) ||
 678                            // Codepoints outside the Unicode range are illegal
 679                            ($mUcs4 > 0x10FFFF)) {
 680
 681                            if($strict){
 682                                trigger_error(
 683                                        'utf8_to_unicode: Illegal sequence or codepoint '.
 684                                            'in UTF-8 at byte '.$i,
 685                                        E_USER_WARNING
 686                                    );
 687
 688                                return false;
 689                            }
 690
 691                        }
 692
 693                        if (0xFEFF != $mUcs4) {
 694                            // BOM is legal but we don't want to output it
 695                            $out[] = $mUcs4;
 696                        }
 697
 698                        //initialize UTF8 cache
 699                        $mState = 0;
 700                        $mUcs4  = 0;
 701                        $mBytes = 1;
 702                    }
 703
 704                } elseif($strict) {
 705                    /**
 706                     *((0xC0 & (*in) != 0x80) && (mState != 0))
 707                     * Incomplete multi-octet sequence.
 708                     */
 709                    trigger_error(
 710                            'utf8_to_unicode: Incomplete multi-octet '.
 711                            '   sequence in UTF-8 at byte '.$i,
 712                            E_USER_WARNING
 713                        );
 714
 715                    return false;
 716                }
 717            }
 718        }
 719        return $out;
 720    }
 721}
 722
 723if(!function_exists('unicode_to_utf8')){
 724    /**
 725     * Takes an array of ints representing the Unicode characters and returns
 726     * a UTF-8 string. Astral planes are supported ie. the ints in the
 727     * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
 728     * are not allowed.
 729     *
 730     * If $strict is set to true the function returns false if the input
 731     * array contains ints that represent surrogates or are outside the
 732     * Unicode range and raises a PHP error at level E_USER_WARNING
 733     *
 734     * Note: this function has been modified slightly in this library to use
 735     * output buffering to concatenate the UTF-8 string (faster) as well as
 736     * reference the array by it's keys
 737     *
 738     * @param  array of unicode code points representing a string
 739     * @param  boolean Check for invalid sequences?
 740     * @return mixed UTF-8 string or false if array contains invalid code points
 741     * @author <hsivonen@iki.fi>
 742     * @author Harry Fuecks <hfuecks@gmail.com>
 743     * @see    utf8_to_unicode
 744     * @link   http://hsivonen.iki.fi/php-utf8/
 745     * @link   http://sourceforge.net/projects/phputf8/
 746     */
 747    function unicode_to_utf8($arr,$strict=false) {
 748        if (!is_array($arr)) return '';
 749        ob_start();
 750
 751        foreach (array_keys($arr) as $k) {
 752
 753            if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) {
 754                # ASCII range (including control chars)
 755
 756                echo chr($arr[$k]);
 757
 758            } else if ($arr[$k] <= 0x07ff) {
 759                # 2 byte sequence
 760
 761                echo chr(0xc0 | ($arr[$k] >> 6));
 762                echo chr(0x80 | ($arr[$k] & 0x003f));
 763
 764            } else if($arr[$k] == 0xFEFF) {
 765                # Byte order mark (skip)
 766
 767                // nop -- zap the BOM
 768
 769            } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) {
 770                # Test for illegal surrogates
 771
 772                // found a surrogate
 773                if($strict){
 774                    trigger_error(
 775                        'unicode_to_utf8: Illegal surrogate '.
 776                            'at index: '.$k.', value: '.$arr[$k],
 777                        E_USER_WARNING
 778                        );
 779                    return false;
 780                }
 781
 782            } else if ($arr[$k] <= 0xffff) {
 783                # 3 byte sequence
 784
 785                echo chr(0xe0 | ($arr[$k] >> 12));
 786                echo chr(0x80 | (($arr[$k] >> 6) & 0x003f));
 787                echo chr(0x80 | ($arr[$k] & 0x003f));
 788
 789            } else if ($arr[$k] <= 0x10ffff) {
 790                # 4 byte sequence
 791
 792                echo chr(0xf0 | ($arr[$k] >> 18));
 793                echo chr(0x80 | (($arr[$k] >> 12) & 0x3f));
 794                echo chr(0x80 | (($arr[$k] >> 6) & 0x3f));
 795                echo chr(0x80 | ($arr[$k] & 0x3f));
 796
 797            } elseif($strict) {
 798
 799                trigger_error(
 800                    'unicode_to_utf8: Codepoint out of Unicode range '.
 801                        'at index: '.$k.', value: '.$arr[$k],
 802                    E_USER_WARNING
 803                    );
 804
 805                // out of range
 806                return false;
 807            }
 808        }
 809
 810        $result = ob_get_contents();
 811        ob_end_clean();
 812        return $result;
 813    }
 814}
 815
 816if(!function_exists('utf8_to_utf16be')){
 817    /**
 818     * UTF-8 to UTF-16BE conversion.
 819     *
 820     * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
 821     */
 822    function utf8_to_utf16be(&$str, $bom = false) {
 823        $out = $bom ? "\xFE\xFF" : '';
 824        if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8');
 825
 826        $uni = utf8_to_unicode($str);
 827        foreach($uni as $cp){
 828            $out .= pack('n',$cp);
 829        }
 830        return $out;
 831    }
 832}
 833
 834if(!function_exists('utf16be_to_utf8')){
 835    /**
 836     * UTF-8 to UTF-16BE conversion.
 837     *
 838     * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
 839     */
 840    function utf16be_to_utf8(&$str) {
 841        $uni = unpack('n*',$str);
 842        return unicode_to_utf8($uni);
 843    }
 844}
 845
 846if(!function_exists('utf8_bad_replace')){
 847    /**
 848     * Replace bad bytes with an alternative character
 849     *
 850     * ASCII character is recommended for replacement char
 851     *
 852     * PCRE Pattern to locate bad bytes in a UTF-8 string
 853     * Comes from W3 FAQ: Multilingual Forms
 854     * Note: modified to include full ASCII range including control chars
 855     *
 856     * @author Harry Fuecks <hfuecks@gmail.com>
 857     * @see http://www.w3.org/International/questions/qa-forms-utf-8
 858     * @param string to search
 859     * @param string to replace bad bytes with (defaults to '?') - use ASCII
 860     * @return string
 861     */
 862    function utf8_bad_replace($str, $replace = '') {
 863        $UTF8_BAD =
 864         '([\x00-\x7F]'.                          # ASCII (including control chars)
 865         '|[\xC2-\xDF][\x80-\xBF]'.               # non-overlong 2-byte
 866         '|\xE0[\xA0-\xBF][\x80-\xBF]'.           # excluding overlongs
 867         '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'.    # straight 3-byte
 868         '|\xED[\x80-\x9F][\x80-\xBF]'.           # excluding surrogates
 869         '|\xF0[\x90-\xBF][\x80-\xBF]{2}'.        # planes 1-3
 870         '|[\xF1-\xF3][\x80-\xBF]{3}'.            # planes 4-15
 871         '|\xF4[\x80-\x8F][\x80-\xBF]{2}'.        # plane 16
 872         '|(.{1}))';                              # invalid byte
 873        ob_start();
 874        while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) {
 875            if ( !isset($matches[2])) {
 876                echo $matches[0];
 877            } else {
 878                echo $replace;
 879            }
 880            $str = substr($str,strlen($matches[0]));
 881        }
 882        $result = ob_get_contents();
 883        ob_end_clean();
 884        return $result;
 885    }
 886}
 887
 888if(!function_exists('utf8_correctIdx')){
 889    /**
 890     * adjust a byte index into a utf8 string to a utf8 character boundary
 891     *
 892     * @param $str   string   utf8 character string
 893     * @param $i     int      byte index into $str
 894     * @param $next  bool     direction to search for boundary,
 895     *                           false = up (current character)
 896     *                           true = down (next character)
 897     *
 898     * @return int            byte index into $str now pointing to a utf8 character boundary
 899     *
 900     * @author       chris smith <chris@jalakai.co.uk>
 901     */
 902    function utf8_correctIdx(&$str,$i,$next=false) {
 903
 904        if ($i <= 0) return 0;
 905
 906        $limit = strlen($str);
 907        if ($i>=$limit) return $limit;
 908
 909        if ($next) {
 910            while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++;
 911        } else {
 912            while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--;
 913        }
 914
 915        return $i;
 916    }
 917}
 918
 919// only needed if no mb_string available
 920if(!UTF8_MBSTRING){
 921    /**
 922     * UTF-8 Case lookup table
 923     *
 924     * This lookuptable defines the upper case letters to their correspponding
 925     * lower case letter in UTF-8
 926     *
 927     * @author Andreas Gohr <andi@splitbrain.org>
 928     */
 929    global $UTF8_LOWER_TO_UPPER;
 930    if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array(
 931            "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q",
 932            "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G",
 933            "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ",
 934            "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ",
 935            "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ",
 936            "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ",
 937            "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ",
 938            "ὲ"=>"Ὲ","ό"=>"ឝ","ὰ"=>"ឺ","ὧ"=>"Ὧ","Ὄ"=>"὎","ὼ"=>"Ὥ","ὤ"=>"὏","ὣ"=>"Ὅ","ὢ"=>"Ὢ","ὥ"=>"Ὂ",
 939            "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ",
 940            "១"=>"៿","ៜ"=>"៞","៾"=>"៽","៴"=>"៟","៳"=>"៝","៲"=>"៺","៹"=>"៚","៰"=>"៸","៧"=>"៯","៌"=>"៎",
 941            "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ",
 942            "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ",
 943            "ᝡ"=>"᝜","᝾"=>"᝴","ᝳ"=>"ᝲ","᝹"=>"ᝰ","ᝯ"=>"ᝎ","᝭"=>"ᝏ","ᝍ"=>"ᝪ","ᝊ"=>"ᝨ","ᝧ"=>"ᝌ","᝼"=>"ᝤ",
 944            "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố",
 945            "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ",
 946            "ẝ"=>"Ẻ","ẚ"=>"Ẹ","ạ"=>"ẜ","Ế"=>"Ẵ","ẳ"=>"Ẳ","ẹ"=>"Ằ","ắ"=>"Ẏ","ậ"=>"ẏ","ẍ"=>"Ẫ","Ẋ"=>"Ẩ",
 947            "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ",
 948            "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ",
 949            "ᚡ"=>"᚜","ᚾ"=>"ᚴ","ᚳ"=>"ᚲ","ᚹ"=>"ᚰ","ᚯ"=>"ᚎ","ᚭ"=>"ᚏ","ᚍ"=>"ᚪ","ᚊ"=>"ᚨ","ᚧ"=>"ᚌ","ᚼ"=>"ᚤ",
 950            "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ",
 951            "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ",
 952            "ḝ"=>"Ḻ","Ḛ"=>"Ḹ","ḡ"=>"Ḝ","Ḿ"=>"Ḵ","ḳ"=>"Ḳ","ḹ"=>"Ḱ","ḯ"=>"Ḏ","ḭ"=>"ḏ","ḍ"=>"Ḫ","Ḋ"=>"Ḩ",
 953            "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ",
 954            "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ",
 955            "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս",
 956            "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ",
 957            "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ",
 958            "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ",
 959            "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ",
 960            "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ",
 961            "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ",
 962            "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ",
 963            "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ",
 964            "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ",
 965            "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ",
 966            "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ",
 967            "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ",
 968            "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ",
 969            "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П",
 970            "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е",
 971            "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ",
 972            "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ",
 973            "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ",
 974            "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π",
 975            "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ",
 976            "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ",
 977            "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ",
 978            "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ",
 979            "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș",
 980            "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ",
 981            "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ",
 982            "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ",
 983            "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ",
 984            "ƽ"=>"Ɵ","ƚ"=>"Ƹ","Ɯ"=>"ƾ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ə","ƨ"=>"Ƨ","Ƽ"=>"Ƥ","ƣ"=>"Ƣ","ƥ"=>"Ơ",
 985            "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž",
 986            "ş"=>"ŝ","ź"=>"Ś","š"=>"Ŝ","ž"=>"Ŵ","ų"=>"Ų","Ź"=>"Ű","ů"=>"Ŏ","ŭ"=>"ŏ","ō"=>"Ū","Ŋ"=>"Ũ",
 987            "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ",
 988            "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ",
 989            "Ğ"=>"Ľ","ğ"=>"ĝ","ĺ"=>"Ě","ġ"=>"Ĝ","ľ"=>"Ĵ","ij"=>"IJ","Ĺ"=>"I","į"=>"Ď","ĭ"=>"ď","č"=>"Ī",
 990            "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė",
 991            "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă",
 992            "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö",
 993            "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì",
 994            "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â",
 995            "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T",
 996            "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J",
 997            "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A"
 998                );
 999
1000    /**
1001     * UTF-8 Case lookup table
1002     *
1003     * This lookuptable defines the lower case letters to their correspponding
1004     * upper case letter in UTF-8
1005     *
1006     * @author Andreas Gohr <andi@splitbrain.org>
1007     */
1008    global $UTF8_UPPER_TO_LOWER;
1009    if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array (
1010            "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q",
1011            "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g",
1012            "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ",
1013            "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ",
1014            "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ",
1015            "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ",
1016            "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ",
1017            "Ὲ"=>"ὲ","ឝ"=>"ό","ឺ"=>"ὰ","Ὧ"=>"ὧ","὎"=>"Ὄ","Ὥ"=>"ὼ","὏"=>"ὤ","Ὅ"=>"ὣ","Ὢ"=>"ὢ","Ὂ"=>"ὥ",
1018            "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ",
1019            "៿"=>"១","៞"=>"ៜ","៽"=>"៾","៟"=>"៴","៝"=>"៳","៺"=>"៲","៚"=>"៹","៸"=>"៰","៯"=>"៧","៎"=>"៌",
1020            "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ",
1021            "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ",
1022            "᝜"=>"ᝡ","᝴"=>"᝾","ᝲ"=>"ᝳ","ᝰ"=>"᝹","ᝎ"=>"ᝯ","ᝏ"=>"᝭","ᝪ"=>"ᝍ","ᝨ"=>"ᝊ","ᝌ"=>"ᝧ","ᝤ"=>"᝼",
1023            "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố",
1024            "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ",
1025            "Ẻ"=>"ẝ","Ẹ"=>"ẚ","ẜ"=>"ạ","Ẵ"=>"Ế","Ẳ"=>"ẳ","Ằ"=>"ẹ","Ẏ"=>"ắ","ẏ"=>"ậ","Ẫ"=>"ẍ","Ẩ"=>"Ẋ",
1026            "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ",
1027            "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ",
1028            "᚜"=>"ᚡ","ᚴ"=>"ᚾ","ᚲ"=>"ᚳ","ᚰ"=>"ᚹ","ᚎ"=>"ᚯ","ᚏ"=>"ᚭ","ᚪ"=>"ᚍ","ᚨ"=>"ᚊ","ᚌ"=>"ᚧ","ᚤ"=>"ᚼ",
1029            "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ",
1030            "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ",
1031            "Ḻ"=>"ḝ","Ḹ"=>"Ḛ","Ḝ"=>"ḡ","Ḵ"=>"Ḿ","Ḳ"=>"ḳ","Ḱ"=>"ḹ","Ḏ"=>"ḯ","ḏ"=>"ḭ","Ḫ"=>"ḍ","Ḩ"=>"Ḋ",
1032            "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ",
1033            "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ",
1034            "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս",
1035            "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ",
1036            "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ",
1037            "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ",
1038            "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ",
1039            "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ",
1040            "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ",
1041            "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ",
1042            "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ",
1043            "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ",
1044            "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ",
1045            "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ",
1046            "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ",
1047            "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ",
1048            "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п",
1049            "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е",
1050            "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ",
1051            "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ",
1052            "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ",
1053            "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π",
1054            "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ",
1055            "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ",
1056            "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ",
1057            "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ",
1058            "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș",
1059            "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ",
1060            "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ",
1061            "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ",
1062            "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ",
1063            "Ɵ"=>"ƽ","Ƹ"=>"ƚ","ƾ"=>"Ɯ","Ƴ"=>"ƴ","Ư"=>"ư","Ə"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"Ƽ","Ƣ"=>"ƣ","Ơ"=>"ƥ",
1064            "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž",
1065            "ŝ"=>"ş","Ś"=>"ź","Ŝ"=>"š","Ŵ"=>"ž","Ų"=>"ų","Ű"=>"Ź","Ŏ"=>"ů","ŏ"=>"ŭ","Ū"=>"ō","Ũ"=>"Ŋ",
1066            "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ",
1067            "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ",
1068            "Ľ"=>"Ğ","ĝ"=>"ğ","Ě"=>"ĺ","Ĝ"=>"ġ","Ĵ"=>"ľ","IJ"=>"ij","I"=>"Ĺ","Ď"=>"į","ď"=>"ĭ","Ī"=>"č",
1069            "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė",
1070            "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă",
1071            "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö",
1072            "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì",
1073            "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â",
1074            "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t",
1075            "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j",
1076            "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a"
1077                );
1078}; // end of case lookup tables
1079
1080/**
1081 * UTF-8 lookup table for lower case accented letters
1082 *
1083 * This lookuptable defines replacements for accented characters from the ASCII-7
1084 * range. This are lower case letters only.
1085 *
1086 * @author Andreas Gohr <andi@splitbrain.org>
1087 * @see    utf8_deaccent()
1088 */
1089global $UTF8_LOWER_ACCENTS;
1090if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array(
1091  'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o',
1092  'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k',
1093  'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o',
1094  'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o',
1095  'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c',
1096  'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't',
1097  'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l',
1098  'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z',
1099  'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't',
1100  'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o',
1101  'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j',
1102  'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o',
1103  'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g',
1104  'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a',
1105  'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e',
1106);
1107
1108/**
1109 * UTF-8 lookup table for upper case accented letters
1110 *
1111 * This lookuptable defines replacements for accented characters from the ASCII-7
1112 * range. This are upper case letters only.
1113 *
1114 * @author Andreas Gohr <andi@splitbrain.org>
1115 * @see    utf8_deaccent()
1116 */
1117global $UTF8_UPPER_ACCENTS;
1118if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array(
1119  'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O',
1120  'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K',
1121  'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O',
1122  'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O',
1123  'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C',
1124  'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T',
1125  'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L',
1126  'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z',
1127  'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T',
1128  'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O',
1129  'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J',
1130  'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O',
1131  'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G',
1132  'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A',
1133  'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E',
1134);
1135
1136/**
1137 * UTF-8 array of common special characters
1138 *
1139 * This array should contain all special characters (not a letter or digit)
1140 * defined in the various local charsets - it's not a complete list of non-alphanum
1141 * characters in UTF-8. It's not perfect but should match most cases of special
1142 * chars.
1143 *
1144 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is!
1145 * These chars are _not_ in the array either:  _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a
1146 *
1147 * @author Andreas Gohr <andi@splitbrain.org>
1148 * @see    utf8_stripspecials()
1149 */
1150global $UTF8_SPECIAL_CHARS;
1151if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array(
1152  0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023,
1153  0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029,         0x002b, 0x002c,
1154          0x002f,         0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b,
1155  0x005c, 0x005d, 0x005e,         0x0060, 0x007b, 0x007c, 0x007d, 0x007e,
1156  0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088,
1157  0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092,
1158  0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c,
1159  0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6,
1160  0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0,
1161  0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba,
1162  0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9,
1163  0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384,
1164  0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1,
1165  0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc,
1166  0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c,
1167  0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651,
1168  0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015,
1169  0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022,
1170  0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab,
1171  0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193,
1172  0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202,
1173  0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212,
1174  0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229,
1175  0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265,
1176  0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310,
1177  0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514,
1178  0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553,
1179  0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d,
1180  0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567,
1181  0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
1182  0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7,
1183  0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702,
1184  0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f,
1185  0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719,
1186  0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723,
1187  0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e,
1188  0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738,
1189  0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742,
1190  0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d,
1191  0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c,
1192  0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f,
1193  0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e,
1194  0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8,
1195  0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3,
1196  0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd,
1197  0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c,
1198  0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017,
1199  0x3018, 0x3019, 0x301a, 0x301b, 0x3036,
1200  0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc,
1201  0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6,
1202  0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0,
1203  0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa,
1204  0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d,
1205          0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09,
1206  0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c,
1207  0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b,
1208  0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65,
1209  0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea,
1210  0xffeb, 0xffec, 0xffed, 0xffee,
1211  0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703,
1212  0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b,
1213  0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713,
1214  0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b,
1215  0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf,
1216);
1217
1218// utf8 version of above data
1219global $UTF8_SPECIAL_CHARS2;
1220if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 =
1221    "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•�'.
1222    '�—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½�'.
1223    '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'.
1224    '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿‌‍‎‏–—―‗‘’‚“”�'.
1225    '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'.
1226    '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'.
1227    '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'.
1228    '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'.
1229    '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'.
1230    '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'.
1231    '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'.
1232    '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'.
1233    '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'.
1234    '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'.
1235    '➷➸➹➺➻➼➽➾'.
1236    ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'.
1237    '�'.
1238    '�'.
1239    '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'.
1240    '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'.
1241    '𝛼𝛽𝛾𝛿𝜀𝜁𝜂𝜃𝜄𝜅𝜆𝜇𝜈𝜉𝜊𝜋𝜌𝜍𝜎𝜏𝜐𝜑𝜒𝜓𝜔𝜕𝜖𝜗𝜘𝜙𝜚𝜛'.
1242    '   ⁠';
1243
1244/**
1245 * Romanization lookup table
1246 *
1247 * This lookup tables provides a way to transform strings written in a language
1248 * different from the ones based upon latin letters into plain ASCII.
1249 *
1250 * Please note: this is not a scientific transliteration table. It only works
1251 * oneway from nonlatin to ASCII and it works by simple character replacement
1252 * only. Specialities of each language are not supported.
1253 *
1254 * @author Andreas Gohr <andi@splitbrain.org>
1255 * @author Vitaly Blokhin <vitinfo@vitn.com>
1256 * @link   http://www.uconv.com/translit.htm
1257 * @author Bisqwit <bisqwit@iki.fi>
1258 * @link   http://kanjidict.stc.cx/hiragana.php?src=2
1259 * @link   http://www.translatum.gr/converter/greek-transliteration.htm
1260 * @link   http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription
1261 * @link   http://www.btranslations.com/resources/romanization/korean.asp
1262 * @author Arthit Suriyawongkul <arthit@gmail.com>
1263 * @author Denis Scheither <amorphis@uni-bremen.de>
1264 * @author Eivind Morland <eivind.morland@gmail.com>
1265 */
1266global $UTF8_ROMANIZATION;
1267if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array(
1268  // scandinavian - differs from what we do in deaccent
1269  'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O',
1270
1271  //russian cyrillic
1272  'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G',
1273  'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh',
1274  'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K',
1275  'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O',
1276  'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T',
1277  'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C',
1278  'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'',
1279  'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju',
1280  'Ю'=>'Ju','я'=>'ja','Я'=>'Ja',
1281  // Ukrainian cyrillic
1282  'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji',
1283  // Georgian
1284  'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th',
1285  'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh',
1286  'რ'=>'r','ქ'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ჼ'=>'kh','჌'=>'gh','ყ'=>'q',
1287  'შ'=>'sh','჊'=>'ch','ც'=>'c','Ⴭ'=>'dh','჏'=>'w','ჭ'=>'j','჎'=>'x','ჯ'=>'jh',
1288  'ჰ'=>'xh',
1289  //Sanskrit
1290  'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry',
1291  'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw',
1292  'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh',
1293  'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh',
1294  'थ'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','़'=>'th','ऌ'=>'d','ध'=>'dh',
1295  'न'=>'n','प'=>'p','ऍ'=>'ph','ए'=>'b','भ'=>'bh','ऎ'=>'m','य'=>'z','र'=>'r',
1296  'ल'=>'l','ा'=>'v','ज'=>'sh','ड'=>'sqh','स'=>'s','च'=>'x',
1297  //Sanskrit diacritics
1298  'Ā'=>'A','Ī'=>'I','Ū'=>'U','Ṛ'=>'R','Ṝ'=>'R','Ṅ'=>'N','Ñ'=>'N','Ṭ'=>'T',
1299  'Ḍ'=>'D','Ṇ'=>'N','Ś'=>'S','Ṣ'=>'S','Ṁ'=>'M','Ṃ'=>'M','Ḥ'=>'H','Ḷ'=>'L','Ḹ'=>'L',
1300  'ā'=>'a','ī'=>'i','ū'=>'u','ṛ'=>'r','ṝ'=>'r','ṅ'=>'n','ñ'=>'n','ṭ'=>'t',
1301  'ḍ'=>'d','ṇ'=>'n','ś'=>'s','ṣ'=>'s','ṁ'=>'m','ṃ'=>'m','ḥ'=>'h','ḷ'=>'l','ḹ'=>'l',
1302  //Hebrew
1303  'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th',
1304  'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n',
1305  'ץ'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','׼'=>'c','׌'=>'c','ק'=>'q','ר'=>'r',
1306  '׊'=>'sh','ת'=>'t',
1307  //Arabic
1308  'ا'=>'a','ب'=>'b','ت'=>'t','؍'=>'th','؏'=>'g','ح'=>'xh','؎'=>'x','د'=>'d',
1309  'ذ'=>'dh','ع'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ؾ'=>'s\'','؜'=>'d\'',
1310  'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k',
1311  'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i',
1312
1313  // Japanese characters  (last update: 2008-05-09)
1314
1315  // Japanese hiragana
1316
1317  // 3 character syllables, っ doubles the consonant after
1318  'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu',
1319  'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu',
1320  'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu',
1321  'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu',
1322  // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu',
1323  'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu',
1324  'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu',
1325  'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu',
1326  'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu',
1327  'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu',
1328  'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu',
1329
1330  // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway)
1331  'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u',
1332  'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu',
1333
1334   // 2 character syllables - normal
1335  'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo',
1336  'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu',
1337  'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu',
1338  'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu',
1339  'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu',
1340  'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu',
1341  'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu',
1342  'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu',
1343  'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu',
1344  'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu',
1345  'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu',
1346  'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju',
1347  'うぇ'=>'we','うぃ'=>'wi',
1348  'いぇ'=>'ye',
1349
1350  // 2 character syllables, っ doubles the consonant after
1351  'っば'=>'bba','っず'=>'bbe','っび'=>'bbi','った'=>'bbo','っぜ'=>'bbu',
1352  'っべ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っち'=>'ppu',
1353  'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu',
1354  'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っお'=>'ddo','っぼ'=>'ddu',
1355  'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu',
1356  'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku',
1357  'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu',
1358  'っな'=>'nna','っね'=>'nne','っき'=>'nni','っぎ'=>'nno','っく'=>'nnu',
1359  'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru',
1360  'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu',
1361  'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu',
1362
1363  // 1 character syllabels
1364  'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n',
1365  'は'=>'ha','へ'=>'he','ひ'=>'hi','そ'=>'ho','ま'=>'fu',
1366  'ば'=>'ba','ず'=>'be','び'=>'bi','た'=>'bo','ぜ'=>'bu',
1367  'べ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ち'=>'pu',
1368  'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu',
1369  'だ'=>'da','で'=>'de','ぢ'=>'di','お'=>'do','ぼ'=>'du',
1370  'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu',
1371  'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku',
1372  'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu',
1373  'な'=>'na','ね'=>'ne','き'=>'ni','ぎ'=>'no','く'=>'nu',
1374  'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru',
1375  'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su',
1376  'わ'=>'wa','を'=>'wo',
1377  'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu',
1378  'や'=>'ya','よ'=>'yo','ゆ'=>'yu',
1379  // old characters
1380  'ゑ'=>'we','ゐ'=>'wi',
1381
1382  //  convert what's left (probably only kicks in when something's missing above)
1383  // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u',
1384  // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu',
1385
1386  // never seen one of those (disabled for the moment)
1387  // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu',
1388  // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu',
1389  // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu',
1390  // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu',
1391  // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu',
1392  // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu',
1393  // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu',
1394  // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu',
1395  // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu',
1396  // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu',
1397  // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu',
1398  // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu',
1399  // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu',
1400  // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu',
1401
1402  // 'spare' characters from other romanization systems
1403  // 'だ'=>'da','で'=>'de','ぢ'=>'di','お'=>'do','ぼ'=>'du',
1404  // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu',
1405  // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su',
1406  // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu',
1407  //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu',
1408  //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu',
1409  //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu',
1410  //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu',
1411  //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi',
1412  //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju',
1413
1414
1415  // Japanese katakana
1416
1417  // 4 character syllables: ッ doubles the consonant after, ミ doubles the vowel before (usualy written with macron, but we don't want that in our URLs)
1418  'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu',
1419  'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu',
1420  'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu',
1421  'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu',
1422  'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu',
1423  'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu',
1424  'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu',
1425  'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu',
1426  'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu',
1427  'ッティー'=>'ttii',
1428  'ッヂィー'=>'ddii',
1429
1430  // 3 character syllables - doubled vowels
1431  'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo',
1432  'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu',
1433  'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu',
1434  'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu',
1435  'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu',
1436  'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu',
1437  'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu',
1438  'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu',
1439  'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu',
1440  'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu',
1441  'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu',
1442  'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu',
1443  'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu',
1444  'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu',
1445  'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu',
1446  'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu',
1447  'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu',
1448  'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu',
1449  'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu',
1450  'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu',
1451  'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu',
1452  'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu',
1453  'ウェー'=>'wee','ウィー'=>'wii',
1454  'イェー'=>'yee',
1455  'ティー'=>'tii',
1456  'ヂィー'=>'dii',
1457
1458  // 3 character syllables - doubled consonants
1459  'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu',
1460  'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu',
1461  'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu',
1462  'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu',
1463  'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu',
1464  'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu',
1465  'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu',
1466  'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu',
1467  'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu',
1468  'ッティ'=>'tti',
1469  'ッヂィ'=>'ddi',
1470
1471  // 3 character syllables - doubled vowel and consonants
1472  'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu',
1473  'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu',
1474  'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa',
1475  'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu',
1476  'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu',
1477  'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu',
1478  'ッナミ'=>'rraa','ッハミ'=>'rree','ッリミ'=>'rrii','ッロミ'=>'rroo','ッネミ'=>'rruu',
1479  'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu',
1480  'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu',
1481  'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu',
1482  'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu',
1483
1484  // 2 character syllables - normal
1485  'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu',
1486  // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu',
1487  'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu',
1488  'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu',
1489  'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu',
1490  'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu',
1491  'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu',
1492  'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu',
1493  'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu',
1494  'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu',
1495  'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu',
1496  'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu',
1497  'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju',
1498  'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu',
1499  'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du',
1500  'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu',
1501  // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu',
1502  'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu',
1503  'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu',
1504  'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu',
1505  'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu',
1506  'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu',
1507  'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu',
1508  'ウェ'=>'we','ウィ'=>'wi',
1509  'イェ'=>'ye',
1510  'ティ'=>'ti',
1511  'ヂィ'=>'di',
1512
1513  // 2 character syllables - doubled vocal
1514  'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu',
1515  'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu',
1516  'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu',
1517  'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu',
1518  'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu',
1519  'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa',
1520  'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu',
1521  'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu',
1522  'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu',
1523  'ナミ'=>'raa','ハミ'=>'ree','リミ'=>'rii','ロミ'=>'roo','ネミ'=>'ruu',
1524  'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu',
1525  'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu',
1526  'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu',
1527  'ワミ'=>'waa','ヲミ'=>'woo',
1528  'ヤミ'=>'yaa','ヨミ'=>'yoo','ヌミ'=>'yuu',
1529  'ヾミ'=>'kaa','ボミ'=>'kee',
1530  // old characters
1531  'ヹミ'=>'wee','ヰミ'=>'wii',
1532
1533  // seperate katakana 'n'
1534  'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u',
1535  'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンヌ'=>'n_yu',
1536
1537  // 2 character syllables - doubled consonants
1538  'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu',
1539  'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu',
1540  'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka',
1541  'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu',
1542  'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu',
1543  'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu',
1544  'ッナ'=>'rra','ッハ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッネ'=>'rru',
1545  'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu',
1546  'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu',
1547  'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu',
1548  'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu',
1549
1550  // 1 character syllables
1551  'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n',
1552  'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu',
1553  'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu',
1554  'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu',
1555  'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka',
1556  'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu',
1557  'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu',
1558  'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu',
1559  'ナ'=>'ra','ハ'=>'re','リ'=>'ri','ロ'=>'ro','ネ'=>'ru',
1560  'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su',
1561  'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu',
1562  'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu',
1563  'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du',
1564  'ワ'=>'wa','ヲ'=>'wo',
1565  'ヤ'=>'ya','ヨ'=>'yo','ヌ'=>'yu',
1566  'ヾ'=>'ka','ボ'=>'ke',
1567  // old characters
1568  'ヹ'=>'we','ヰ'=>'wi',
1569
1570  //  convert what's left (probably only kicks in when something's missing above)
1571  'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u',
1572  'ャ'=>'ya','ョ'=>'yo','ー'=>'yu',
1573
1574  // special characters
1575  '・'=>'_','、'=>'_',
1576  'ミ'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise
1577
1578  // 'ナ'=>'la','ハ'=>'le','リ'=>'li','ロ'=>'lo','ネ'=>'lu',
1579  // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu',
1580  //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu',
1581  // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu',
1582  // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu',
1583  //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu',
1584  //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu',
1585  // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu',
1586  // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu',
1587  //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu',
1588  //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu',
1589  //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi',
1590
1591  // "Greeklish"
1592  'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps',
1593  'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps',
1594
1595  // Thai
1596  'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch',
1597  'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th',
1598  'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th',
1599  'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph',
1600  'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue',
1601  'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h',
1602  'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am',
1603  'ิ'=>'i','฾'=>'i','ผ'=>'ue','฾'=>'ue','ุ'=>'u','บ'=>'u',
1604  'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o',
1605  '฾ยะ'=>'ia','฾ย'=>'ia','มอะ'=>'uea','มอ'=>'uea','ูวะ'=>'ua','ูว'=>'ua',
1606  'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao',
1607  'ุย'=>'ui','อย'=>'oi','มอย'=>'ueai','วย'=>'uai',
1608  'ิว'=>'io','็ว'=>'eo','ียว'=>'iao',
1609  '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'',
1610  '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'',
1611  'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-',
1612  '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4',
1613  '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9',
1614
1615  // Korean
1616  'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p',
1617  'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss',
1618  'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o',
1619  'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we',
1620  'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy',
1621  'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey',
1622);
1623
1624