/wiki/inc/utf8.php
PHP | 1624 lines | 988 code | 149 blank | 487 comment | 168 complexity | f8750418e7437f331696aff455367f49 MD5 | raw file
1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL 2.1 (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 21 22if(!function_exists('utf8_isASCII')){ 23 /** 24 * Checks if a string contains 7bit ASCII only 25 * 26 * @author Andreas Haerter <andreas.haerter@dev.mail-node.com> 27 */ 28 function utf8_isASCII($str){ 29 return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); 30 } 31} 32 33if(!function_exists('utf8_strip')){ 34 /** 35 * Strips all highbyte chars 36 * 37 * Returns a pure ASCII7 string 38 * 39 * @author Andreas Gohr <andi@splitbrain.org> 40 */ 41 function utf8_strip($str){ 42 $ascii = ''; 43 $len = strlen($str); 44 for($i=0; $i<$len; $i++){ 45 if(ord($str{$i}) <128){ 46 $ascii .= $str{$i}; 47 } 48 } 49 return $ascii; 50 } 51} 52 53if(!function_exists('utf8_check')){ 54 /** 55 * Tries to detect if a string is in Unicode encoding 56 * 57 * @author <bmorel@ssi.fr> 58 * @link http://www.php.net/manual/en/function.utf8-encode.php 59 */ 60 function utf8_check($Str) { 61 $len = strlen($Str); 62 for ($i=0; $i<$len; $i++) { 63 $b = ord($Str[$i]); 64 if ($b < 0x80) continue; # 0bbbbbbb 65 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 66 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 67 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 68 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 69 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 70 else return false; # Does not match any model 71 72 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 73 if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80)) 74 return false; 75 } 76 } 77 return true; 78 } 79} 80 81if(!function_exists('utf8_strlen')){ 82 /** 83 * Unicode aware replacement for strlen() 84 * 85 * utf8_decode() converts characters that are not in ISO-8859-1 86 * to '?', which, for the purpose of counting, is alright - It's 87 * even faster than mb_strlen. 88 * 89 * @author <chernyshevsky at hotmail dot com> 90 * @see strlen() 91 * @see utf8_decode() 92 */ 93 function utf8_strlen($string){ 94 return strlen(utf8_decode($string)); 95 } 96} 97 98if(!function_exists('utf8_substr')){ 99 /** 100 * UTF-8 aware alternative to substr 101 * 102 * Return part of a string given character offset (and optionally length) 103 * 104 * @author Harry Fuecks <hfuecks@gmail.com> 105 * @author Chris Smith <chris@jalakai.co.uk> 106 * @param string 107 * @param integer number of UTF-8 characters offset (from left) 108 * @param integer (optional) length in UTF-8 characters from offset 109 * @return mixed string or false if failure 110 */ 111 function utf8_substr($str, $offset, $length = null) { 112 if(UTF8_MBSTRING){ 113 if( $length === null ){ 114 return mb_substr($str, $offset); 115 }else{ 116 return mb_substr($str, $offset, $length); 117 } 118 } 119 120 /* 121 * Notes: 122 * 123 * no mb string support, so we'll use pcre regex's with 'u' flag 124 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 125 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 126 * 127 * substr documentation states false can be returned in some cases (e.g. offset > string length) 128 * mb_substr never returns false, it will return an empty string instead. 129 * 130 * calculating the number of characters in the string is a relatively expensive operation, so 131 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 132 */ 133 134 // cast parameters to appropriate types to avoid multiple notices/warnings 135 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 136 $offset = (int)$offset; 137 if (!is_null($length)) $length = (int)$length; 138 139 // handle trivial cases 140 if ($length === 0) return ''; 141 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 142 143 $offset_pattern = ''; 144 $length_pattern = ''; 145 146 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 147 if ($offset < 0) { 148 $strlen = strlen(utf8_decode($str)); // see notes 149 $offset = $strlen + $offset; 150 if ($offset < 0) $offset = 0; 151 } 152 153 // establish a pattern for offset, a non-captured group equal in length to offset 154 if ($offset > 0) { 155 $Ox = (int)($offset/65535); 156 $Oy = $offset%65535; 157 158 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 159 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 160 } else { 161 $offset_pattern = '^'; // offset == 0; just anchor the pattern 162 } 163 164 // establish a pattern for length 165 if (is_null($length)) { 166 $length_pattern = '(.*)$'; // the rest of the string 167 } else { 168 169 if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes 170 if ($offset > $strlen) return ''; // another trivial case 171 172 if ($length > 0) { 173 174 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 175 176 $Lx = (int)($length/65535); 177 $Ly = $length%65535; 178 179 // +ve length requires ... a captured group of length characters 180 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 181 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 182 183 } else if ($length < 0) { 184 185 if ($length < ($offset - $strlen)) return ''; 186 187 $Lx = (int)((-$length)/65535); 188 $Ly = (-$length)%65535; 189 190 // -ve length requires ... capture everything except a group of -length characters 191 // anchored at the tail-end of the string 192 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 193 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 194 } 195 } 196 197 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 198 return $match[1]; 199 } 200} 201 202if(!function_exists('utf8_substr_replace')){ 203 /** 204 * Unicode aware replacement for substr_replace() 205 * 206 * @author Andreas Gohr <andi@splitbrain.org> 207 * @see substr_replace() 208 */ 209 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 210 $ret = ''; 211 if($start>0) $ret .= utf8_substr($string, 0, $start); 212 $ret .= $replacement; 213 $ret .= utf8_substr($string, $start+$length); 214 return $ret; 215 } 216} 217 218if(!function_exists('utf8_ltrim')){ 219 /** 220 * Unicode aware replacement for ltrim() 221 * 222 * @author Andreas Gohr <andi@splitbrain.org> 223 * @see ltrim() 224 * @return string 225 */ 226 function utf8_ltrim($str,$charlist=''){ 227 if($charlist == '') return ltrim($str); 228 229 //quote charlist for use in a characterclass 230 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 231 232 return preg_replace('/^['.$charlist.']+/u','',$str); 233 } 234} 235 236if(!function_exists('utf8_rtrim')){ 237 /** 238 * Unicode aware replacement for rtrim() 239 * 240 * @author Andreas Gohr <andi@splitbrain.org> 241 * @see rtrim() 242 * @return string 243 */ 244 function utf8_rtrim($str,$charlist=''){ 245 if($charlist == '') return rtrim($str); 246 247 //quote charlist for use in a characterclass 248 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 249 250 return preg_replace('/['.$charlist.']+$/u','',$str); 251 } 252} 253 254if(!function_exists('utf8_trim')){ 255 /** 256 * Unicode aware replacement for trim() 257 * 258 * @author Andreas Gohr <andi@splitbrain.org> 259 * @see trim() 260 * @return string 261 */ 262 function utf8_trim($str,$charlist='') { 263 if($charlist == '') return trim($str); 264 265 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 266 } 267} 268 269if(!function_exists('utf8_strtolower')){ 270 /** 271 * This is a unicode aware replacement for strtolower() 272 * 273 * Uses mb_string extension if available 274 * 275 * @author Leo Feyer <leo@typolight.org> 276 * @see strtolower() 277 * @see utf8_strtoupper() 278 */ 279 function utf8_strtolower($string){ 280 if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); 281 282 global $UTF8_UPPER_TO_LOWER; 283 return strtr($string,$UTF8_UPPER_TO_LOWER); 284 } 285} 286 287if(!function_exists('utf8_strtoupper')){ 288 /** 289 * This is a unicode aware replacement for strtoupper() 290 * 291 * Uses mb_string extension if available 292 * 293 * @author Leo Feyer <leo@typolight.org> 294 * @see strtoupper() 295 * @see utf8_strtoupper() 296 */ 297 function utf8_strtoupper($string){ 298 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 299 300 global $UTF8_LOWER_TO_UPPER; 301 return strtr($string,$UTF8_LOWER_TO_UPPER); 302 } 303} 304 305if(!function_exists('utf8_ucfirst')){ 306 /** 307 * UTF-8 aware alternative to ucfirst 308 * Make a string's first character uppercase 309 * 310 * @author Harry Fuecks 311 * @param string 312 * @return string with first character as upper case (if applicable) 313 */ 314 function utf8_ucfirst($str){ 315 switch ( utf8_strlen($str) ) { 316 case 0: 317 return ''; 318 case 1: 319 return utf8_strtoupper($str); 320 default: 321 preg_match('/^(.{1})(.*)$/us', $str, $matches); 322 return utf8_strtoupper($matches[1]).$matches[2]; 323 } 324 } 325} 326 327if(!function_exists('utf8_ucwords')){ 328 /** 329 * UTF-8 aware alternative to ucwords 330 * Uppercase the first character of each word in a string 331 * 332 * @author Harry Fuecks 333 * @param string 334 * @return string with first char of each word uppercase 335 * @see http://www.php.net/ucwords 336 */ 337 function utf8_ucwords($str) { 338 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 339 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 340 // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords 341 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 342 343 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 344 } 345 346 /** 347 * Callback function for preg_replace_callback call in utf8_ucwords 348 * You don't need to call this yourself 349 * 350 * @author Harry Fuecks 351 * @param array of matches corresponding to a single word 352 * @return string with first char of the word in uppercase 353 * @see utf8_ucwords 354 * @see utf8_strtoupper 355 */ 356 function utf8_ucwords_callback($matches) { 357 $leadingws = $matches[2]; 358 $ucfirst = utf8_strtoupper($matches[3]); 359 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 360 return $leadingws . $ucword; 361 } 362} 363 364if(!function_exists('utf8_deaccent')){ 365 /** 366 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 367 * 368 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 369 * letters. Default is to deaccent both cases ($case = 0) 370 * 371 * @author Andreas Gohr <andi@splitbrain.org> 372 */ 373 function utf8_deaccent($string,$case=0){ 374 if($case <= 0){ 375 global $UTF8_LOWER_ACCENTS; 376 $string = strtr($string,$UTF8_LOWER_ACCENTS); 377 } 378 if($case >= 0){ 379 global $UTF8_UPPER_ACCENTS; 380 $string = strtr($string,$UTF8_UPPER_ACCENTS); 381 } 382 return $string; 383 } 384} 385 386if(!function_exists('utf8_romanize')){ 387 /** 388 * Romanize a non-latin string 389 * 390 * @author Andreas Gohr <andi@splitbrain.org> 391 */ 392 function utf8_romanize($string){ 393 if(utf8_isASCII($string)) return $string; //nothing to do 394 395 global $UTF8_ROMANIZATION; 396 return strtr($string,$UTF8_ROMANIZATION); 397 } 398} 399 400if(!function_exists('utf8_stripspecials')){ 401 /** 402 * Removes special characters (nonalphanumeric) from a UTF-8 string 403 * 404 * This function adds the controlchars 0x00 to 0x19 to the array of 405 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 406 * 407 * @author Andreas Gohr <andi@splitbrain.org> 408 * @param string $string The UTF8 string to strip of special chars 409 * @param string $repl Replace special with this string 410 * @param string $additional Additional chars to strip (used in regexp char class) 411 */ 412 function utf8_stripspecials($string,$repl='',$additional=''){ 413 global $UTF8_SPECIAL_CHARS; 414 global $UTF8_SPECIAL_CHARS2; 415 416 static $specials = null; 417 if(is_null($specials)){ 418 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 419 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 420 } 421 422 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 423 } 424} 425 426if(!function_exists('utf8_strpos')){ 427 /** 428 * This is an Unicode aware replacement for strpos 429 * 430 * @author Leo Feyer <leo@typolight.org> 431 * @see strpos() 432 * @param string 433 * @param string 434 * @param integer 435 * @return integer 436 */ 437 function utf8_strpos($haystack, $needle, $offset=0){ 438 $comp = 0; 439 $length = null; 440 441 while (is_null($length) || $length < $offset) { 442 $pos = strpos($haystack, $needle, $offset + $comp); 443 444 if ($pos === false) 445 return false; 446 447 $length = utf8_strlen(substr($haystack, 0, $pos)); 448 449 if ($length < $offset) 450 $comp = $pos - $length; 451 } 452 453 return $length; 454 } 455} 456 457if(!function_exists('utf8_tohtml')){ 458 /** 459 * Encodes UTF-8 characters to HTML entities 460 * 461 * @author Tom N Harris <tnharris@whoopdedo.org> 462 * @author <vpribish at shopping dot com> 463 * @link http://www.php.net/manual/en/function.utf8-decode.php 464 */ 465 function utf8_tohtml ($str) { 466 $ret = ''; 467 foreach (utf8_to_unicode($str) as $cp) { 468 if ($cp < 0x80) 469 $ret .= chr($cp); 470 elseif ($cp < 0x100) 471 $ret .= "&#$cp;"; 472 else 473 $ret .= '&#x'.dechex($cp).';'; 474 } 475 return $ret; 476 } 477} 478 479if(!function_exists('utf8_unhtml')){ 480 /** 481 * Decodes HTML entities to UTF-8 characters 482 * 483 * Convert any &#..; entity to a codepoint, 484 * The entities flag defaults to only decoding numeric entities. 485 * Pass HTML_ENTITIES and named entities, including & < etc. 486 * are handled as well. Avoids the problem that would occur if you 487 * had to decode "&#38;&amp;#38;" 488 * 489 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 490 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 491 * what it should be -> "&&#38;" 492 * 493 * @author Tom N Harris <tnharris@whoopdedo.org> 494 * @param string $str UTF-8 encoded string 495 * @param boolean $entities Flag controlling decoding of named entities. 496 * @return UTF-8 encoded string with numeric (and named) entities replaced. 497 */ 498 function utf8_unhtml($str, $entities=null) { 499 static $decoder = null; 500 if (is_null($decoder)) 501 $decoder = new utf8_entity_decoder(); 502 if (is_null($entities)) 503 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 504 'utf8_decode_numeric', $str); 505 else 506 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 507 array(&$decoder, 'decode'), $str); 508 } 509} 510 511if(!function_exists('utf8_decode_numeric')){ 512 function utf8_decode_numeric($ent) { 513 switch ($ent[2]) { 514 case 'X': 515 case 'x': 516 $cp = hexdec($ent[3]); 517 break; 518 default: 519 $cp = intval($ent[3]); 520 break; 521 } 522 return unicode_to_utf8(array($cp)); 523 } 524} 525 526if(!class_exists('utf8_entity_decoder')){ 527 class utf8_entity_decoder { 528 var $table; 529 function utf8_entity_decoder() { 530 $table = get_html_translation_table(HTML_ENTITIES); 531 $table = array_flip($table); 532 $this->table = array_map(array(&$this,'makeutf8'), $table); 533 } 534 function makeutf8($c) { 535 return unicode_to_utf8(array(ord($c))); 536 } 537 function decode($ent) { 538 if ($ent[1] == '#') { 539 return utf8_decode_numeric($ent); 540 } elseif (array_key_exists($ent[0],$this->table)) { 541 return $this->table[$ent[0]]; 542 } else { 543 return $ent[0]; 544 } 545 } 546 } 547} 548 549if(!function_exists('utf8_to_unicode')){ 550 /** 551 * Takes an UTF-8 string and returns an array of ints representing the 552 * Unicode characters. Astral planes are supported ie. the ints in the 553 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 554 * are not allowed. 555 * 556 * If $strict is set to true the function returns false if the input 557 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 558 * level E_USER_WARNING 559 * 560 * Note: this function has been modified slightly in this library to 561 * trigger errors on encountering bad bytes 562 * 563 * @author <hsivonen@iki.fi> 564 * @author Harry Fuecks <hfuecks@gmail.com> 565 * @param string UTF-8 encoded string 566 * @param boolean Check for invalid sequences? 567 * @return mixed array of unicode code points or false if UTF-8 invalid 568 * @see unicode_to_utf8 569 * @link http://hsivonen.iki.fi/php-utf8/ 570 * @link http://sourceforge.net/projects/phputf8/ 571 */ 572 function utf8_to_unicode($str,$strict=false) { 573 $mState = 0; // cached expected number of octets after the current octet 574 // until the beginning of the next UTF8 character sequence 575 $mUcs4 = 0; // cached Unicode character 576 $mBytes = 1; // cached expected number of octets in the current sequence 577 578 $out = array(); 579 580 $len = strlen($str); 581 582 for($i = 0; $i < $len; $i++) { 583 584 $in = ord($str{$i}); 585 586 if ( $mState == 0) { 587 588 // When mState is zero we expect either a US-ASCII character or a 589 // multi-octet sequence. 590 if (0 == (0x80 & ($in))) { 591 // US-ASCII, pass straight through. 592 $out[] = $in; 593 $mBytes = 1; 594 595 } else if (0xC0 == (0xE0 & ($in))) { 596 // First octet of 2 octet sequence 597 $mUcs4 = ($in); 598 $mUcs4 = ($mUcs4 & 0x1F) << 6; 599 $mState = 1; 600 $mBytes = 2; 601 602 } else if (0xE0 == (0xF0 & ($in))) { 603 // First octet of 3 octet sequence 604 $mUcs4 = ($in); 605 $mUcs4 = ($mUcs4 & 0x0F) << 12; 606 $mState = 2; 607 $mBytes = 3; 608 609 } else if (0xF0 == (0xF8 & ($in))) { 610 // First octet of 4 octet sequence 611 $mUcs4 = ($in); 612 $mUcs4 = ($mUcs4 & 0x07) << 18; 613 $mState = 3; 614 $mBytes = 4; 615 616 } else if (0xF8 == (0xFC & ($in))) { 617 /* First octet of 5 octet sequence. 618 * 619 * This is illegal because the encoded codepoint must be either 620 * (a) not the shortest form or 621 * (b) outside the Unicode range of 0-0x10FFFF. 622 * Rather than trying to resynchronize, we will carry on until the end 623 * of the sequence and let the later error handling code catch it. 624 */ 625 $mUcs4 = ($in); 626 $mUcs4 = ($mUcs4 & 0x03) << 24; 627 $mState = 4; 628 $mBytes = 5; 629 630 } else if (0xFC == (0xFE & ($in))) { 631 // First octet of 6 octet sequence, see comments for 5 octet sequence. 632 $mUcs4 = ($in); 633 $mUcs4 = ($mUcs4 & 1) << 30; 634 $mState = 5; 635 $mBytes = 6; 636 637 } elseif($strict) { 638 /* Current octet is neither in the US-ASCII range nor a legal first 639 * octet of a multi-octet sequence. 640 */ 641 trigger_error( 642 'utf8_to_unicode: Illegal sequence identifier '. 643 'in UTF-8 at byte '.$i, 644 E_USER_WARNING 645 ); 646 return false; 647 648 } 649 650 } else { 651 652 // When mState is non-zero, we expect a continuation of the multi-octet 653 // sequence 654 if (0x80 == (0xC0 & ($in))) { 655 656 // Legal continuation. 657 $shift = ($mState - 1) * 6; 658 $tmp = $in; 659 $tmp = ($tmp & 0x0000003F) << $shift; 660 $mUcs4 |= $tmp; 661 662 /** 663 * End of the multi-octet sequence. mUcs4 now contains the final 664 * Unicode codepoint to be output 665 */ 666 if (0 == --$mState) { 667 668 /* 669 * Check for illegal sequences and codepoints. 670 */ 671 // From Unicode 3.1, non-shortest form is illegal 672 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 673 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 674 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 675 (4 < $mBytes) || 676 // From Unicode 3.2, surrogate characters are illegal 677 (($mUcs4 & 0xFFFFF800) == 0xD800) || 678 // Codepoints outside the Unicode range are illegal 679 ($mUcs4 > 0x10FFFF)) { 680 681 if($strict){ 682 trigger_error( 683 'utf8_to_unicode: Illegal sequence or codepoint '. 684 'in UTF-8 at byte '.$i, 685 E_USER_WARNING 686 ); 687 688 return false; 689 } 690 691 } 692 693 if (0xFEFF != $mUcs4) { 694 // BOM is legal but we don't want to output it 695 $out[] = $mUcs4; 696 } 697 698 //initialize UTF8 cache 699 $mState = 0; 700 $mUcs4 = 0; 701 $mBytes = 1; 702 } 703 704 } elseif($strict) { 705 /** 706 *((0xC0 & (*in) != 0x80) && (mState != 0)) 707 * Incomplete multi-octet sequence. 708 */ 709 trigger_error( 710 'utf8_to_unicode: Incomplete multi-octet '. 711 ' sequence in UTF-8 at byte '.$i, 712 E_USER_WARNING 713 ); 714 715 return false; 716 } 717 } 718 } 719 return $out; 720 } 721} 722 723if(!function_exists('unicode_to_utf8')){ 724 /** 725 * Takes an array of ints representing the Unicode characters and returns 726 * a UTF-8 string. Astral planes are supported ie. the ints in the 727 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 728 * are not allowed. 729 * 730 * If $strict is set to true the function returns false if the input 731 * array contains ints that represent surrogates or are outside the 732 * Unicode range and raises a PHP error at level E_USER_WARNING 733 * 734 * Note: this function has been modified slightly in this library to use 735 * output buffering to concatenate the UTF-8 string (faster) as well as 736 * reference the array by it's keys 737 * 738 * @param array of unicode code points representing a string 739 * @param boolean Check for invalid sequences? 740 * @return mixed UTF-8 string or false if array contains invalid code points 741 * @author <hsivonen@iki.fi> 742 * @author Harry Fuecks <hfuecks@gmail.com> 743 * @see utf8_to_unicode 744 * @link http://hsivonen.iki.fi/php-utf8/ 745 * @link http://sourceforge.net/projects/phputf8/ 746 */ 747 function unicode_to_utf8($arr,$strict=false) { 748 if (!is_array($arr)) return ''; 749 ob_start(); 750 751 foreach (array_keys($arr) as $k) { 752 753 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 754 # ASCII range (including control chars) 755 756 echo chr($arr[$k]); 757 758 } else if ($arr[$k] <= 0x07ff) { 759 # 2 byte sequence 760 761 echo chr(0xc0 | ($arr[$k] >> 6)); 762 echo chr(0x80 | ($arr[$k] & 0x003f)); 763 764 } else if($arr[$k] == 0xFEFF) { 765 # Byte order mark (skip) 766 767 // nop -- zap the BOM 768 769 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 770 # Test for illegal surrogates 771 772 // found a surrogate 773 if($strict){ 774 trigger_error( 775 'unicode_to_utf8: Illegal surrogate '. 776 'at index: '.$k.', value: '.$arr[$k], 777 E_USER_WARNING 778 ); 779 return false; 780 } 781 782 } else if ($arr[$k] <= 0xffff) { 783 # 3 byte sequence 784 785 echo chr(0xe0 | ($arr[$k] >> 12)); 786 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 787 echo chr(0x80 | ($arr[$k] & 0x003f)); 788 789 } else if ($arr[$k] <= 0x10ffff) { 790 # 4 byte sequence 791 792 echo chr(0xf0 | ($arr[$k] >> 18)); 793 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 794 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 795 echo chr(0x80 | ($arr[$k] & 0x3f)); 796 797 } elseif($strict) { 798 799 trigger_error( 800 'unicode_to_utf8: Codepoint out of Unicode range '. 801 'at index: '.$k.', value: '.$arr[$k], 802 E_USER_WARNING 803 ); 804 805 // out of range 806 return false; 807 } 808 } 809 810 $result = ob_get_contents(); 811 ob_end_clean(); 812 return $result; 813 } 814} 815 816if(!function_exists('utf8_to_utf16be')){ 817 /** 818 * UTF-8 to UTF-16BE conversion. 819 * 820 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 821 */ 822 function utf8_to_utf16be(&$str, $bom = false) { 823 $out = $bom ? "\xFE\xFF" : ''; 824 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 825 826 $uni = utf8_to_unicode($str); 827 foreach($uni as $cp){ 828 $out .= pack('n',$cp); 829 } 830 return $out; 831 } 832} 833 834if(!function_exists('utf16be_to_utf8')){ 835 /** 836 * UTF-8 to UTF-16BE conversion. 837 * 838 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 839 */ 840 function utf16be_to_utf8(&$str) { 841 $uni = unpack('n*',$str); 842 return unicode_to_utf8($uni); 843 } 844} 845 846if(!function_exists('utf8_bad_replace')){ 847 /** 848 * Replace bad bytes with an alternative character 849 * 850 * ASCII character is recommended for replacement char 851 * 852 * PCRE Pattern to locate bad bytes in a UTF-8 string 853 * Comes from W3 FAQ: Multilingual Forms 854 * Note: modified to include full ASCII range including control chars 855 * 856 * @author Harry Fuecks <hfuecks@gmail.com> 857 * @see http://www.w3.org/International/questions/qa-forms-utf-8 858 * @param string to search 859 * @param string to replace bad bytes with (defaults to '?') - use ASCII 860 * @return string 861 */ 862 function utf8_bad_replace($str, $replace = '') { 863 $UTF8_BAD = 864 '([\x00-\x7F]'. # ASCII (including control chars) 865 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 866 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 867 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 868 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 869 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 870 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 871 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 872 '|(.{1}))'; # invalid byte 873 ob_start(); 874 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 875 if ( !isset($matches[2])) { 876 echo $matches[0]; 877 } else { 878 echo $replace; 879 } 880 $str = substr($str,strlen($matches[0])); 881 } 882 $result = ob_get_contents(); 883 ob_end_clean(); 884 return $result; 885 } 886} 887 888if(!function_exists('utf8_correctIdx')){ 889 /** 890 * adjust a byte index into a utf8 string to a utf8 character boundary 891 * 892 * @param $str string utf8 character string 893 * @param $i int byte index into $str 894 * @param $next bool direction to search for boundary, 895 * false = up (current character) 896 * true = down (next character) 897 * 898 * @return int byte index into $str now pointing to a utf8 character boundary 899 * 900 * @author chris smith <chris@jalakai.co.uk> 901 */ 902 function utf8_correctIdx(&$str,$i,$next=false) { 903 904 if ($i <= 0) return 0; 905 906 $limit = strlen($str); 907 if ($i>=$limit) return $limit; 908 909 if ($next) { 910 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 911 } else { 912 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 913 } 914 915 return $i; 916 } 917} 918 919// only needed if no mb_string available 920if(!UTF8_MBSTRING){ 921 /** 922 * UTF-8 Case lookup table 923 * 924 * This lookuptable defines the upper case letters to their correspponding 925 * lower case letter in UTF-8 926 * 927 * @author Andreas Gohr <andi@splitbrain.org> 928 */ 929 global $UTF8_LOWER_TO_UPPER; 930 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 931 "ď˝"=>"ďźş","ď˝"=>"ďźš","ď˝"=>"","ď˝"=>"","ď˝"=>"ďźś","ď˝"=>"ďźľ","ď˝"=>"ďź´","ď˝"=>"ďźł","ď˝"=>"","ď˝"=>"ďźą", 932 "ď˝"=>"ďź°","ď˝"=>"","ď˝"=>"","ď˝"=>"ďź","ď˝"=>"","ď˝"=>"","ď˝"=>"","ď˝"=>"","ď˝"=>"","ď˝"=>"ďź§", 933 "ď˝"=>"","ď˝ "=>"","ď˝"=>"","ď˝"=>"","ď˝"=>"","ď˝"=>"","áżł"=>"áżź","ῼ"=>"῏","ῥ"=>"Ὴ","áż"=>"áż", 934 "áż"=>"áż","áż"=>"áż","ážž"=>"Î","ážł"=>"ážź","ážą"=>"ážš","áž°"=>"ី","áž§"=>"ឯ","ឌ"=>"ណ","ូ"=>"áž","ឤ"=>"ត", 935 "ឣ"=>"ឍ","អ"=>"ឪ","ឥ"=>"ដ","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž", 936 "áž"=>"áž","áž"=>"áž","áž"=>"áž","áž "=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","á˝˝"=>"áżť", 937 "Ὗ"=>"áżş","Ὕ"=>"῍","ὺ"=>"Ὺ","὚"=>"áżš","ὸ"=>"Ὸ","ὡ"=>"áż","὜"=>"áż","὾"=>"áż","á˝´"=>"áż","έ"=>"áż", 938 "ὲ"=>"áż","ό"=>"ážť","á˝°"=>"ážş","á˝§"=>"Ὧ","Ὄ"=>"὎","ὼ"=>"á˝","ὤ"=>"὏","ὣ"=>"Ὅ","ὢ"=>"Ὢ","ὥ"=>"Ὂ", 939 "á˝"=>"á˝","á˝"=>"á˝","á˝"=>"á˝","á˝"=>"á˝","á˝ "=>"á˝","á˝"=>"á˝","á˝"=>"á˝","á˝"=>"á˝","á˝"=>"á˝","á˝"=>"á˝", 940 "១"=>"áźż","áźś"=>"áźž","áźľ"=>"áź˝","áź´"=>"áźź","áźł"=>"áźť","៲"=>"áźş","áźą"=>"áźš","áź°"=>"៸","áź§"=>"៯","៌"=>"៎", 941 "៼"=>"áź","៤"=>"៏","៣"=>"៍","២"=>"៪","៥"=>"៊","áź"=>"áź","áź"=>"áź","áź"=>"áź","áź"=>"áź","áź"=>"áź", 942 "áź"=>"áź","áź"=>"áź","áź"=>"áź","áź "=>"áź","áź"=>"áź","áź"=>"áź","áź"=>"áź","áź"=>"áź","áź"=>"áź","áťš"=>"᝸", 943 "ᝡ"=>"áťś","áťľ"=>"áť´","áťł"=>"ᝲ","áťą"=>"áť°","ᝯ"=>"ᝎ","áť"=>"ᝏ","ᝍ"=>"ᝪ","ᝊ"=>"ᝨ","áť§"=>"ᝌ","᝼"=>"ᝤ", 944 "ᝣ"=>"ᝢ","ᝥ"=>"áť ","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť", 945 "áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť "=>"áť","áť"=>"áť","áť"=>"áť","áşż"=>"áşž","áş˝"=>"áşź", 946 "áşť"=>"áşş","áşš"=>"Ẹ","ạ"=>"áşś","áşľ"=>"áş´","áşł"=>"Ẳ","áşą"=>"áş°","ắ"=>"Ẏ","áş"=>"ẏ","ẍ"=>"Ẫ","Ẋ"=>"Ẩ", 947 "áş§"=>"Ẍ","Ẽ"=>"Ấ","ả"=>"Ả","ấ"=>"áş ","áş"=>"áš ","áş"=>"áş","áş"=>"áş","áş"=>"áş","áş"=>"áş","áş"=>"áş", 948 "áş"=>"áş","áş"=>"áş","áş"=>"áş","áş "=>"áş","áş"=>"áş","áş"=>"áş","ášż"=>"ášž","áš˝"=>"ášź","ášť"=>"ášş","ášš"=>"ᚸ", 949 "ᚡ"=>"ášś","ášľ"=>"áš´","ášł"=>"ᚲ","ášą"=>"áš°","ᚯ"=>"ᚎ","áš"=>"ᚏ","ᚍ"=>"ᚪ","ᚊ"=>"ᚨ","áš§"=>"ᚌ","ᚼ"=>"ᚤ", 950 "ᚣ"=>"ᚢ","ᚥ"=>"áš ","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš", 951 "áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš "=>"áš","áš"=>"áš","áš"=>"áš","ḿ"=>"Ḟ","ḽ"=>"ḟ", 952 "ḝ"=>"Ḻ","Ḛ"=>"Ḹ","ḡ"=>"Ḝ","Ḿ"=>"Ḵ","ḳ"=>"Ḳ","ḹ"=>"Ḱ","ḯ"=>"Ḏ","á¸"=>"ḏ","ḍ"=>"Ḫ","Ḋ"=>"Ḩ", 953 "ḧ"=>"Ḍ","Ḽ"=>"Ḥ","ḣ"=>"Ḣ","ḥ"=>"Ḡ","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸", 954 "á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","Ḡ"=>"á¸","á¸"=>"á¸","á¸"=>"á¸", 955 "Ö"=>"Ő","Ö "=>"Ő","Ö"=>"Ő","Ö"=>"Ő","Ö"=>"Ő","Ö"=>"Ő","Ö"=>"Ő","Őż"=>"Ő","Őž"=>"Ő","Ő˝"=>"Ő", 956 "Őź"=>"Ő","Őť"=>"Ő","Őş"=>"Ő","Őš"=>"Ő","Ő¸"=>"Ő","Őˇ"=>"Ő","Őś"=>"Ő","Őľ"=>"Ő ","Ő´"=>"Ő","Őł"=>"Ő", 957 "Ő˛"=>"Ő","Őą"=>"Ő","Ő°"=>"Ő","ŐŻ"=>"Ôż","ŐŽ"=>"Ôž","Ő"=>"Ô˝","ŐŹ"=>"Ôź","ŐŤ"=>"Ôť","ŐŞ"=>"Ôş","ŐŠ"=>"Ôš", 958 "Ő¨"=>"Ô¸","Ő§"=>"Ôˇ","ŐŚ"=>"Ôś","ŐĽ"=>"Ôľ","Ő¤"=>"Ô´","ŐŁ"=>"Ôł","Ő˘"=>"Ô˛","ŐĄ"=>"Ôą","Ô"=>"Ô","Ô"=>"Ô", 959 "Ô"=>"Ô","Ô"=>"Ô","Ô"=>"Ô","Ô "=>"Ô","Ô"=>"Ô","Ô"=>"Ô","Óš"=>"Ó¸","Óľ"=>"Ó´","Ół"=>"Ó˛","Óą"=>"Ó°", 960 "ÓŻ"=>"ÓŽ","Ó"=>"ÓŹ","ÓŤ"=>"ÓŞ","ÓŠ"=>"Ó¨","Ó§"=>"ÓŚ","ÓĽ"=>"Ó¤","ÓŁ"=>"Ó˘","ÓĄ"=>"Ó ","Ó"=>"Ó","Ó"=>"Ó", 961 "Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó", 962 "Ó"=>"Ó ","Ó"=>"Ó","Ó"=>"Ó","Ňż"=>"Ňž","Ň˝"=>"Ňź","Ňť"=>"Ňş","Ňš"=>"Ҹ","ҡ"=>"Ňś","Ňľ"=>"Ň´","Ňł"=>"Ҳ", 963 "Ňą"=>"Ұ","ŇŻ"=>"ŇŽ","Ň"=>"ŇŹ","ŇŤ"=>"ŇŞ","ŇŠ"=>"Ҩ","ҧ"=>"ŇŚ","ŇĽ"=>"Ҥ","ŇŁ"=>"Ң","ŇĄ"=>"Ň ","Ň"=>"Ň", 964 "Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň", 965 "Ň"=>"Ň","Ńż"=>"Ńž","Ń˝"=>"Ńź","Ńť"=>"Ńş","Ńš"=>"Ѹ","ѡ"=>"Ńś","Ńľ"=>"Ń´","Ńł"=>"Ѳ","Ńą"=>"Ѱ","ŃŻ"=>"ŃŽ", 966 "Ń"=>"ŃŹ","ŃŤ"=>"ŃŞ","ŃŠ"=>"Ѩ","ѧ"=>"ŃŚ","ŃĽ"=>"Ѥ","ŃŁ"=>"Ѣ","ŃĄ"=>"Ń ","Ń"=>"Đ","Ń"=>"Đ","Ń"=>"Đ", 967 "Ń"=>"Đ","Ń"=>"Đ","Ń"=>"Đ","Ń"=>"Đ","Ń"=>"Đ","Ń"=>"Đ","Ń"=>"Đ","Ń"=>"Đ ","Ń"=>"Đ","Ń"=>"Đ", 968 "Ń"=>"Đ","Ń"=>"Đ","Ń"=>"Đ","Ń"=>"ĐŻ","Ń"=>"ĐŽ","Ń"=>"Đ","Ń"=>"ĐŹ","Ń"=>"ĐŤ","Ń"=>"ĐŞ","Ń"=>"ĐŠ", 969 "Ń"=>"Ш","Ń"=>"Ч","Ń"=>"ĐŚ","Ń "=>"ĐĽ","Ń"=>"Ф","Ń"=>"ĐŁ","Ń"=>"Т","Ń"=>"ĐĄ","Ń"=>"Đ ","Đż"=>"Đ", 970 "Đž"=>"Đ","Đ˝"=>"Đ","Đź"=>"Đ","Đť"=>"Đ","Đş"=>"Đ","Đš"=>"Đ","и"=>"Đ","С"=>"Đ","Đś"=>"Đ","Đľ"=>"Đ", 971 "Đ´"=>"Đ","Đł"=>"Đ","в"=>"Đ","Đą"=>"Đ","а"=>"Đ","Ďľ"=>"Î","ϲ"=>"ÎŁ","Ďą"=>"ÎĄ","ϰ"=>"Î","ĎŻ"=>"ĎŽ", 972 "Ď"=>"ĎŹ","ĎŤ"=>"ĎŞ","ĎŠ"=>"Ϩ","ϧ"=>"ĎŚ","ĎĽ"=>"Ϥ","ĎŁ"=>"Ϣ","ĎĄ"=>"Ď ","Ď"=>"Ď","Ď"=>"Ď","Ď"=>"Ď", 973 "Ď"=>"Ď","Ď"=>"Î ","Ď"=>"ÎŚ","Ď"=>"Î","Ď"=>"Î","Ď"=>"Î","Ď"=>"Î","Ď"=>"Î","Ď"=>"ÎŤ","Ď"=>"ÎŞ", 974 "Ď"=>"Ί","Ď"=>"Ψ","Ď"=>"Χ","Ď"=>"ÎŚ","Ď "=>"ÎĽ","Ď"=>"Τ","Ď"=>"ÎŁ","Ď"=>"ÎŁ","Ď"=>"ÎĄ","Ď"=>"Î ", 975 "Îż"=>"Î","Ξ"=>"Î","ν"=>"Î","Îź"=>"Î","Îť"=>"Î","Îş"=>"Î","Κ"=>"Î","θ"=>"Î","Ρ"=>"Î","Îś"=>"Î", 976 "Îľ"=>"Î","δ"=>"Î","Îł"=>"Î","β"=>"Î","Îą"=>"Î","ÎŻ"=>"Î","ÎŽ"=>"Î","Î"=>"Î","ÎŹ"=>"Î","Ę"=>"ơ", 977 "Ę"=>"Ʋ","Ę"=>"Ćą","Ę"=>"ĆŽ","Ę"=>"ĆŠ","Ę"=>"ĆŚ","Éľ"=>"Ć","ɲ"=>"Ć","ÉŻ"=>"Ć","ÉŠ"=>"Ć","ɨ"=>"Ć", 978 "ÉŁ"=>"Ć","É"=>"Ć","É"=>"Ć","É"=>"Ć","É"=>"Ć","É"=>"Ć","É"=>"Ć","Čł"=>"Ȳ","Čą"=>"Ȱ","ČŻ"=>"ČŽ", 979 "Č"=>"ČŹ","ČŤ"=>"ČŞ","ČŠ"=>"Ȩ","ȧ"=>"ČŚ","ČĽ"=>"Ȥ","ČŁ"=>"Ȣ","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č", 980 "Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č "=>"Č", 981 "Č"=>"Č","Č"=>"Č","Çż"=>"Çž","Ç˝"=>"Çź","Çť"=>"Çş","Çš"=>"Ǹ","Çľ"=>"Ç´","Çł"=>"Dz","ÇŻ"=>"ÇŽ","Ç"=>"ÇŹ", 982 "ÇŤ"=>"ÇŞ","ÇŠ"=>"Ǩ","ǧ"=>"ÇŚ","ÇĽ"=>"Ǥ","ÇŁ"=>"Ǣ","ÇĄ"=>"Ç ","Ç"=>"Ç","Ç"=>"Ć","Ç"=>"Ç","Ç"=>"Ç", 983 "Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç ","Ćż"=>"ǡ", 984 "Ć˝"=>"Ćź","Ćš"=>"Ƹ","Ćś"=>"Ćľ","Ć´"=>"Ćł","ư"=>"ĆŻ","Ć"=>"ĆŹ","ƨ"=>"Ƨ","ĆĽ"=>"Ƥ","ĆŁ"=>"Ƣ","ĆĄ"=>"Ć ", 985 "Ć"=>"Č ","Ć"=>"Ć","Ć"=>"Çś","Ć"=>"Ć","Ć"=>"Ć","Ć"=>"Ć","Ć "=>"Ć","Ć"=>"Ć","Ĺż"=>"S","Ĺž"=>"Ĺ˝", 986 "Ĺź"=>"Ĺť","Ĺş"=>"Ĺš","š"=>"Ĺś","Ĺľ"=>"Ĺ´","Ĺł"=>"Ų","Ĺą"=>"Ű","ĹŻ"=>"ĹŽ","Ĺ"=>"ĹŹ","ĹŤ"=>"ĹŞ","ĹŠ"=>"Ũ", 987 "ŧ"=>"ĹŚ","ĹĽ"=>"Ť","ĹŁ"=>"Ţ","ĹĄ"=>"Ĺ ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ", 988 "Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Äż", 989 "Äž"=>"Ä˝","Äź"=>"Äť","Äş"=>"Äš","ġ"=>"Äś","Äľ"=>"Ä´","Äł"=>"IJ","Äą"=>"I","ÄŻ"=>"ÄŽ","Ä"=>"ÄŹ","ÄŤ"=>"ÄŞ", 990 "ÄŠ"=>"Ĩ","ħ"=>"ÄŚ","ÄĽ"=>"Ĥ","ÄŁ"=>"Ģ","ÄĄ"=>"Ä ","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä", 991 "Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä "=>"Ä","Ä"=>"Ä", 992 "Ä"=>"Ä","Ăż"=>"Ÿ","Ăž"=>"Ă","Ă˝"=>"Ă","Ăź"=>"Ă","Ăť"=>"Ă","Ăş"=>"Ă","Ăš"=>"Ă","ø"=>"Ă","Ăś"=>"Ă", 993 "Ăľ"=>"Ă","Ă´"=>"Ă","Ăł"=>"Ă","ò"=>"Ă","Ăą"=>"Ă","ð"=>"Ă","ĂŻ"=>"Ă","ĂŽ"=>"Ă","Ă"=>"Ă","ĂŹ"=>"Ă", 994 "ĂŤ"=>"Ă","ĂŞ"=>"Ă","ĂŠ"=>"Ă","è"=>"Ă","ç"=>"Ă","ĂŚ"=>"Ă","ĂĽ"=>"Ă ","ä"=>"Ă","ĂŁ"=>"Ă","â"=>"Ă", 995 "ĂĄ"=>"Ă","Ă "=>"Ă","Âľ"=>"Î","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 996 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 997 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 998 ); 999 1000 /** 1001 * UTF-8 Case lookup table 1002 * 1003 * This lookuptable defines the lower case letters to their correspponding 1004 * upper case letter in UTF-8 1005 * 1006 * @author Andreas Gohr <andi@splitbrain.org> 1007 */ 1008 global $UTF8_UPPER_TO_LOWER; 1009 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1010 "ďźş"=>"ď˝","ďźš"=>"ď˝",""=>"ď˝",""=>"ď˝","ďźś"=>"ď˝","ďźľ"=>"ď˝","ďź´"=>"ď˝","ďźł"=>"ď˝",""=>"ď˝","ďźą"=>"ď˝", 1011 "ďź°"=>"ď˝",""=>"ď˝",""=>"ď˝","ďź"=>"ď˝",""=>"ď˝",""=>"ď˝",""=>"ď˝",""=>"ď˝",""=>"ď˝","ďź§"=>"ď˝", 1012 ""=>"ď˝",""=>"ď˝ ",""=>"ď˝",""=>"ď˝",""=>"ď˝",""=>"ď˝","áżź"=>"áżł","῏"=>"ῼ","Ὴ"=>"ῥ","áż"=>"áż", 1013 "áż"=>"áż","áż"=>"áż","Î"=>"ážž","ážź"=>"ážł","ážš"=>"ážą","ី"=>"áž°","ឯ"=>"áž§","ណ"=>"ឌ","áž"=>"ូ","ត"=>"ឤ", 1014 "ឍ"=>"ឣ","ឪ"=>"អ","ដ"=>"ឥ","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž", 1015 "áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž ","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áżť"=>"á˝˝", 1016 "áżş"=>"Ὗ","῍"=>"Ὕ","Ὺ"=>"ὺ","áżš"=>"὚","Ὸ"=>"ὸ","áż"=>"ὡ","áż"=>"὜","áż"=>"὾","áż"=>"á˝´","áż"=>"έ", 1017 "áż"=>"ὲ","ážť"=>"ό","ážş"=>"á˝°","Ὧ"=>"á˝§","὎"=>"Ὄ","á˝"=>"ὼ","὏"=>"ὤ","Ὅ"=>"ὣ","Ὢ"=>"ὢ","Ὂ"=>"ὥ", 1018 "á˝"=>"á˝","á˝"=>"á˝","á˝"=>"á˝","á˝"=>"á˝","á˝"=>"á˝ ","á˝"=>"á˝","á˝"=>"á˝","á˝"=>"á˝","á˝"=>"á˝","á˝"=>"á˝", 1019 "áźż"=>"១","áźž"=>"áźś","áź˝"=>"áźľ","áźź"=>"áź´","áźť"=>"áźł","áźş"=>"៲","áźš"=>"áźą","៸"=>"áź°","៯"=>"áź§","៎"=>"៌", 1020 "áź"=>"៼","៏"=>"៤","៍"=>"៣","៪"=>"២","៊"=>"៥","áź"=>"áź","áź"=>"áź","áź"=>"áź","áź"=>"áź","áź"=>"áź", 1021 "áź"=>"áź","áź"=>"áź","áź"=>"áź","áź"=>"áź ","áź"=>"áź","áź"=>"áź","áź"=>"áź","áź"=>"áź","áź"=>"áź","᝸"=>"áťš", 1022 "áťś"=>"ᝡ","áť´"=>"áťľ","ᝲ"=>"áťł","áť°"=>"áťą","ᝎ"=>"ᝯ","ᝏ"=>"áť","ᝪ"=>"ᝍ","ᝨ"=>"ᝊ","ᝌ"=>"áť§","ᝤ"=>"᝼", 1023 "ᝢ"=>"ᝣ","áť "=>"ᝥ","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť", 1024 "áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť","áť"=>"áť ","áť"=>"áť","áť"=>"áť","áşž"=>"áşż","áşź"=>"áş˝", 1025 "áşş"=>"áşť","Ẹ"=>"áşš","áşś"=>"ạ","áş´"=>"áşľ","Ẳ"=>"áşł","áş°"=>"áşą","Ẏ"=>"ắ","ẏ"=>"áş","Ẫ"=>"ẍ","Ẩ"=>"Ẋ", 1026 "Ẍ"=>"áş§","Ấ"=>"Ẽ","Ả"=>"ả","áş "=>"ấ","áš "=>"áş","áş"=>"áş","áş"=>"áş","áş"=>"áş","áş"=>"áş","áş"=>"áş", 1027 "áş"=>"áş","áş"=>"áş","áş"=>"áş","áş"=>"áş ","áş"=>"áş","áş"=>"áş","ášž"=>"ášż","ášź"=>"áš˝","ášş"=>"ášť","ᚸ"=>"ášš", 1028 "ášś"=>"ᚡ","áš´"=>"ášľ","ᚲ"=>"ášł","áš°"=>"ášą","ᚎ"=>"ᚯ","ᚏ"=>"áš","ᚪ"=>"ᚍ","ᚨ"=>"ᚊ","ᚌ"=>"áš§","ᚤ"=>"ᚼ", 1029 "ᚢ"=>"ᚣ","áš "=>"ᚥ","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš", 1030 "áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš","áš"=>"áš ","áš"=>"áš","áš"=>"áš","Ḟ"=>"ḿ","ḟ"=>"ḽ", 1031 "Ḻ"=>"ḝ","Ḹ"=>"Ḛ","Ḝ"=>"ḡ","Ḵ"=>"Ḿ","Ḳ"=>"ḳ","Ḱ"=>"ḹ","Ḏ"=>"ḯ","ḏ"=>"á¸","Ḫ"=>"ḍ","Ḩ"=>"Ḋ", 1032 "Ḍ"=>"ḧ","Ḥ"=>"Ḽ","Ḣ"=>"ḣ","Ḡ"=>"ḥ","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸", 1033 "á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"á¸","á¸"=>"Ḡ","á¸"=>"á¸","á¸"=>"á¸", 1034 "Ő"=>"Ö","Ő"=>"Ö ","Ő"=>"Ö","Ő"=>"Ö","Ő"=>"Ö","Ő"=>"Ö","Ő"=>"Ö","Ő"=>"Őż","Ő"=>"Őž","Ő"=>"Ő˝", 1035 "Ő"=>"Őź","Ő"=>"Őť","Ő"=>"Őş","Ő"=>"Őš","Ő"=>"Ő¸","Ő"=>"Őˇ","Ő"=>"Őś","Ő "=>"Őľ","Ő"=>"Ő´","Ő"=>"Őł", 1036 "Ő"=>"Ő˛","Ő"=>"Őą","Ő"=>"Ő°","Ôż"=>"ŐŻ","Ôž"=>"ŐŽ","Ô˝"=>"Ő","Ôź"=>"ŐŹ","Ôť"=>"ŐŤ","Ôş"=>"ŐŞ","Ôš"=>"ŐŠ", 1037 "Ô¸"=>"Ő¨","Ôˇ"=>"Ő§","Ôś"=>"ŐŚ","Ôľ"=>"ŐĽ","Ô´"=>"Ő¤","Ôł"=>"ŐŁ","Ô˛"=>"Ő˘","Ôą"=>"ŐĄ","Ô"=>"Ô","Ô"=>"Ô", 1038 "Ô"=>"Ô","Ô"=>"Ô","Ô"=>"Ô","Ô"=>"Ô ","Ô"=>"Ô","Ô"=>"Ô","Ó¸"=>"Óš","Ó´"=>"Óľ","Ó˛"=>"Ół","Ó°"=>"Óą", 1039 "ÓŽ"=>"ÓŻ","ÓŹ"=>"Ó","ÓŞ"=>"ÓŤ","Ó¨"=>"ÓŠ","ÓŚ"=>"Ó§","Ó¤"=>"ÓĽ","Ó˘"=>"ÓŁ","Ó "=>"ÓĄ","Ó"=>"Ó","Ó"=>"Ó", 1040 "Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó", 1041 "Ó "=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ňž"=>"Ňż","Ňź"=>"Ň˝","Ňş"=>"Ňť","Ҹ"=>"Ňš","Ňś"=>"ҡ","Ň´"=>"Ňľ","Ҳ"=>"Ňł", 1042 "Ұ"=>"Ňą","ŇŽ"=>"ŇŻ","ŇŹ"=>"Ň","ŇŞ"=>"ŇŤ","Ҩ"=>"ŇŠ","ŇŚ"=>"ҧ","Ҥ"=>"ŇĽ","Ң"=>"ŇŁ","Ň "=>"ŇĄ","Ň"=>"Ň", 1043 "Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň","Ň"=>"Ň", 1044 "Ň"=>"Ň","Ńž"=>"Ńż","Ńź"=>"Ń˝","Ńş"=>"Ńť","Ѹ"=>"Ńš","Ńś"=>"ѡ","Ń´"=>"Ńľ","Ѳ"=>"Ńł","Ѱ"=>"Ńą","ŃŽ"=>"ŃŻ", 1045 "ŃŹ"=>"Ń","ŃŞ"=>"ŃŤ","Ѩ"=>"ŃŠ","ŃŚ"=>"ѧ","Ѥ"=>"ŃĽ","Ѣ"=>"ŃŁ","Ń "=>"ŃĄ","Đ"=>"Ń","Đ"=>"Ń","Đ"=>"Ń", 1046 "Đ"=>"Ń","Đ"=>"Ń","Đ"=>"Ń","Đ"=>"Ń","Đ"=>"Ń","Đ"=>"Ń","Đ"=>"Ń","Đ "=>"Ń","Đ"=>"Ń","Đ"=>"Ń", 1047 "Đ"=>"Ń","Đ"=>"Ń","Đ"=>"Ń","ĐŻ"=>"Ń","ĐŽ"=>"Ń","Đ"=>"Ń","ĐŹ"=>"Ń","ĐŤ"=>"Ń","ĐŞ"=>"Ń","ĐŠ"=>"Ń", 1048 "Ш"=>"Ń","Ч"=>"Ń","ĐŚ"=>"Ń","ĐĽ"=>"Ń ","Ф"=>"Ń","ĐŁ"=>"Ń","Т"=>"Ń","ĐĄ"=>"Ń","Đ "=>"Ń","Đ"=>"Đż", 1049 "Đ"=>"Đž","Đ"=>"Đ˝","Đ"=>"Đź","Đ"=>"Đť","Đ"=>"Đş","Đ"=>"Đš","Đ"=>"и","Đ"=>"С","Đ"=>"Đś","Đ"=>"Đľ", 1050 "Đ"=>"Đ´","Đ"=>"Đł","Đ"=>"в","Đ"=>"Đą","Đ"=>"а","Î"=>"Ďľ","ÎŁ"=>"ϲ","ÎĄ"=>"Ďą","Î"=>"ϰ","ĎŽ"=>"ĎŻ", 1051 "ĎŹ"=>"Ď","ĎŞ"=>"ĎŤ","Ϩ"=>"ĎŠ","ĎŚ"=>"ϧ","Ϥ"=>"ĎĽ","Ϣ"=>"ĎŁ","Ď "=>"ĎĄ","Ď"=>"Ď","Ď"=>"Ď","Ď"=>"Ď", 1052 "Ď"=>"Ď","Î "=>"Ď","ÎŚ"=>"Ď","Î"=>"Ď","Î"=>"Ď","Î"=>"Ď","Î"=>"Ď","Î"=>"Ď","ÎŤ"=>"Ď","ÎŞ"=>"Ď", 1053 "Ί"=>"Ď","Ψ"=>"Ď","Χ"=>"Ď","ÎŚ"=>"Ď","ÎĽ"=>"Ď ","Τ"=>"Ď","ÎŁ"=>"Ď","ÎŁ"=>"Ď","ÎĄ"=>"Ď","Î "=>"Ď", 1054 "Î"=>"Îż","Î"=>"Ξ","Î"=>"ν","Î"=>"Îź","Î"=>"Îť","Î"=>"Îş","Î"=>"Κ","Î"=>"θ","Î"=>"Ρ","Î"=>"Îś", 1055 "Î"=>"Îľ","Î"=>"δ","Î"=>"Îł","Î"=>"β","Î"=>"Îą","Î"=>"ÎŻ","Î"=>"ÎŽ","Î"=>"Î","Î"=>"ÎŹ","ơ"=>"Ę", 1056 "Ʋ"=>"Ę","Ćą"=>"Ę","ĆŽ"=>"Ę","ĆŠ"=>"Ę","ĆŚ"=>"Ę","Ć"=>"Éľ","Ć"=>"ɲ","Ć"=>"ÉŻ","Ć"=>"ÉŠ","Ć"=>"ɨ", 1057 "Ć"=>"ÉŁ","Ć"=>"É","Ć"=>"É","Ć"=>"É","Ć"=>"É","Ć"=>"É","Ć"=>"É","Ȳ"=>"Čł","Ȱ"=>"Čą","ČŽ"=>"ČŻ", 1058 "ČŹ"=>"Č","ČŞ"=>"ČŤ","Ȩ"=>"ČŠ","ČŚ"=>"ȧ","Ȥ"=>"ČĽ","Ȣ"=>"ČŁ","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č", 1059 "Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č","Č"=>"Č ", 1060 "Č"=>"Č","Č"=>"Č","Çž"=>"Çż","Çź"=>"Ç˝","Çş"=>"Çť","Ǹ"=>"Çš","Ç´"=>"Çľ","Dz"=>"Çł","ÇŽ"=>"ÇŻ","ÇŹ"=>"Ç", 1061 "ÇŞ"=>"ÇŤ","Ǩ"=>"ÇŠ","ÇŚ"=>"ǧ","Ǥ"=>"ÇĽ","Ǣ"=>"ÇŁ","Ç "=>"ÇĄ","Ç"=>"Ç","Ć"=>"Ç","Ç"=>"Ç","Ç"=>"Ç", 1062 "Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç "=>"Ç","ǡ"=>"Ćż", 1063 "Ćź"=>"Ć˝","Ƹ"=>"Ćš","Ćľ"=>"Ćś","Ćł"=>"Ć´","ĆŻ"=>"ư","ĆŹ"=>"Ć","Ƨ"=>"ƨ","Ƥ"=>"ĆĽ","Ƣ"=>"ĆŁ","Ć "=>"ĆĄ", 1064 "Č "=>"Ć","Ć"=>"Ć","Çś"=>"Ć","Ć"=>"Ć","Ć"=>"Ć","Ć"=>"Ć","Ć"=>"Ć ","Ć"=>"Ć","S"=>"Ĺż","Ĺ˝"=>"Ĺž", 1065 "Ĺť"=>"Ĺź","Ĺš"=>"Ĺş","Ĺś"=>"š","Ĺ´"=>"Ĺľ","Ų"=>"Ĺł","Ű"=>"Ĺą","ĹŽ"=>"ĹŻ","ĹŹ"=>"Ĺ","ĹŞ"=>"ĹŤ","Ũ"=>"ĹŠ", 1066 "ĹŚ"=>"ŧ","Ť"=>"ĹĽ","Ţ"=>"ĹŁ","Ĺ "=>"ĹĄ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ", 1067 "Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Ĺ "=>"Ĺ","Ĺ"=>"Ĺ","Ĺ"=>"Ĺ","Äż"=>"Ĺ", 1068 "Ä˝"=>"Äž","Äť"=>"Äź","Äš"=>"Äş","Äś"=>"ġ","Ä´"=>"Äľ","IJ"=>"Äł","I"=>"Äą","ÄŽ"=>"ÄŻ","ÄŹ"=>"Ä","ÄŞ"=>"ÄŤ", 1069 "Ĩ"=>"ÄŠ","ÄŚ"=>"ħ","Ĥ"=>"ÄĽ","Ģ"=>"ÄŁ","Ä "=>"ÄĄ","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä", 1070 "Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä ","Ä"=>"Ä", 1071 "Ä"=>"Ä","Ÿ"=>"Ăż","Ă"=>"Ăž","Ă"=>"Ă˝","Ă"=>"Ăź","Ă"=>"Ăť","Ă"=>"Ăş","Ă"=>"Ăš","Ă"=>"ø","Ă"=>"Ăś", 1072 "Ă"=>"Ăľ","Ă"=>"Ă´","Ă"=>"Ăł","Ă"=>"ò","Ă"=>"Ăą","Ă"=>"ð","Ă"=>"ĂŻ","Ă"=>"ĂŽ","Ă"=>"Ă","Ă"=>"ĂŹ", 1073 "Ă"=>"ĂŤ","Ă"=>"ĂŞ","Ă"=>"ĂŠ","Ă"=>"è","Ă"=>"ç","Ă"=>"ĂŚ","Ă "=>"ĂĽ","Ă"=>"ä","Ă"=>"ĂŁ","Ă"=>"â", 1074 "Ă"=>"ĂĄ","Ă"=>"Ă ","Î"=>"Âľ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1075 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1076 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1077 ); 1078}; // end of case lookup tables 1079 1080/** 1081 * UTF-8 lookup table for lower case accented letters 1082 * 1083 * This lookuptable defines replacements for accented characters from the ASCII-7 1084 * range. This are lower case letters only. 1085 * 1086 * @author Andreas Gohr <andi@splitbrain.org> 1087 * @see utf8_deaccent() 1088 */ 1089global $UTF8_LOWER_ACCENTS; 1090if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1091 'Ă ' => 'a', 'Ă´' => 'o', 'Ä' => 'd', 'á¸' => 'f', 'ĂŤ' => 'e', 'ĹĄ' => 's', 'ĆĄ' => 'o', 1092 'Ă' => 'ss', 'Ä' => 'a', 'Ĺ' => 'r', 'Č' => 't', 'Ĺ' => 'n', 'Ä' => 'a', 'ġ' => 'k', 1093 'Ĺ' => 's', 'áťł' => 'y', 'Ĺ' => 'n', 'Äş' => 'l', 'ħ' => 'h', 'áš' => 'p', 'Ăł' => 'o', 1094 'Ăş' => 'u', 'Ä' => 'e', 'ĂŠ' => 'e', 'ç' => 'c', 'áş' => 'w', 'Ä' => 'c', 'Ăľ' => 'o', 1095 'ᚥ' => 's', 'ø' => 'o', 'ÄŁ' => 'g', 'ŧ' => 't', 'Č' => 's', 'Ä' => 'e', 'Ä' => 'c', 1096 'Ĺ' => 's', 'ĂŽ' => 'i', 'Ĺą' => 'u', 'Ä' => 'c', 'Ä' => 'e', 'Ĺľ' => 'w', 'ᚍ' => 't', 1097 'ĹŤ' => 'u', 'Ä' => 'c', 'Ăś' => 'oe', 'è' => 'e', 'š' => 'y', 'Ä ' => 'a', 'Ĺ' => 'l', 1098 'Ĺł' => 'u', 'ĹŻ' => 'u', 'Ĺ' => 's', 'Ä' => 'g', 'Äź' => 'l', 'Ć' => 'f', 'Ĺž' => 'z', 1099 'áş' => 'w', 'á¸' => 'b', 'ĂĽ' => 'a', 'ĂŹ' => 'i', 'ĂŻ' => 'i', 'á¸' => 'd', 'ĹĽ' => 't', 1100 'Ĺ' => 'r', 'ä' => 'ae', 'Ă' => 'i', 'Ĺ' => 'r', 'ĂŞ' => 'e', 'Ăź' => 'ue', 'ò' => 'o', 1101 'Ä' => 'e', 'Ăą' => 'n', 'Ĺ' => 'n', 'ÄĽ' => 'h', 'Ä' => 'g', 'Ä' => 'd', 'Äľ' => 'j', 1102 'Ăż' => 'y', 'ĹŠ' => 'u', 'Ĺ' => 'u', 'ư' => 'u', 'ĹŁ' => 't', 'Ă˝' => 'y', 'Ĺ' => 'o', 1103 'â' => 'a', 'Äž' => 'l', 'áş ' => 'w', 'Ĺź' => 'z', 'ÄŤ' => 'i', 'ĂŁ' => 'a', 'ÄĄ' => 'g', 1104 'áš' => 'm', 'Ĺ' => 'o', 'ÄŠ' => 'i', 'Ăš' => 'u', 'ÄŻ' => 'i', 'Ĺş' => 'z', 'ĂĄ' => 'a', 1105 'Ăť' => 'u', 'Ăž' => 'th', 'ð' => 'dh', 'ĂŚ' => 'ae', 'Âľ' => 'u', 'Ä' => 'e', 1106); 1107 1108/** 1109 * UTF-8 lookup table for upper case accented letters 1110 * 1111 * This lookuptable defines replacements for accented characters from the ASCII-7 1112 * range. This are upper case letters only. 1113 * 1114 * @author Andreas Gohr <andi@splitbrain.org> 1115 * @see utf8_deaccent() 1116 */ 1117global $UTF8_UPPER_ACCENTS; 1118if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1119 'Ă' => 'A', 'Ă' => 'O', 'Ä' => 'D', 'á¸' => 'F', 'Ă' => 'E', 'Ĺ ' => 'S', 'Ć ' => 'O', 1120 'Ä' => 'A', 'Ĺ' => 'R', 'Č' => 'T', 'Ĺ' => 'N', 'Ä' => 'A', 'Äś' => 'K', 1121 'Ĺ' => 'S', 'ᝲ' => 'Y', 'Ĺ ' => 'N', 'Äš' => 'L', 'ÄŚ' => 'H', 'áš' => 'P', 'Ă' => 'O', 1122 'Ă' => 'U', 'Ä' => 'E', 'Ă' => 'E', 'Ă' => 'C', 'áş' => 'W', 'Ä' => 'C', 'Ă' => 'O', 1123 'áš ' => 'S', 'Ă' => 'O', 'Ģ' => 'G', 'ĹŚ' => 'T', 'Č' => 'S', 'Ä' => 'E', 'Ä' => 'C', 1124 'Ĺ' => 'S', 'Ă' => 'I', 'Ű' => 'U', 'Ä' => 'C', 'Ä' => 'E', 'Ĺ´' => 'W', 'ᚪ' => 'T', 1125 'ĹŞ' => 'U', 'Ä' => 'C', 'Ă' => 'Oe', 'Ă' => 'E', 'Ĺś' => 'Y', 'Ä' => 'A', 'Ĺ' => 'L', 1126 'Ų' => 'U', 'ĹŽ' => 'U', 'Ĺ' => 'S', 'Ä' => 'G', 'Äť' => 'L', 'Ć' => 'F', 'Ĺ˝' => 'Z', 1127 'áş' => 'W', 'á¸' => 'B', 'Ă ' => 'A', 'Ă' => 'I', 'Ă' => 'I', 'á¸' => 'D', 'Ť' => 'T', 1128 'Ĺ' => 'R', 'Ă' => 'Ae', 'Ă' => 'I', 'Ĺ' => 'R', 'Ă' => 'E', 'Ă' => 'Ue', 'Ă' => 'O', 1129 'Ä' => 'E', 'Ă' => 'N', 'Ĺ' => 'N', 'Ĥ' => 'H', 'Ä' => 'G', 'Ä' => 'D', 'Ä´' => 'J', 1130 'Ÿ' => 'Y', 'Ũ' => 'U', 'ĹŹ' => 'U', 'ĆŻ' => 'U', 'Ţ' => 'T', 'Ă' => 'Y', 'Ĺ' => 'O', 1131 'Ă' => 'A', 'Ä˝' => 'L', 'áş' => 'W', 'Ĺť' => 'Z', 'ÄŞ' => 'I', 'Ă' => 'A', 'Ä ' => 'G', 1132 'áš' => 'M', 'Ĺ' => 'O', 'Ĩ' => 'I', 'Ă' => 'U', 'ÄŽ' => 'I', 'Ĺš' => 'Z', 'Ă' => 'A', 1133 'Ă' => 'U', 'Ă' => 'Th', 'Ă' => 'Dh', 'Ă' => 'Ae', 'Ä' => 'E', 1134); 1135 1136/** 1137 * UTF-8 array of common special characters 1138 * 1139 * This array should contain all special characters (not a letter or digit) 1140 * defined in the various local charsets - it's not a complete list of non-alphanum 1141 * characters in UTF-8. It's not perfect but should match most cases of special 1142 * chars. 1143 * 1144 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1145 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1146 * 1147 * @author Andreas Gohr <andi@splitbrain.org> 1148 * @see utf8_stripspecials() 1149 */ 1150global $UTF8_SPECIAL_CHARS; 1151if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1152 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1153 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1154 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1155 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1156 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1157 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1158 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1159 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1160 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1161 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1162 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1163 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1164 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1165 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1166 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1167 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1168 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1169 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1170 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1171 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1172 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1173 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1174 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1175 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1176 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1177 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1178 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1179 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1180 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1181 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1182 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1183 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1184 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1185 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1186 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1187 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1188 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1189 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1190 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1191 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1192 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1193 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1194 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1195 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1196 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1197 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1198 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1199 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1200 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1201 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1202 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1203 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1204 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1205 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1206 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1207 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1208 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1209 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1210 0xffeb, 0xffec, 0xffed, 0xffee, 1211 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1212 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1213 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1214 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1215 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1216); 1217 1218// utf8 version of above data 1219global $UTF8_SPECIAL_CHARS2; 1220if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1221 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂďż˝'. 1222 '� ¥¢£¤¼Œ§¨ŠªÂޝ°¹²³´¾œ¡¸šºŸ½�'. 1223 '�¿ĂáËËËËËËËĚĚĚĚĚŁÎÎ ÎĎÖ°ÖąÖ˛ÖłÖ´ÖľÖśÖˇÖ¸ÖšÖťÖźÖ˝ÖžÖżďż˝'. 1224 'ďż˝×××׳״ŘŘŘŮŮŮŮŮŮŮŮŮ٪฿âââââââââââââďż˝'. 1225 '��â âĄâ˘âŚâ°â˛âłâšâşââ§âŞâŤâŹâââ˘âŚâľâââââââľ'. 1226 'âââââââââ âââââââââââââââ â§â¨ďż˝'. 1227 'ďż˝âŞâŤâ´âźâ ââ âĄâ¤âĽââââââââĽâ ââ âĄâŠâŞâŠâďż˝'. 1228 '��ââââââ¤âŹâ´âźâââââââââââââââââ '. 1229 'âĄâ˘âŁâ¤âĽâŚâ§â¨âŠâŞâŤâŹâââââââââ â˛âźâââďż˝'. 1230 'ďż˝â ââââ âŁâĽâŚââââââââââââââââââďż˝'. 1231 '��ââââââââââ âĄâ˘âŁâ¤âĽâŚâ§âŠâŞâŤâŹââŽâŻâ°âą'. 1232 'â˛âłâ´âľâśâˇâ¸âšâşâťâźâ˝âžâżââââââ ââââââďż˝'. 1233 'ďż˝âââââââââââââĄâ˘âŁâ¤âĽâŚâ§âżââââââďż˝'. 1234 '��âââââ âĄâ˘âŁâ¤âĽâŚâ§â¨âŠâŞâŤâŹââŽâŻâąâ˛âłâ´âľâś'. 1235 'âˇâ¸âšâşâťâźâ˝âž'. 1236 'ăăăăăăăăăăăăăăăăăăăăăăăăś'. 1237 'ďďďďŁďŁďŁďŁďŁďŁďŁďŁďŁďŁ ďŁĄďŁ˘ďŁŁďŁ¤ďŁĽďż˝'. 1238 '�ďŁďŁŽďŁŻďŁ°ďŁąďŁ˛ďŁłďŁ´ďŁľďŁśďŁˇďŁ¸ďŁšďŁşďŁťďŁźďŁ˝ďŁžďšźďš˝'. 1239 'ďźďźďźďźďź ďźďźďźďźďźďźďźďźďźďźďźďźďźďźďźďźďź ď˝ď˝ď˝ď˝ď˝'. 1240 'ď˝ď˝ ď˝Ąď˝˘ď˝Łď˝¤ď˝Ľďż ďżĄďż˘ďżŁďż¤ďżĽďżŚďż¨ďżŠďżŞďżŤďżŹďżďżŽ'. 1241 'đźđ˝đžđżđđđđđđ đđđđđđđđđđđđđđđđđđđđđđ'. 1242 ' ââŻâ ďťż'; 1243 1244/** 1245 * Romanization lookup table 1246 * 1247 * This lookup tables provides a way to transform strings written in a language 1248 * different from the ones based upon latin letters into plain ASCII. 1249 * 1250 * Please note: this is not a scientific transliteration table. It only works 1251 * oneway from nonlatin to ASCII and it works by simple character replacement 1252 * only. Specialities of each language are not supported. 1253 * 1254 * @author Andreas Gohr <andi@splitbrain.org> 1255 * @author Vitaly Blokhin <vitinfo@vitn.com> 1256 * @link http://www.uconv.com/translit.htm 1257 * @author Bisqwit <bisqwit@iki.fi> 1258 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1259 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1260 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1261 * @link http://www.btranslations.com/resources/romanization/korean.asp 1262 * @author Arthit Suriyawongkul <arthit@gmail.com> 1263 * @author Denis Scheither <amorphis@uni-bremen.de> 1264 * @author Eivind Morland <eivind.morland@gmail.com> 1265 */ 1266global $UTF8_ROMANIZATION; 1267if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1268 // scandinavian - differs from what we do in deaccent 1269 'ĂĽ'=>'a','Ă '=>'A','ä'=>'a','Ă'=>'A','Ăś'=>'o','Ă'=>'O', 1270 1271 //russian cyrillic 1272 'а'=>'a','Đ'=>'A','Đą'=>'b','Đ'=>'B','в'=>'v','Đ'=>'V','Đł'=>'g','Đ'=>'G', 1273 'Đ´'=>'d','Đ'=>'D','Đľ'=>'e','Đ'=>'E','Ń'=>'jo','Đ'=>'Jo','Đś'=>'zh','Đ'=>'Zh', 1274 'С'=>'z','Đ'=>'Z','и'=>'i','Đ'=>'I','Đš'=>'j','Đ'=>'J','Đş'=>'k','Đ'=>'K', 1275 'Đť'=>'l','Đ'=>'L','Đź'=>'m','Đ'=>'M','Đ˝'=>'n','Đ'=>'N','Đž'=>'o','Đ'=>'O', 1276 'Đż'=>'p','Đ'=>'P','Ń'=>'r','Đ '=>'R','Ń'=>'s','ĐĄ'=>'S','Ń'=>'t','Т'=>'T', 1277 'Ń'=>'u','ĐŁ'=>'U','Ń'=>'f','Ф'=>'F','Ń '=>'x','ĐĽ'=>'X','Ń'=>'c','ĐŚ'=>'C', 1278 'Ń'=>'ch','Ч'=>'Ch','Ń'=>'sh','Ш'=>'Sh','Ń'=>'sch','ĐŠ'=>'Sch','Ń'=>'', 1279 'ĐŞ'=>'','Ń'=>'y','ĐŤ'=>'Y','Ń'=>'','ĐŹ'=>'','Ń'=>'eh','Đ'=>'Eh','Ń'=>'ju', 1280 'ĐŽ'=>'Ju','Ń'=>'ja','ĐŻ'=>'Ja', 1281 // Ukrainian cyrillic 1282 'Ň'=>'Gh','Ň'=>'gh','Đ'=>'Je','Ń'=>'je','Đ'=>'I','Ń'=>'i','Đ'=>'Ji','Ń'=>'ji', 1283 // Georgian 1284 'á'=>'a','á'=>'b','á'=>'g','á'=>'d','á'=>'e','á'=>'v','á'=>'z','á'=>'th', 1285 'á'=>'i','á'=>'p','á'=>'l','á'=>'m','á'=>'n','á'=>'o','á'=>'p','á'=>'zh', 1286 'á '=>'r','áĄ'=>'s','á˘'=>'t','áŁ'=>'u','á¤'=>'ph','áĽ'=>'kh','áŚ'=>'gh','á§'=>'q', 1287 'á¨'=>'sh','áŠ'=>'ch','áŞ'=>'c','áŤ'=>'dh','áŹ'=>'w','á'=>'j','áŽ'=>'x','áŻ'=>'jh', 1288 'á°'=>'xh', 1289 //Sanskrit 1290 'ठ'=>'a','ŕ¤'=>'ah','ŕ¤'=>'i','ŕ¤'=>'ih','ŕ¤'=>'u','ŕ¤'=>'uh','ŕ¤'=>'ry', 1291 '༠'=>'ryh','ŕ¤'=>'ly','༥'=>'lyh','ŕ¤'=>'e','ŕ¤'=>'ay','ŕ¤'=>'o','ŕ¤'=>'aw', 1292 'ठŕ¤'=>'amh','ठŕ¤'=>'aq','ŕ¤'=>'k','ŕ¤'=>'kh','ŕ¤'=>'g','ŕ¤'=>'gh','ŕ¤'=>'nh', 1293 'ŕ¤'=>'c','ŕ¤'=>'ch','ŕ¤'=>'j','ŕ¤'=>'jh','ŕ¤'=>'ny','ŕ¤'=>'tq','ठ'=>'tqh', 1294 'थ'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','़'=>'th','ऌ'=>'d','ध'=>'dh', 1295 'न'=>'n','प'=>'p','ऍ'=>'ph','ए'=>'b','ŕ¤'=>'bh','ऎ'=>'m','य'=>'z','र'=>'r', 1296 'ल'=>'l','ा'=>'v','ज'=>'sh','ड'=>'sqh','स'=>'s','च'=>'x', 1297 //Sanskrit diacritics 1298 'Ä'=>'A','ÄŞ'=>'I','ĹŞ'=>'U','áš'=>'R','áš'=>'R','áš'=>'N','Ă'=>'N','ᚏ'=>'T', 1299 'á¸'=>'D','áš'=>'N','Ĺ'=>'S','ᚢ'=>'S','áš'=>'M','áš'=>'M','Ḥ'=>'H','Ḝ'=>'L','Ḹ'=>'L', 1300 'Ä'=>'a','ÄŤ'=>'i','ĹŤ'=>'u','áš'=>'r','áš'=>'r','áš '=>'n','Ăą'=>'n','áš'=>'t', 1301 'á¸'=>'d','áš'=>'n','Ĺ'=>'s','ᚣ'=>'s','áš'=>'m','áš'=>'m','Ḽ'=>'h','ḡ'=>'l','Ḛ'=>'l', 1302 //Hebrew 1303 '×'=>'a', '×'=>'b','×'=>'g','×'=>'d','×'=>'h','×'=>'v','×'=>'z','×'=>'kh','×'=>'th', 1304 '×'=>'y','×'=>'h','×'=>'k','×'=>'l','×'=>'m','×'=>'m','×'=>'n','× '=>'n', 1305 'ץ'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','׼'=>'c','׌'=>'c','×§'=>'q','ר'=>'r', 1306 '׊'=>'sh','ת'=>'t', 1307 //Arabic 1308 'ا'=>'a','ب'=>'b','ŘŞ'=>'t','ŘŤ'=>'th','ŘŹ'=>'g','Ř'=>'xh','ŘŽ'=>'x','ŘŻ'=>'d', 1309 'ذ'=>'dh','Řą'=>'r','ز'=>'z','Řł'=>'s','Ř´'=>'sh','Řľ'=>'s\'','Řś'=>'d\'', 1310 'ء'=>'t\'','ظ'=>'z\'','Řš'=>'y','Řş'=>'gh','Ů'=>'f','Ů'=>'q','Ů'=>'k', 1311 'Ů'=>'l','Ů '=>'m','Ů'=>'n','Ů'=>'x\'','Ů'=>'u','Ů'=>'i', 1312 1313 // Japanese characters (last update: 2008-05-09) 1314 1315 // Japanese hiragana 1316 1317 // 3 character syllables, 㣠doubles the consonant after 1318 'ăŁăĄă'=>'ccha','ăŁăĄă'=>'cche','ăŁăĄă'=>'ccho','ăŁăĄă '=>'cchu', 1319 'ăŁăłă'=>'bbya','ăŁăłă'=>'bbye','ăŁăłă'=>'bbyi','ăŁăłă'=>'bbyo','ăŁăłă '=>'bbyu', 1320 'ăŁă´ă'=>'ppya','ăŁă´ă'=>'ppye','ăŁă´ă'=>'ppyi','ăŁă´ă'=>'ppyo','ăŁă´ă '=>'ppyu', 1321 'ăŁăĄă'=>'ccha','ăŁăĄă'=>'cche','ăŁăĄ'=>'cchi','ăŁăĄă'=>'ccho','ăŁăĄă '=>'cchu', 1322 // 'ăŁă˛ă'=>'hya','ăŁă˛ă'=>'hye','ăŁă˛ă'=>'hyi','ăŁă˛ă'=>'hyo','ăŁă˛ă '=>'hyu', 1323 'ăŁăă'=>'kkya','ăŁăă'=>'kkye','ăŁăă'=>'kkyi','ăŁăă'=>'kkyo','ăŁăă '=>'kkyu', 1324 'ăŁăă'=>'ggya','ăŁăă'=>'ggye','ăŁăă'=>'ggyi','ăŁăă'=>'ggyo','ăŁăă '=>'ggyu', 1325 'ăŁăżă'=>'mmya','ăŁăżă'=>'mmye','ăŁăżă'=>'mmyi','ăŁăżă'=>'mmyo','ăŁăżă '=>'mmyu', 1326 'ăŁăŤă'=>'nnya','ăŁăŤă'=>'nnye','ăŁăŤă'=>'nnyi','ăŁăŤă'=>'nnyo','ăŁăŤă '=>'nnyu', 1327 'ăŁăă'=>'rrya','ăŁăă'=>'rrye','ăŁăă'=>'rryi','ăŁăă'=>'rryo','ăŁăă '=>'rryu', 1328 'ăŁăă'=>'ssha','ăŁăă'=>'sshe','ăŁă'=>'sshi','ăŁăă'=>'ssho','ăŁăă '=>'sshu', 1329 1330 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1331 'ăă'=>'n_a','ăă'=>'n_e','ăă'=>'n_i','ăă'=>'n_o','ăă'=>'n_u', 1332 'ăă'=>'n_ya','ăă'=>'n_yo','ăă'=>'n_yu', 1333 1334 // 2 character syllables - normal 1335 'ăľă'=>'fa','ăľă'=>'fe','ăľă'=>'fi','ăľă'=>'fo', 1336 'ăĄă'=>'cha','ăĄă'=>'che','ăĄ'=>'chi','ăĄă'=>'cho','ăĄă '=>'chu', 1337 'ă˛ă'=>'hya','ă˛ă'=>'hye','ă˛ă'=>'hyi','ă˛ă'=>'hyo','ă˛ă '=>'hyu', 1338 'ăłă'=>'bya','ăłă'=>'bye','ăłă'=>'byi','ăłă'=>'byo','ăłă '=>'byu', 1339 'ă´ă'=>'pya','ă´ă'=>'pye','ă´ă'=>'pyi','ă´ă'=>'pyo','ă´ă '=>'pyu', 1340 'ăă'=>'kya','ăă'=>'kye','ăă'=>'kyi','ăă'=>'kyo','ăă '=>'kyu', 1341 'ăă'=>'gya','ăă'=>'gye','ăă'=>'gyi','ăă'=>'gyo','ăă '=>'gyu', 1342 'ăżă'=>'mya','ăżă'=>'mye','ăżă'=>'myi','ăżă'=>'myo','ăżă '=>'myu', 1343 'ăŤă'=>'nya','ăŤă'=>'nye','ăŤă'=>'nyi','ăŤă'=>'nyo','ăŤă '=>'nyu', 1344 'ăă'=>'rya','ăă'=>'rye','ăă'=>'ryi','ăă'=>'ryo','ăă '=>'ryu', 1345 'ăă'=>'sha','ăă'=>'she','ă'=>'shi','ăă'=>'sho','ăă '=>'shu', 1346 'ăă'=>'ja','ăă'=>'je','ăă'=>'jo','ăă '=>'ju', 1347 'ăă'=>'we','ăă'=>'wi', 1348 'ăă'=>'ye', 1349 1350 // 2 character syllables, 㣠doubles the consonant after 1351 'ăŁă°'=>'bba','ăŁăš'=>'bbe','ăŁăł'=>'bbi','ăŁăź'=>'bbo','ăŁăś'=>'bbu', 1352 'ăŁăą'=>'ppa','ăŁăş'=>'ppe','ăŁă´'=>'ppi','ăŁă˝'=>'ppo','ăŁăˇ'=>'ppu', 1353 'ăŁă'=>'tta','ăŁăŚ'=>'tte','ăŁăĄ'=>'cchi','ăŁă¨'=>'tto','ăŁă¤'=>'ttsu', 1354 'ăŁă '=>'dda','ăŁă§'=>'dde','ăŁă˘'=>'ddi','ăŁăŠ'=>'ddo','ăŁăĽ'=>'ddu', 1355 'ăŁă'=>'gga','ăŁă'=>'gge','ăŁă'=>'ggi','ăŁă'=>'ggo','ăŁă'=>'ggu', 1356 'ăŁă'=>'kka','ăŁă'=>'kke','ăŁă'=>'kki','ăŁă'=>'kko','ăŁă'=>'kku', 1357 'ăŁăž'=>'mma','ăŁă'=>'mme','ăŁăż'=>'mmi','ăŁă'=>'mmo','ăŁă'=>'mmu', 1358 'ăŁăŞ'=>'nna','ăŁă'=>'nne','ăŁăŤ'=>'nni','ăŁăŽ'=>'nno','ăŁăŹ'=>'nnu', 1359 'ăŁă'=>'rra','ăŁă'=>'rre','ăŁă'=>'rri','ăŁă'=>'rro','ăŁă'=>'rru', 1360 'ăŁă'=>'ssa','ăŁă'=>'sse','ăŁă'=>'sshi','ăŁă'=>'sso','ăŁă'=>'ssu', 1361 'ăŁă'=>'zza','ăŁă'=>'zze','ăŁă'=>'jji','ăŁă'=>'zzo','ăŁă'=>'zzu', 1362 1363 // 1 character syllabels 1364 'ă'=>'a','ă'=>'e','ă'=>'i','ă'=>'o','ă'=>'u','ă'=>'n', 1365 'ăŻ'=>'ha','ă¸'=>'he','ă˛'=>'hi','ăť'=>'ho','ăľ'=>'fu', 1366 'ă°'=>'ba','ăš'=>'be','ăł'=>'bi','ăź'=>'bo','ăś'=>'bu', 1367 'ăą'=>'pa','ăş'=>'pe','ă´'=>'pi','ă˝'=>'po','ăˇ'=>'pu', 1368 'ă'=>'ta','ăŚ'=>'te','ăĄ'=>'chi','ă¨'=>'to','ă¤'=>'tsu', 1369 'ă '=>'da','ă§'=>'de','ă˘'=>'di','ăŠ'=>'do','ăĽ'=>'du', 1370 'ă'=>'ga','ă'=>'ge','ă'=>'gi','ă'=>'go','ă'=>'gu', 1371 'ă'=>'ka','ă'=>'ke','ă'=>'ki','ă'=>'ko','ă'=>'ku', 1372 'ăž'=>'ma','ă'=>'me','ăż'=>'mi','ă'=>'mo','ă'=>'mu', 1373 'ăŞ'=>'na','ă'=>'ne','ăŤ'=>'ni','ăŽ'=>'no','ăŹ'=>'nu', 1374 'ă'=>'ra','ă'=>'re','ă'=>'ri','ă'=>'ro','ă'=>'ru', 1375 'ă'=>'sa','ă'=>'se','ă'=>'shi','ă'=>'so','ă'=>'su', 1376 'ă'=>'wa','ă'=>'wo', 1377 'ă'=>'za','ă'=>'ze','ă'=>'ji','ă'=>'zo','ă'=>'zu', 1378 'ă'=>'ya','ă'=>'yo','ă'=>'yu', 1379 // old characters 1380 'ă'=>'we','ă'=>'wi', 1381 1382 // convert what's left (probably only kicks in when something's missing above) 1383 // 'ă'=>'a','ă'=>'e','ă'=>'i','ă'=>'o','ă '=>'u', 1384 // 'ă'=>'ya','ă'=>'yo','ă '=>'yu', 1385 1386 // never seen one of those (disabled for the moment) 1387 // 'ă´ă'=>'va','ă´ă'=>'ve','ă´ă'=>'vi','ă´ă'=>'vo','ă´'=>'vu', 1388 // 'ă§ă'=>'dha','ă§ă'=>'dhe','ă§ă'=>'dhi','ă§ă'=>'dho','ă§ă '=>'dhu', 1389 // 'ăŠă'=>'dwa','ăŠă'=>'dwe','ăŠă'=>'dwi','ăŠă'=>'dwo','ăŠă '=>'dwu', 1390 // 'ă˘ă'=>'dya','ă˘ă'=>'dye','ă˘ă'=>'dyi','ă˘ă'=>'dyo','ă˘ă '=>'dyu', 1391 // 'ăľă'=>'fwa','ăľă'=>'fwe','ăľă'=>'fwi','ăľă'=>'fwo','ăľă '=>'fwu', 1392 // 'ăľă'=>'fya','ăľă'=>'fye','ăľă'=>'fyi','ăľă'=>'fyo','ăľă '=>'fyu', 1393 // 'ăă'=>'swa','ăă'=>'swe','ăă'=>'swi','ăă'=>'swo','ăă '=>'swu', 1394 // 'ăŚă'=>'tha','ăŚă'=>'the','ăŚă'=>'thi','ăŚă'=>'tho','ăŚă '=>'thu', 1395 // 'ă¤ă'=>'tsa','ă¤ă'=>'tse','ă¤ă'=>'tsi','ă¤ă'=>'tso','ă¤'=>'tsu', 1396 // 'ă¨ă'=>'twa','ă¨ă'=>'twe','ă¨ă'=>'twi','ă¨ă'=>'two','ă¨ă '=>'twu', 1397 // 'ă´ă'=>'vya','ă´ă'=>'vye','ă´ă'=>'vyi','ă´ă'=>'vyo','ă´ă '=>'vyu', 1398 // 'ăă'=>'wha','ăă'=>'whe','ăă'=>'whi','ăă'=>'who','ăă '=>'whu', 1399 // 'ăă'=>'zha','ăă'=>'zhe','ăă'=>'zhi','ăă'=>'zho','ăă '=>'zhu', 1400 // 'ăă'=>'zya','ăă'=>'zye','ăă'=>'zyi','ăă'=>'zyo','ăă '=>'zyu', 1401 1402 // 'spare' characters from other romanization systems 1403 // 'ă '=>'da','ă§'=>'de','ă˘'=>'di','ăŠ'=>'do','ăĽ'=>'du', 1404 // 'ă'=>'la','ă'=>'le','ă'=>'li','ă'=>'lo','ă'=>'lu', 1405 // 'ă'=>'sa','ă'=>'se','ă'=>'si','ă'=>'so','ă'=>'su', 1406 // 'ăĄă'=>'cya','ăĄă'=>'cye','ăĄă'=>'cyi','ăĄă'=>'cyo','ăĄă '=>'cyu', 1407 //'ăă'=>'jya','ăă'=>'jye','ăă'=>'jyi','ăă'=>'jyo','ăă '=>'jyu', 1408 //'ăă'=>'lya','ăă'=>'lye','ăă'=>'lyi','ăă'=>'lyo','ăă '=>'lyu', 1409 //'ăă'=>'sya','ăă'=>'sye','ăă'=>'syi','ăă'=>'syo','ăă '=>'syu', 1410 //'ăĄă'=>'tya','ăĄă'=>'tye','ăĄă'=>'tyi','ăĄă'=>'tyo','ăĄă '=>'tyu', 1411 //'ă'=>'ci',,ă'=>'yi','ă˘'=>'dzi', 1412 //'ăŁăă'=>'jja','ăŁăă'=>'jje','ăŁă'=>'jji','ăŁăă'=>'jjo','ăŁăă '=>'jju', 1413 1414 1415 // Japanese katakana 1416 1417 // 4 character syllables: ă doubles the consonant after, ăź doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1418 'ăăăŁăź'=>'bbyaa','ăăă§ăź'=>'bbyee','ăăăŁăź'=>'bbyii','ăăă§ăź'=>'bbyoo','ăăăĽăź'=>'bbyuu', 1419 'ăăăŁăź'=>'ppyaa','ăăă§ăź'=>'ppyee','ăăăŁăź'=>'ppyii','ăăă§ăź'=>'ppyoo','ăăăĽăź'=>'ppyuu', 1420 'ăăăŁăź'=>'kkyaa','ăăă§ăź'=>'kkyee','ăăăŁăź'=>'kkyii','ăăă§ăź'=>'kkyoo','ăăăĽăź'=>'kkyuu', 1421 'ăăŽăŁăź'=>'ggyaa','ăăŽă§ăź'=>'ggyee','ăăŽăŁăź'=>'ggyii','ăăŽă§ăź'=>'ggyoo','ăăŽăĽăź'=>'ggyuu', 1422 'ăăăŁăź'=>'mmyaa','ăăă§ăź'=>'mmyee','ăăăŁăź'=>'mmyii','ăăă§ăź'=>'mmyoo','ăăăĽăź'=>'mmyuu', 1423 'ăăăŁăź'=>'nnyaa','ăăă§ăź'=>'nnyee','ăăăŁăź'=>'nnyii','ăăă§ăź'=>'nnyoo','ăăăĽăź'=>'nnyuu', 1424 'ăăŞăŁăź'=>'rryaa','ăăŞă§ăź'=>'rryee','ăăŞăŁăź'=>'rryii','ăăŞă§ăź'=>'rryoo','ăăŞăĽăź'=>'rryuu', 1425 'ăăˇăŁăź'=>'sshaa','ăăˇă§ăź'=>'sshee','ăăˇăź'=>'sshii','ăăˇă§ăź'=>'sshoo','ăăˇăĽăź'=>'sshuu', 1426 'ăăăŁăź'=>'cchaa','ăăă§ăź'=>'cchee','ăăăź'=>'cchii','ăăă§ăź'=>'cchoo','ăăăĽăź'=>'cchuu', 1427 'ăăăŁăź'=>'ttii', 1428 'ăăăŁăź'=>'ddii', 1429 1430 // 3 character syllables - doubled vowels 1431 'ăăĄăź'=>'faa','ăă§ăź'=>'fee','ăăŁăź'=>'fii','ăăŠăź'=>'foo', 1432 'ăăŁăź'=>'fyaa','ăă§ăź'=>'fyee','ăăŁăź'=>'fyii','ăă§ăź'=>'fyoo','ăăĽăź'=>'fyuu', 1433 'ăăŁăź'=>'hyaa','ăă§ăź'=>'hyee','ăăŁăź'=>'hyii','ăă§ăź'=>'hyoo','ăăĽăź'=>'hyuu', 1434 'ăăŁăź'=>'byaa','ăă§ăź'=>'byee','ăăŁăź'=>'byii','ăă§ăź'=>'byoo','ăăĽăź'=>'byuu', 1435 'ăăŁăź'=>'pyaa','ăă§ăź'=>'pyee','ăăŁăź'=>'pyii','ăă§ăź'=>'pyoo','ăăĽăź'=>'pyuu', 1436 'ăăŁăź'=>'kyaa','ăă§ăź'=>'kyee','ăăŁăź'=>'kyii','ăă§ăź'=>'kyoo','ăăĽăź'=>'kyuu', 1437 'ăŽăŁăź'=>'gyaa','ăŽă§ăź'=>'gyee','ăŽăŁăź'=>'gyii','ăŽă§ăź'=>'gyoo','ăŽăĽăź'=>'gyuu', 1438 'ăăŁăź'=>'myaa','ăă§ăź'=>'myee','ăăŁăź'=>'myii','ăă§ăź'=>'myoo','ăăĽăź'=>'myuu', 1439 'ăăŁăź'=>'nyaa','ăă§ăź'=>'nyee','ăăŁăź'=>'nyii','ăă§ăź'=>'nyoo','ăăĽăź'=>'nyuu', 1440 'ăŞăŁăź'=>'ryaa','ăŞă§ăź'=>'ryee','ăŞăŁăź'=>'ryii','ăŞă§ăź'=>'ryoo','ăŞăĽăź'=>'ryuu', 1441 'ăˇăŁăź'=>'shaa','ăˇă§ăź'=>'shee','ăˇăź'=>'shii','ăˇă§ăź'=>'shoo','ăˇăĽăź'=>'shuu', 1442 'ă¸ăŁăź'=>'jaa','ă¸ă§ăź'=>'jee','ă¸ăź'=>'jii','ă¸ă§ăź'=>'joo','ă¸ăĽăź'=>'juu', 1443 'ăšăĄăź'=>'swaa','ăšă§ăź'=>'swee','ăšăŁăź'=>'swii','ăšăŠăź'=>'swoo','ăšăĽăź'=>'swuu', 1444 'ăăĄăź'=>'daa','ăă§ăź'=>'dee','ăăŁăź'=>'dii','ăăŠăź'=>'doo','ăăĽăź'=>'duu', 1445 'ăăŁăź'=>'chaa','ăă§ăź'=>'chee','ăăź'=>'chii','ăă§ăź'=>'choo','ăăĽăź'=>'chuu', 1446 'ăăŁăź'=>'dyaa','ăă§ăź'=>'dyee','ăăŁăź'=>'dyii','ăă§ăź'=>'dyoo','ăăĽăź'=>'dyuu', 1447 'ăăŁăź'=>'tsaa','ăă§ăź'=>'tsee','ăăŁăź'=>'tsii','ăă§ăź'=>'tsoo','ăăź'=>'tsuu', 1448 'ăăĄăź'=>'twaa','ăă§ăź'=>'twee','ăăŁăź'=>'twii','ăăŠăź'=>'twoo','ăăĽăź'=>'twuu', 1449 'ăăĄăź'=>'dwaa','ăă§ăź'=>'dwee','ăăŁăź'=>'dwii','ăăŠăź'=>'dwoo','ăăĽăź'=>'dwuu', 1450 'ăŚăĄăź'=>'whaa','ăŚă§ăź'=>'whee','ăŚăŁăź'=>'whii','ăŚăŠăź'=>'whoo','ăŚăĽăź'=>'whuu', 1451 'ă´ăŁăź'=>'vyaa','ă´ă§ăź'=>'vyee','ă´ăŁăź'=>'vyii','ă´ă§ăź'=>'vyoo','ă´ăĽăź'=>'vyuu', 1452 'ă´ăĄăź'=>'vaa','ă´ă§ăź'=>'vee','ă´ăŁăź'=>'vii','ă´ăŠăź'=>'voo','ă´ăź'=>'vuu', 1453 'ăŚă§ăź'=>'wee','ăŚăŁăź'=>'wii', 1454 'ă¤ă§ăź'=>'yee', 1455 'ăăŁăź'=>'tii', 1456 'ăăŁăź'=>'dii', 1457 1458 // 3 character syllables - doubled consonants 1459 'ăăăŁ'=>'bbya','ăăă§'=>'bbye','ăăăŁ'=>'bbyi','ăăă§'=>'bbyo','ăăăĽ'=>'bbyu', 1460 'ăăăŁ'=>'ppya','ăăă§'=>'ppye','ăăăŁ'=>'ppyi','ăăă§'=>'ppyo','ăăăĽ'=>'ppyu', 1461 'ăăăŁ'=>'kkya','ăăă§'=>'kkye','ăăăŁ'=>'kkyi','ăăă§'=>'kkyo','ăăăĽ'=>'kkyu', 1462 'ăăŽăŁ'=>'ggya','ăăŽă§'=>'ggye','ăăŽăŁ'=>'ggyi','ăăŽă§'=>'ggyo','ăăŽăĽ'=>'ggyu', 1463 'ăăăŁ'=>'mmya','ăăă§'=>'mmye','ăăăŁ'=>'mmyi','ăăă§'=>'mmyo','ăăăĽ'=>'mmyu', 1464 'ăăăŁ'=>'nnya','ăăă§'=>'nnye','ăăăŁ'=>'nnyi','ăăă§'=>'nnyo','ăăăĽ'=>'nnyu', 1465 'ăăŞăŁ'=>'rrya','ăăŞă§'=>'rrye','ăăŞăŁ'=>'rryi','ăăŞă§'=>'rryo','ăăŞăĽ'=>'rryu', 1466 'ăăˇăŁ'=>'ssha','ăăˇă§'=>'sshe','ăăˇ'=>'sshi','ăăˇă§'=>'ssho','ăăˇăĽ'=>'sshu', 1467 'ăăăŁ'=>'ccha','ăăă§'=>'cche','ăă'=>'cchi','ăăă§'=>'ccho','ăăăĽ'=>'cchu', 1468 'ăăăŁ'=>'tti', 1469 'ăăăŁ'=>'ddi', 1470 1471 // 3 character syllables - doubled vowel and consonants 1472 'ăăăź'=>'bbaa','ăăăź'=>'bbee','ăăăź'=>'bbii','ăăăź'=>'bboo','ăăăź'=>'bbuu', 1473 'ăăăź'=>'ppaa','ăăăź'=>'ppee','ăăăź'=>'ppii','ăăăź'=>'ppoo','ăăăź'=>'ppuu', 1474 'ăăąăź'=>'kkee','ăăăź'=>'kkii','ăăłăź'=>'kkoo','ăăŻăź'=>'kkuu','ăăŤăź'=>'kkaa', 1475 'ăăŹăź'=>'ggaa','ăă˛ăź'=>'ggee','ăăŽăź'=>'ggii','ăă´ăź'=>'ggoo','ăă°ăź'=>'gguu', 1476 'ăăăź'=>'maa','ăăĄăź'=>'mee','ăăăź'=>'mii','ăă˘ăź'=>'moo','ăă ăź'=>'muu', 1477 'ăăăź'=>'nnaa','ăăăź'=>'nnee','ăăăź'=>'nnii','ăăăź'=>'nnoo','ăăăź'=>'nnuu', 1478 'ăăŠăź'=>'rraa','ăăŹăź'=>'rree','ăăŞăź'=>'rrii','ăăăź'=>'rroo','ăăŤăź'=>'rruu', 1479 'ăăľăź'=>'ssaa','ăăťăź'=>'ssee','ăăˇăź'=>'sshii','ăă˝ăź'=>'ssoo','ăăšăź'=>'ssuu', 1480 'ăăśăź'=>'zzaa','ăăźăź'=>'zzee','ăă¸ăź'=>'jjii','ăăžăź'=>'zzoo','ăăşăź'=>'zzuu', 1481 'ăăżăź'=>'ttaa','ăăăź'=>'ttee','ăăăź'=>'chii','ăăăź'=>'ttoo','ăăăź'=>'ttsuu', 1482 'ăăăź'=>'ddaa','ăăăź'=>'ddee','ăăăź'=>'ddii','ăăăź'=>'ddoo','ăă ăź'=>'dduu', 1483 1484 // 2 character syllables - normal 1485 'ăăĄ'=>'fa','ăă§'=>'fe','ăăŁ'=>'fi','ăăŠ'=>'fo','ăăĽ'=>'fu', 1486 // 'ăăŁ'=>'fya','ăă§'=>'fye','ăăŁ'=>'fyi','ăă§'=>'fyo','ăăĽ'=>'fyu', 1487 'ăăŁ'=>'fa','ăă§'=>'fe','ăăŁ'=>'fi','ăă§'=>'fo','ăăĽ'=>'fu', 1488 'ăăŁ'=>'hya','ăă§'=>'hye','ăăŁ'=>'hyi','ăă§'=>'hyo','ăăĽ'=>'hyu', 1489 'ăăŁ'=>'bya','ăă§'=>'bye','ăăŁ'=>'byi','ăă§'=>'byo','ăăĽ'=>'byu', 1490 'ăăŁ'=>'pya','ăă§'=>'pye','ăăŁ'=>'pyi','ăă§'=>'pyo','ăăĽ'=>'pyu', 1491 'ăăŁ'=>'kya','ăă§'=>'kye','ăăŁ'=>'kyi','ăă§'=>'kyo','ăăĽ'=>'kyu', 1492 'ăŽăŁ'=>'gya','ăŽă§'=>'gye','ăŽăŁ'=>'gyi','ăŽă§'=>'gyo','ăŽăĽ'=>'gyu', 1493 'ăăŁ'=>'mya','ăă§'=>'mye','ăăŁ'=>'myi','ăă§'=>'myo','ăăĽ'=>'myu', 1494 'ăăŁ'=>'nya','ăă§'=>'nye','ăăŁ'=>'nyi','ăă§'=>'nyo','ăăĽ'=>'nyu', 1495 'ăŞăŁ'=>'rya','ăŞă§'=>'rye','ăŞăŁ'=>'ryi','ăŞă§'=>'ryo','ăŞăĽ'=>'ryu', 1496 'ăˇăŁ'=>'sha','ăˇă§'=>'she','ăˇă§'=>'sho','ăˇăĽ'=>'shu', 1497 'ă¸ăŁ'=>'ja','ă¸ă§'=>'je','ă¸ă§'=>'jo','ă¸ăĽ'=>'ju', 1498 'ăšăĄ'=>'swa','ăšă§'=>'swe','ăšăŁ'=>'swi','ăšăŠ'=>'swo','ăšăĽ'=>'swu', 1499 'ăăĄ'=>'da','ăă§'=>'de','ăăŁ'=>'di','ăăŠ'=>'do','ăăĽ'=>'du', 1500 'ăăŁ'=>'cha','ăă§'=>'che','ă'=>'chi','ăă§'=>'cho','ăăĽ'=>'chu', 1501 // 'ăăŁ'=>'dya','ăă§'=>'dye','ăăŁ'=>'dyi','ăă§'=>'dyo','ăăĽ'=>'dyu', 1502 'ăăŁ'=>'tsa','ăă§'=>'tse','ăăŁ'=>'tsi','ăă§'=>'tso','ă'=>'tsu', 1503 'ăăĄ'=>'twa','ăă§'=>'twe','ăăŁ'=>'twi','ăăŠ'=>'two','ăăĽ'=>'twu', 1504 'ăăĄ'=>'dwa','ăă§'=>'dwe','ăăŁ'=>'dwi','ăăŠ'=>'dwo','ăăĽ'=>'dwu', 1505 'ăŚăĄ'=>'wha','ăŚă§'=>'whe','ăŚăŁ'=>'whi','ăŚăŠ'=>'who','ăŚăĽ'=>'whu', 1506 'ă´ăŁ'=>'vya','ă´ă§'=>'vye','ă´ăŁ'=>'vyi','ă´ă§'=>'vyo','ă´ăĽ'=>'vyu', 1507 'ă´ăĄ'=>'va','ă´ă§'=>'ve','ă´ăŁ'=>'vi','ă´ăŠ'=>'vo','ă´'=>'vu', 1508 'ăŚă§'=>'we','ăŚăŁ'=>'wi', 1509 'ă¤ă§'=>'ye', 1510 'ăăŁ'=>'ti', 1511 'ăăŁ'=>'di', 1512 1513 // 2 character syllables - doubled vocal 1514 'ă˘ăź'=>'aa','ă¨ăź'=>'ee','ă¤ăź'=>'ii','ăŞăź'=>'oo','ăŚăź'=>'uu', 1515 'ăăź'=>'daa','ăăź'=>'dee','ăăź'=>'dii','ăăź'=>'doo','ă ăź'=>'duu', 1516 'ăăź'=>'haa','ăăź'=>'hee','ăăź'=>'hii','ăăź'=>'hoo','ăăź'=>'fuu', 1517 'ăăź'=>'baa','ăăź'=>'bee','ăăź'=>'bii','ăăź'=>'boo','ăăź'=>'buu', 1518 'ăăź'=>'paa','ăăź'=>'pee','ăăź'=>'pii','ăăź'=>'poo','ăăź'=>'puu', 1519 'ăąăź'=>'kee','ăăź'=>'kii','ăłăź'=>'koo','ăŻăź'=>'kuu','ăŤăź'=>'kaa', 1520 'ăŹăź'=>'gaa','ă˛ăź'=>'gee','ăŽăź'=>'gii','ă´ăź'=>'goo','ă°ăź'=>'guu', 1521 'ăăź'=>'maa','ăĄăź'=>'mee','ăăź'=>'mii','ă˘ăź'=>'moo','ă ăź'=>'muu', 1522 'ăăź'=>'naa','ăăź'=>'nee','ăăź'=>'nii','ăăź'=>'noo','ăăź'=>'nuu', 1523 'ăŠăź'=>'raa','ăŹăź'=>'ree','ăŞăź'=>'rii','ăăź'=>'roo','ăŤăź'=>'ruu', 1524 'ăľăź'=>'saa','ăťăź'=>'see','ăˇăź'=>'shii','ă˝ăź'=>'soo','ăšăź'=>'suu', 1525 'ăśăź'=>'zaa','ăźăź'=>'zee','ă¸ăź'=>'jii','ăžăź'=>'zoo','ăşăź'=>'zuu', 1526 'ăżăź'=>'taa','ăăź'=>'tee','ăăź'=>'chii','ăăź'=>'too','ăăź'=>'tsuu', 1527 'ăŻăź'=>'waa','ă˛ăź'=>'woo', 1528 'ă¤ăź'=>'yaa','ă¨ăź'=>'yoo','ăŚăź'=>'yuu', 1529 'ăľăź'=>'kaa','ăśăź'=>'kee', 1530 // old characters 1531 'ăąăź'=>'wee','ă°ăź'=>'wii', 1532 1533 // seperate katakana 'n' 1534 'ăłă˘'=>'n_a','ăłă¨'=>'n_e','ăłă¤'=>'n_i','ăłăŞ'=>'n_o','ăłăŚ'=>'n_u', 1535 'ăłă¤'=>'n_ya','ăłă¨'=>'n_yo','ăłăŚ'=>'n_yu', 1536 1537 // 2 character syllables - doubled consonants 1538 'ăă'=>'bba','ăă'=>'bbe','ăă'=>'bbi','ăă'=>'bbo','ăă'=>'bbu', 1539 'ăă'=>'ppa','ăă'=>'ppe','ăă'=>'ppi','ăă'=>'ppo','ăă'=>'ppu', 1540 'ăăą'=>'kke','ăă'=>'kki','ăăł'=>'kko','ăăŻ'=>'kku','ăăŤ'=>'kka', 1541 'ăăŹ'=>'gga','ăă˛'=>'gge','ăăŽ'=>'ggi','ăă´'=>'ggo','ăă°'=>'ggu', 1542 'ăă'=>'ma','ăăĄ'=>'me','ăă'=>'mi','ăă˘'=>'mo','ăă '=>'mu', 1543 'ăă'=>'nna','ăă'=>'nne','ăă'=>'nni','ăă'=>'nno','ăă'=>'nnu', 1544 'ăăŠ'=>'rra','ăăŹ'=>'rre','ăăŞ'=>'rri','ăă'=>'rro','ăăŤ'=>'rru', 1545 'ăăľ'=>'ssa','ăăť'=>'sse','ăăˇ'=>'sshi','ăă˝'=>'sso','ăăš'=>'ssu', 1546 'ăăś'=>'zza','ăăź'=>'zze','ăă¸'=>'jji','ăăž'=>'zzo','ăăş'=>'zzu', 1547 'ăăż'=>'tta','ăă'=>'tte','ăă'=>'cchi','ăă'=>'tto','ăă'=>'ttsu', 1548 'ăă'=>'dda','ăă'=>'dde','ăă'=>'ddi','ăă'=>'ddo','ăă '=>'ddu', 1549 1550 // 1 character syllables 1551 'ă˘'=>'a','ă¨'=>'e','ă¤'=>'i','ăŞ'=>'o','ăŚ'=>'u','ăł'=>'n', 1552 'ă'=>'ha','ă'=>'he','ă'=>'hi','ă'=>'ho','ă'=>'fu', 1553 'ă'=>'ba','ă'=>'be','ă'=>'bi','ă'=>'bo','ă'=>'bu', 1554 'ă'=>'pa','ă'=>'pe','ă'=>'pi','ă'=>'po','ă'=>'pu', 1555 'ăą'=>'ke','ă'=>'ki','ăł'=>'ko','ăŻ'=>'ku','ăŤ'=>'ka', 1556 'ăŹ'=>'ga','ă˛'=>'ge','ăŽ'=>'gi','ă´'=>'go','ă°'=>'gu', 1557 'ă'=>'ma','ăĄ'=>'me','ă'=>'mi','ă˘'=>'mo','ă '=>'mu', 1558 'ă'=>'na','ă'=>'ne','ă'=>'ni','ă'=>'no','ă'=>'nu', 1559 'ăŠ'=>'ra','ăŹ'=>'re','ăŞ'=>'ri','ă'=>'ro','ăŤ'=>'ru', 1560 'ăľ'=>'sa','ăť'=>'se','ăˇ'=>'shi','ă˝'=>'so','ăš'=>'su', 1561 'ăś'=>'za','ăź'=>'ze','ă¸'=>'ji','ăž'=>'zo','ăş'=>'zu', 1562 'ăż'=>'ta','ă'=>'te','ă'=>'chi','ă'=>'to','ă'=>'tsu', 1563 'ă'=>'da','ă'=>'de','ă'=>'di','ă'=>'do','ă '=>'du', 1564 'ăŻ'=>'wa','ă˛'=>'wo', 1565 'ă¤'=>'ya','ă¨'=>'yo','ăŚ'=>'yu', 1566 'ăľ'=>'ka','ăś'=>'ke', 1567 // old characters 1568 'ăą'=>'we','ă°'=>'wi', 1569 1570 // convert what's left (probably only kicks in when something's missing above) 1571 'ăĄ'=>'a','ă§'=>'e','ăŁ'=>'i','ăŠ'=>'o','ăĽ'=>'u', 1572 'ăŁ'=>'ya','ă§'=>'yo','ăĽ'=>'yu', 1573 1574 // special characters 1575 'ăť'=>'_','ă'=>'_', 1576 'ăź'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1577 1578 // 'ăŠ'=>'la','ăŹ'=>'le','ăŞ'=>'li','ă'=>'lo','ăŤ'=>'lu', 1579 // 'ăăŁ'=>'cya','ăă§'=>'cye','ăăŁ'=>'cyi','ăă§'=>'cyo','ăăĽ'=>'cyu', 1580 //'ăăŁ'=>'dha','ăă§'=>'dhe','ăăŁ'=>'dhi','ăă§'=>'dho','ăăĽ'=>'dhu', 1581 // 'ăŞăŁ'=>'lya','ăŞă§'=>'lye','ăŞăŁ'=>'lyi','ăŞă§'=>'lyo','ăŞăĽ'=>'lyu', 1582 // 'ăăŁ'=>'tha','ăă§'=>'the','ăăŁ'=>'thi','ăă§'=>'tho','ăăĽ'=>'thu', 1583 //'ăăĄ'=>'fwa','ăă§'=>'fwe','ăăŁ'=>'fwi','ăăŠ'=>'fwo','ăăĽ'=>'fwu', 1584 //'ăăŁ'=>'tya','ăă§'=>'tye','ăăŁ'=>'tyi','ăă§'=>'tyo','ăăĽ'=>'tyu', 1585 // 'ă¸ăŁ'=>'jya','ă¸ă§'=>'jye','ă¸ăŁ'=>'jyi','ă¸ă§'=>'jyo','ă¸ăĽ'=>'jyu', 1586 // 'ă¸ăŁ'=>'zha','ă¸ă§'=>'zhe','ă¸ăŁ'=>'zhi','ă¸ă§'=>'zho','ă¸ăĽ'=>'zhu', 1587 //'ă¸ăŁ'=>'zya','ă¸ă§'=>'zye','ă¸ăŁ'=>'zyi','ă¸ă§'=>'zyo','ă¸ăĽ'=>'zyu', 1588 //'ăˇăŁ'=>'sya','ăˇă§'=>'sye','ăˇăŁ'=>'syi','ăˇă§'=>'syo','ăˇăĽ'=>'syu', 1589 //'ăˇ'=>'ci','ă'=>'hu',ăˇ'=>'si','ă'=>'ti','ă'=>'tu','ă¤'=>'yi','ă'=>'dzi', 1590 1591 // "Greeklish" 1592 'Î'=>'G','Î'=>'E','Î'=>'Th','Î'=>'L','Î'=>'X','Î '=>'P','ÎŁ'=>'S','ÎŚ'=>'F','Ψ'=>'Ps', 1593 'Îł'=>'g','δ'=>'e','θ'=>'th','Îť'=>'l','Ξ'=>'x','Ď'=>'p','Ď'=>'s','Ď'=>'f','Ď'=>'ps', 1594 1595 // Thai 1596 'ŕ¸'=>'k','ŕ¸'=>'kh','ŕ¸'=>'kh','ŕ¸'=>'kh','ภ'=>'kh','ŕ¸'=>'kh','ŕ¸'=>'ng','ŕ¸'=>'ch', 1597 'ŕ¸'=>'ch','ŕ¸'=>'ch','ŕ¸'=>'s','ŕ¸'=>'ch','ŕ¸'=>'y','ŕ¸'=>'d','ŕ¸'=>'t','ŕ¸'=>'th', 1598 'ŕ¸'=>'d','ŕ¸'=>'th','ŕ¸'=>'n','ŕ¸'=>'d','ŕ¸'=>'t','ŕ¸'=>'th','ŕ¸'=>'th','ŕ¸'=>'th', 1599 'ŕ¸'=>'n','ŕ¸'=>'b','ŕ¸'=>'p','ŕ¸'=>'ph','ŕ¸'=>'f','ŕ¸'=>'ph','ŕ¸'=>'f','ภ'=>'ph', 1600 'ล'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤ๠'=>'rue','฼'=>'l','ฌ'=>'lue', 1601 'ฦ๠'=>'lue','ว'=>'w','ศ'=>'s','ช'=>'s','ส'=>'s','ญ'=>'h','ฏ'=>'l','ฎ'=>'h', 1602 'ะ'=>'a','ู'=>'a','รร'=>'a','า'=>'a','ŕš '=>'a','ำ'=>'am','ŕšŕ¸˛'=>'am', 1603 'ิ'=>'i','฾'=>'i','ผ'=>'ue','฾'=>'ue','ุ'=>'u','บ'=>'u', 1604 'ŕš'=>'e','ŕš'=>'ae','ŕš'=>'o','ŕ¸'=>'o', 1605 '฾ยะ'=>'ia','฾ย'=>'ia','มŕ¸ŕ¸°'=>'uea','มŕ¸'=>'uea','ูวะ'=>'ua','ูว'=>'ua', 1606 'ŕš'=>'ai','ŕš'=>'ai','ูย'=>'ai','าย'=>'ai','าว'=>'ao', 1607 'ุย'=>'ui','ŕ¸ŕ¸˘'=>'oi','มŕ¸ŕ¸˘'=>'ueai','วย'=>'uai', 1608 'ิว'=>'io','ŕšŕ¸§'=>'eo','฾ยว'=>'iao', 1609 'ŕš'=>'','ŕš'=>'','ŕš'=>'','ŕš'=>'','ŕš'=>'', 1610 'ŕš'=>'','ŕš'=>'','ŕš'=>'','ฺ'=>'', 1611 'ŕš'=>'2','ŕš'=>'o','ฯ'=>'-','ŕš'=>'-','ŕš'=>'-', 1612 'ŕš'=>'0','ŕš'=>'1','ŕš'=>'2','ŕš'=>'3','ŕš'=>'4', 1613 'ŕš'=>'5','ŕš'=>'6','ŕš'=>'7','ŕš'=>'8','ŕš'=>'9', 1614 1615 // Korean 1616 'ăą'=>'k','ă '=>'kh','ă˛'=>'kk','ăˇ'=>'t','ă '=>'th','ă¸'=>'tt','ă '=>'p', 1617 'ă '=>'ph','ă '=>'pp','ă '=>'c','ă '=>'ch','ă '=>'cc','ă '=>'s','ă '=>'ss', 1618 'ă '=>'h','ă '=>'ng','ă´'=>'n','ăš'=>'l','ă '=>'m', 'ă '=>'a','ă '=>'e','ă '=>'o', 1619 'ă '=>'wu','ă Ą'=>'u','ă Ł'=>'i','ă '=>'ay','ă '=>'ey','ă '=>'oy','ă '=>'wa','ă '=>'we', 1620 'ă '=>'wi','ă '=>'way','ă '=>'wey','ă ˘'=>'uy','ă '=>'ya','ă '=>'ye','ă '=>'oy', 1621 'ă '=>'yu','ă '=>'yay','ă '=>'yey', 1622); 1623 1624