PageRenderTime 143ms CodeModel.GetById 49ms app.highlight 79ms RepoModel.GetById 1ms app.codeStats 1ms

/base/lib/flourishlib/fUTF8.php

https://bitbucket.org/thanhtungnguyenphp/monitos
PHP | 1623 lines | 1009 code | 185 blank | 429 comment | 113 complexity | be1b877c3c091ae9fec8a7a85836a3d9 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1<?php
  2/**
  3 * Provides string functions for UTF-8 strings
  4 * 
  5 * This class is implemented to provide a UTF-8 version of almost every built-in
  6 * PHP string function. For more information about UTF-8, please visit
  7 * http://flourishlib.com/docs/UTF-8.
  8 * 
  9 * @copyright  Copyright (c) 2008-2010 Will Bond
 10 * @author     Will Bond [wb] <will@flourishlib.com>
 11 * @license    http://flourishlib.com/license
 12 * 
 13 * @package    Flourish
 14 * @link       http://flourishlib.com/fUTF8
 15 * 
 16 * @version    1.0.0b11
 17 * @changes    1.0.0b11  Updated the class to not using phpinfo() to determine the iconv implementation [wb, 2010-11-04]
 18 * @changes    1.0.0b10  Fixed a bug with capitalizing a lowercase i resulting in a dotted upper-case I [wb, 2010-11-01]
 19 * @changes    1.0.0b9   Updated class to use fCore::startErrorCapture() instead of `error_reporting()` [wb, 2010-08-09]
 20 * @changes    1.0.0b8   Removed `e` flag from preg_replace() calls [wb, 2010-06-08]
 21 * @changes    1.0.0b7   Added the methods ::trim(), ::rtrim() and ::ltrim() [wb, 2010-05-11]
 22 * @changes    1.0.0b6   Fixed ::clean() to work with PHP installs that use an iconv library that doesn't support //IGNORE [wb, 2010-03-02]
 23 * @changes    1.0.0b5   Changed ::ucwords() to also uppercase words right after various punctuation [wb, 2009-09-18]
 24 * @changes    1.0.0b4   Changed replacement values in preg_replace() calls to be properly escaped [wb, 2009-06-11]
 25 * @changes    1.0.0b3   Fixed a parameter name in ::rpos() from `$search` to `$needle` [wb, 2009-02-06]
 26 * @changes    1.0.0b2   Fixed a bug in ::explode() with newlines and zero-length delimiters [wb, 2009-02-05]
 27 * @changes    1.0.0b    The initial implementation [wb, 2008-06-01]
 28 */
 29class fUTF8
 30{
 31	// The following constants allow for nice looking callbacks to static methods
 32	const ascii    = 'fUTF8::ascii';
 33	const chr      = 'fUTF8::chr';
 34	const clean    = 'fUTF8::clean';
 35	const cmp      = 'fUTF8::cmp';
 36	const explode  = 'fUTF8::explode';
 37	const icmp     = 'fUTF8::icmp';
 38	const inatcmp  = 'fUTF8::inatcmp';
 39	const ipos     = 'fUTF8::ipos';
 40	const ireplace = 'fUTF8::ireplace';
 41	const irpos    = 'fUTF8::irpos';
 42	const istr     = 'fUTF8::istr';
 43	const len      = 'fUTF8::len';
 44	const lower    = 'fUTF8::lower';
 45	const ltrim    = 'fUTF8::ltrim';
 46	const natcmp   = 'fUTF8::natcmp';
 47	const ord      = 'fUTF8::ord';
 48	const pad      = 'fUTF8::pad';
 49	const pos      = 'fUTF8::pos';
 50	const replace  = 'fUTF8::replace';
 51	const reset    = 'fUTF8::reset';
 52	const rev      = 'fUTF8::rev';
 53	const rpos     = 'fUTF8::rpos';
 54	const rtrim    = 'fUTF8::rtrim';
 55	const str      = 'fUTF8::str';
 56	const sub      = 'fUTF8::sub';
 57	const trim     = 'fUTF8::trim';
 58	const ucfirst  = 'fUTF8::ucfirst';
 59	const ucwords  = 'fUTF8::ucwords';
 60	const upper    = 'fUTF8::upper';
 61	const wordwrap = 'fUTF8::wordwrap';
 62	
 63	
 64	/**
 65	 * Depending how things are compiled, NetBSD and Solaris don't support //IGNORE in iconv()
 66	 * 
 67	 * If //IGNORE support is not provided strings with invalid characters will be truncated
 68	 * 
 69	 * @var boolean
 70	 */
 71	static private $can_ignore_invalid = NULL;
 72	
 73	/**
 74	 * All lowercase UTF-8 characters mapped to uppercase characters
 75	 * 
 76	 * @var array
 77	 */
 78	static private $lower_to_upper = array(
 79		'a' => 'A', 'b' => 'B', 'c' => 'C', 'd' => 'D', 'e' => 'E', 'f' => 'F',
 80		'g' => 'G', 'h' => 'H', 'i' => 'I', 'j' => 'J', 'k' => 'K', 'l' => 'L',
 81		'm' => 'M', 'n' => 'N', 'o' => 'O', 'p' => 'P', 'q' => 'Q', 'r' => 'R',
 82		's' => 'S', 't' => 'T', 'u' => 'U', 'v' => 'V', 'w' => 'W', 'x' => 'X',
 83		'y' => 'Y', 'z' => 'Z', 'à' => 'À', 'á' => 'Á', 'â' => 'Â', 'ã' => 'Ã',
 84		'ä' => 'Ä', 'å' => 'Å', 'æ' => 'Æ', 'ç' => 'Ç', 'è' => 'È', 'é' => 'É',
 85		'ê' => 'Ê', 'ë' => 'Ë', 'ì' => 'Ì', 'í' => 'Í', 'î' => 'Î', 'ï' => 'Ï',
 86		'ð' => 'Ð', 'ñ' => 'Ñ', 'ò' => 'Ò', 'ó' => 'Ó', 'ô' => 'Ô', 'õ' => 'Õ',
 87		'ö' => 'Ö', 'ø' => 'Ø', 'ù' => 'Ù', 'ú' => 'Ú', 'û' => 'Û', 'ü' => 'Ü',
 88		'ý' => 'Ý', 'þ' => 'Þ', 'ā' => 'Ā', 'ă' => 'Ă', 'ą' => 'Ą', 'ć' => 'Ć',
 89		'ĉ' => 'Ĉ', 'ċ' => 'Ċ', 'č' => 'Č', 'ď' => 'Ď', 'đ' => 'Đ', 'ē' => 'Ē',
 90		'ĕ' => 'Ĕ', 'ė' => 'Ė', 'ę' => 'Ę', 'ě' => 'Ě', 'ĝ' => 'Ĝ', 'ğ' => 'Ğ',
 91		'ġ' => 'Ġ', 'ģ' => 'Ģ', 'ĥ' => 'Ĥ', 'ħ' => 'Ħ', 'ĩ' => 'Ĩ', 'ī' => 'Ī',
 92		'ĭ' => 'Ĭ', 'į' => 'Į', 'ij' => 'IJ', 'ĵ' => 'Ĵ', 'ķ' => 'Ķ', 'ĺ' => 'Ĺ',
 93		'ļ' => 'Ļ', 'ľ' => 'Ľ', 'ŀ' => 'Ŀ', 'ł' => 'Ł', 'ń' => 'Ń', 'ņ' => 'Ņ',
 94		'ň' => 'Ň', 'ŋ' => 'Ŋ', 'ō' => 'Ō', 'ŏ' => 'Ŏ', 'ő' => 'Ő', 'œ' => 'Œ',
 95		'ŕ' => 'Ŕ', 'ŗ' => 'Ŗ', 'ř' => 'Ř', 'ś' => 'Ś', 'ŝ' => 'Ŝ', 'ş' => 'Ş',
 96		'š' => 'Š', 'ţ' => 'Ţ', 'ť' => 'Ť', 'ŧ' => 'Ŧ', 'ũ' => 'Ũ', 'ū' => 'Ū',
 97		'ŭ' => 'Ŭ', 'ů' => 'Ů', 'ű' => 'Ű', 'ų' => 'Ų', 'ŵ' => 'Ŵ', 'ŷ' => 'Ŷ',
 98		'ÿ' => 'Ÿ', 'ź' => 'Ź', 'ż' => 'Ż', 'ž' => 'Ž', 'ɓ' => 'Ɓ', 'ƃ' => 'Ƃ',
 99		'ƅ' => 'Ƅ', 'ɔ' => 'Ɔ', 'ƈ' => 'Ƈ', 'ɗ' => 'Ɗ', 'ƌ' => 'Ƌ', 'ɘ' => 'Ǝ',
100		'ə' => 'Ə', 'ɛ' => 'Ɛ', 'ƒ' => 'Ƒ', 'ɠ' => 'Ɠ', 'ɣ' => 'Ɣ', 'ɩ' => 'Ɩ',
101		'ɨ' => 'Ɨ', 'ƙ' => 'Ƙ', 'ɯ' => 'Ɯ', 'ɲ' => 'Ɲ', 'ɵ' => 'Ɵ', 'ơ' => 'Ơ',
102		'ƣ' => 'Ƣ', 'ƥ' => 'Ƥ', 'ƨ' => 'Ƨ', 'ʃ' => 'Ʃ', 'ƭ' => 'Ƭ', 'ʈ' => 'Ʈ',
103		'ư' => 'Ư', 'ʊ' => 'Ʊ', 'ʋ' => 'Ʋ', 'ƴ' => 'Ƴ', 'ƶ' => 'Ƶ', 'ʒ' => 'Ʒ',
104		'ƹ' => 'Ƹ', 'ƽ' => 'Ƽ', 'dž' => 'DŽ', 'dž' => 'Dž', 'lj' => 'LJ', 'lj' => 'Lj',
105		'nj' => 'NJ', 'nj' => 'Nj', 'ǎ' => 'Ǎ', 'ǐ' => 'Ǐ', 'ǒ' => 'Ǒ', 'ǔ' => 'Ǔ',
106		'ǖ' => 'Ǖ', 'ǘ' => 'Ǘ', 'ǚ' => 'Ǚ', 'ǜ' => 'Ǜ', 'ǟ' => 'Ǟ', 'ǡ' => 'Ǡ',
107		'ǣ' => 'Ǣ', 'ǥ' => 'Ǥ', 'ǧ' => 'Ǧ', 'ǩ' => 'Ǩ', 'ǫ' => 'Ǫ', 'ǭ' => 'Ǭ',
108		'ǯ' => 'Ǯ', 'dz' => 'DZ', 'ǵ' => 'Ǵ', 'ǻ' => 'Ǻ', 'ǽ' => 'Ǽ', 'ǿ' => 'Ǿ',
109		'ȁ' => 'Ȁ', 'ȃ' => 'Ȃ', 'ȅ' => 'Ȅ', 'ȇ' => 'Ȇ', 'ȉ' => 'Ȉ', 'ȋ' => 'Ȋ',
110		'ȍ' => 'Ȍ', 'ȏ' => 'Ȏ', 'ȑ' => 'Ȑ', 'ȓ' => 'Ȓ', 'ȕ' => 'Ȕ', 'ȗ' => 'Ȗ',
111		'ά' => 'Ά', 'έ' => 'Έ', 'ή' => 'Ή', 'ί' => 'Ί', 'ό' => 'Ό', 'ύ' => 'Ύ',
112		'ώ' => 'Ώ', 'α' => 'Α', 'β' => 'Β', 'γ' => 'Γ', 'δ' => 'Δ', 'ε' => 'Ε',
113		'ζ' => 'Ζ', 'η' => 'Η', 'θ' => 'Θ', 'ι' => 'Ι', 'κ' => 'Κ', 'λ' => 'Λ',
114		'μ' => 'Μ', 'ν' => 'Ν', 'ξ' => 'Ξ', 'ο' => 'Ο', 'π' => 'Π', 'ρ' => 'Ρ',
115		'σ' => 'Σ', 'τ' => 'Τ', 'υ' => 'Υ', 'φ' => 'Φ', 'χ' => 'Χ', 'ψ' => 'Ψ',
116		'ω' => 'Ω', 'ϊ' => 'Ϊ', 'ϋ' => 'Ϋ', 'ϣ' => 'Ϣ', 'ϥ' => 'Ϥ', 'ϧ' => 'Ϧ',
117		'ϩ' => 'Ϩ', 'ϫ' => 'Ϫ', 'ϭ' => 'Ϭ', 'ϯ' => 'Ϯ', 'ё' => 'Ё', 'ђ' => 'Ђ',
118		'ѓ' => 'Ѓ', 'є' => 'Є', 'ѕ' => 'Ѕ', 'і' => 'І', 'ї' => 'Ї', 'ј' => 'Ј',
119		'љ' => 'Љ', 'њ' => 'Њ', 'ћ' => 'Ћ', 'ќ' => 'Ќ', 'ў' => 'Ў', 'џ' => 'Џ',
120		'а' => 'А', 'б' => 'Б', 'в' => 'В', 'г' => 'Г', 'д' => 'Д', 'е' => 'Е',
121		'ж' => 'Ж', 'з' => 'З', 'и' => 'И', 'й' => 'Й', 'к' => 'К', 'л' => 'Л',
122		'м' => 'М', 'н' => 'Н', 'о' => 'О', 'п' => 'П', 'р' => 'Р', 'с' => 'С',
123		'т' => 'Т', 'у' => 'У', 'ф' => 'Ф', 'х' => 'Х', 'ц' => 'Ц', 'ч' => 'Ч',
124		'ш' => 'Ш', 'щ' => 'Щ', 'ъ' => 'Ъ', 'ы' => 'Ы', 'ь' => 'Ь', 'э' => 'Э',
125		'ю' => 'Ю', 'я' => 'Я', 'ѡ' => 'Ѡ', 'ѣ' => 'Ѣ', 'ѥ' => 'Ѥ', 'ѧ' => 'Ѧ',
126		'ѩ' => 'Ѩ', 'ѫ' => 'Ѫ', 'ѭ' => 'Ѭ', 'ѯ' => 'Ѯ', 'ѱ' => 'Ѱ', 'ѳ' => 'Ѳ',
127		'ѵ' => 'Ѵ', 'ѷ' => 'Ѷ', 'ѹ' => 'Ѹ', 'ѻ' => 'Ѻ', 'ѽ' => 'Ѽ', 'ѿ' => 'Ѿ',
128		'ҁ' => 'Ҁ', 'ґ' => 'Ґ', 'ғ' => 'Ғ', 'ҕ' => 'Ҕ', 'җ' => 'Җ', 'ҙ' => 'Ҙ',
129		'қ' => 'Қ', 'ҝ' => 'Ҝ', 'ҟ' => 'Ҟ', 'ҡ' => 'Ҡ', 'ң' => 'Ң', 'ҥ' => 'Ҥ',
130		'ҧ' => 'Ҧ', 'ҩ' => 'Ҩ', 'ҫ' => 'Ҫ', 'ҭ' => 'Ҭ', 'ү' => 'Ү', 'ұ' => 'Ұ',
131		'ҳ' => 'Ҳ', 'ҵ' => 'Ҵ', 'ҷ' => 'Ҷ', 'ҹ' => 'Ҹ', 'һ' => 'Һ', 'ҽ' => 'Ҽ',
132		'ҿ' => 'Ҿ', 'ӂ' => 'Ӂ', 'ӄ' => 'Ӄ', 'ӈ' => 'Ӈ', 'ӌ' => 'Ӌ', 'ӑ' => 'Ӑ',
133		'ӓ' => 'Ӓ', 'ӕ' => 'Ӕ', 'ӗ' => 'Ӗ', 'ә' => 'Ә', 'ӛ' => 'Ӛ', 'ӝ' => 'Ӝ',
134		'ӟ' => 'Ӟ', 'ӡ' => 'Ӡ', 'ӣ' => 'Ӣ', 'ӥ' => 'Ӥ', 'ӧ' => 'Ӧ', 'ө' => 'Ө',
135		'ӫ' => 'Ӫ', 'ӯ' => 'Ӯ', 'ӱ' => 'Ӱ', 'ӳ' => 'Ӳ', 'ӵ' => 'Ӵ', 'ӹ' => 'Ӹ',
136		'ա' => 'Ա', 'բ' => 'Բ', 'գ' => 'Գ', 'դ' => 'Դ', 'ե' => 'Ե', 'զ' => 'Զ',
137		'է' => 'Է', 'ը' => 'Ը', 'թ' => 'Թ', 'ժ' => 'Ժ', 'ի' => 'Ի', 'լ' => 'Լ',
138		'խ' => 'Խ', 'ծ' => 'Ծ', 'կ' => 'Կ', 'հ' => 'Հ', 'ձ' => 'Ձ', 'ղ' => 'Ղ',
139		'ճ' => 'Ճ', 'մ' => 'Մ', 'յ' => 'Յ', 'ն' => 'Ն', 'շ' => 'Շ', 'ո' => 'Ո',
140		'չ' => 'Չ', 'պ' => 'Պ', 'ջ' => 'Ջ', 'ռ' => 'Ռ', 'ս' => 'Ս', 'վ' => 'Վ',
141		'տ' => 'Տ', 'ր' => 'Ր', 'ց' => 'Ց', 'ւ' => 'Ւ', 'փ' => 'Փ', 'ք' => 'Ք',
142		'օ' => 'Օ', 'ֆ' => 'Ֆ', 'ა' => 'Ⴀ', 'ბ' => 'Ⴁ', 'გ' => 'Ⴂ', 'დ' => 'Ⴃ',
143		'ე' => 'Ⴄ', 'ვ' => 'Ⴅ', 'ზ' => 'Ⴆ', 'თ' => 'Ⴇ', 'ი' => 'Ⴈ', 'კ' => 'Ⴉ',
144		'ლ' => 'Ⴊ', 'მ' => 'Ⴋ', 'ნ' => 'Ⴌ', 'ო' => 'Ⴍ', 'პ' => 'Ⴎ', 'ჟ' => 'Ⴏ',
145		'რ' => 'Ⴐ', 'ს' => 'Ⴑ', 'ტ' => 'Ⴒ', 'უ' => 'Ⴓ', 'ფ' => 'Ⴔ', 'ქ' => 'Ⴕ',
146		'ღ' => 'Ⴖ', 'ყ' => 'Ⴗ', 'შ' => 'Ⴘ', 'ჩ' => 'Ⴙ', 'ც' => 'Ⴚ', 'ძ' => 'Ⴛ',
147		'წ' => 'Ⴜ', 'ჭ' => 'Ⴝ', 'ხ' => 'Ⴞ', 'ჯ' => 'Ⴟ', 'ჰ' => 'Ⴠ', 'ჱ' => 'Ⴡ',
148		'ჲ' => 'Ⴢ', 'ჳ' => 'Ⴣ', 'ჴ' => 'Ⴤ', 'ჵ' => 'Ⴥ', 'ḁ' => 'Ḁ', 'ḃ' => 'Ḃ',
149		'ḅ' => 'Ḅ', 'ḇ' => 'Ḇ', 'ḉ' => 'Ḉ', 'ḋ' => 'Ḋ', 'ḍ' => 'Ḍ', 'ḏ' => 'Ḏ',
150		'ḑ' => 'Ḑ', 'ḓ' => 'Ḓ', 'ḕ' => 'Ḕ', 'ḗ' => 'Ḗ', 'ḙ' => 'Ḙ', 'ḛ' => 'Ḛ',
151		'ḝ' => 'Ḝ', 'ḟ' => 'Ḟ', 'ḡ' => 'Ḡ', 'ḣ' => 'Ḣ', 'ḥ' => 'Ḥ', 'ḧ' => 'Ḧ',
152		'ḩ' => 'Ḩ', 'ḫ' => 'Ḫ', 'ḭ' => 'Ḭ', 'ḯ' => 'Ḯ', 'ḱ' => 'Ḱ', 'ḳ' => 'Ḳ',
153		'ḵ' => 'Ḵ', 'ḷ' => 'Ḷ', 'ḹ' => 'Ḹ', 'ḻ' => 'Ḻ', 'ḽ' => 'Ḽ', 'ḿ' => 'Ḿ',
154		'ṁ' => 'Ṁ', 'ṃ' => 'Ṃ', 'ṅ' => 'Ṅ', 'ṇ' => 'Ṇ', 'ṉ' => 'Ṉ', 'ṋ' => 'Ṋ',
155		'ṍ' => 'Ṍ', 'ṏ' => 'Ṏ', 'ṑ' => 'Ṑ', 'ṓ' => 'Ṓ', 'ṕ' => 'Ṕ', 'ṗ' => 'Ṗ',
156		'ṙ' => 'Ṙ', 'ṛ' => 'Ṛ', 'ṝ' => 'Ṝ', 'ṟ' => 'Ṟ', 'ṡ' => 'Ṡ', 'ṣ' => 'Ṣ',
157		'ṥ' => 'Ṥ', 'ṧ' => 'Ṧ', 'ṩ' => 'Ṩ', 'ṫ' => 'Ṫ', 'ṭ' => 'Ṭ', 'ṯ' => 'Ṯ',
158		'ṱ' => 'Ṱ', 'ṳ' => 'Ṳ', 'ṵ' => 'Ṵ', 'ṷ' => 'Ṷ', 'ṹ' => 'Ṹ', 'ṻ' => 'Ṻ',
159		'ṽ' => 'Ṽ', 'ṿ' => 'Ṿ', 'ẁ' => 'Ẁ', 'ẃ' => 'Ẃ', 'ẅ' => 'Ẅ', 'ẇ' => 'Ẇ',
160		'ẉ' => 'Ẉ', 'ẋ' => 'Ẋ', 'ẍ' => 'Ẍ', 'ẏ' => 'Ẏ', 'ẑ' => 'Ẑ', 'ẓ' => 'Ẓ',
161		'ẕ' => 'Ẕ', 'ạ' => 'Ạ', 'ả' => 'Ả', 'ấ' => 'Ấ', 'ầ' => 'Ầ', 'ẩ' => 'Ẩ',
162		'ẫ' => 'Ẫ', 'ậ' => 'Ậ', 'ắ' => 'Ắ', 'ằ' => 'Ằ', 'ẳ' => 'Ẳ', 'ẵ' => 'Ẵ',
163		'ặ' => 'Ặ', 'ẹ' => 'Ẹ', 'ẻ' => 'Ẻ', 'ẽ' => 'Ẽ', 'ế' => 'Ế', 'ề' => 'Ề',
164		'ể' => 'Ể', 'ễ' => 'Ễ', 'ệ' => 'Ệ', 'ỉ' => 'Ỉ', 'ị' => 'Ị', 'ọ' => 'Ọ',
165		'ỏ' => 'Ỏ', 'ố' => 'Ố', 'ồ' => 'Ồ', 'ổ' => 'Ổ', 'ỗ' => 'Ỗ', 'ộ' => 'Ộ',
166		'ớ' => 'Ớ', 'ờ' => 'Ờ', 'ở' => 'Ở', 'ỡ' => 'Ỡ', 'ợ' => 'Ợ', 'ụ' => 'Ụ',
167		'ủ' => 'Ủ', 'ứ' => 'Ứ', 'ừ' => 'Ừ', 'ử' => 'Ử', 'ữ' => 'Ữ', 'ự' => 'Ự',
168		'ỳ' => 'Ỳ', 'ỵ' => 'Ỵ', 'ỷ' => 'Ỷ', 'ỹ' => 'Ỹ', 'ἀ' => 'Ἀ', 'ἁ' => 'Ἁ',
169		'ἂ' => 'Ἂ', 'ἃ' => 'Ἃ', 'ἄ' => 'Ἄ', 'ἅ' => 'Ἅ', 'ἆ' => 'Ἆ', 'ἇ' => 'Ἇ',
170		'ἐ' => 'Ἐ', 'ἑ' => 'Ἑ', 'ἒ' => 'Ἒ', 'ἓ' => 'Ἓ', 'ἔ' => 'Ἔ', 'ἕ' => 'Ἕ',
171		'ἠ' => 'Ἠ', 'ἡ' => 'Ἡ', 'ἢ' => 'Ἢ', 'ἣ' => 'Ἣ', 'ἤ' => 'Ἤ', 'ἥ' => 'Ἥ',
172		'ἦ' => 'Ἦ', 'ἧ' => 'Ἧ', 'ἰ' => 'Ἰ', 'ἱ' => 'Ἱ', 'ἲ' => 'Ἲ', 'ἳ' => 'Ἳ',
173		'ἴ' => 'Ἴ', 'ἵ' => 'Ἵ', 'ἶ' => 'Ἶ', 'ἷ' => 'Ἷ', 'ὀ' => 'Ὀ', 'ὁ' => 'Ὁ',
174		'ὂ' => 'Ὂ', 'ὃ' => 'Ὃ', 'ὄ' => 'Ὄ', 'ὅ' => 'Ὅ', 'ὑ' => 'Ὑ', 'ὓ' => 'Ὓ',
175		'ὕ' => 'Ὕ', 'ὗ' => 'Ὗ', 'ὠ' => 'Ὠ', 'ὡ' => 'Ὡ', 'ὢ' => 'Ὢ', 'ὣ' => 'Ὣ',
176		'ὤ' => 'Ὤ', 'ὥ' => 'Ὥ', 'ὦ' => 'Ὦ', 'ὧ' => 'Ὧ', 'ᾀ' => 'ᾈ', 'ᾁ' => 'ᾉ',
177		'ᾂ' => 'ᾊ', 'ᾃ' => 'ᾋ', 'ᾄ' => 'ᾌ', 'ᾅ' => 'ᾍ', 'ᾆ' => 'ᾎ', 'ᾇ' => 'ᾏ',
178		'ᾐ' => 'ᾘ', 'ᾑ' => 'ᾙ', 'ᾒ' => 'ᾚ', 'ᾓ' => 'ᾛ', 'ᾔ' => 'ᾜ', 'ᾕ' => 'ᾝ',
179		'ᾖ' => 'ᾞ', 'ᾗ' => 'ᾟ', 'ᾠ' => 'ᾨ', 'ᾡ' => 'ᾩ', 'ᾢ' => 'ᾪ', 'ᾣ' => 'ᾫ',
180		'ᾤ' => 'ᾬ', 'ᾥ' => 'ᾭ', 'ᾦ' => 'ᾮ', 'ᾧ' => 'ᾯ', 'ᾰ' => 'Ᾰ', 'ᾱ' => 'Ᾱ',
181		'ῐ' => 'Ῐ', 'ῑ' => 'Ῑ', 'ῠ' => 'Ῠ', 'ῡ' => 'Ῡ', 'ⓐ' => 'Ⓐ', 'ⓑ' => 'Ⓑ',
182		'ⓒ' => 'Ⓒ', 'ⓓ' => 'Ⓓ', 'ⓔ' => 'Ⓔ', 'ⓕ' => 'Ⓕ', 'ⓖ' => 'Ⓖ', 'ⓗ' => 'Ⓗ',
183		'ⓘ' => 'Ⓘ', 'ⓙ' => 'Ⓙ', 'ⓚ' => 'Ⓚ', 'ⓛ' => 'Ⓛ', 'ⓜ' => 'Ⓜ', 'ⓝ' => 'Ⓝ',
184		'ⓞ' => 'Ⓞ', 'ⓟ' => 'Ⓟ', 'ⓠ' => 'Ⓠ', 'ⓡ' => 'Ⓡ', 'ⓢ' => 'Ⓢ', 'ⓣ' => 'Ⓣ',
185		'ⓤ' => 'Ⓤ', 'ⓥ' => 'Ⓥ', 'ⓦ' => 'Ⓦ', 'ⓧ' => 'Ⓧ', 'ⓨ' => 'Ⓨ', 'ⓩ' => 'Ⓩ',
186		'a' => 'A', 'b' => 'B', 'c' => 'C', 'd' => 'D', 'e' => 'E', 'f' => 'F',
187		'g' => 'G', 'h' => 'H', 'i' => 'I', 'j' => 'J', 'k' => 'K', 'l' => 'L',
188		'm' => 'M', 'n' => 'N', 'o' => 'O', 'p' => 'P', 'q' => 'Q', 'r' => 'R',
189		's' => 'S', 't' => 'T', 'u' => 'U', 'v' => 'V', 'w' => 'W', 'x' => 'X',
190		'y' => 'Y', 'z' => 'Z'
191	);
192	
193	/**
194	 * All lowercase UTF-8 characters not properly handled by [http://php.net/mb_strtoupper mb_strtoupper()] mapped to uppercase characters
195	 * 
196	 * @var array
197	 */
198	static private $mb_lower_to_upper_fix = array(
199		'ɘ' => 'Ǝ', 'Dz' => 'DZ', 'ა' => 'Ⴀ', 'ბ' => 'Ⴁ', 'გ' => 'Ⴂ', 'დ' => 'Ⴃ',
200		'ე' => 'Ⴄ', 'ვ' => 'Ⴅ', 'ზ' => 'Ⴆ', 'თ' => 'Ⴇ', 'ი' => 'Ⴈ', 'კ' => 'Ⴉ',
201		'ლ' => 'Ⴊ', 'მ' => 'Ⴋ', 'ნ' => 'Ⴌ', 'ო' => 'Ⴍ', 'პ' => 'Ⴎ', 'ჟ' => 'Ⴏ',
202		'რ' => 'Ⴐ', 'ს' => 'Ⴑ', 'ტ' => 'Ⴒ', 'უ' => 'Ⴓ', 'ფ' => 'Ⴔ', 'ქ' => 'Ⴕ',
203		'ღ' => 'Ⴖ', 'ყ' => 'Ⴗ', 'შ' => 'Ⴘ', 'ჩ' => 'Ⴙ', 'ც' => 'Ⴚ', 'ძ' => 'Ⴛ',
204		'წ' => 'Ⴜ', 'ჭ' => 'Ⴝ', 'ხ' => 'Ⴞ', 'ჯ' => 'Ⴟ', 'ჰ' => 'Ⴠ', 'ჱ' => 'Ⴡ',
205		'ჲ' => 'Ⴢ', 'ჳ' => 'Ⴣ', 'ჴ' => 'Ⴤ', 'ჵ' => 'Ⴥ', 'ⓐ' => 'Ⓐ', 'ⓑ' => 'Ⓑ',
206		'ⓒ' => 'Ⓒ', 'ⓓ' => 'Ⓓ', 'ⓔ' => 'Ⓔ', 'ⓕ' => 'Ⓕ', 'ⓖ' => 'Ⓖ', 'ⓗ' => 'Ⓗ',
207		'ⓘ' => 'Ⓘ', 'ⓙ' => 'Ⓙ', 'ⓚ' => 'Ⓚ', 'ⓛ' => 'Ⓛ', 'ⓜ' => 'Ⓜ', 'ⓝ' => 'Ⓝ',
208		'ⓞ' => 'Ⓞ', 'ⓟ' => 'Ⓟ', 'ⓠ' => 'Ⓠ', 'ⓡ' => 'Ⓡ', 'ⓢ' => 'Ⓢ', 'ⓣ' => 'Ⓣ',
209		'ⓤ' => 'Ⓤ', 'ⓥ' => 'Ⓥ', 'ⓦ' => 'Ⓦ', 'ⓧ' => 'Ⓧ', 'ⓨ' => 'Ⓨ', 'ⓩ' => 'Ⓩ'
210	);
211	
212	/**
213	 * All uppercase UTF-8 characters not properly handled by [http://php.net/mb_strtolower mb_strtolower()] mapped to lowercase characters
214	 * 
215	 * @var array
216	 */
217	static private $mb_upper_to_lower_fix = array(
218		'ǝ' => 'ɘ', 'Dž' => 'dž', 'Lj' => 'lj', 'Nj' => 'nj', 'Ⴀ' => 'ა', 'Ⴁ' => 'ბ',
219		'Ⴂ' => 'გ', 'Ⴃ' => 'დ', 'Ⴄ' => 'ე', 'Ⴅ' => 'ვ', 'Ⴆ' => 'ზ', 'Ⴇ' => 'თ',
220		'Ⴈ' => 'ი', 'Ⴉ' => 'კ', 'Ⴊ' => 'ლ', 'Ⴋ' => 'მ', 'Ⴌ' => 'ნ', 'Ⴍ' => 'ო',
221		'Ⴎ' => 'პ', 'Ⴏ' => 'ჟ', 'Ⴐ' => 'რ', 'Ⴑ' => 'ს', 'Ⴒ' => 'ტ', 'Ⴓ' => 'უ',
222		'Ⴔ' => 'ფ', 'Ⴕ' => 'ქ', 'Ⴖ' => 'ღ', 'Ⴗ' => 'ყ', 'Ⴘ' => 'შ', 'Ⴙ' => 'ჩ',
223		'Ⴚ' => 'ც', 'Ⴛ' => 'ძ', 'Ⴜ' => 'წ', 'Ⴝ' => 'ჭ', 'Ⴞ' => 'ხ', 'Ⴟ' => 'ჯ',
224		'Ⴠ' => 'ჰ', 'Ⴡ' => 'ჱ', 'Ⴢ' => 'ჲ', 'Ⴣ' => 'ჳ', 'Ⴤ' => 'ჴ', 'Ⴥ' => 'ჵ',
225		'ᾈ' => 'ᾀ', 'ᾉ' => 'ᾁ', 'ᾊ' => 'ᾂ', 'ᾋ' => 'ᾃ', 'ᾌ' => 'ᾄ', 'ᾍ' => 'ᾅ',
226		'ᾎ' => 'ᾆ', 'ᾏ' => 'ᾇ', 'ᾘ' => 'ᾐ', 'ᾙ' => 'ᾑ', 'ᾚ' => 'ᾒ', 'ᾛ' => 'ᾓ',
227		'ᾜ' => 'ᾔ', 'ᾝ' => 'ᾕ', 'ᾞ' => 'ᾖ', 'ᾟ' => 'ᾗ', 'ᾨ' => 'ᾠ', 'ᾩ' => 'ᾡ',
228		'ᾪ' => 'ᾢ', 'ᾫ' => 'ᾣ', 'ᾬ' => 'ᾤ', 'ᾭ' => 'ᾥ', 'ᾮ' => 'ᾦ', 'ᾯ' => 'ᾧ',
229		'Ⓐ' => 'ⓐ', 'Ⓑ' => 'ⓑ', 'Ⓒ' => 'ⓒ', 'Ⓓ' => 'ⓓ', 'Ⓔ' => 'ⓔ', 'Ⓕ' => 'ⓕ',
230		'Ⓖ' => 'ⓖ', 'Ⓗ' => 'ⓗ', 'Ⓘ' => 'ⓘ', 'Ⓙ' => 'ⓙ', 'Ⓚ' => 'ⓚ', 'Ⓛ' => 'ⓛ',
231		'Ⓜ' => 'ⓜ', 'Ⓝ' => 'ⓝ', 'Ⓞ' => 'ⓞ', 'Ⓟ' => 'ⓟ', 'Ⓠ' => 'ⓠ', 'Ⓡ' => 'ⓡ',
232		'Ⓢ' => 'ⓢ', 'Ⓣ' => 'ⓣ', 'Ⓤ' => 'ⓤ', 'Ⓥ' => 'ⓥ', 'Ⓦ' => 'ⓦ', 'Ⓧ' => 'ⓧ',
233		'Ⓨ' => 'ⓨ', 'Ⓩ' => 'ⓩ'
234	);
235	
236	/**
237	 * All uppercase UTF-8 characters mapped to lowercase characters
238	 * 
239	 * @var array
240	 */
241	static private $upper_to_lower = array(
242		'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', 'F' => 'f',
243		'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k', 'L' => 'l',
244		'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q', 'R' => 'r',
245		'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x',
246		'Y' => 'y', 'Z' => 'z', 'À' => 'à', 'Á' => 'á', 'Â' => 'â', 'Ã' => 'ã',
247		'Ä' => 'ä', 'Å' => 'å', 'Æ' => 'æ', 'Ç' => 'ç', 'È' => 'è', 'É' => 'é',
248		'Ê' => 'ê', 'Ë' => 'ë', 'Ì' => 'ì', 'Í' => 'í', 'Î' => 'î', 'Ï' => 'ï',
249		'Ð' => 'ð', 'Ñ' => 'ñ', 'Ò' => 'ò', 'Ó' => 'ó', 'Ô' => 'ô', 'Õ' => 'õ',
250		'Ö' => 'ö', 'Ø' => 'ø', 'Ù' => 'ù', 'Ú' => 'ú', 'Û' => 'û', 'Ü' => 'ü',
251		'Ý' => 'ý', 'Þ' => 'þ', 'Ā' => 'ā', 'Ă' => 'ă', 'Ą' => 'ą', 'Ć' => 'ć',
252		'Ĉ' => 'ĉ', 'Ċ' => 'ċ', 'Č' => 'č', 'Ď' => 'ď', 'Đ' => 'đ', 'Ē' => 'ē',
253		'Ĕ' => 'ĕ', 'Ė' => 'ė', 'Ę' => 'ę', 'Ě' => 'ě', 'Ĝ' => 'ĝ', 'Ğ' => 'ğ',
254		'Ġ' => 'ġ', 'Ģ' => 'ģ', 'Ĥ' => 'ĥ', 'Ħ' => 'ħ', 'Ĩ' => 'ĩ', 'Ī' => 'ī',
255		'Ĭ' => 'ĭ', 'Į' => 'į', 'İ' => 'i', 'IJ' => 'ij', 'Ĵ' => 'ĵ', 'Ķ' => 'ķ',
256		'Ĺ' => 'ĺ', 'Ļ' => 'ļ', 'Ľ' => 'ľ', 'Ŀ' => 'ŀ', 'Ł' => 'ł', 'Ń' => 'ń',
257		'Ņ' => 'ņ', 'Ň' => 'ň', 'Ŋ' => 'ŋ', 'Ō' => 'ō', 'Ŏ' => 'ŏ', 'Ő' => 'ő',
258		'Œ' => 'œ', 'Ŕ' => 'ŕ', 'Ŗ' => 'ŗ', 'Ř' => 'ř', 'Ś' => 'ś', 'Ŝ' => 'ŝ',
259		'Ş' => 'ş', 'Š' => 'š', 'Ţ' => 'ţ', 'Ť' => 'ť', 'Ŧ' => 'ŧ', 'Ũ' => 'ũ',
260		'Ū' => 'ū', 'Ŭ' => 'ŭ', 'Ů' => 'ů', 'Ű' => 'ű', 'Ų' => 'ų', 'Ŵ' => 'ŵ',
261		'Ŷ' => 'ŷ', 'Ÿ' => 'ÿ', 'Ź' => 'ź', 'Ż' => 'ż', 'Ž' => 'ž', 'Ɓ' => 'ɓ',
262		'Ƃ' => 'ƃ', 'Ƅ' => 'ƅ', 'Ɔ' => 'ɔ', 'Ƈ' => 'ƈ', 'Ɗ' => 'ɗ', 'Ƌ' => 'ƌ',
263		'Ǝ' => 'ɘ', 'Ə' => 'ə', 'Ɛ' => 'ɛ', 'Ƒ' => 'ƒ', 'Ɠ' => 'ɠ', 'Ɣ' => 'ɣ',
264		'Ɩ' => 'ɩ', 'Ɨ' => 'ɨ', 'Ƙ' => 'ƙ', 'Ɯ' => 'ɯ', 'Ɲ' => 'ɲ', 'Ɵ' => 'ɵ',
265		'Ơ' => 'ơ', 'Ƣ' => 'ƣ', 'Ƥ' => 'ƥ', 'Ƨ' => 'ƨ', 'Ʃ' => 'ʃ', 'Ƭ' => 'ƭ',
266		'Ʈ' => 'ʈ', 'Ư' => 'ư', 'Ʊ' => 'ʊ', 'Ʋ' => 'ʋ', 'Ƴ' => 'ƴ', 'Ƶ' => 'ƶ',
267		'Ʒ' => 'ʒ', 'Ƹ' => 'ƹ', 'Ƽ' => 'ƽ', 'DŽ' => 'dž', 'Dž' => 'dž', 'LJ' => 'lj',
268		'Lj' => 'lj', 'NJ' => 'nj', 'Nj' => 'nj', 'Ǎ' => 'ǎ', 'Ǐ' => 'ǐ', 'Ǒ' => 'ǒ',
269		'Ǔ' => 'ǔ', 'Ǖ' => 'ǖ', 'Ǘ' => 'ǘ', 'Ǚ' => 'ǚ', 'Ǜ' => 'ǜ', 'Ǟ' => 'ǟ',
270		'Ǡ' => 'ǡ', 'Ǣ' => 'ǣ', 'Ǥ' => 'ǥ', 'Ǧ' => 'ǧ', 'Ǩ' => 'ǩ', 'Ǫ' => 'ǫ',
271		'Ǭ' => 'ǭ', 'Ǯ' => 'ǯ', 'DZ' => 'dz', 'Ǵ' => 'ǵ', 'Ǻ' => 'ǻ', 'Ǽ' => 'ǽ',
272		'Ǿ' => 'ǿ', 'Ȁ' => 'ȁ', 'Ȃ' => 'ȃ', 'Ȅ' => 'ȅ', 'Ȇ' => 'ȇ', 'Ȉ' => 'ȉ',
273		'Ȋ' => 'ȋ', 'Ȍ' => 'ȍ', 'Ȏ' => 'ȏ', 'Ȑ' => 'ȑ', 'Ȓ' => 'ȓ', 'Ȕ' => 'ȕ',
274		'Ȗ' => 'ȗ', 'Ά' => 'ά', 'Έ' => 'έ', 'Ή' => 'ή', 'Ί' => 'ί', 'Ό' => 'ό',
275		'Ύ' => 'ύ', 'Ώ' => 'ώ', 'Α' => 'α', 'Β' => 'β', 'Γ' => 'γ', 'Δ' => 'δ',
276		'Ε' => 'ε', 'Ζ' => 'ζ', 'Η' => 'η', 'Θ' => 'θ', 'Ι' => 'ι', 'Κ' => 'κ',
277		'Λ' => 'λ', 'Μ' => 'μ', 'Ν' => 'ν', 'Ξ' => 'ξ', 'Ο' => 'ο', 'Π' => 'π',
278		'Ρ' => 'ρ', 'Σ' => 'σ', 'Τ' => 'τ', 'Υ' => 'υ', 'Φ' => 'φ', 'Χ' => 'χ',
279		'Ψ' => 'ψ', 'Ω' => 'ω', 'Ϊ' => 'ϊ', 'Ϋ' => 'ϋ', 'Ϣ' => 'ϣ', 'Ϥ' => 'ϥ',
280		'Ϧ' => 'ϧ', 'Ϩ' => 'ϩ', 'Ϫ' => 'ϫ', 'Ϭ' => 'ϭ', 'Ϯ' => 'ϯ', 'Ё' => 'ё',
281		'Ђ' => 'ђ', 'Ѓ' => 'ѓ', 'Є' => 'є', 'Ѕ' => 'ѕ', 'І' => 'і', 'Ї' => 'ї',
282		'Ј' => 'ј', 'Љ' => 'љ', 'Њ' => 'њ', 'Ћ' => 'ћ', 'Ќ' => 'ќ', 'Ў' => 'ў',
283		'Џ' => 'џ', 'А' => 'а', 'Б' => 'б', 'В' => 'в', 'Г' => 'г', 'Д' => 'д',
284		'Е' => 'е', 'Ж' => 'ж', 'З' => 'з', 'И' => 'и', 'Й' => 'й', 'К' => 'к',
285		'Л' => 'л', 'М' => 'м', 'Н' => 'н', 'О' => 'о', 'П' => 'п', 'Р' => 'р',
286		'С' => 'с', 'Т' => 'т', 'У' => 'у', 'Ф' => 'ф', 'Х' => 'х', 'Ц' => 'ц',
287		'Ч' => 'ч', 'Ш' => 'ш', 'Щ' => 'щ', 'Ъ' => 'ъ', 'Ы' => 'ы', 'Ь' => 'ь',
288		'Э' => 'э', 'Ю' => 'ю', 'Я' => 'я', 'Ѡ' => 'ѡ', 'Ѣ' => 'ѣ', 'Ѥ' => 'ѥ',
289		'Ѧ' => 'ѧ', 'Ѩ' => 'ѩ', 'Ѫ' => 'ѫ', 'Ѭ' => 'ѭ', 'Ѯ' => 'ѯ', 'Ѱ' => 'ѱ',
290		'Ѳ' => 'ѳ', 'Ѵ' => 'ѵ', 'Ѷ' => 'ѷ', 'Ѹ' => 'ѹ', 'Ѻ' => 'ѻ', 'Ѽ' => 'ѽ',
291		'Ѿ' => 'ѿ', 'Ҁ' => 'ҁ', 'Ґ' => 'ґ', 'Ғ' => 'ғ', 'Ҕ' => 'ҕ', 'Җ' => 'җ',
292		'Ҙ' => 'ҙ', 'Қ' => 'қ', 'Ҝ' => 'ҝ', 'Ҟ' => 'ҟ', 'Ҡ' => 'ҡ', 'Ң' => 'ң',
293		'Ҥ' => 'ҥ', 'Ҧ' => 'ҧ', 'Ҩ' => 'ҩ', 'Ҫ' => 'ҫ', 'Ҭ' => 'ҭ', 'Ү' => 'ү',
294		'Ұ' => 'ұ', 'Ҳ' => 'ҳ', 'Ҵ' => 'ҵ', 'Ҷ' => 'ҷ', 'Ҹ' => 'ҹ', 'Һ' => 'һ',
295		'Ҽ' => 'ҽ', 'Ҿ' => 'ҿ', 'Ӂ' => 'ӂ', 'Ӄ' => 'ӄ', 'Ӈ' => 'ӈ', 'Ӌ' => 'ӌ',
296		'Ӑ' => 'ӑ', 'Ӓ' => 'ӓ', 'Ӕ' => 'ӕ', 'Ӗ' => 'ӗ', 'Ә' => 'ә', 'Ӛ' => 'ӛ',
297		'Ӝ' => 'ӝ', 'Ӟ' => 'ӟ', 'Ӡ' => 'ӡ', 'Ӣ' => 'ӣ', 'Ӥ' => 'ӥ', 'Ӧ' => 'ӧ',
298		'Ө' => 'ө', 'Ӫ' => 'ӫ', 'Ӯ' => 'ӯ', 'Ӱ' => 'ӱ', 'Ӳ' => 'ӳ', 'Ӵ' => 'ӵ',
299		'Ӹ' => 'ӹ', 'Ա' => 'ա', 'Բ' => 'բ', 'Գ' => 'գ', 'Դ' => 'դ', 'Ե' => 'ե',
300		'Զ' => 'զ', 'Է' => 'է', 'Ը' => 'ը', 'Թ' => 'թ', 'Ժ' => 'ժ', 'Ի' => 'ի',
301		'Լ' => 'լ', 'Խ' => 'խ', 'Ծ' => 'ծ', 'Կ' => 'կ', 'Հ' => 'հ', 'Ձ' => 'ձ',
302		'Ղ' => 'ղ', 'Ճ' => 'ճ', 'Մ' => 'մ', 'Յ' => 'յ', 'Ն' => 'ն', 'Շ' => 'շ',
303		'Ո' => 'ո', 'Չ' => 'չ', 'Պ' => 'պ', 'Ջ' => 'ջ', 'Ռ' => 'ռ', 'Ս' => 'ս',
304		'Վ' => 'վ', 'Տ' => 'տ', 'Ր' => 'ր', 'Ց' => 'ց', 'Ւ' => 'ւ', 'Փ' => 'փ',
305		'Ք' => 'ք', 'Օ' => 'օ', 'Ֆ' => 'ֆ', 'Ⴀ' => 'ა', 'Ⴁ' => 'ბ', 'Ⴂ' => 'გ',
306		'Ⴃ' => 'დ', 'Ⴄ' => 'ე', 'Ⴅ' => 'ვ', 'Ⴆ' => 'ზ', 'Ⴇ' => 'თ', 'Ⴈ' => 'ი',
307		'Ⴉ' => 'კ', 'Ⴊ' => 'ლ', 'Ⴋ' => 'მ', 'Ⴌ' => 'ნ', 'Ⴍ' => 'ო', 'Ⴎ' => 'პ',
308		'Ⴏ' => 'ჟ', 'Ⴐ' => 'რ', 'Ⴑ' => 'ს', 'Ⴒ' => 'ტ', 'Ⴓ' => 'უ', 'Ⴔ' => 'ფ',
309		'Ⴕ' => 'ქ', 'Ⴖ' => 'ღ', 'Ⴗ' => 'ყ', 'Ⴘ' => 'შ', 'Ⴙ' => 'ჩ', 'Ⴚ' => 'ც',
310		'Ⴛ' => 'ძ', 'Ⴜ' => 'წ', 'Ⴝ' => 'ჭ', 'Ⴞ' => 'ხ', 'Ⴟ' => 'ჯ', 'Ⴠ' => 'ჰ',
311		'Ⴡ' => 'ჱ', 'Ⴢ' => 'ჲ', 'Ⴣ' => 'ჳ', 'Ⴤ' => 'ჴ', 'Ⴥ' => 'ჵ', 'Ḁ' => 'ḁ',
312		'Ḃ' => 'ḃ', 'Ḅ' => 'ḅ', 'Ḇ' => 'ḇ', 'Ḉ' => 'ḉ', 'Ḋ' => 'ḋ', 'Ḍ' => 'ḍ',
313		'Ḏ' => 'ḏ', 'Ḑ' => 'ḑ', 'Ḓ' => 'ḓ', 'Ḕ' => 'ḕ', 'Ḗ' => 'ḗ', 'Ḙ' => 'ḙ',
314		'Ḛ' => 'ḛ', 'Ḝ' => 'ḝ', 'Ḟ' => 'ḟ', 'Ḡ' => 'ḡ', 'Ḣ' => 'ḣ', 'Ḥ' => 'ḥ',
315		'Ḧ' => 'ḧ', 'Ḩ' => 'ḩ', 'Ḫ' => 'ḫ', 'Ḭ' => 'ḭ', 'Ḯ' => 'ḯ', 'Ḱ' => 'ḱ',
316		'Ḳ' => 'ḳ', 'Ḵ' => 'ḵ', 'Ḷ' => 'ḷ', 'Ḹ' => 'ḹ', 'Ḻ' => 'ḻ', 'Ḽ' => 'ḽ',
317		'Ḿ' => 'ḿ', 'Ṁ' => 'ṁ', 'Ṃ' => 'ṃ', 'Ṅ' => 'ṅ', 'Ṇ' => 'ṇ', 'Ṉ' => 'ṉ',
318		'Ṋ' => 'ṋ', 'Ṍ' => 'ṍ', 'Ṏ' => 'ṏ', 'Ṑ' => 'ṑ', 'Ṓ' => 'ṓ', 'Ṕ' => 'ṕ',
319		'Ṗ' => 'ṗ', 'Ṙ' => 'ṙ', 'Ṛ' => 'ṛ', 'Ṝ' => 'ṝ', 'Ṟ' => 'ṟ', 'Ṡ' => 'ṡ',
320		'Ṣ' => 'ṣ', 'Ṥ' => 'ṥ', 'Ṧ' => 'ṧ', 'Ṩ' => 'ṩ', 'Ṫ' => 'ṫ', 'Ṭ' => 'ṭ',
321		'Ṯ' => 'ṯ', 'Ṱ' => 'ṱ', 'Ṳ' => 'ṳ', 'Ṵ' => 'ṵ', 'Ṷ' => 'ṷ', 'Ṹ' => 'ṹ',
322		'Ṻ' => 'ṻ', 'Ṽ' => 'ṽ', 'Ṿ' => 'ṿ', 'Ẁ' => 'ẁ', 'Ẃ' => 'ẃ', 'Ẅ' => 'ẅ',
323		'Ẇ' => 'ẇ', 'Ẉ' => 'ẉ', 'Ẋ' => 'ẋ', 'Ẍ' => 'ẍ', 'Ẏ' => 'ẏ', 'Ẑ' => 'ẑ',
324		'Ẓ' => 'ẓ', 'Ẕ' => 'ẕ', 'Ạ' => 'ạ', 'Ả' => 'ả', 'Ấ' => 'ấ', 'Ầ' => 'ầ',
325		'Ẩ' => 'ẩ', 'Ẫ' => 'ẫ', 'Ậ' => 'ậ', 'Ắ' => 'ắ', 'Ằ' => 'ằ', 'Ẳ' => 'ẳ',
326		'Ẵ' => 'ẵ', 'Ặ' => 'ặ', 'Ẹ' => 'ẹ', 'Ẻ' => 'ẻ', 'Ẽ' => 'ẽ', 'Ế' => 'ế',
327		'Ề' => 'ề', 'Ể' => 'ể', 'Ễ' => 'ễ', 'Ệ' => 'ệ', 'Ỉ' => 'ỉ', 'Ị' => 'ị',
328		'Ọ' => 'ọ', 'Ỏ' => 'ỏ', 'Ố' => 'ố', 'Ồ' => 'ồ', 'Ổ' => 'ổ', 'Ỗ' => 'ỗ',
329		'Ộ' => 'ộ', 'Ớ' => 'ớ', 'Ờ' => 'ờ', 'Ở' => 'ở', 'Ỡ' => 'ỡ', 'Ợ' => 'ợ',
330		'Ụ' => 'ụ', 'Ủ' => 'ủ', 'Ứ' => 'ứ', 'Ừ' => 'ừ', 'Ử' => 'ử', 'Ữ' => 'ữ',
331		'Ự' => 'ự', 'Ỳ' => 'ỳ', 'Ỵ' => 'ỵ', 'Ỷ' => 'ỷ', 'Ỹ' => 'ỹ', 'Ἀ' => 'ἀ',
332		'Ἁ' => 'ἁ', 'Ἂ' => 'ἂ', 'Ἃ' => 'ἃ', 'Ἄ' => 'ἄ', 'Ἅ' => 'ἅ', 'Ἆ' => 'ἆ',
333		'Ἇ' => 'ἇ', 'Ἐ' => 'ἐ', 'Ἑ' => 'ἑ', 'Ἒ' => 'ἒ', 'Ἓ' => 'ἓ', 'Ἔ' => 'ἔ',
334		'Ἕ' => 'ἕ', 'Ἠ' => 'ἠ', 'Ἡ' => 'ἡ', 'Ἢ' => 'ἢ', 'Ἣ' => 'ἣ', 'Ἤ' => 'ἤ',
335		'Ἥ' => 'ἥ', 'Ἦ' => 'ἦ', 'Ἧ' => 'ἧ', 'Ἰ' => 'ἰ', 'Ἱ' => 'ἱ', 'Ἲ' => 'ἲ',
336		'Ἳ' => 'ἳ', 'Ἴ' => 'ἴ', 'Ἵ' => 'ἵ', 'Ἶ' => 'ἶ', 'Ἷ' => 'ἷ', 'Ὀ' => 'ὀ',
337		'Ὁ' => 'ὁ', 'Ὂ' => 'ὂ', 'Ὃ' => 'ὃ', 'Ὄ' => 'ὄ', 'Ὅ' => 'ὅ', 'Ὑ' => 'ὑ',
338		'Ὓ' => 'ὓ', 'Ὕ' => 'ὕ', 'Ὗ' => 'ὗ', 'Ὠ' => 'ὠ', 'Ὡ' => 'ὡ', 'Ὢ' => 'ὢ',
339		'Ὣ' => 'ὣ', 'Ὤ' => 'ὤ', 'Ὥ' => 'ὥ', 'Ὦ' => 'ὦ', 'Ὧ' => 'ὧ', 'ᾈ' => 'ᾀ',
340		'ᾉ' => 'ᾁ', 'ᾊ' => 'ᾂ', 'ᾋ' => 'ᾃ', 'ᾌ' => 'ᾄ', 'ᾍ' => 'ᾅ', 'ᾎ' => 'ᾆ',
341		'ᾏ' => 'ᾇ', 'ᾘ' => 'ᾐ', 'ᾙ' => 'ᾑ', 'ᾚ' => 'ᾒ', 'ᾛ' => 'ᾓ', 'ᾜ' => 'ᾔ',
342		'ᾝ' => 'ᾕ', 'ᾞ' => 'ᾖ', 'ᾟ' => 'ᾗ', 'ᾨ' => 'ᾠ', 'ᾩ' => 'ᾡ', 'ᾪ' => 'ᾢ',
343		'ᾫ' => 'ᾣ', 'ᾬ' => 'ᾤ', 'ᾭ' => 'ᾥ', 'ᾮ' => 'ᾦ', 'ᾯ' => 'ᾧ', 'Ᾰ' => 'ᾰ',
344		'Ᾱ' => 'ᾱ', 'Ῐ' => 'ῐ', 'Ῑ' => 'ῑ', 'Ῠ' => 'ῠ', 'Ῡ' => 'ῡ', 'Ⓐ' => 'ⓐ',
345		'Ⓑ' => 'ⓑ', 'Ⓒ' => 'ⓒ', 'Ⓓ' => 'ⓓ', 'Ⓔ' => 'ⓔ', 'Ⓕ' => 'ⓕ', 'Ⓖ' => 'ⓖ',
346		'Ⓗ' => 'ⓗ', 'Ⓘ' => 'ⓘ', 'Ⓙ' => 'ⓙ', 'Ⓚ' => 'ⓚ', 'Ⓛ' => 'ⓛ', 'Ⓜ' => 'ⓜ',
347		'Ⓝ' => 'ⓝ', 'Ⓞ' => 'ⓞ', 'Ⓟ' => 'ⓟ', 'Ⓠ' => 'ⓠ', 'Ⓡ' => 'ⓡ', 'Ⓢ' => 'ⓢ',
348		'Ⓣ' => 'ⓣ', 'Ⓤ' => 'ⓤ', 'Ⓥ' => 'ⓥ', 'Ⓦ' => 'ⓦ', 'Ⓧ' => 'ⓧ', 'Ⓨ' => 'ⓨ',
349		'Ⓩ' => 'ⓩ', 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e',
350		'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k',
351		'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q',
352		'R' => 'r', 'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w',
353		'X' => 'x', 'Y' => 'y', 'Z' => 'z'
354	);
355	
356	/**
357	 * A mapping of all ASCII-based latin characters, puntuation, symbols and number forms to ASCII.
358	 * 
359	 * Includes elements form the following unicode blocks:
360	 * 
361	 *  - Latin-1 Supplement
362	 *  - Latin Extended-A
363	 *  - Latin Extended-B
364	 *  - IPA Extensions
365	 *  - Latin Extended Additional
366	 *  - General Punctuation
367	 *  - Letterlike symbols
368	 *  - Number Forms
369	 * 
370	 * @var array
371	 */
372	static private $utf8_to_ascii = array(
373		// Latin-1 Supplement
374		'©' => '(c)', '«' => '<<',  '®' => '(R)', '»' => '>>',  '¼' => '1/4',
375		'½' => '1/2', '¾' => '3/4', 'À' => 'A',   'Á' => 'A',   'Â' => 'A',
376		'Ã' => 'A',   'Ä' => 'A',   'Å' => 'A',   'Æ' => 'AE',  'Ç' => 'C',
377		'È' => 'E',   'É' => 'E',   'Ê' => 'E',   'Ë' => 'E',   'Ì' => 'I',
378		'Í' => 'I',   'Î' => 'I',   'Ï' => 'I',   'Ñ' => 'N',   'Ò' => 'O',
379		'Ó' => 'O',   'Ô' => 'O',   'Õ' => 'O',   'Ö' => 'O',   'Ø' => 'O',
380		'Ù' => 'U',   'Ú' => 'U',   'Û' => 'U',   'Ü' => 'U',   'Ý' => 'Y',
381		'à' => 'a',   'á' => 'a',   'â' => 'a',   'ã' => 'a',   'ä' => 'a',
382		'å' => 'a',   'æ' => 'ae',  'ç' => 'c',   'è' => 'e',   'é' => 'e',
383		'ê' => 'e',   'ë' => 'e',   'ì' => 'i',   'í' => 'i',   'î' => 'i',
384		'ï' => 'i',   'ñ' => 'n',   'ò' => 'o',   'ó' => 'o',   'ô' => 'o',
385		'õ' => 'o',   'ö' => 'o',   'ø' => 'o',   'ù' => 'u',   'ú' => 'u',
386		'û' => 'u',   'ü' => 'u',   'ý' => 'y',   'ÿ' => 'y',
387		// Latin Extended-A
388		'Ā' => 'A',   'ā' => 'a',   'Ă' => 'A',   'ă' => 'a',   'Ą' => 'A',
389		'ą' => 'a',   'Ć' => 'C',   'ć' => 'c',   'Ĉ' => 'C',   'ĉ' => 'c',
390		'Ċ' => 'C',   'ċ' => 'c',   'Č' => 'C',   'č' => 'c',   'Ď' => 'D',
391		'ď' => 'd',   'Đ' => 'D',   'đ' => 'd',   'Ē' => 'E',   'ē' => 'e',
392		'Ĕ' => 'E',   'ĕ' => 'e',   'Ė' => 'E',   'ė' => 'e',   'Ę' => 'E',
393		'ę' => 'e',   'Ě' => 'E',   'ě' => 'e',   'Ĝ' => 'G',   'ĝ' => 'g',
394		'Ğ' => 'G',   'ğ' => 'g',   'Ġ' => 'G',   'ġ' => 'g',   'Ģ' => 'G',
395		'ģ' => 'g',   'Ĥ' => 'H',   'ĥ' => 'h',   'Ħ' => 'H',   'ħ' => 'h',
396		'Ĩ' => 'I',   'ĩ' => 'i',   'Ī' => 'I',   'ī' => 'i',   'Ĭ' => 'I',
397		'ĭ' => 'i',   'Į' => 'I',   'į' => 'i',   'İ' => 'I',   'ı' => 'i',
398		'IJ' => 'IJ',  'ij' => 'ij',  'Ĵ' => 'J',   'ĵ' => 'j',   'Ķ' => 'K',
399		'ķ' => 'k',   'Ĺ' => 'L',   'ĺ' => 'l',   'Ļ' => 'L',   'ļ' => 'l',
400		'Ľ' => 'L',   'ľ' => 'l',   'Ŀ' => 'L',   'ŀ' => 'l',   'Ł' => 'L',
401		'ł' => 'l',   'Ń' => 'N',   'ń' => 'n',   'Ņ' => 'N',   'ņ' => 'n',
402		'Ň' => 'N',   'ň' => 'n',   'ʼn' => "'n", 'Ŋ' => 'N',   'ŋ' => 'n',
403		'Ō' => 'O',   'ō' => 'o',   'Ŏ' => 'O',   'ŏ' => 'o',   'Ő' => 'O',
404		'ő' => 'o',   'Œ' => 'OE',  'œ' => 'oe',  'Ŕ' => 'R',   'ŕ' => 'r',
405		'Ŗ' => 'R',   'ŗ' => 'r',   'Ř' => 'R',   'ř' => 'r',   'Ś' => 'S',
406		'ś' => 's',   'Ŝ' => 'S',   'ŝ' => 's',   'Ş' => 'S',   'ş' => 's',
407		'Š' => 'S',   'š' => 's',   'Ţ' => 'T',   'ţ' => 't',   'Ť' => 'T',
408		'ť' => 't',   'Ŧ' => 'T',   'ŧ' => 't',   'Ũ' => 'U',   'ũ' => 'u',
409		'Ū' => 'U',   'ū' => 'u',   'Ŭ' => 'U',   'ŭ' => 'u',   'Ů' => 'U',
410		'ů' => 'u',   'Ű' => 'U',   'ű' => 'u',   'Ų' => 'U',   'ų' => 'u',
411		'Ŵ' => 'W',   'ŵ' => 'w',   'Ŷ' => 'Y',   'ŷ' => 'y',   'Ÿ' => 'Y',
412		'Ź' => 'Z',   'ź' => 'z',   'Ż' => 'Z',   'ż' => 'z',   'Ž' => 'Z',
413		'ž' => 'z',
414		// Latin Extended-B
415		'ƀ' => 'b',   'Ɓ' => 'B',   'Ƃ' => 'B',   'ƃ' => 'b',   'Ɔ' => 'O',
416		'Ƈ' => 'C',   'ƈ' => 'c',   'Ɖ' => 'D',   'Ɗ' => 'D',   'Ƌ' => 'D',
417		'ƌ' => 'd',   'Ǝ' => 'E',   'Ɛ' => 'E',   'Ƒ' => 'F',   'ƒ' => 'f',
418		'Ɠ' => 'G',   'Ɨ' => 'I',   'Ƙ' => 'K',   'ƙ' => 'k',   'ƚ' => 'l',
419		'Ɯ' => 'M',   'Ɲ' => 'N',   'ƞ' => 'n',   'Ɵ' => 'O',   'Ơ' => 'O',
420		'ơ' => 'o',   'Ƣ' => 'OI',  'ƣ' => 'oi',  'Ƥ' => 'P',   'ƥ' => 'p',
421		'ƫ' => 't',   'Ƭ' => 'T',   'ƭ' => 't',   'Ʈ' => 'T',   'Ư' => 'U',
422		'ư' => 'u',   'Ʋ' => 'V',   'Ƴ' => 'Y',   'ƴ' => 'y',   'Ƶ' => 'Z',
423		'ƶ' => 'z',   'ƻ' => '2',   'DŽ' => 'DZ',  'Dž' => 'Dz',  'dž' => 'dz',
424		'LJ' => 'LJ',  'Lj' => 'Lj',  'lj' => 'lj',  'NJ' => 'Nj',  'Nj' => 'Nj',
425		'nj' => 'nj',  'Ǎ' => 'A',   'ǎ' => 'a',   'Ǐ' => 'I',   'ǐ' => 'i',
426		'Ǒ' => 'O',   'ǒ' => 'o',   'Ǔ' => 'U',   'ǔ' => 'u',   'Ǖ' => 'U',
427		'ǖ' => 'u',   'Ǘ' => 'U',   'ǘ' => 'u',   'Ǚ' => 'U',   'ǚ' => 'u',
428		'Ǜ' => 'U',   'ǜ' => 'u',   'ǝ' => 'e',   'Ǟ' => 'A',   'ǟ' => 'a',
429		'Ǡ' => 'A',   'ǡ' => 'a',   'Ǣ' => 'AE',  'ǣ' => 'ae',  'Ǥ' => 'G',
430		'ǥ' => 'g',   'Ǧ' => 'G',   'ǧ' => 'g',   'Ǩ' => 'K',   'ǩ' => 'k',
431		'Ǫ' => 'O',   'ǫ' => 'o',   'Ǭ' => 'O',   'ǭ' => 'o',   'ǰ' => 'j',
432		'DZ' => 'DZ',  'Dz' => 'Dz',  'dz' => 'dz',  'Ǵ' => 'G',   'ǵ' => 'g',
433		'Ǹ' => 'N',   'ǹ' => 'n',   'Ǻ' => 'A',   'ǻ' => 'a',   'Ǽ' => 'AE',
434		'ǽ' => 'ae',  'Ǿ' => 'O',   'ǿ' => 'o',   'Ȁ' => 'A',   'ȁ' => 'a',
435		'Ȃ' => 'A',   'ȃ' => 'a',   'Ȅ' => 'E',   'ȅ' => 'e',   'Ȇ' => 'E',
436		'ȇ' => 'e',   'Ȉ' => 'I',   'ȉ' => 'i',   'Ȋ' => 'I',   'ȋ' => 'i',
437		'Ȍ' => 'O',   'ȍ' => 'o',   'Ȏ' => 'O',   'ȏ' => 'o',   'Ȑ' => 'R',
438		'ȑ' => 'r',   'Ȓ' => 'R',   'ȓ' => 'r',   'Ȕ' => 'U',   'ȕ' => 'u',
439		'Ȗ' => 'U',   'ȗ' => 'u',   'Ș' => 'S',   'ș' => 's',   'Ț' => 'T',
440		'ț' => 't',   'Ȟ' => 'H',   'ȟ' => 'h',   'Ƞ' => 'N',   'ȡ' => 'd',
441		'Ȥ' => 'Z',   'ȥ' => 'z',   'Ȧ' => 'A',   'ȧ' => 'a',   'Ȩ' => 'E',
442		'ȩ' => 'e',   'Ȫ' => 'O',   'ȫ' => 'o',   'Ȭ' => 'O',   'ȭ' => 'o',
443		'Ȯ' => 'O',   'ȯ' => 'o',   'Ȱ' => 'O',   'ȱ' => 'o',   'Ȳ' => 'Y',
444		'ȳ' => 'y',   'ȴ' => 'l',   'ȵ' => 'n',   'ȶ' => 't',   'ȷ' => 'j',
445		'ȸ' => 'db',  'ȹ' => 'qp',  'Ⱥ' => 'A',   'Ȼ' => 'C',   'ȼ' => 'c',
446		'Ƚ' => 'L',   'Ⱦ' => 'T',   'ȿ' => 's',   'ɀ' => 'z',   'Ƀ' => 'B',
447		'Ʉ' => 'U',   'Ʌ' => 'V',   'Ɇ' => 'E',   'ɇ' => 'e',   'Ɉ' => 'J',
448		'ɉ' => 'j',   'Ɋ' => 'Q',   'ɋ' => 'q',   'Ɍ' => 'R',   'ɍ' => 'r',
449		'Ɏ' => 'Y',   'ɏ' => 'y',
450		// IPA Extensions
451		'ɐ' => 'a',   'ɓ' => 'b',   'ɔ' => 'o',   'ɕ' => 'c',   'ɖ' => 'd',
452		'ɗ' => 'd',   'ɘ' => 'e',   'ɛ' => 'e',   'ɜ' => 'e',   'ɝ' => 'e',
453		'ɞ' => 'e',   'ɟ' => 'j',   'ɠ' => 'g',   'ɡ' => 'g',   'ɢ' => 'G',
454		'ɥ' => 'h',   'ɦ' => 'h',   'ɨ' => 'i',   'ɪ' => 'I',   'ɫ' => 'l',
455		'ɬ' => 'l',   'ɭ' => 'l',   'ɯ' => 'm',   'ɰ' => 'm',   'ɱ' => 'm',
456		'ɲ' => 'n',   'ɳ' => 'n',   'ɴ' => 'N',   'ɵ' => 'o',   'ɶ' => 'OE',
457		'ɹ' => 'r',   'ɺ' => 'r',   'ɻ' => 'r',   'ɼ' => 'r',   'ɽ' => 'r',
458		'ɾ' => 'r',   'ɿ' => 'r',   'ʀ' => 'R',   'ʁ' => 'R',   'ʂ' => 's',
459		'ʇ' => 't',   'ʈ' => 't',   'ʉ' => 'u',   'ʋ' => 'v',   'ʌ' => 'v',
460		'ʍ' => 'w',   'ʎ' => 'y',   'ʏ' => 'Y',   'ʐ' => 'z',   'ʑ' => 'z',
461		'ʗ' => 'C',   'ʙ' => 'B',   'ʚ' => 'e',   'ʛ' => 'G',   'ʜ' => 'H',
462		'ʝ' => 'j',   'ʞ' => 'k',   'ʟ' => 'L',   'ʠ' => 'q',   'ʣ' => 'dz',
463		'ʥ' => 'dz',  'ʦ' => 'ts',  'ʨ' => 'tc',  'ʪ' => 'ls',  'ʫ' => 'lz',
464		'ʮ' => 'h',   'ʯ' => 'h',
465		// Latin Extended Additional
466		'Ḁ' => 'A',   'ḁ' => 'a',   'Ḃ' => 'B',   'ḃ' => 'b',   'Ḅ' => 'B',
467		'ḅ' => 'b',   'Ḇ' => 'B',   'ḇ' => 'b',   'Ḉ' => 'C',   'ḉ' => 'c',
468		'Ḋ' => 'D',   'ḋ' => 'd',   'Ḍ' => 'D',   'ḍ' => 'd',   'Ḏ' => 'D',
469		'ḏ' => 'd',   'Ḑ' => 'D',   'ḑ' => 'd',   'Ḓ' => 'D',   'ḓ' => 'd',
470		'Ḕ' => 'E',   'ḕ' => 'e',   'Ḗ' => 'E',   'ḗ' => 'e',   'Ḙ' => 'E',
471		'ḙ' => 'e',   'Ḛ' => 'E',   'ḛ' => 'e',   'Ḝ' => 'E',   'ḝ' => 'e',
472		'Ḟ' => 'F',   'ḟ' => 'f',   'Ḡ' => 'G',   'ḡ' => 'g',   'Ḣ' => 'H',
473		'ḣ' => 'h',   'Ḥ' => 'H',   'ḥ' => 'h',   'Ḧ' => 'H',   'ḧ' => 'h',
474		'Ḩ' => 'H',   'ḩ' => 'h',   'Ḫ' => 'H',   'ḫ' => 'h',   'Ḭ' => 'I',
475		'ḭ' => 'i',   'Ḯ' => 'I',   'ḯ' => 'i',   'Ḱ' => 'K',   'ḱ' => 'k',
476		'Ḳ' => 'K',   'ḳ' => 'k',   'Ḵ' => 'K',   'ḵ' => 'k',   'Ḷ' => 'L',
477		'ḷ' => 'l',   'Ḹ' => 'L',   'ḹ' => 'l',   'Ḻ' => 'L',   'ḻ' => 'l',
478		'Ḽ' => 'L',   'ḽ' => 'l',   'Ḿ' => 'M',   'ḿ' => 'm',   'Ṁ' => 'M',
479		'ṁ' => 'm',   'Ṃ' => 'M',   'ṃ' => 'm',   'Ṅ' => 'N',   'ṅ' => 'n',
480		'Ṇ' => 'N',   'ṇ' => 'n',   'Ṉ' => 'N',   'ṉ' => 'n',   'Ṋ' => 'N',
481		'ṋ' => 'n',   'Ṍ' => 'O',   'ṍ' => 'o',   'Ṏ' => 'O',   'ṏ' => 'o',
482		'Ṑ' => 'O',   'ṑ' => 'o',   'Ṓ' => 'O',   'ṓ' => 'o',   'Ṕ' => 'P',
483		'ṕ' => 'p',   'Ṗ' => 'P',   'ṗ' => 'p',   'Ṙ' => 'R',   'ṙ' => 'r',
484		'Ṛ' => 'R',   'ṛ' => 'r',   'Ṝ' => 'R',   'ṝ' => 'r',   'Ṟ' => 'R',
485		'ṟ' => 'r',   'Ṡ' => 'S',   'ṡ' => 's',   'Ṣ' => 'S',   'ṣ' => 's',
486		'Ṥ' => 'S',   'ṥ' => 's',   'Ṧ' => 'S',   'ṧ' => 's',   'Ṩ' => 'S',
487		'ṩ' => 's',   'Ṫ' => 'T',   'ṫ' => 't',   'Ṭ' => 'T',   'ṭ' => 't',
488		'Ṯ' => 'T',   'ṯ' => 't',   'Ṱ' => 'T',   'ṱ' => 't',   'Ṳ' => 'U',
489		'ṳ' => 'u',   'Ṵ' => 'U',   'ṵ' => 'u',   'Ṷ' => 'U',   'ṷ' => 'u',
490		'Ṹ' => 'U',   'ṹ' => 'u',   'Ṻ' => 'U',   'ṻ' => 'u',   'Ṽ' => 'V',
491		'ṽ' => 'v',   'Ṿ' => 'V',   'ṿ' => 'v',   'Ẁ' => 'W',   'ẁ' => 'w',
492		'Ẃ' => 'W',   'ẃ' => 'w',   'Ẅ' => 'W',   'ẅ' => 'w',   'Ẇ' => 'W',
493		'ẇ' => 'w',   'Ẉ' => 'W',   'ẉ' => 'w',   'Ẋ' => 'X',   'ẋ' => 'x',
494		'Ẍ' => 'X',   'ẍ' => 'x',   'Ẏ' => 'Y',   'ẏ' => 'y',   'Ẑ' => 'Z',
495		'ẑ' => 'z',   'Ẓ' => 'Z',   'ẓ' => 'z',   'Ẕ' => 'Z',   'ẕ' => 'z',
496		'ẖ' => 'h',   'ẗ' => 't',   'ẘ' => 'w',   'ẙ' => 'y',   'ẚ' => 'a',
497		'Ạ' => 'A',   'ạ' => 'a',   'Ả' => 'A',   'ả' => 'a',   'Ấ' => 'A',
498		'ấ' => 'a',   'Ầ' => 'A',   'ầ' => 'a',   'Ẩ' => 'A',   'ẩ' => 'a',
499		'Ẫ' => 'A',   'ẫ' => 'a',   'Ậ' => 'A',   'ậ' => 'a',   'Ắ' => 'A',
500		'ắ' => 'a',   'Ằ' => 'A',   'ằ' => 'a',   'Ẳ' => 'A',   'ẳ' => 'a',
501		'Ẵ' => 'A',   'ẵ' => 'a',   'Ặ' => 'A',   'ặ' => 'a',   'Ẹ' => 'E',
502		'ẹ' => 'e',   'Ẻ' => 'E',   'ẻ' => 'e',   'Ẽ' => 'E',   'ẽ' => 'e',
503		'Ế' => 'E',   'ế' => 'e',   'Ề' => 'E',   'ề' => 'e',   'Ể' => 'E',
504		'ể' => 'e',   'Ễ' => 'E',   'ễ' => 'e',   'Ệ' => 'E',   'ệ' => 'e',
505		'Ỉ' => 'I',   'ỉ' => 'i',   'Ị' => 'I',   'ị' => 'i',   'Ọ' => 'O',
506		'ọ' => 'o',   'Ỏ' => 'O',   'ỏ' => 'o',   'Ố' => 'O',   'ố' => 'o',
507		'Ồ' => 'O',   'ồ' => 'o',   'Ổ' => 'O',   'ổ' => 'o',   'Ỗ' => 'O',
508		'ỗ' => 'o',   'Ộ' => 'O',   'ộ' => 'o',   'Ớ' => 'O',   'ớ' => 'o',
509		'Ờ' => 'O',   'ờ' => 'o',   'Ở' => 'O',   'ở' => 'o',   'Ỡ' => 'O',
510		'ỡ' => 'o',   'Ợ' => 'O',   'ợ' => 'o',   'Ụ' => 'U',   'ụ' => 'u',
511		'Ủ' => 'U',   'ủ' => 'u',   'Ứ' => 'U',   'ứ' => 'u',   'Ừ' => 'U',
512		'ừ' => 'u',   'Ử' => 'U',   'ử' => 'u',   'Ữ' => 'U',   'ữ' => 'u',
513		'Ự' => 'U',   'ự' => 'u',   'Ỳ' => 'Y',   'ỳ' => 'y',   'Ỵ' => 'Y',
514		'ỵ' => 'y',   'Ỷ' => 'Y',   'ỷ' => 'y',   'Ỹ' => 'Y',   'ỹ' => 'y',
515		// General Punctuation
516		' ' => ' ',   ' ' => ' ',   ' ' => ' ',   ' ' => ' ',   ' ' => ' ',
517		' ' => ' ',   ' ' => ' ',   ' ' => ' ',   ' ' => ' ',   ' ' => ' ',
518		' ' => ' ',   '​' => '',    '‌' => '',    '‍' => '',    '‐' => '-',
519		'‑' => '-',   '‒' => '-',   '–' => '-',   '—' => '-',   '―' => '-',
520		'‖' => '||',  '‘' => "'",   '’' => "'",   '‚' => ',',   '‛' => "'",
521		'“' => '"',   '”' => '"',   '‟' => '"',   '․' => '.',   '‥' => '..',
522		'…' => '...', ' ' => ' ',   '′' => "'",   '″' => '"',   '‴' => '\'"',
523		'‵' => "'",   '‶' => '"',   '‷' => '"\'', '‹' => '<',   '›' => '>',
524		'‼' => '!!',  '‽' => '?!',  '⁄' => '/',   '⁇' => '?/',  '⁈' => '?!',
525		'⁉' => '!?',
526		// Letterlike Symbols
527		'℠' => 'SM',  '™' => 'TM',
528		// Number Forms
529		'⅓' => '1/3', '⅔' => '2/3', '⅕' => '1/5', '⅖' => '2/5', '⅗' => '3/5',
530		'⅘' => '4/5', '⅙' => '1/6', '⅚' => '5/6', '⅛' => '1/8', '⅜' => '3/8',
531		'⅝' => '5/8', '⅞' => '7/8', 'Ⅰ' => 'I',   'Ⅱ' => 'II',  'Ⅲ' => 'III',
532		'Ⅳ' => 'IV',  'Ⅴ' => 'V',   'Ⅵ' => 'Vi',  'Ⅶ' => 'VII', 'Ⅷ' => 'VIII',
533		'Ⅸ' => 'IX',  'Ⅹ' => 'X',   'Ⅺ' => 'XI',  'Ⅻ' => 'XII', 'Ⅼ' => 'L',
534		'Ⅽ' => 'C',   'Ⅾ' => 'D',   'Ⅿ' => 'M',   'ⅰ' => 'i',   'ⅱ' => 'ii',
535		'ⅲ' => 'iii', 'ⅳ' => 'iv',  'ⅴ' => 'v',   'ⅵ' => 'vi',  'ⅶ' => 'vii',
536		'ⅷ' => 'viii','ⅸ' => 'ix',  'ⅹ' => 'x',   'ⅺ' => 'xi',  'ⅻ' => 'xii',
537		'ⅼ' => 'l',   'ⅽ' => 'c',   'ⅾ' => 'd',   'ⅿ' => 'm'
538	);
539	
540	/**
541	 * If the [http://php.net/mbstring mbstring] extension is available
542	 * 
543	 * @var boolean
544	 */
545	static private $mbstring_available = NULL;
546	
547	
548	/**
549	 * Maps UTF-8 ASCII-based latin characters, puntuation, symbols and number forms to ASCII
550	 * 
551	 * Any characters or symbols that can not be translated will be removed.
552	 * 
553	 * This function is most useful for situation that only allows ASCII, such
554	 * as in URLs.
555	 * 
556	 * Translates elements form the following unicode blocks:
557	 * 
558	 *  - Latin-1 Supplement
559	 *  - Latin Extended-A
560	 *  - Latin Extended-B
561	 *  - IPA Extensions
562	 *  - Latin Extended Additional
563	 *  - General Punctuation
564	 *  - Letterlike symbols
565	 *  - Number Forms
566	 * 
567	 * @internal
568	 * 
569	 * @param  string $string  The string to convert
570	 * @return string  The input string in pure ASCII
571	 */
572	static public function ascii($string)
573	{
574		if (!self::detect($string)) {
575			return $string;
576		}
577		
578		$string = strtr($string, self::$utf8_to_ascii);
579		return preg_replace('#[^\x00-\x7F]#', '', $string);
580	}
581	
582	
583	/**
584	 * Checks to see if the [http://php.net/mbstring mbstring] extension is available
585	 * 
586	 * @return void
587	 */
588	static private function checkMbString()
589	{
590		self::$mbstring_available = extension_loaded('mbstring');
591	}
592	
593	
594	/**
595	 * Converts a unicode value into a UTF-8 character
596	 * 
597	 * @param  mixed $unicode_code_point  The character to create, either the `U+hex` or decimal code point
598	 * @return string  The UTF-8 character
599	 */
600	static public function chr($unicode_code_point)
601	{
602		if (is_string($unicode_code_point) && substr($unicode_code_point, 0, 2) == 'U+') {
603			$unicode_code_point = substr($unicode_code_point, 2);
604			$unicode_code_point = hexdec($unicode_code_point);
605		}
606		
607		$bin = decbin($unicode_code_point);
608		$digits = strlen($bin);
609		
610		$first = $second = $third = $fourth = NULL;
611		
612		// One byte characters
613		if ($digits <= 7) {
614			$first = chr(bindec($bin));
615			
616		// Two byte characters
617		} elseif ($digits <= 11) {
618			$first  = chr(bindec('110' . str_pad(substr($bin, 0, -6), 5, '0', STR_PAD_LEFT)));
619			$second = chr(bindec('10' . substr($bin, -6)));
620			
621		// Three byte characters
622		} elseif ($digits <= 16) {
623			$first  = chr(bindec('1110' . str_pad(substr($bin, 0, -12), 4, '0', STR_PAD_LEFT)));
624			$second = chr(bindec('10' . substr($bin, -12, -6)));
625			$third  = chr(bindec('10' . substr($bin, -6)));
626			
627		// Four byte characters
628		} elseif ($digits <= 21) {
629			$first  = chr(bindec('11110' . str_pad(substr($bin, 0, -18), 3, '0', STR_PAD_LEFT)));
630			$second = chr(bindec('10' . substr($bin, -18, -12)));
631			$third  = chr(bindec('10' . substr($bin, -12, -6)));
632			$fourth = chr(bindec('10' . substr($bin, -6)));
633		}
634		
635		$ord = ord($first);
636		if ($digits > 21 || $ord == 0xC0 || $ord == 0xC1 || $ord > 0xF4) {
637			throw new fProgrammerException(
638				'The code point specified, %s, is invalid.',
639				$unicode_code_point
640			);
641		}
642		
643		return $first . $second . $third . $fourth;
644	}
645	
646	
647	/**
648	 * Removes any invalid UTF-8 characters from a string or array of strings
649	 * 
650	 * @param  array|string $value  The string or array of strings to clean
651	 * @return string  The cleaned string
652	 */
653	static public function clean($value)
654	{
655		if (!is_array($value)) {
656			if (self::$can_ignore_invalid === NULL) {
657				self::$can_ignore_invalid = strtolower(ICONV_IMPL) != 'unknown';	
658			}
659			if (!self::$can_ignore_invalid) {
660				fCore::startErrorCapture(E_NOTICE);
661			}
662			return iconv('UTF-8', 'UTF-8' . (self::$can_ignore_invalid ? '//IGNORE' : ''), (string) $value);
663			if (!self::$can_ignore_invalid) {
664				fCore::stopErrorCapture();
665			}
666		}
667		
668		$keys = array_keys($value);
669		$num_keys = sizeof($keys);
670		for ($i=0; $i<$num_keys; $i++) {
671			$value[$keys[$i]] = self::clean($value[$keys[$i]]);
672		}
673		
674		return $value;
675	}
676	
677	
678	/**
679	 * Compares strings, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
680	 * 
681	 * Please note that this function sorts based on English language sorting
682	 * rules only. Locale-sepcific sorting is done by
683	 * [http://php.net/strcoll strcoll()], however there are technical
684	 * limitations.
685	 * 
686	 * @param  string $str1  The first string to compare
687	 * @param  string $str2  The second string to compare
688	 * @return integer  < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
689	 */
690	static public function cmp($str1, $str2)
691	{
692		$ascii_str1 = strtr($str1, self::$utf8_to_ascii);
693		$ascii_str2 = strtr($str2, self::$utf8_to_ascii);
694		
695		$res = strcmp($ascii_str1, $ascii_str2);
696		
697		// If the ASCII representations are the same, sort by the UTF-8 representations
698		if ($res === 0) {
699			$res = strcmp($str1, $str2);
700		}
701		
702		return $res;
703	}
704	
705	
706	/**
707	 * Converts an offset in characters to an offset in bytes to that we can use the built-in functions for some operations
708	 * 
709	 * @param  string  $string  The string to base the offset on
710	 * @param  integer $offset  The character offset to conver to bytes
711	 * @return integer  The converted offset
712	 */
713	static private function convertOffsetToBytes($string, $offset)
714	{
715		if ($offset == 0) {
716			return 0;
717		}
718		
719		$len = strlen($string);
720		
721		$byte_offset     = 0;
722		$measured_offset = 0;
723		$sign            = 1;
724		
725		// Negative offsets require us to reverse some stuff
726		if ($offset < 0) {
727			$string    = strrev($string);
728			$sign      = -1;
729			$offset    = abs($offset);
730		}
731			
732		for ($i=0; $i<$len && $measured_offset<$offset; $i++) {
733			$char = $string[$i];
734			++$byte_offset;
735			if (ord($char) < 0x80) {
736				++$measured_offset;
737			} else {
738				switch (ord($char) & 0xF0) {
739					case 0xF0:
740					case 0xE0:
741					case 0xD0:
742					case 0xC0:
743						++$measured_offset;
744						break;
745				}
746			}
747		}
748		
749		return $byte_offset * $sign;
750	}
751	
752	
753	/**
754	 * Detects if a UTF-8 string contains any non-ASCII characters
755	 * 
756	 * @param  string $string  The string to check
757	 * @return boolean  If the string contains any non-ASCII characters
758	 */
759	static private function detect($string)
760	{
761		return (boolean) preg_match('#[^\x00-\x7F]#', $string);
762	}
763	
764	
765	/**
766	 * Explodes a string on a delimiter
767	 * 
768	 * If no delimiter is provided, the string will be exploded with each
769	 * characters being an element in the array.
770	 * 
771	 * @param  string  $string     The string to explode
772	 * @param  string  $delimiter  The string to explode on. If `NULL` or `''` this method will return one character per array index.
773	 * @return array  The exploded string
774	 */
775	static public function explode($string, $delimiter=NULL)
776	{
777		// If a delimiter was passed, we just do an explode
778		if ($delimiter || (!$delimiter && is_numeric($delimiter))) {
779			return explode($delimiter, $string);
780		}
781		
782		// If no delimiter was passed, we explode the characters into an array
783		preg_match_all('#.|^\z#us', $string, $matches);
784		return $matches[0];
785	}
786	
787	
788	/**
789	 * Compares strings in a case-insensitive manner, with the resulting order having characters that are based on ASCII letters placed after the relative ASCII characters
790	 * 
791	 * Please note that this function sorts based on English language sorting
792	 * rules only. Locale-sepcific sorting is done by
793	 * [http://php.net/strcoll strcoll()], however there are technical
794	 * limitations.
795	 * 
796	 * @param  string $str1  The first string to compare
797	 * @param  string $str2  The second string to compare
798	 * @return integer  < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
799	 */
800	static public function icmp($str1, $str2)
801	{
802		$str1 = self::lower($str1);
803		$str2 = self::lower($str2);
804		
805		return self::cmp($str1, $str2);
806	}
807	
808	
809	/**
810	 * Compares strings using a natural order algorithm in a case-insensitive manner, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
811	 * 
812	 * Please note that this function sorts based on English language sorting
813	 * rules only. Locale-sepcific sorting is done by
814	 * [http://php.net/strcoll strcoll()], however there are technical
815	 * limitations.
816	 * 
817	 * @param  string $str1  The first string to compare
818	 * @param  string $str2  The second string to compare
819	 * @return integer  `< 0` if `$str1 < $str2`, `0` if they are equal, `> 0` if `$str1 > $str2`
820	 */
821	static public function inatcmp($str1, $str2)
822	{
823		$str1 = self::lower($str1);
824		$str2 = self::lower($str2);
825		
826		return self::natcmp($str1, $str2);
827	}
828	
829	
830	/**
831	 * Finds the first position (in characters) of the search value in the string - case is ignored when doing performing a match
832	 * 
833	 * @param  string  $haystack  The string to search in
834	 * @param  string  $needle    The string to search for. This match will be done in a case-insensitive manner.
835	 * @param  integer $offset    The character position to start searching from
836	 * @return mixed  The integer character position of the first occurence of the needle or `FALSE` if no match
837	 */
838	static public function ipos($haystack, $needle, $offset=0)
839	{
840		// We get better performance falling back for ASCII strings
841		if (!self::detect($haystack)) {
842			return stripos($haystack, $needle, $offset);
843		}
844		
845		if (self::$mbstring_available === NULL) {
846			self::checkMbString();
847		}
848		
849		if (self::$mbstring_available && function_exists('mb_stripos')) {
850			return mb_stripos($haystack, $needle, $offset, 'UTF-8');
851		}
852		
853		$haystack = self::lower($haystack);
854		$needle   = self::lower($needle);
855		
856		return self::pos($haystack, $needle, $offset);
857	}
858	
859	
860	/**
861	 * Replaces matching parts of the string, with matches being done in a a case-insensitive manner
862	 * 
863	 * If `$search` and `$replace` are both arrays and `$replace` is shorter,
864	 * the extra `$search` string will be replaced with an empty string. If
865	 * `$search` is an array and `$replace` is a string, all `$search` values
866	 * will be replaced with the string specified.
867	 * 
868	 * @param  string $string   The string to perform the replacements on
869	 * @param  mixed  $search   The string (or array of strings) to search for - see method description for details
870	 * @param  mixed  $replace  The string (or array of strings) to replace with - see method description for details
871	 * @return string  The input string with the specified replacements
872	 */
873	static public function ireplace($string, $search, $replace)
874	{
875		if (is_array($search)) {
876			foreach ($search as &$needle) {
877				$needle = '#' . preg_quote($needle, '#') . '#ui';
878			}
879		} else {
880			$search = '#' . preg_quote($search, '#') . '#ui';
881		}
882		return preg_replace(
883			$search,
884			strtr($replace, array('\\' => '\\\\', '$' => '\\$')),
885			$string
886		);
887	}
888	
889	
890	/**
891	 * Finds the last position (in characters) of the search value in the string - case is ignored when doing performing a match
892	 * 
893	 * @param  string  $haystack  The string to search in
894	 * @param  string  $needle    The string to search for. This match will be done in a case-insensitive manner.
895	 * @param  integer $offset    The character position to sta…

Large files files are truncated, but you can click here to view the full file