PageRenderTime 1120ms CodeModel.GetById 223ms app.highlight 781ms RepoModel.GetById 17ms app.codeStats 5ms

/classes/fUTF8.php

https://bitbucket.org/wbond/flourish/
PHP | 1628 lines | 1010 code | 187 blank | 431 comment | 110 complexity | c9f6b48fb2a19f8390197d74b4538bcb MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1<?php
   2/**
   3 * Provides string functions for UTF-8 strings
   4 * 
   5 * This class is implemented to provide a UTF-8 version of almost every built-in
   6 * PHP string function. For more information about UTF-8, please visit
   7 * http://flourishlib.com/docs/UTF-8.
   8 * 
   9 * @copyright  Copyright (c) 2008-2011 Will Bond
  10 * @author     Will Bond [wb] <will@flourishlib.com>
  11 * @license    http://flourishlib.com/license
  12 * 
  13 * @package    Flourish
  14 * @link       http://flourishlib.com/fUTF8
  15 * 
  16 * @version    1.0.0b15
  17 * @changes    1.0.0b15  Fixed a bug with using IBM's iconv implementation on AIX [wb, 2011-07-29]
  18 * @changes    1.0.0b14  Added a workaround for iconv having issues in MAMP 1.9.4+ [wb, 2011-07-26]
  19 * @changes    1.0.0b13  Fixed notices from being thrown when invalid data is sent to ::clean() [wb, 2011-06-10]
  20 * @changes    1.0.0b12  Fixed a variable name typo in ::sub() [wb, 2011-05-09]
  21 * @changes    1.0.0b11  Updated the class to not using phpinfo() to determine the iconv implementation [wb, 2010-11-04]
  22 * @changes    1.0.0b10  Fixed a bug with capitalizing a lowercase i resulting in a dotted upper-case I [wb, 2010-11-01]
  23 * @changes    1.0.0b9   Updated class to use fCore::startErrorCapture() instead of `error_reporting()` [wb, 2010-08-09]
  24 * @changes    1.0.0b8   Removed `e` flag from preg_replace() calls [wb, 2010-06-08]
  25 * @changes    1.0.0b7   Added the methods ::trim(), ::rtrim() and ::ltrim() [wb, 2010-05-11]
  26 * @changes    1.0.0b6   Fixed ::clean() to work with PHP installs that use an iconv library that doesn't support //IGNORE [wb, 2010-03-02]
  27 * @changes    1.0.0b5   Changed ::ucwords() to also uppercase words right after various punctuation [wb, 2009-09-18]
  28 * @changes    1.0.0b4   Changed replacement values in preg_replace() calls to be properly escaped [wb, 2009-06-11]
  29 * @changes    1.0.0b3   Fixed a parameter name in ::rpos() from `$search` to `$needle` [wb, 2009-02-06]
  30 * @changes    1.0.0b2   Fixed a bug in ::explode() with newlines and zero-length delimiters [wb, 2009-02-05]
  31 * @changes    1.0.0b    The initial implementation [wb, 2008-06-01]
  32 */
  33class fUTF8
  34{
  35	// The following constants allow for nice looking callbacks to static methods
  36	const ascii    = 'fUTF8::ascii';
  37	const chr      = 'fUTF8::chr';
  38	const clean    = 'fUTF8::clean';
  39	const cmp      = 'fUTF8::cmp';
  40	const explode  = 'fUTF8::explode';
  41	const icmp     = 'fUTF8::icmp';
  42	const inatcmp  = 'fUTF8::inatcmp';
  43	const ipos     = 'fUTF8::ipos';
  44	const ireplace = 'fUTF8::ireplace';
  45	const irpos    = 'fUTF8::irpos';
  46	const istr     = 'fUTF8::istr';
  47	const len      = 'fUTF8::len';
  48	const lower    = 'fUTF8::lower';
  49	const ltrim    = 'fUTF8::ltrim';
  50	const natcmp   = 'fUTF8::natcmp';
  51	const ord      = 'fUTF8::ord';
  52	const pad      = 'fUTF8::pad';
  53	const pos      = 'fUTF8::pos';
  54	const replace  = 'fUTF8::replace';
  55	const reset    = 'fUTF8::reset';
  56	const rev      = 'fUTF8::rev';
  57	const rpos     = 'fUTF8::rpos';
  58	const rtrim    = 'fUTF8::rtrim';
  59	const str      = 'fUTF8::str';
  60	const sub      = 'fUTF8::sub';
  61	const trim     = 'fUTF8::trim';
  62	const ucfirst  = 'fUTF8::ucfirst';
  63	const ucwords  = 'fUTF8::ucwords';
  64	const upper    = 'fUTF8::upper';
  65	const wordwrap = 'fUTF8::wordwrap';
  66	
  67	
  68	/**
  69	 * Depending how things are compiled, NetBSD and Solaris don't support //IGNORE in iconv()
  70	 * 
  71	 * If //IGNORE support is not provided strings with invalid characters will be truncated
  72	 * 
  73	 * @var boolean
  74	 */
  75	static private $can_ignore_invalid = NULL;
  76	
  77	/**
  78	 * All lowercase UTF-8 characters mapped to uppercase characters
  79	 * 
  80	 * @var array
  81	 */
  82	static private $lower_to_upper = array(
  83		'a' => 'A', 'b' => 'B', 'c' => 'C', 'd' => 'D', 'e' => 'E', 'f' => 'F',
  84		'g' => 'G', 'h' => 'H', 'i' => 'I', 'j' => 'J', 'k' => 'K', 'l' => 'L',
  85		'm' => 'M', 'n' => 'N', 'o' => 'O', 'p' => 'P', 'q' => 'Q', 'r' => 'R',
  86		's' => 'S', 't' => 'T', 'u' => 'U', 'v' => 'V', 'w' => 'W', 'x' => 'X',
  87		'y' => 'Y', 'z' => 'Z', 'ŕ' => 'Ŕ', 'á' => 'Á', 'â' => 'Â', 'ă' => 'Ă',
  88		'ä' => 'Ä', 'ĺ' => 'Ĺ', 'ć' => 'Ć', 'ç' => 'Ç', 'č' => 'Č', 'é' => 'É',
  89		'ę' => 'Ę', 'ë' => 'Ë', 'ě' => 'Ě', 'í' => 'Í', 'î' => 'Î', 'ď' => 'Ď',
  90		'đ' => 'Đ', 'ń' => 'Ń', 'ň' => 'Ň', 'ó' => 'Ó', 'ô' => 'Ô', 'ő' => 'Ő',
  91		'ö' => 'Ö', 'ř' => 'Ř', 'ů' => 'Ů', 'ú' => 'Ú', 'ű' => 'Ű', 'ü' => 'Ü',
  92		'ý' => 'Ý', 'ţ' => 'Ţ', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  93		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  94		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  95		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  96		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  97		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  98		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', 'œ' => 'Œ',
  99		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 100		'š' => 'Š', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 101		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 102		'˙' => 'Ÿ', '?' => '?', '?' => '?', 'ž' => 'Ž', '?' => '?', '?' => '?',
 103		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 104		'?' => '?', '?' => '?', 'ƒ' => '?', '?' => '?', '?' => '?', '?' => '?',
 105		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 106		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 107		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 108		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 109		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 110		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 111		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 112		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 113		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 114		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 115		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 116		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 117		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 118		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 119		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 120		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 121		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 122		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 123		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 124		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 125		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 126		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 127		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 128		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 129		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 130		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 131		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 132		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 133		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 134		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 135		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 136		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 137		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 138		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 139		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 140		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 141		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 142		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 143		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 144		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 145		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 146		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 147		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 148		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 149		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 150		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 151		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 152		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 153		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 154		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 155		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 156		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 157		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 158		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 159		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 160		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 161		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 162		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 163		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 164		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 165		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 166		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 167		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 168		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 169		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 170		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 171		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 172		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 173		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 174		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 175		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 176		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 177		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 178		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 179		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 180		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 181		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 182		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 183		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 184		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 185		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 186		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 187		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 188		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 189		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 190		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 191		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 192		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 193		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 194		'?' => '?', '?' => '?'
 195	);
 196	
 197	/**
 198	 * All lowercase UTF-8 characters not properly handled by [http://php.net/mb_strtoupper mb_strtoupper()] mapped to uppercase characters
 199	 * 
 200	 * @var array
 201	 */
 202	static private $mb_lower_to_upper_fix = array(
 203		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 204		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 205		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 206		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 207		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 208		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 209		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 210		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 211		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 212		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 213		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?'
 214	);
 215	
 216	/**
 217	 * All uppercase UTF-8 characters not properly handled by [http://php.net/mb_strtolower mb_strtolower()] mapped to lowercase characters
 218	 * 
 219	 * @var array
 220	 */
 221	static private $mb_upper_to_lower_fix = array(
 222		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 223		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 224		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 225		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 226		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 227		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 228		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 229		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 230		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 231		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 232		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 233		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 234		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 235		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 236		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 237		'?' => '?', '?' => '?'
 238	);
 239	
 240	/**
 241	 * All uppercase UTF-8 characters mapped to lowercase characters
 242	 * 
 243	 * @var array
 244	 */
 245	static private $upper_to_lower = array(
 246		'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', 'F' => 'f',
 247		'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k', 'L' => 'l',
 248		'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q', 'R' => 'r',
 249		'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x',
 250		'Y' => 'y', 'Z' => 'z', 'Ŕ' => 'ŕ', 'Á' => 'á', 'Â' => 'â', 'Ă' => 'ă',
 251		'Ä' => 'ä', 'Ĺ' => 'ĺ', 'Ć' => 'ć', 'Ç' => 'ç', 'Č' => 'č', 'É' => 'é',
 252		'Ę' => 'ę', 'Ë' => 'ë', 'Ě' => 'ě', 'Í' => 'í', 'Î' => 'î', 'Ď' => 'ď',
 253		'Đ' => 'đ', 'Ń' => 'ń', 'Ň' => 'ň', 'Ó' => 'ó', 'Ô' => 'ô', 'Ő' => 'ő',
 254		'Ö' => 'ö', 'Ř' => 'ř', 'Ů' => 'ů', 'Ú' => 'ú', 'Ű' => 'ű', 'Ü' => 'ü',
 255		'Ý' => 'ý', 'Ţ' => 'ţ', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 256		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 257		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 258		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 259		'?' => '?', '?' => '?', '?' => 'i', '?' => '?', '?' => '?', '?' => '?',
 260		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 261		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 262		'Œ' => 'œ', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 263		'?' => '?', 'Š' => 'š', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 264		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 265		'?' => '?', 'Ÿ' => '˙', '?' => '?', '?' => '?', 'Ž' => 'ž', '?' => '?',
 266		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 267		'?' => '?', '?' => '?', '?' => '?', '?' => 'ƒ', '?' => '?', '?' => '?',
 268		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 269		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 270		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 271		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 272		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 273		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 274		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 275		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 276		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 277		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 278		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 279		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 280		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 281		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 282		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 283		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 284		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 285		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 286		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 287		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 288		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 289		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 290		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 291		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 292		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 293		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 294		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 295		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 296		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 297		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 298		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 299		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 300		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 301		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 302		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 303		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 304		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 305		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 306		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 307		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 308		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 309		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 310		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 311		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 312		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 313		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 314		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 315		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 316		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 317		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 318		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 319		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 320		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 321		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 322		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 323		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 324		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 325		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 326		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 327		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 328		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 329		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 330		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 331		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 332		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 333		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 334		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 335		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 336		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 337		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 338		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 339		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 340		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 341		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 342		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 343		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 344		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 345		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 346		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 347		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 348		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 349		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 350		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 351		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 352		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 353		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 354		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 355		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 356		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
 357		'?' => '?', '?' => '?', '?' => '?'
 358	);
 359	
 360	/**
 361	 * A mapping of all ASCII-based latin characters, puntuation, symbols and number forms to ASCII.
 362	 * 
 363	 * Includes elements form the following unicode blocks:
 364	 * 
 365	 *  - Latin-1 Supplement
 366	 *  - Latin Extended-A
 367	 *  - Latin Extended-B
 368	 *  - IPA Extensions
 369	 *  - Latin Extended Additional
 370	 *  - General Punctuation
 371	 *  - Letterlike symbols
 372	 *  - Number Forms
 373	 * 
 374	 * @var array
 375	 */
 376	static private $utf8_to_ascii = array(
 377		// Latin-1 Supplement
 378		'Š' => '(c)', 'Ť' => '<<',  'Ž' => '(R)', 'ť' => '>>',  'ź' => '1/4',
 379		'˝' => '1/2', 'ž' => '3/4', 'Ŕ' => 'A',   'Á' => 'A',   'Â' => 'A',
 380		'Ă' => 'A',   'Ä' => 'A',   'Ĺ' => 'A',   'Ć' => 'AE',  'Ç' => 'C',
 381		'Č' => 'E',   'É' => 'E',   'Ę' => 'E',   'Ë' => 'E',   'Ě' => 'I',
 382		'Í' => 'I',   'Î' => 'I',   'Ď' => 'I',   'Ń' => 'N',   'Ň' => 'O',
 383		'Ó' => 'O',   'Ô' => 'O',   'Ő' => 'O',   'Ö' => 'O',   'Ř' => 'O',
 384		'Ů' => 'U',   'Ú' => 'U',   'Ű' => 'U',   'Ü' => 'U',   'Ý' => 'Y',
 385		'ŕ' => 'a',   'á' => 'a',   'â' => 'a',   'ă' => 'a',   'ä' => 'a',
 386		'ĺ' => 'a',   'ć' => 'ae',  'ç' => 'c',   'č' => 'e',   'é' => 'e',
 387		'ę' => 'e',   'ë' => 'e',   'ě' => 'i',   'í' => 'i',   'î' => 'i',
 388		'ď' => 'i',   'ń' => 'n',   'ň' => 'o',   'ó' => 'o',   'ô' => 'o',
 389		'ő' => 'o',   'ö' => 'o',   'ř' => 'o',   'ů' => 'u',   'ú' => 'u',
 390		'ű' => 'u',   'ü' => 'u',   'ý' => 'y',   '˙' => 'y',
 391		// Latin Extended-A
 392		'?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'A',
 393		'?' => 'a',   '?' => 'C',   '?' => 'c',   '?' => 'C',   '?' => 'c',
 394		'?' => 'C',   '?' => 'c',   '?' => 'C',   '?' => 'c',   '?' => 'D',
 395		'?' => 'd',   '?' => 'D',   '?' => 'd',   '?' => 'E',   '?' => 'e',
 396		'?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',
 397		'?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'G',   '?' => 'g',
 398		'?' => 'G',   '?' => 'g',   '?' => 'G',   '?' => 'g',   '?' => 'G',
 399		'?' => 'g',   '?' => 'H',   '?' => 'h',   '?' => 'H',   '?' => 'h',
 400		'?' => 'I',   '?' => 'i',   '?' => 'I',   '?' => 'i',   '?' => 'I',
 401		'?' => 'i',   '?' => 'I',   '?' => 'i',   '?' => 'I',   '?' => 'i',
 402		'?' => 'IJ',  '?' => 'ij',  '?' => 'J',   '?' => 'j',   '?' => 'K',
 403		'?' => 'k',   '?' => 'L',   '?' => 'l',   '?' => 'L',   '?' => 'l',
 404		'?' => 'L',   '?' => 'l',   '?' => 'L',   '?' => 'l',   '?' => 'L',
 405		'?' => 'l',   '?' => 'N',   '?' => 'n',   '?' => 'N',   '?' => 'n',
 406		'?' => 'N',   '?' => 'n',   '?' => "'n", '?' => 'N',   '?' => 'n',
 407		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'O',
 408		'?' => 'o',   'Œ' => 'OE',  'œ' => 'oe',  '?' => 'R',   '?' => 'r',
 409		'?' => 'R',   '?' => 'r',   '?' => 'R',   '?' => 'r',   '?' => 'S',
 410		'?' => 's',   '?' => 'S',   '?' => 's',   '?' => 'S',   '?' => 's',
 411		'Š' => 'S',   'š' => 's',   '?' => 'T',   '?' => 't',   '?' => 'T',
 412		'?' => 't',   '?' => 'T',   '?' => 't',   '?' => 'U',   '?' => 'u',
 413		'?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',
 414		'?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',
 415		'?' => 'W',   '?' => 'w',   '?' => 'Y',   '?' => 'y',   'Ÿ' => 'Y',
 416		'?' => 'Z',   '?' => 'z',   '?' => 'Z',   '?' => 'z',   'Ž' => 'Z',
 417		'ž' => 'z',
 418		// Latin Extended-B
 419		'?' => 'b',   '?' => 'B',   '?' => 'B',   '?' => 'b',   '?' => 'O',
 420		'?' => 'C',   '?' => 'c',   '?' => 'D',   '?' => 'D',   '?' => 'D',
 421		'?' => 'd',   '?' => 'E',   '?' => 'E',   '?' => 'F',   'ƒ' => 'f',
 422		'?' => 'G',   '?' => 'I',   '?' => 'K',   '?' => 'k',   '?' => 'l',
 423		'?' => 'M',   '?' => 'N',   '?' => 'n',   '?' => 'O',   '?' => 'O',
 424		'?' => 'o',   '?' => 'OI',  '?' => 'oi',  '?' => 'P',   '?' => 'p',
 425		'?' => 't',   '?' => 'T',   '?' => 't',   '?' => 'T',   '?' => 'U',
 426		'?' => 'u',   '?' => 'V',   '?' => 'Y',   '?' => 'y',   '?' => 'Z',
 427		'?' => 'z',   '?' => '2',   '?' => 'DZ',  '?' => 'Dz',  '?' => 'dz',
 428		'?' => 'LJ',  '?' => 'Lj',  '?' => 'lj',  '?' => 'Nj',  '?' => 'Nj',
 429		'?' => 'nj',  '?' => 'A',   '?' => 'a',   '?' => 'I',   '?' => 'i',
 430		'?' => 'O',   '?' => 'o',   '?' => 'U',   '?' => 'u',   '?' => 'U',
 431		'?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',
 432		'?' => 'U',   '?' => 'u',   '?' => 'e',   '?' => 'A',   '?' => 'a',
 433		'?' => 'A',   '?' => 'a',   '?' => 'AE',  '?' => 'ae',  '?' => 'G',
 434		'?' => 'g',   '?' => 'G',   '?' => 'g',   '?' => 'K',   '?' => 'k',
 435		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'j',
 436		'?' => 'DZ',  '?' => 'Dz',  '?' => 'dz',  '?' => 'G',   '?' => 'g',
 437		'?' => 'N',   '?' => 'n',   '?' => 'A',   '?' => 'a',   '?' => 'AE',
 438		'?' => 'ae',  '?' => 'O',   '?' => 'o',   '?' => 'A',   '?' => 'a',
 439		'?' => 'A',   '?' => 'a',   '?' => 'E',   '?' => 'e',   '?' => 'E',
 440		'?' => 'e',   '?' => 'I',   '?' => 'i',   '?' => 'I',   '?' => 'i',
 441		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'R',
 442		'?' => 'r',   '?' => 'R',   '?' => 'r',   '?' => 'U',   '?' => 'u',
 443		'?' => 'U',   '?' => 'u',   '?' => 'S',   '?' => 's',   '?' => 'T',
 444		'?' => 't',   '?' => 'H',   '?' => 'h',   '?' => 'N',   '?' => 'd',
 445		'?' => 'Z',   '?' => 'z',   '?' => 'A',   '?' => 'a',   '?' => 'E',
 446		'?' => 'e',   '?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',
 447		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'Y',
 448		'?' => 'y',   '?' => 'l',   '?' => 'n',   '?' => 't',   '?' => 'j',
 449		'?' => 'db',  '?' => 'qp',  '?' => 'A',   '?' => 'C',   '?' => 'c',
 450		'?' => 'L',   '?' => 'T',   '?' => 's',   '?' => 'z',   '?' => 'B',
 451		'?' => 'U',   '?' => 'V',   '?' => 'E',   '?' => 'e',   '?' => 'J',
 452		'?' => 'j',   '?' => 'Q',   '?' => 'q',   '?' => 'R',   '?' => 'r',
 453		'?' => 'Y',   '?' => 'y',
 454		// IPA Extensions
 455		'?' => 'a',   '?' => 'b',   '?' => 'o',   '?' => 'c',   '?' => 'd',
 456		'?' => 'd',   '?' => 'e',   '?' => 'e',   '?' => 'e',   '?' => 'e',
 457		'?' => 'e',   '?' => 'j',   '?' => 'g',   '?' => 'g',   '?' => 'G',
 458		'?' => 'h',   '?' => 'h',   '?' => 'i',   '?' => 'I',   '?' => 'l',
 459		'?' => 'l',   '?' => 'l',   '?' => 'm',   '?' => 'm',   '?' => 'm',
 460		'?' => 'n',   '?' => 'n',   '?' => 'N',   '?' => 'o',   '?' => 'OE',
 461		'?' => 'r',   '?' => 'r',   '?' => 'r',   '?' => 'r',   '?' => 'r',
 462		'?' => 'r',   '?' => 'r',   '?' => 'R',   '?' => 'R',   '?' => 's',
 463		'?' => 't',   '?' => 't',   '?' => 'u',   '?' => 'v',   '?' => 'v',
 464		'?' => 'w',   '?' => 'y',   '?' => 'Y',   '?' => 'z',   '?' => 'z',
 465		'?' => 'C',   '?' => 'B',   '?' => 'e',   '?' => 'G',   '?' => 'H',
 466		'?' => 'j',   '?' => 'k',   '?' => 'L',   '?' => 'q',   '?' => 'dz',
 467		'?' => 'dz',  '?' => 'ts',  '?' => 'tc',  '?' => 'ls',  '?' => 'lz',
 468		'?' => 'h',   '?' => 'h',
 469		// Latin Extended Additional
 470		'?' => 'A',   '?' => 'a',   '?' => 'B',   '?' => 'b',   '?' => 'B',
 471		'?' => 'b',   '?' => 'B',   '?' => 'b',   '?' => 'C',   '?' => 'c',
 472		'?' => 'D',   '?' => 'd',   '?' => 'D',   '?' => 'd',   '?' => 'D',
 473		'?' => 'd',   '?' => 'D',   '?' => 'd',   '?' => 'D',   '?' => 'd',
 474		'?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',
 475		'?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',
 476		'?' => 'F',   '?' => 'f',   '?' => 'G',   '?' => 'g',   '?' => 'H',
 477		'?' => 'h',   '?' => 'H',   '?' => 'h',   '?' => 'H',   '?' => 'h',
 478		'?' => 'H',   '?' => 'h',   '?' => 'H',   '?' => 'h',   '?' => 'I',
 479		'?' => 'i',   '?' => 'I',   '?' => 'i',   '?' => 'K',   '?' => 'k',
 480		'?' => 'K',   '?' => 'k',   '?' => 'K',   '?' => 'k',   '?' => 'L',
 481		'?' => 'l',   '?' => 'L',   '?' => 'l',   '?' => 'L',   '?' => 'l',
 482		'?' => 'L',   '?' => 'l',   '?' => 'M',   '?' => 'm',   '?' => 'M',
 483		'?' => 'm',   '?' => 'M',   '?' => 'm',   '?' => 'N',   '?' => 'n',
 484		'?' => 'N',   '?' => 'n',   '?' => 'N',   '?' => 'n',   '?' => 'N',
 485		'?' => 'n',   '?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',
 486		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'P',
 487		'?' => 'p',   '?' => 'P',   '?' => 'p',   '?' => 'R',   '?' => 'r',
 488		'?' => 'R',   '?' => 'r',   '?' => 'R',   '?' => 'r',   '?' => 'R',
 489		'?' => 'r',   '?' => 'S',   '?' => 's',   '?' => 'S',   '?' => 's',
 490		'?' => 'S',   '?' => 's',   '?' => 'S',   '?' => 's',   '?' => 'S',
 491		'?' => 's',   '?' => 'T',   '?' => 't',   '?' => 'T',   '?' => 't',
 492		'?' => 'T',   '?' => 't',   '?' => 'T',   '?' => 't',   '?' => 'U',
 493		'?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',
 494		'?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'V',
 495		'?' => 'v',   '?' => 'V',   '?' => 'v',   '?' => 'W',   '?' => 'w',
 496		'?' => 'W',   '?' => 'w',   '?' => 'W',   '?' => 'w',   '?' => 'W',
 497		'?' => 'w',   '?' => 'W',   '?' => 'w',   '?' => 'X',   '?' => 'x',
 498		'?' => 'X',   '?' => 'x',   '?' => 'Y',   '?' => 'y',   '?' => 'Z',
 499		'?' => 'z',   '?' => 'Z',   '?' => 'z',   '?' => 'Z',   '?' => 'z',
 500		'?' => 'h',   '?' => 't',   '?' => 'w',   '?' => 'y',   '?' => 'a',
 501		'?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'A',
 502		'?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',
 503		'?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'A',
 504		'?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',
 505		'?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'E',
 506		'?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',
 507		'?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',
 508		'?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',
 509		'?' => 'I',   '?' => 'i',   '?' => 'I',   '?' => 'i',   '?' => 'O',
 510		'?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',
 511		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'O',
 512		'?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',
 513		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'O',
 514		'?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'U',   '?' => 'u',
 515		'?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',
 516		'?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',
 517		'?' => 'U',   '?' => 'u',   '?' => 'Y',   '?' => 'y',   '?' => 'Y',
 518		'?' => 'y',   '?' => 'Y',   '?' => 'y',   '?' => 'Y',   '?' => 'y',
 519		// General Punctuation
 520		'?' => ' ',   '?' => ' ',   '?' => ' ',   '?' => ' ',   '?' => ' ',
 521		'?' => ' ',   '?' => ' ',   '?' => ' ',   '?' => ' ',   '?' => ' ',
 522		'?' => ' ',   '?' => '',    '?' => '',    '?' => '',    '?' => '-',
 523		'?' => '-',   '?' => '-',   '–' => '-',   '—' => '-',   '?' => '-',
 524		'?' => '||',  '‘' => "'",   '’' => "'",   '‚' => ',',   '?' => "'",
 525		'“' => '"',   '”' => '"',   '?' => '"',   '?' => '.',   '?' => '..',
 526		'…' => '...', '?' => ' ',   '?' => "'",   '?' => '"',   '?' => '\'"',
 527		'?' => "'",   '?' => '"',   '?' => '"\'', '‹' => '<',   '›' => '>',
 528		'?' => '!!',  '?' => '?!',  '?' => '/',   '?' => '?/',  '?' => '?!',
 529		'?' => '!?',
 530		// Letterlike Symbols
 531		'?' => 'SM',  '™' => 'TM',
 532		// Number Forms
 533		'?' => '1/3', '?' => '2/3', '?' => '1/5', '?' => '2/5', '?' => '3/5',
 534		'?' => '4/5', '?' => '1/6', '?' => '5/6', '?' => '1/8', '?' => '3/8',
 535		'?' => '5/8', '?' => '7/8', '?' => 'I',   '?' => 'II',  '?' => 'III',
 536		'?' => 'IV',  '?' => 'V',   '?' => 'Vi',  '?' => 'VII', '?' => 'VIII',
 537		'?' => 'IX',  '?' => 'X',   '?' => 'XI',  '?' => 'XII', '?' => 'L',
 538		'?' => 'C',   '?' => 'D',   '?' => 'M',   '?' => 'i',   '?' => 'ii',
 539		'?' => 'iii', '?' => 'iv',  '?' => 'v',   '?' => 'vi',  '?' => 'vii',
 540		'?' => 'viii','?' => 'ix',  '?' => 'x',   '?' => 'xi',  '?' => 'xii',
 541		'?' => 'l',   '?' => 'c',   '?' => 'd',   '?' => 'm'
 542	);
 543	
 544	/**
 545	 * If the [http://php.net/mbstring mbstring] extension is available
 546	 * 
 547	 * @var boolean
 548	 */
 549	static private $mbstring_available = NULL;
 550	
 551	
 552	/**
 553	 * Maps UTF-8 ASCII-based latin characters, puntuation, symbols and number forms to ASCII
 554	 * 
 555	 * Any characters or symbols that can not be translated will be removed.
 556	 * 
 557	 * This function is most useful for situation that only allows ASCII, such
 558	 * as in URLs.
 559	 * 
 560	 * Translates elements form the following unicode blocks:
 561	 * 
 562	 *  - Latin-1 Supplement
 563	 *  - Latin Extended-A
 564	 *  - Latin Extended-B
 565	 *  - IPA Extensions
 566	 *  - Latin Extended Additional
 567	 *  - General Punctuation
 568	 *  - Letterlike symbols
 569	 *  - Number Forms
 570	 * 
 571	 * @internal
 572	 * 
 573	 * @param  string $string  The string to convert
 574	 * @return string  The input string in pure ASCII
 575	 */
 576	static public function ascii($string)
 577	{
 578		if (!self::detect($string)) {
 579			return $string;
 580		}
 581		
 582		$string = strtr($string, self::$utf8_to_ascii);
 583		return preg_replace('#[^\x00-\x7F]#', '', $string);
 584	}
 585	
 586	
 587	/**
 588	 * Checks to see if the [http://php.net/mbstring mbstring] extension is available
 589	 * 
 590	 * @return void
 591	 */
 592	static private function checkMbString()
 593	{
 594		self::$mbstring_available = extension_loaded('mbstring');
 595	}
 596	
 597	
 598	/**
 599	 * Converts a unicode value into a UTF-8 character
 600	 * 
 601	 * @param  mixed $unicode_code_point  The character to create, either the `U+hex` or decimal code point
 602	 * @return string  The UTF-8 character
 603	 */
 604	static public function chr($unicode_code_point)
 605	{
 606		if (is_string($unicode_code_point) && substr($unicode_code_point, 0, 2) == 'U+') {
 607			$unicode_code_point = substr($unicode_code_point, 2);
 608			$unicode_code_point = hexdec($unicode_code_point);
 609		}
 610		
 611		$bin = decbin($unicode_code_point);
 612		$digits = strlen($bin);
 613		
 614		$first = $second = $third = $fourth = NULL;
 615		
 616		// One byte characters
 617		if ($digits <= 7) {
 618			$first = chr(bindec($bin));
 619			
 620		// Two byte characters
 621		} elseif ($digits <= 11) {
 622			$first  = chr(bindec('110' . str_pad(substr($bin, 0, -6), 5, '0', STR_PAD_LEFT)));
 623			$second = chr(bindec('10' . substr($bin, -6)));
 624			
 625		// Three byte characters
 626		} elseif ($digits <= 16) {
 627			$first  = chr(bindec('1110' . str_pad(substr($bin, 0, -12), 4, '0', STR_PAD_LEFT)));
 628			$second = chr(bindec('10' . substr($bin, -12, -6)));
 629			$third  = chr(bindec('10' . substr($bin, -6)));
 630			
 631		// Four byte characters
 632		} elseif ($digits <= 21) {
 633			$first  = chr(bindec('11110' . str_pad(substr($bin, 0, -18), 3, '0', STR_PAD_LEFT)));
 634			$second = chr(bindec('10' . substr($bin, -18, -12)));
 635			$third  = chr(bindec('10' . substr($bin, -12, -6)));
 636			$fourth = chr(bindec('10' . substr($bin, -6)));
 637		}
 638		
 639		$ord = ord($first);
 640		if ($digits > 21 || $ord == 0xC0 || $ord == 0xC1 || $ord > 0xF4) {
 641			throw new fProgrammerException(
 642				'The code point specified, %s, is invalid.',
 643				$unicode_code_point
 644			);
 645		}
 646		
 647		return $first . $second . $third . $fourth;
 648	}
 649	
 650	
 651	/**
 652	 * Removes any invalid UTF-8 characters from a string or array of strings
 653	 * 
 654	 * @param  array|string $value  The string or array of strings to clean
 655	 * @return string  The cleaned string
 656	 */
 657	static public function clean($value)
 658	{
 659		if (!is_array($value)) {
 660			if (self::$can_ignore_invalid === NULL) {
 661				self::$can_ignore_invalid = !in_array(strtolower(ICONV_IMPL), array('unknown', 'ibm iconv'));	
 662			}
 663			fCore::startErrorCapture(E_NOTICE);
 664			$value = self::iconv('UTF-8', 'UTF-8' . (self::$can_ignore_invalid ? '//IGNORE' : ''), (string) $value);
 665			fCore::stopErrorCapture();
 666			return $value;
 667		}
 668		
 669		$keys = array_keys($value);
 670		$num_keys = sizeof($keys);
 671		for ($i=0; $i<$num_keys; $i++) {
 672			$value[$keys[$i]] = self::clean($value[$keys[$i]]);
 673		}
 674		
 675		return $value;
 676	}
 677	
 678	
 679	/**
 680	 * Compares strings, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
 681	 * 
 682	 * Please note that this function sorts based on English language sorting
 683	 * rules only. Locale-sepcific sorting is done by
 684	 * [http://php.net/strcoll strcoll()], however there are technical
 685	 * limitations.
 686	 * 
 687	 * @param  string $str1  The first string to compare
 688	 * @param  string $str2  The second string to compare
 689	 * @return integer  < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
 690	 */
 691	static public function cmp($str1, $str2)
 692	{
 693		$ascii_str1 = strtr($str1, self::$utf8_to_ascii);
 694		$ascii_str2 = strtr($str2, self::$utf8_to_ascii);
 695		
 696		$res = strcmp($ascii_str1, $ascii_str2);
 697		
 698		// If the ASCII representations are the same, sort by the UTF-8 representations
 699		if ($res === 0) {
 700			$res = strcmp($str1, $str2);
 701		}
 702		
 703		return $res;
 704	}
 705	
 706	
 707	/**
 708	 * Converts an offset in characters to an offset in bytes to that we can use the built-in functions for some operations
 709	 * 
 710	 * @param  string  $string  The string to base the offset on
 711	 * @param  integer $offset  The character offset to conver to bytes
 712	 * @return integer  The converted offset
 713	 */
 714	static private function convertOffsetToBytes($string, $offset)
 715	{
 716		if ($offset == 0) {
 717			return 0;
 718		}
 719		
 720		$len = strlen($string);
 721		
 722		$byte_offset     = 0;
 723		$measured_offset = 0;
 724		$sign            = 1;
 725		
 726		// Negative offsets require us to reverse some stuff
 727		if ($offset < 0) {
 728			$string    = strrev($string);
 729			$sign      = -1;
 730			$offset    = abs($offset);
 731		}
 732			
 733		for ($i=0; $i<$len && $measured_offset<$offset; $i++) {
 734			$char = $string[$i];
 735			++$byte_offset;
 736			if (ord($char) < 0x80) {
 737				++$measured_offset;
 738			} else {
 739				switch (ord($char) & 0xF0) {
 740					case 0xF0:
 741					case 0xE0:
 742					case 0xD0:
 743					case 0xC0:
 744						++$measured_offset;
 745						break;
 746				}
 747			}
 748		}
 749		
 750		return $byte_offset * $sign;
 751	}
 752	
 753	
 754	/**
 755	 * Detects if a UTF-8 string contains any non-ASCII characters
 756	 * 
 757	 * @param  string $string  The string to check
 758	 * @return boolean  If the string contains any non-ASCII characters
 759	 */
 760	static private function detect($string)
 761	{
 762		return (boolean) preg_match('#[^\x00-\x7F]#', $string);
 763	}
 764	
 765	
 766	/**
 767	 * Explodes a string on a delimiter
 768	 * 
 769	 * If no delimiter is provided, the string will be exploded with each
 770	 * characters being an element in the array.
 771	 * 
 772	 * @param  string  $string     The string to explode
 773	 * @param  string  $delimiter  The string to explode on. If `NULL` or `''` this method will return one character per array index.
 774	 * @return array  The exploded string
 775	 */
 776	static public function explode($string, $delimiter=NULL)
 777	{
 778		// If a delimiter was passed, we just do an explode
 779		if ($delimiter || (!$delimiter && is_numeric($delimiter))) {
 780			return explode($delimiter, $string);
 781		}
 782		
 783		// If no delimiter was passed, we explode the characters into an array
 784		preg_match_all('#.|^\z#us', $string, $matches);
 785		return $matches[0];
 786	}
 787
 788
 789	/**
 790	 * This works around a bug in MAMP 1.9.4+ and PHP 5.3 where iconv()
 791	 * does not seem to properly assign the return value to a variable, but
 792	 * does work when returning the value.
 793	 *
 794	 * @param string $in_charset   The incoming character encoding
 795	 * @param string $out_charset  The outgoing character encoding
 796	 * @param string $string       The string to convert
 797	 * @return string  The converted string
 798	 */
 799	static private function iconv($in_charset, $out_charset, $string)
 800	{
 801		return iconv($in_charset, $out_charset, $string);
 802	}
 803	
 804	
 805	/**
 806	 * Compares strings in a case-insensitive manner, with the resulting order having characters that are based on ASCII letters placed after the relative ASCII characters
 807	 * 
 808	 * Please note that this function sorts based on English language sorting
 809	 * rules only. Locale-sepcific sorting is done by
 810	 * [http://php.net/strcoll strcoll()], however there are technical
 811	 * limitations.
 812	 * 
 813	 * @param  string $str1  The first string to compare
 814	 * @param  string $str2  The second string to compare
 815	 * @return integer  < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
 816	 */
 817	static public function icmp($str1, $str2)
 818	{
 819		$str1 = self::lower($str1);
 820		$str2 = self::lower($str2);
 821		
 822		return self::cmp($str1, $str2);
 823	}
 824	
 825	
 826	/**
 827	 * Compares strings using a natural order algorithm in a case-insensitive manner, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
 828	 * 
 829	 * Please note that this function sorts based on English language sorting
 830	 * rules only. Locale-sepcific sorting is done by
 831	 * [http://php.net/strcoll strcoll()], however there are technical
 832	 * limitations.
 833	 * 
 834	 * @param  string $str1  The first string to compare
 835	 * @param  string $str2  The second string to compare
 836	 * @return integer  `< 0` if `$str1 < $str2`, `0` if they are equal, `> 0` if `$str1 > $str2`
 837	 */
 838	static public function inatcmp($str1, $str2)
 839	{
 840		$str1 = self::lower($str1);
 841		$str2 = self::lower($str2);
 842		
 843		return self::natcmp($str1, $str2);
 844	}
 845	
 846	
 847	/**
 848	 * Finds the first position (in characters) of the search value in the string - case is ignored when doing performing a match
 849	 * 
 850	 * @param  string  $haystack  The string to search in
 851	 * @param  string  $needle    The string to search for. This match will be done in a case-insensitive manner.
 852	 * @param  integer $offset    The character position to start searching from
 853	 * @return mixed  The integer character position of the first occurence of the needle or `FALSE` if no match
 854	 */
 855	static public function ipos($haystack, $needle, $offset=0)
 856	{
 857		// We get better performance falling back for ASCII strings
 858		if (!self::detect($haystack)) {
 859			return stripos($haystack, $needle, $offset);
 860		}
 861		
 862		if (self::$mbstring_available === NULL) {
 863			self::checkMbString();
 864		}
 865		
 866		if (self::$mbstring_available && function_exists('mb_stripos')) {
 867			return mb_stripos($haystack, $needle, $offset, 'UTF-8');
 868		}
 869		
 870		$haystack = self::lower($haystack);
 871		$needle   = self::lower($needle);
 872		
 873		return self::pos($haystack, $needle, $offset);
 874	}
 875	
 876	
 877	/**
 878	 * Replaces matching parts of the string, with matches being done in a a case-insensitive manner
 879	 * 
 880	 * If `$search` and `$replace` are both arrays and `$replace` is shorter,
 881	 * the extra `$search` string will be replaced with an empty string. If
 882	 * `$search` is an array and `$replace` is a string, all `$search` values
 883	 * will be replaced with the string specified.
 884	 * 
 885	 * @param  string $string   The string to perform the replacements on
 886	 * @param  mixed  $search   The string (or array of strings) to search for - see method description for details
 887	 * @param  mixed  $replace  The string (or array of strings) to replace with - see method description for details
 888	 * @return string  The input string with the specified replacements
 889	 */
 890	static public function ireplace($string, $search, $replace)
 891	{
 892		if (is_array($search)) {
 893			foreach ($search as &$needle) {
 894				$needle = '#' . preg_quote($needle, '#') . '#ui';
 895			}
 896		} else {
 897			$search = '#' . preg_quote($search, '#') . '#ui';
 898		}
 899		return preg_replace(
 900			$search,
 901			strtr($replace, array('\\' => '\\\\', '$' => '\\$')),
 902			$string
 903		);
 904	}
 905	
 906	
 907	/**
 908	 * Finds the last position (in characters) of the search value in the string - case is ignored when doing performing a match
 909	 * 
 910	 * @param  string  $haystack  The string to search in
 911	 * @param  string  $needle    The string to search for. This match will be done in a case-insensitive manner.
 912	 * @param  integer $offset    The character position to start searching from. A negative value will stop looking that many characters from the end of the string
 913	 * @return mixed  The integer character position of the last occurence of the needle or `FALSE` if no match
 914	 */
 915	static public function irpos($haystack, $needle, $offset=0)
 916	{
 917		// We get better performance falling back for ASCII strings
 918		if (!self::detect($haystack)) {
 919			return strripos($haystack, $needle, $offset);
 920		}
 921		
 922		if (self::$mbstring_available === NULL) {
 923			self::checkMbString();
 924		}
 925		
 926		if (self::$mbstring_available && function_exists('mb_strripos')) {
 927			return mb_strripos($haystack, $needle, $offset, 'UTF-8');
 928		}
 929		
 930		$haystack = self::lower($haystack);
 931		$needle   = self::lower($needle);
 932		
 933		return self::rpos($haystack, $needle, $offset);
 934	}
 935	
 936	
 937	/**
 938	 * Matches a string needle in the string haystack, returning a substring from the beginning of the needle to the end of the haystack
 939	 * 
 940	 * Can optionally return the part of the haystack before the needle. Matching
 941	 * is done in a case-insensitive manner.
 942	 * 
 943	 * @param  string  $haystack       The string to search in
 944	 * @param  string  $needle         The string to search for. This match will be done in a case-insensitive manner.
 945	 * @param  boolean $before_needle  If a substring of the haystack before the needle should be returned instead of the substring from the needle to the end of the haystack
 946	 * @return mixed  The specified part of the haystack, or `FALSE` if the needle was not found
 947	 */
 948	static public function istr($haystack, $needle, $before_needle=FALSE)
 949	{
 950		// We get better performance falling back for ASCII strings
 951		if ($before_needle == FALSE && !self::detect($haystack)) {
 952			return stristr($haystack, $needle);
 953		}
 954		
 955		if (self::$mbstring_available === NULL) {
 956			self::checkMbString();
 957		}
 958		
 959		if (self::$mbstring_available && function_exists('mb_stristr')) {
 960			return mb_stristr($haystack, $needle, $before_needle, 'UTF-8');
 961		}
 962		
 963		$lower_haystack = self::lower($haystack);
 964		$lower_needle   = self::lower($needle);
 965		
 966		$pos = strpos($lower_haystack, $lower_needle);
 967		
 968		if ($before_needle) {
 969			return substr($haystack, 0, $pos);
 970		}
 971		
 972		return substr($haystack, $pos);
 973	}
 974	
 975	
 976	/**
 977	 * Determines the length (in characters) of a string
 978	 * 
 979	 * @param  string $string  The string to measure
 980	 * @return integer  The number of characters in the string
 981	 */
 982	static public function len($string)
 983	{
 984		if (self::$mbstring_available === NULL) {
 985			self::checkMbString();
 986		}
 987		
 988		if (self::$mbstring_available) {
 989			return mb_strlen($string, 'UTF-8');
 990		}
 991		
 992		return strlen(utf8_decode($string));
 993	}
 994	
 995	
 996	/**
 997	 * Converts all uppercase characters to lowercase
 998	 * 
 999	 * @param  string $string  The string to convert
1000	 * @return string  The input string with all uppercase characters in lowercase
1001	 */
1002	static public function lower($string)
1003	{
1004		// We get better performance falling back for ASCII strings
1005		if (!self::detect($string)) {
1006			return strtolower($string);
1007		}
1008		
1009		if (self::$mbstring_available === NULL) {
1010			self::checkMbString();
1011		}
1012		
1013		if (self::$mbstring_available) {
1014			$string = mb_strtolower($string, 'utf-8');
1015			// For some reason mb_strtolower misses some character
1016			return strtr($string, self::$mb_upper_to_lower_fix);
1017		}
1018		
1019		return strtr($string, self::$upper_to_lower);
1020	}
1021	
1022	
1023	/**
1024	 * Trims whitespace, or any specified characters, from the beginning of a string
1025	 * 
1026	 * @param  string $string    The string to trim
1027	 * @param  string $charlist  The characters to trim
1028	 * @return string  The trimmed string
1029	 */
1030	static public function ltrim($string, $charlist=NULL)
1031	{
1032		if (strlen($charlist) === 0) {
1033			return ltrim($string);
1034		}
1035		
1036		$search = preg_quote($charlist, '#');
1037		$search = str_replace('-', '\-', $search);
1038		$search = str_replace('\.\.', '-', $search);
1039		return preg_replace('#^[' . $search . ']+#Du', '', $string);
1040	}
1041	
1042	
1043	/**
1044	 * Compares strings using a natural order algorithm, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
1045	 * 
1046	 * Please note that this function sorts based on English language sorting
1047	 * rules only. Locale-sepcific sorting is done by
1048	 * [http://php.net/strcoll strcoll()], however there are technical
1049	 * limitations.
1050	 * 
1051	 * @param 

Large files files are truncated, but you can click here to view the full file