fUTF8.php - This PHP code defines a class `fUTF8` to provid…

/classes/fUTF8.php

https://bitbucket.org/wbond/flourish/ · PHP · 1628 lines · 1010 code · 187 blank · 431 comment · 110 complexity · c9f6b48fb2a19f8390197d74b4538bcb MD5 · raw file
Large files are truncated click here to view the full file

<?php
/**
 * Provides string functions for UTF-8 strings
 * 
 * This class is implemented to provide a UTF-8 version of almost every built-in
 * PHP string function. For more information about UTF-8, please visit
 * http://flourishlib.com/docs/UTF-8.
 * 
 * @copyright  Copyright (c) 2008-2011 Will Bond
 * @author     Will Bond [wb] <will@flourishlib.com>
 * @license    http://flourishlib.com/license
 * 
 * @package    Flourish
 * @link       http://flourishlib.com/fUTF8
 * 
 * @version    1.0.0b15
 * @changes    1.0.0b15  Fixed a bug with using IBM's iconv implementation on AIX [wb, 2011-07-29]
 * @changes    1.0.0b14  Added a workaround for iconv having issues in MAMP 1.9.4+ [wb, 2011-07-26]
 * @changes    1.0.0b13  Fixed notices from being thrown when invalid data is sent to ::clean() [wb, 2011-06-10]
 * @changes    1.0.0b12  Fixed a variable name typo in ::sub() [wb, 2011-05-09]
 * @changes    1.0.0b11  Updated the class to not using phpinfo() to determine the iconv implementation [wb, 2010-11-04]
 * @changes    1.0.0b10  Fixed a bug with capitalizing a lowercase i resulting in a dotted upper-case I [wb, 2010-11-01]
 * @changes    1.0.0b9   Updated class to use fCore::startErrorCapture() instead of `error_reporting()` [wb, 2010-08-09]
 * @changes    1.0.0b8   Removed `e` flag from preg_replace() calls [wb, 2010-06-08]
 * @changes    1.0.0b7   Added the methods ::trim(), ::rtrim() and ::ltrim() [wb, 2010-05-11]
 * @changes    1.0.0b6   Fixed ::clean() to work with PHP installs that use an iconv library that doesn't support //IGNORE [wb, 2010-03-02]
 * @changes    1.0.0b5   Changed ::ucwords() to also uppercase words right after various punctuation [wb, 2009-09-18]
 * @changes    1.0.0b4   Changed replacement values in preg_replace() calls to be properly escaped [wb, 2009-06-11]
 * @changes    1.0.0b3   Fixed a parameter name in ::rpos() from `$search` to `$needle` [wb, 2009-02-06]
 * @changes    1.0.0b2   Fixed a bug in ::explode() with newlines and zero-length delimiters [wb, 2009-02-05]
 * @changes    1.0.0b    The initial implementation [wb, 2008-06-01]
 */
class fUTF8
{
	// The following constants allow for nice looking callbacks to static methods
	const ascii    = 'fUTF8::ascii';
	const chr      = 'fUTF8::chr';
	const clean    = 'fUTF8::clean';
	const cmp      = 'fUTF8::cmp';
	const explode  = 'fUTF8::explode';
	const icmp     = 'fUTF8::icmp';
	const inatcmp  = 'fUTF8::inatcmp';
	const ipos     = 'fUTF8::ipos';
	const ireplace = 'fUTF8::ireplace';
	const irpos    = 'fUTF8::irpos';
	const istr     = 'fUTF8::istr';
	const len      = 'fUTF8::len';
	const lower    = 'fUTF8::lower';
	const ltrim    = 'fUTF8::ltrim';
	const natcmp   = 'fUTF8::natcmp';
	const ord      = 'fUTF8::ord';
	const pad      = 'fUTF8::pad';
	const pos      = 'fUTF8::pos';
	const replace  = 'fUTF8::replace';
	const reset    = 'fUTF8::reset';
	const rev      = 'fUTF8::rev';
	const rpos     = 'fUTF8::rpos';
	const rtrim    = 'fUTF8::rtrim';
	const str      = 'fUTF8::str';
	const sub      = 'fUTF8::sub';
	const trim     = 'fUTF8::trim';
	const ucfirst  = 'fUTF8::ucfirst';
	const ucwords  = 'fUTF8::ucwords';
	const upper    = 'fUTF8::upper';
	const wordwrap = 'fUTF8::wordwrap';
	
	
	/**
	 * Depending how things are compiled, NetBSD and Solaris don't support //IGNORE in iconv()
	 * 
	 * If //IGNORE support is not provided strings with invalid characters will be truncated
	 * 
	 * @var boolean
	 */
	static private $can_ignore_invalid = NULL;
	
	/**
	 * All lowercase UTF-8 characters mapped to uppercase characters
	 * 
	 * @var array
	 */
	static private $lower_to_upper = array(
		'a' => 'A', 'b' => 'B', 'c' => 'C', 'd' => 'D', 'e' => 'E', 'f' => 'F',
		'g' => 'G', 'h' => 'H', 'i' => 'I', 'j' => 'J', 'k' => 'K', 'l' => 'L',
		'm' => 'M', 'n' => 'N', 'o' => 'O', 'p' => 'P', 'q' => 'Q', 'r' => 'R',
		's' => 'S', 't' => 'T', 'u' => 'U', 'v' => 'V', 'w' => 'W', 'x' => 'X',
		'y' => 'Y', 'z' => 'Z', 'ŕ' => 'Ŕ', 'á' => 'Á', 'â' => 'Â', 'ă' => 'Ă',
		'ä' => 'Ä', 'ĺ' => 'Ĺ', 'ć' => 'Ć', 'ç' => 'Ç', 'č' => 'Č', 'é' => 'É',
		'ę' => 'Ę', 'ë' => 'Ë', 'ě' => 'Ě', 'í' => 'Í', 'î' => 'Î', 'ď' => 'Ď',
		'đ' => 'Đ', 'ń' => 'Ń', 'ň' => 'Ň', 'ó' => 'Ó', 'ô' => 'Ô', 'ő' => 'Ő',
		'ö' => 'Ö', 'ř' => 'Ř', 'ů' => 'Ů', 'ú' => 'Ú', 'ű' => 'Ű', 'ü' => 'Ü',
		'ý' => 'Ý', 'ţ' => 'Ţ', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '' => '',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'' => '', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'˙' => '', '?' => '?', '?' => '?', '' => '', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?'
	);
	
	/**
	 * All lowercase UTF-8 characters not properly handled by [http://php.net/mb_strtoupper mb_strtoupper()] mapped to uppercase characters
	 * 
	 * @var array
	 */
	static private $mb_lower_to_upper_fix = array(
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?'
	);
	
	/**
	 * All uppercase UTF-8 characters not properly handled by [http://php.net/mb_strtolower mb_strtolower()] mapped to lowercase characters
	 * 
	 * @var array
	 */
	static private $mb_upper_to_lower_fix = array(
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?'
	);
	
	/**
	 * All uppercase UTF-8 characters mapped to lowercase characters
	 * 
	 * @var array
	 */
	static private $upper_to_lower = array(
		'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', 'F' => 'f',
		'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k', 'L' => 'l',
		'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q', 'R' => 'r',
		'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x',
		'Y' => 'y', 'Z' => 'z', 'Ŕ' => 'ŕ', 'Á' => 'á', 'Â' => 'â', 'Ă' => 'ă',
		'Ä' => 'ä', 'Ĺ' => 'ĺ', 'Ć' => 'ć', 'Ç' => 'ç', 'Č' => 'č', 'É' => 'é',
		'Ę' => 'ę', 'Ë' => 'ë', 'Ě' => 'ě', 'Í' => 'í', 'Î' => 'î', 'Ď' => 'ď',
		'Đ' => 'đ', 'Ń' => 'ń', 'Ň' => 'ň', 'Ó' => 'ó', 'Ô' => 'ô', 'Ő' => 'ő',
		'Ö' => 'ö', 'Ř' => 'ř', 'Ů' => 'ů', 'Ú' => 'ú', 'Ű' => 'ű', 'Ü' => 'ü',
		'Ý' => 'ý', 'Ţ' => 'ţ', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => 'i', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'' => '', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '' => '', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '' => '˙', '?' => '?', '?' => '?', '' => '', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
		'?' => '?', '?' => '?', '?' => '?'
	);
	
	/**
	 * A mapping of all ASCII-based latin characters, puntuation, symbols and number forms to ASCII.
	 * 
	 * Includes elements form the following unicode blocks:
	 * 
	 *  - Latin-1 Supplement
	 *  - Latin Extended-A
	 *  - Latin Extended-B
	 *  - IPA Extensions
	 *  - Latin Extended Additional
	 *  - General Punctuation
	 *  - Letterlike symbols
	 *  - Number Forms
	 * 
	 * @var array
	 */
	static private $utf8_to_ascii = array(
		// Latin-1 Supplement
		'Š' => '(c)', 'Ť' => '<<',  'Ž' => '(R)', 'ť' => '>>',  'ź' => '1/4',
		'˝' => '1/2', 'ž' => '3/4', 'Ŕ' => 'A',   'Á' => 'A',   'Â' => 'A',
		'Ă' => 'A',   'Ä' => 'A',   'Ĺ' => 'A',   'Ć' => 'AE',  'Ç' => 'C',
		'Č' => 'E',   'É' => 'E',   'Ę' => 'E',   'Ë' => 'E',   'Ě' => 'I',
		'Í' => 'I',   'Î' => 'I',   'Ď' => 'I',   'Ń' => 'N',   'Ň' => 'O',
		'Ó' => 'O',   'Ô' => 'O',   'Ő' => 'O',   'Ö' => 'O',   'Ř' => 'O',
		'Ů' => 'U',   'Ú' => 'U',   'Ű' => 'U',   'Ü' => 'U',   'Ý' => 'Y',
		'ŕ' => 'a',   'á' => 'a',   'â' => 'a',   'ă' => 'a',   'ä' => 'a',
		'ĺ' => 'a',   'ć' => 'ae',  'ç' => 'c',   'č' => 'e',   'é' => 'e',
		'ę' => 'e',   'ë' => 'e',   'ě' => 'i',   'í' => 'i',   'î' => 'i',
		'ď' => 'i',   'ń' => 'n',   'ň' => 'o',   'ó' => 'o',   'ô' => 'o',
		'ő' => 'o',   'ö' => 'o',   'ř' => 'o',   'ů' => 'u',   'ú' => 'u',
		'ű' => 'u',   'ü' => 'u',   'ý' => 'y',   '˙' => 'y',
		// Latin Extended-A
		'?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'A',
		'?' => 'a',   '?' => 'C',   '?' => 'c',   '?' => 'C',   '?' => 'c',
		'?' => 'C',   '?' => 'c',   '?' => 'C',   '?' => 'c',   '?' => 'D',
		'?' => 'd',   '?' => 'D',   '?' => 'd',   '?' => 'E',   '?' => 'e',
		'?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',
		'?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'G',   '?' => 'g',
		'?' => 'G',   '?' => 'g',   '?' => 'G',   '?' => 'g',   '?' => 'G',
		'?' => 'g',   '?' => 'H',   '?' => 'h',   '?' => 'H',   '?' => 'h',
		'?' => 'I',   '?' => 'i',   '?' => 'I',   '?' => 'i',   '?' => 'I',
		'?' => 'i',   '?' => 'I',   '?' => 'i',   '?' => 'I',   '?' => 'i',
		'?' => 'IJ',  '?' => 'ij',  '?' => 'J',   '?' => 'j',   '?' => 'K',
		'?' => 'k',   '?' => 'L',   '?' => 'l',   '?' => 'L',   '?' => 'l',
		'?' => 'L',   '?' => 'l',   '?' => 'L',   '?' => 'l',   '?' => 'L',
		'?' => 'l',   '?' => 'N',   '?' => 'n',   '?' => 'N',   '?' => 'n',
		'?' => 'N',   '?' => 'n',   '?' => "'n", '?' => 'N',   '?' => 'n',
		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'O',
		'?' => 'o',   '' => 'OE',  '' => 'oe',  '?' => 'R',   '?' => 'r',
		'?' => 'R',   '?' => 'r',   '?' => 'R',   '?' => 'r',   '?' => 'S',
		'?' => 's',   '?' => 'S',   '?' => 's',   '?' => 'S',   '?' => 's',
		'' => 'S',   '' => 's',   '?' => 'T',   '?' => 't',   '?' => 'T',
		'?' => 't',   '?' => 'T',   '?' => 't',   '?' => 'U',   '?' => 'u',
		'?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',
		'?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',
		'?' => 'W',   '?' => 'w',   '?' => 'Y',   '?' => 'y',   '' => 'Y',
		'?' => 'Z',   '?' => 'z',   '?' => 'Z',   '?' => 'z',   '' => 'Z',
		'' => 'z',
		// Latin Extended-B
		'?' => 'b',   '?' => 'B',   '?' => 'B',   '?' => 'b',   '?' => 'O',
		'?' => 'C',   '?' => 'c',   '?' => 'D',   '?' => 'D',   '?' => 'D',
		'?' => 'd',   '?' => 'E',   '?' => 'E',   '?' => 'F',   '' => 'f',
		'?' => 'G',   '?' => 'I',   '?' => 'K',   '?' => 'k',   '?' => 'l',
		'?' => 'M',   '?' => 'N',   '?' => 'n',   '?' => 'O',   '?' => 'O',
		'?' => 'o',   '?' => 'OI',  '?' => 'oi',  '?' => 'P',   '?' => 'p',
		'?' => 't',   '?' => 'T',   '?' => 't',   '?' => 'T',   '?' => 'U',
		'?' => 'u',   '?' => 'V',   '?' => 'Y',   '?' => 'y',   '?' => 'Z',
		'?' => 'z',   '?' => '2',   '?' => 'DZ',  '?' => 'Dz',  '?' => 'dz',
		'?' => 'LJ',  '?' => 'Lj',  '?' => 'lj',  '?' => 'Nj',  '?' => 'Nj',
		'?' => 'nj',  '?' => 'A',   '?' => 'a',   '?' => 'I',   '?' => 'i',
		'?' => 'O',   '?' => 'o',   '?' => 'U',   '?' => 'u',   '?' => 'U',
		'?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',
		'?' => 'U',   '?' => 'u',   '?' => 'e',   '?' => 'A',   '?' => 'a',
		'?' => 'A',   '?' => 'a',   '?' => 'AE',  '?' => 'ae',  '?' => 'G',
		'?' => 'g',   '?' => 'G',   '?' => 'g',   '?' => 'K',   '?' => 'k',
		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'j',
		'?' => 'DZ',  '?' => 'Dz',  '?' => 'dz',  '?' => 'G',   '?' => 'g',
		'?' => 'N',   '?' => 'n',   '?' => 'A',   '?' => 'a',   '?' => 'AE',
		'?' => 'ae',  '?' => 'O',   '?' => 'o',   '?' => 'A',   '?' => 'a',
		'?' => 'A',   '?' => 'a',   '?' => 'E',   '?' => 'e',   '?' => 'E',
		'?' => 'e',   '?' => 'I',   '?' => 'i',   '?' => 'I',   '?' => 'i',
		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'R',
		'?' => 'r',   '?' => 'R',   '?' => 'r',   '?' => 'U',   '?' => 'u',
		'?' => 'U',   '?' => 'u',   '?' => 'S',   '?' => 's',   '?' => 'T',
		'?' => 't',   '?' => 'H',   '?' => 'h',   '?' => 'N',   '?' => 'd',
		'?' => 'Z',   '?' => 'z',   '?' => 'A',   '?' => 'a',   '?' => 'E',
		'?' => 'e',   '?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',
		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'Y',
		'?' => 'y',   '?' => 'l',   '?' => 'n',   '?' => 't',   '?' => 'j',
		'?' => 'db',  '?' => 'qp',  '?' => 'A',   '?' => 'C',   '?' => 'c',
		'?' => 'L',   '?' => 'T',   '?' => 's',   '?' => 'z',   '?' => 'B',
		'?' => 'U',   '?' => 'V',   '?' => 'E',   '?' => 'e',   '?' => 'J',
		'?' => 'j',   '?' => 'Q',   '?' => 'q',   '?' => 'R',   '?' => 'r',
		'?' => 'Y',   '?' => 'y',
		// IPA Extensions
		'?' => 'a',   '?' => 'b',   '?' => 'o',   '?' => 'c',   '?' => 'd',
		'?' => 'd',   '?' => 'e',   '?' => 'e',   '?' => 'e',   '?' => 'e',
		'?' => 'e',   '?' => 'j',   '?' => 'g',   '?' => 'g',   '?' => 'G',
		'?' => 'h',   '?' => 'h',   '?' => 'i',   '?' => 'I',   '?' => 'l',
		'?' => 'l',   '?' => 'l',   '?' => 'm',   '?' => 'm',   '?' => 'm',
		'?' => 'n',   '?' => 'n',   '?' => 'N',   '?' => 'o',   '?' => 'OE',
		'?' => 'r',   '?' => 'r',   '?' => 'r',   '?' => 'r',   '?' => 'r',
		'?' => 'r',   '?' => 'r',   '?' => 'R',   '?' => 'R',   '?' => 's',
		'?' => 't',   '?' => 't',   '?' => 'u',   '?' => 'v',   '?' => 'v',
		'?' => 'w',   '?' => 'y',   '?' => 'Y',   '?' => 'z',   '?' => 'z',
		'?' => 'C',   '?' => 'B',   '?' => 'e',   '?' => 'G',   '?' => 'H',
		'?' => 'j',   '?' => 'k',   '?' => 'L',   '?' => 'q',   '?' => 'dz',
		'?' => 'dz',  '?' => 'ts',  '?' => 'tc',  '?' => 'ls',  '?' => 'lz',
		'?' => 'h',   '?' => 'h',
		// Latin Extended Additional
		'?' => 'A',   '?' => 'a',   '?' => 'B',   '?' => 'b',   '?' => 'B',
		'?' => 'b',   '?' => 'B',   '?' => 'b',   '?' => 'C',   '?' => 'c',
		'?' => 'D',   '?' => 'd',   '?' => 'D',   '?' => 'd',   '?' => 'D',
		'?' => 'd',   '?' => 'D',   '?' => 'd',   '?' => 'D',   '?' => 'd',
		'?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',
		'?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',
		'?' => 'F',   '?' => 'f',   '?' => 'G',   '?' => 'g',   '?' => 'H',
		'?' => 'h',   '?' => 'H',   '?' => 'h',   '?' => 'H',   '?' => 'h',
		'?' => 'H',   '?' => 'h',   '?' => 'H',   '?' => 'h',   '?' => 'I',
		'?' => 'i',   '?' => 'I',   '?' => 'i',   '?' => 'K',   '?' => 'k',
		'?' => 'K',   '?' => 'k',   '?' => 'K',   '?' => 'k',   '?' => 'L',
		'?' => 'l',   '?' => 'L',   '?' => 'l',   '?' => 'L',   '?' => 'l',
		'?' => 'L',   '?' => 'l',   '?' => 'M',   '?' => 'm',   '?' => 'M',
		'?' => 'm',   '?' => 'M',   '?' => 'm',   '?' => 'N',   '?' => 'n',
		'?' => 'N',   '?' => 'n',   '?' => 'N',   '?' => 'n',   '?' => 'N',
		'?' => 'n',   '?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',
		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'P',
		'?' => 'p',   '?' => 'P',   '?' => 'p',   '?' => 'R',   '?' => 'r',
		'?' => 'R',   '?' => 'r',   '?' => 'R',   '?' => 'r',   '?' => 'R',
		'?' => 'r',   '?' => 'S',   '?' => 's',   '?' => 'S',   '?' => 's',
		'?' => 'S',   '?' => 's',   '?' => 'S',   '?' => 's',   '?' => 'S',
		'?' => 's',   '?' => 'T',   '?' => 't',   '?' => 'T',   '?' => 't',
		'?' => 'T',   '?' => 't',   '?' => 'T',   '?' => 't',   '?' => 'U',
		'?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',
		'?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'V',
		'?' => 'v',   '?' => 'V',   '?' => 'v',   '?' => 'W',   '?' => 'w',
		'?' => 'W',   '?' => 'w',   '?' => 'W',   '?' => 'w',   '?' => 'W',
		'?' => 'w',   '?' => 'W',   '?' => 'w',   '?' => 'X',   '?' => 'x',
		'?' => 'X',   '?' => 'x',   '?' => 'Y',   '?' => 'y',   '?' => 'Z',
		'?' => 'z',   '?' => 'Z',   '?' => 'z',   '?' => 'Z',   '?' => 'z',
		'?' => 'h',   '?' => 't',   '?' => 'w',   '?' => 'y',   '?' => 'a',
		'?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'A',
		'?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',
		'?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'A',
		'?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',
		'?' => 'A',   '?' => 'a',   '?' => 'A',   '?' => 'a',   '?' => 'E',
		'?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',
		'?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',
		'?' => 'e',   '?' => 'E',   '?' => 'e',   '?' => 'E',   '?' => 'e',
		'?' => 'I',   '?' => 'i',   '?' => 'I',   '?' => 'i',   '?' => 'O',
		'?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',
		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'O',
		'?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',
		'?' => 'O',   '?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'O',
		'?' => 'o',   '?' => 'O',   '?' => 'o',   '?' => 'U',   '?' => 'u',
		'?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',
		'?' => 'u',   '?' => 'U',   '?' => 'u',   '?' => 'U',   '?' => 'u',
		'?' => 'U',   '?' => 'u',   '?' => 'Y',   '?' => 'y',   '?' => 'Y',
		'?' => 'y',   '?' => 'Y',   '?' => 'y',   '?' => 'Y',   '?' => 'y',
		// General Punctuation
		'?' => ' ',   '?' => ' ',   '?' => ' ',   '?' => ' ',   '?' => ' ',
		'?' => ' ',   '?' => ' ',   '?' => ' ',   '?' => ' ',   '?' => ' ',
		'?' => ' ',   '?' => '',    '?' => '',    '?' => '',    '?' => '-',
		'?' => '-',   '?' => '-',   '' => '-',   '' => '-',   '?' => '-',
		'?' => '||',  '' => "'",   '' => "'",   '' => ',',   '?' => "'",
		'' => '"',   '' => '"',   '?' => '"',   '?' => '.',   '?' => '..',
		'' => '...', '?' => ' ',   '?' => "'",   '?' => '"',   '?' => '\'"',
		'?' => "'",   '?' => '"',   '?' => '"\'', '' => '<',   '' => '>',
		'?' => '!!',  '?' => '?!',  '?' => '/',   '?' => '?/',  '?' => '?!',
		'?' => '!?',
		// Letterlike Symbols
		'?' => 'SM',  '' => 'TM',
		// Number Forms
		'?' => '1/3', '?' => '2/3', '?' => '1/5', '?' => '2/5', '?' => '3/5',
		'?' => '4/5', '?' => '1/6', '?' => '5/6', '?' => '1/8', '?' => '3/8',
		'?' => '5/8', '?' => '7/8', '?' => 'I',   '?' => 'II',  '?' => 'III',
		'?' => 'IV',  '?' => 'V',   '?' => 'Vi',  '?' => 'VII', '?' => 'VIII',
		'?' => 'IX',  '?' => 'X',   '?' => 'XI',  '?' => 'XII', '?' => 'L',
		'?' => 'C',   '?' => 'D',   '?' => 'M',   '?' => 'i',   '?' => 'ii',
		'?' => 'iii', '?' => 'iv',  '?' => 'v',   '?' => 'vi',  '?' => 'vii',
		'?' => 'viii','?' => 'ix',  '?' => 'x',   '?' => 'xi',  '?' => 'xii',
		'?' => 'l',   '?' => 'c',   '?' => 'd',   '?' => 'm'
	);
	
	/**
	 * If the [http://php.net/mbstring mbstring] extension is available
	 * 
	 * @var boolean
	 */
	static private $mbstring_available = NULL;
	
	
	/**
	 * Maps UTF-8 ASCII-based latin characters, puntuation, symbols and number forms to ASCII
	 * 
	 * Any characters or symbols that can not be translated will be removed.
	 * 
	 * This function is most useful for situation that only allows ASCII, such
	 * as in URLs.
	 * 
	 * Translates elements form the following unicode blocks:
	 * 
	 *  - Latin-1 Supplement
	 *  - Latin Extended-A
	 *  - Latin Extended-B
	 *  - IPA Extensions
	 *  - Latin Extended Additional
	 *  - General Punctuation
	 *  - Letterlike symbols
	 *  - Number Forms
	 * 
	 * @internal
	 * 
	 * @param  string $string  The string to convert
	 * @return string  The input string in pure ASCII
	 */
	static public function ascii($string)
	{
		if (!self::detect($string)) {
			return $string;
		}
		
		$string = strtr($string, self::$utf8_to_ascii);
		return preg_replace('#[^\x00-\x7F]#', '', $string);
	}
	
	
	/**
	 * Checks to see if the [http://php.net/mbstring mbstring] extension is available
	 * 
	 * @return void
	 */
	static private function checkMbString()
	{
		self::$mbstring_available = extension_loaded('mbstring');
	}
	
	
	/**
	 * Converts a unicode value into a UTF-8 character
	 * 
	 * @param  mixed $unicode_code_point  The character to create, either the `U+hex` or decimal code point
	 * @return string  The UTF-8 character
	 */
	static public function chr($unicode_code_point)
	{
		if (is_string($unicode_code_point) && substr($unicode_code_point, 0, 2) == 'U+') {
			$unicode_code_point = substr($unicode_code_point, 2);
			$unicode_code_point = hexdec($unicode_code_point);
		}
		
		$bin = decbin($unicode_code_point);
		$digits = strlen($bin);
		
		$first = $second = $third = $fourth = NULL;
		
		// One byte characters
		if ($digits <= 7) {
			$first = chr(bindec($bin));
			
		// Two byte characters
		} elseif ($digits <= 11) {
			$first  = chr(bindec('110' . str_pad(substr($bin, 0, -6), 5, '0', STR_PAD_LEFT)));
			$second = chr(bindec('10' . substr($bin, -6)));
			
		// Three byte characters
		} elseif ($digits <= 16) {
			$first  = chr(bindec('1110' . str_pad(substr($bin, 0, -12), 4, '0', STR_PAD_LEFT)));
			$second = chr(bindec('10' . substr($bin, -12, -6)));
			$third  = chr(bindec('10' . substr($bin, -6)));
			
		// Four byte characters
		} elseif ($digits <= 21) {
			$first  = chr(bindec('11110' . str_pad(substr($bin, 0, -18), 3, '0', STR_PAD_LEFT)));
			$second = chr(bindec('10' . substr($bin, -18, -12)));
			$third  = chr(bindec('10' . substr($bin, -12, -6)));
			$fourth = chr(bindec('10' . substr($bin, -6)));
		}
		
		$ord = ord($first);
		if ($digits > 21 || $ord == 0xC0 || $ord == 0xC1 || $ord > 0xF4) {
			throw new fProgrammerException(
				'The code point specified, %s, is invalid.',
				$unicode_code_point
			);
		}
		
		return $first . $second . $third . $fourth;
	}
	
	
	/**
	 * Removes any invalid UTF-8 characters from a string or array of strings
	 * 
	 * @param  array|string $value  The string or array of strings to clean
	 * @return string  The cleaned string
	 */
	static public function clean($value)
	{
		if (!is_array($value)) {
			if (self::$can_ignore_invalid === NULL) {
				self::$can_ignore_invalid = !in_array(strtolower(ICONV_IMPL), array('unknown', 'ibm iconv'));	
			}
			fCore::startErrorCapture(E_NOTICE);
			$value = self::iconv('UTF-8', 'UTF-8' . (self::$can_ignore_invalid ? '//IGNORE' : ''), (string) $value);
			fCore::stopErrorCapture();
			return $value;
		}
		
		$keys = array_keys($value);
		$num_keys = sizeof($keys);
		for ($i=0; $i<$num_keys; $i++) {
			$value[$keys[$i]] = self::clean($value[$keys[$i]]);
		}
		
		return $value;
	}
	
	
	/**
	 * Compares strings, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
	 * 
	 * Please note that this function sorts based on English language sorting
	 * rules only. Locale-sepcific sorting is done by
	 * [http://php.net/strcoll strcoll()], however there are technical
	 * limitations.
	 * 
	 * @param  string $str1  The first string to compare
	 * @param  string $str2  The second string to compare
	 * @return integer  < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
	 */
	static public function cmp($str1, $str2)
	{
		$ascii_str1 = strtr($str1, self::$utf8_to_ascii);
		$ascii_str2 = strtr($str2, self::$utf8_to_ascii);
		
		$res = strcmp($ascii_str1, $ascii_str2);
		
		// If the ASCII representations are the same, sort by the UTF-8 representations
		if ($res === 0) {
			$res = strcmp($str1, $str2);
		}
		
		return $res;
	}
	
	
	/**
	 * Converts an offset in characters to an offset in bytes to that we can use the built-in functions for some operations
	 * 
	 * @param  string  $string  The string to base the offset on
	 * @param  integer $offset  The character offset to conver to bytes
	 * @return integer  The converted offset
	 */
	static private function convertOffsetToBytes($string, $offset)
	{
		if ($offset == 0) {
			return 0;
		}
		
		$len = strlen($string);
		
		$byte_offset     = 0;
		$measured_offset = 0;
		$sign            = 1;
		
		// Negative offsets require us to reverse some stuff
		if ($offset < 0) {
			$string    = strrev($string);
			$sign      = -1;
			$offset    = abs($offset);
		}
			
		for ($i=0; $i<$len && $measured_offset<$offset; $i++) {
			$char = $string[$i];
			++$byte_offset;
			if (ord($char) < 0x80) {
				++$measured_offset;
			} else {
				switch (ord($char) & 0xF0) {
					case 0xF0:
					case 0xE0:
					case 0xD0:
					case 0xC0:
						++$measured_offset;
						break;
				}
			}
		}
		
		return $byte_offset * $sign;
	}
	
	
	/**
	 * Detects if a UTF-8 string contains any non-ASCII characters
	 * 
	 * @param  string $string  The string to check
	 * @return boolean  If the string contains any non-ASCII characters
	 */
	static private function detect($string)
	{
		return (boolean) preg_match('#[^\x00-\x7F]#', $string);
	}
	
	
	/**
	 * Explodes a string on a delimiter
	 * 
	 * If no delimiter is provided, the string will be exploded with each
	 * characters being an element in the array.
	 * 
	 * @param  string  $string     The string to explode
	 * @param  string  $delimiter  The string to explode on. If `NULL` or `''` this method will return one character per array index.
	 * @return array  The exploded string
	 */
	static public function explode($string, $delimiter=NULL)
	{
		// If a delimiter was passed, we just do an explode
		if ($delimiter || (!$delimiter && is_numeric($delimiter))) {
			return explode($delimiter, $string);
		}
		
		// If no delimiter was passed, we explode the characters into an array
		preg_match_all('#.|^\z#us', $string, $matches);
		return $matches[0];
	}


	/**
	 * This works around a bug in MAMP 1.9.4+ and PHP 5.3 where iconv()
	 * does not seem to properly assign the return value to a variable, but
	 * does work when returning the value.
	 *
	 * @param string $in_charset   The incoming character encoding
	 * @param string $out_charset  The outgoing character encoding
	 * @param string $string       The string to convert
	 * @return string  The converted string
	 */
	static private function iconv($in_charset, $out_charset, $string)
	{
		return iconv($in_charset, $out_charset, $string);
	}
	
	
	/**
	 * Compares strings in a case-insensitive manner, with the resulting order having characters that are based on ASCII letters placed after the relative ASCII characters
	 * 
	 * Please note that this function sorts based on English language sorting
	 * rules only. Locale-sepcific sorting is done by
	 * [http://php.net/strcoll strcoll()], however there are technical
	 * limitations.
	 * 
	 * @param  string $str1  The first string to compare
	 * @param  string $str2  The second string to compare
	 * @return integer  < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
	 */
	static public function icmp($str1, $str2)
	{
		$str1 = self::lower($str1);
		$str2 = self::lower($str2);
		
		return self::cmp($str1, $str2);
	}
	
	
	/**
	 * Compares strings using a natural order algorithm in a case-insensitive manner, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
	 * 
	 * Please note that this function sorts based on English language sorting
	 * rules only. Locale-sepcific sorting is done by
	 * [http://php.net/strcoll strcoll()], however there are technical
	 * limitations.
	 * 
	 * @param  string $str1  The first string to compare
	 * @param  string $str2  The second string to compare
	 * @return integer  `< 0` if `$str1 < $str2`, `0` if they are equal, `> 0` if `$str1 > $str2`
	 */
	static public function inatcmp($str1, $str2)
	{
		$str1 = self::lower($str1);
		$str2 = self::lower($str2);
		
		return self::natcmp($str1, $str2);
	}
	
	
	/**
	 * Finds the first position (in characters) of the search value in the string - case is ignored when doing performing a match
	 * 
	 * @param  string  $haystack  The string to search in
	 * @param  string  $needle    The string to search for. This match will be done in a case-insensitive manner.
	 * @param  integer $offset    The character position to start searching from
	 * @return mixed  The integer character position of the first occurence of the needle or `FALSE` if no match
	 */
	static public function ipos($haystack, $needle, $offset=0)
	{
		// We get better performance falling back for ASCII strings
		if (!self::detect($haystack)) {
			return stripos($haystack, $needle, $offset);
		}
		
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available && function_exists('mb_stripos')) {
			return mb_stripos($haystack, $needle, $offset, 'UTF-8');
		}
		
		$haystack = self::lower($haystack);
		$needle   = self::lower($needle);
		
		return self::pos($haystack, $needle, $offset);
	}
	
	
	/**
	 * Replaces matching parts of the string, with matches being done in a a case-insensitive manner
	 * 
	 * If `$search` and `$replace` are both arrays and `$replace` is shorter,
	 * the extra `$search` string will be replaced with an empty string. If
	 * `$search` is an array and `$replace` is a string, all `$search` values
	 * will be replaced with the string specified.
	 * 
	 * @param  string $string   The string to perform the replacements on
	 * @param  mixed  $search   The string (or array of strings) to search for - see method description for details
	 * @param  mixed  $replace  The string (or array of strings) to replace with - see method description for details
	 * @return string  The input string with the specified replacements
	 */
	static public function ireplace($string, $search, $replace)
	{
		if (is_array($search)) {
			foreach ($search as &$needle) {
				$needle = '#' . preg_quote($needle, '#') . '#ui';
			}
		} else {
			$search = '#' . preg_quote($search, '#') . '#ui';
		}
		return preg_replace(
			$search,
			strtr($replace, array('\\' => '\\\\', '$' => '\\$')),
			$string
		);
	}
	
	
	/**
	 * Finds the last position (in characters) of the search value in the string - case is ignored when doing performing a match
	 * 
	 * @param  string  $haystack  The string to search in
	 * @param  string  $needle    The string to search for. This match will be done in a case-insensitive manner.
	 * @param  integer $offset    The character position to start searching from. A negative value will stop looking that many characters from the end of the string
	 * @return mixed  The integer character position of the last occurence of the needle or `FALSE` if no match
	 */
	static public function irpos($haystack, $needle, $offset=0)
	{
		// We get better performance falling back for ASCII strings
		if (!self::detect($haystack)) {
			return strripos($haystack, $needle, $offset);
		}
		
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available && function_exists('mb_strripos')) {
			return mb_strripos($haystack, $needle, $offset, 'UTF-8');
		}
		
		$haystack = self::lower($haystack);
		$needle   = self::lower($needle);
		
		return self::rpos($haystack, $needle, $offset);
	}
	
	
	/**
	 * Matches a string needle in the string haystack, returning a substring from the beginning of the needle to the end of the haystack
	 * 
	 * Can optionally return the part of the haystack before the needle. Matching
	 * is done in a case-insensitive manner.
	 * 
	 * @param  string  $haystack       The string to search in
	 * @param  string  $needle         The string to search for. This match will be done in a case-insensitive manner.
	 * @param  boolean $before_needle  If a substring of the haystack before the needle should be returned instead of the substring from the needle to the end of the haystack
	 * @return mixed  The specified part of the haystack, or `FALSE` if the needle was not found
	 */
	static public function istr($haystack, $needle, $before_needle=FALSE)
	{
		// We get better performance falling back for ASCII strings
		if ($before_needle == FALSE && !self::detect($haystack)) {
			return stristr($haystack, $needle);
		}
		
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available && function_exists('mb_stristr')) {
			return mb_stristr($haystack, $needle, $before_needle, 'UTF-8');
		}
		
		$lower_haystack = self::lower($haystack);
		$lower_needle   = self::lower($needle);
		
		$pos = strpos($lower_haystack, $lower_needle);
		
		if ($before_needle) {
			return substr($haystack, 0, $pos);
		}
		
		return substr($haystack, $pos);
	}
	
	
	/**
	 * Determines the length (in characters) of a string
	 * 
	 * @param  string $string  The string to measure
	 * @return integer  The number of characters in the string
	 */
	static public function len($string)
	{
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available) {
			return mb_strlen($string, 'UTF-8');
		}
		
		return strlen(utf8_decode($string));
	}
	
	
	/**
	 * Converts all uppercase characters to lowercase
	 * 
	 * @param  string $string  The string to convert
	 * @return string  The input string with all uppercase characters in lowercase
	 */
	static public function lower($string)
	{
		// We get better performance falling back for ASCII strings
		if (!self::detect($string)) {
			return strtolower($string);
		}
		
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available) {
			$string = mb_strtolower($string, 'utf-8');
			// For some reason mb_strtolower misses some character
			return strtr($string, self::$mb_upper_to_lower_fix);
		}
		
		return strtr($string, self::$upper_to_lower);
	}
	
	
	/**
	 * Trims whitespace, or any specified characters, from the beginning of a string
	 * 
	 * @param  string $string    The string to trim
	 * @param  string $charlist  The characters to trim
	 * @return string  The trimmed string
	 */
	static public function ltrim($string, $charlist=NULL)
	{
		if (strlen($charlist) === 0) {
			return ltrim($string);
		}
		
		$search = preg_quote($charlist, '#');
		$search = str_replace('-', '\-', $search);
		$search = str_replace('\.\.', '-', $search);
		return preg_replace('#^[' . $search . ']+#Du', '', $string);
	}
	
	
	/**
	 * Compares strings using a natural order algorithm, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
	 * 
	 * Please note that this function sorts based on English language sorting
	 * rules only. Locale-sepcific sorting is done by
	 * [http://php.net/strcoll strcoll()], however there are technical
	 * limitations.
	 * 
	 * @param…
Summary ✨

This PHP code defines a class fUTF8 to provide UTF-8 string manipulation functions, mirroring many built-in PHP string functions. It handles operations like character length calculation, case conversion, substring extraction, and string comparison, ensuring correct handling of multi-byte UTF-8 characters. It also includes functions for cleaning invalid UTF-8 characters and converting UTF-8 strings to ASCII. The class checks for the availability of the mbstring extension for improved performance.
Alerts (1)

Complexity hotspot; line 640 (total complexity: 6)
640