/classes/fUTF8.php
PHP | 1640 lines | 1010 code | 187 blank | 443 comment | 110 complexity | c9f6b48fb2a19f8390197d74b4538bcb MD5 | raw file
Large files files are truncated, but you can click here to view the full file
- <?php
- /**
- * Provides string functions for UTF-8 strings
- *
- * This class is implemented to provide a UTF-8 version of almost every built-in
- * PHP string function. For more information about UTF-8, please visit
- * http://flourishlib.com/docs/UTF-8.
- *
- * @copyright Copyright (c) 2008-2011 Will Bond
- * @author Will Bond [wb] <will@flourishlib.com>
- * @license http://flourishlib.com/license
- *
- * @package Flourish
- * @link http://flourishlib.com/fUTF8
- *
- * @version 1.0.0b15
- * @changes 1.0.0b15 Fixed a bug with using IBM's iconv implementation on AIX [wb, 2011-07-29]
- * @changes 1.0.0b14 Added a workaround for iconv having issues in MAMP 1.9.4+ [wb, 2011-07-26]
- * @changes 1.0.0b13 Fixed notices from being thrown when invalid data is sent to ::clean() [wb, 2011-06-10]
- * @changes 1.0.0b12 Fixed a variable name typo in ::sub() [wb, 2011-05-09]
- * @changes 1.0.0b11 Updated the class to not using phpinfo() to determine the iconv implementation [wb, 2010-11-04]
- * @changes 1.0.0b10 Fixed a bug with capitalizing a lowercase i resulting in a dotted upper-case I [wb, 2010-11-01]
- * @changes 1.0.0b9 Updated class to use fCore::startErrorCapture() instead of `error_reporting()` [wb, 2010-08-09]
- * @changes 1.0.0b8 Removed `e` flag from preg_replace() calls [wb, 2010-06-08]
- * @changes 1.0.0b7 Added the methods ::trim(), ::rtrim() and ::ltrim() [wb, 2010-05-11]
- * @changes 1.0.0b6 Fixed ::clean() to work with PHP installs that use an iconv library that doesn't support //IGNORE [wb, 2010-03-02]
- * @changes 1.0.0b5 Changed ::ucwords() to also uppercase words right after various punctuation [wb, 2009-09-18]
- * @changes 1.0.0b4 Changed replacement values in preg_replace() calls to be properly escaped [wb, 2009-06-11]
- * @changes 1.0.0b3 Fixed a parameter name in ::rpos() from `$search` to `$needle` [wb, 2009-02-06]
- * @changes 1.0.0b2 Fixed a bug in ::explode() with newlines and zero-length delimiters [wb, 2009-02-05]
- * @changes 1.0.0b The initial implementation [wb, 2008-06-01]
- */
- class fUTF8
- {
- // The following constants allow for nice looking callbacks to static methods
- const ascii = 'fUTF8::ascii';
- const chr = 'fUTF8::chr';
- const clean = 'fUTF8::clean';
- const cmp = 'fUTF8::cmp';
- const explode = 'fUTF8::explode';
- const icmp = 'fUTF8::icmp';
- const inatcmp = 'fUTF8::inatcmp';
- const ipos = 'fUTF8::ipos';
- const ireplace = 'fUTF8::ireplace';
- const irpos = 'fUTF8::irpos';
- const istr = 'fUTF8::istr';
- const len = 'fUTF8::len';
- const lower = 'fUTF8::lower';
- const ltrim = 'fUTF8::ltrim';
- const natcmp = 'fUTF8::natcmp';
- const ord = 'fUTF8::ord';
- const pad = 'fUTF8::pad';
- const pos = 'fUTF8::pos';
- const replace = 'fUTF8::replace';
- const reset = 'fUTF8::reset';
- const rev = 'fUTF8::rev';
- const rpos = 'fUTF8::rpos';
- const rtrim = 'fUTF8::rtrim';
- const str = 'fUTF8::str';
- const sub = 'fUTF8::sub';
- const trim = 'fUTF8::trim';
- const ucfirst = 'fUTF8::ucfirst';
- const ucwords = 'fUTF8::ucwords';
- const upper = 'fUTF8::upper';
- const wordwrap = 'fUTF8::wordwrap';
-
-
- /**
- * Depending how things are compiled, NetBSD and Solaris don't support //IGNORE in iconv()
- *
- * If //IGNORE support is not provided strings with invalid characters will be truncated
- *
- * @var boolean
- */
- static private $can_ignore_invalid = NULL;
-
- /**
- * All lowercase UTF-8 characters mapped to uppercase characters
- *
- * @var array
- */
- static private $lower_to_upper = array(
- 'a' => 'A', 'b' => 'B', 'c' => 'C', 'd' => 'D', 'e' => 'E', 'f' => 'F',
- 'g' => 'G', 'h' => 'H', 'i' => 'I', 'j' => 'J', 'k' => 'K', 'l' => 'L',
- 'm' => 'M', 'n' => 'N', 'o' => 'O', 'p' => 'P', 'q' => 'Q', 'r' => 'R',
- 's' => 'S', 't' => 'T', 'u' => 'U', 'v' => 'V', 'w' => 'W', 'x' => 'X',
- 'y' => 'Y', 'z' => 'Z', 'à' => 'À', 'á' => 'Á', 'â' => 'Â', 'ã' => 'Ã',
- 'ä' => 'Ä', 'å' => 'Å', 'æ' => 'Æ', 'ç' => 'Ç', 'è' => 'È', 'é' => 'É',
- 'ê' => 'Ê', 'ë' => 'Ë', 'ì' => 'Ì', 'í' => 'Í', 'î' => 'Î', 'ï' => 'Ï',
- 'ð' => 'Ð', 'ñ' => 'Ñ', 'ò' => 'Ò', 'ó' => 'Ó', 'ô' => 'Ô', 'õ' => 'Õ',
- 'ö' => 'Ö', 'ø' => 'Ø', 'ù' => 'Ù', 'ú' => 'Ú', 'û' => 'Û', 'ü' => 'Ü',
- 'ý' => 'Ý', 'þ' => 'Þ', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', 'œ' => 'Œ',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- 'š' => 'Š', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- 'ÿ' => 'Ÿ', '?' => '?', '?' => '?', 'ž' => 'Ž', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', 'ƒ' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?'
- );
-
- /**
- * All lowercase UTF-8 characters not properly handled by [http://php.net/mb_strtoupper mb_strtoupper()] mapped to uppercase characters
- *
- * @var array
- */
- static private $mb_lower_to_upper_fix = array(
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?'
- );
-
- /**
- * All uppercase UTF-8 characters not properly handled by [http://php.net/mb_strtolower mb_strtolower()] mapped to lowercase characters
- *
- * @var array
- */
- static private $mb_upper_to_lower_fix = array(
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?'
- );
-
- /**
- * All uppercase UTF-8 characters mapped to lowercase characters
- *
- * @var array
- */
- static private $upper_to_lower = array(
- 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', 'F' => 'f',
- 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k', 'L' => 'l',
- 'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q', 'R' => 'r',
- 'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x',
- 'Y' => 'y', 'Z' => 'z', 'À' => 'à', 'Á' => 'á', 'Â' => 'â', 'Ã' => 'ã',
- 'Ä' => 'ä', 'Å' => 'å', 'Æ' => 'æ', 'Ç' => 'ç', 'È' => 'è', 'É' => 'é',
- 'Ê' => 'ê', 'Ë' => 'ë', 'Ì' => 'ì', 'Í' => 'í', 'Î' => 'î', 'Ï' => 'ï',
- 'Ð' => 'ð', 'Ñ' => 'ñ', 'Ò' => 'ò', 'Ó' => 'ó', 'Ô' => 'ô', 'Õ' => 'õ',
- 'Ö' => 'ö', 'Ø' => 'ø', 'Ù' => 'ù', 'Ú' => 'ú', 'Û' => 'û', 'Ü' => 'ü',
- 'Ý' => 'ý', 'Þ' => 'þ', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => 'i', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- 'Œ' => 'œ', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', 'Š' => 'š', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', 'Ÿ' => 'ÿ', '?' => '?', '?' => '?', 'Ž' => 'ž', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => 'ƒ', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
- '?' => '?', '?' => '?', '?' => '?'
- );
-
- /**
- * A mapping of all ASCII-based latin characters, puntuation, symbols and number forms to ASCII.
- *
- * Includes elements form the following unicode blocks:
- *
- * - Latin-1 Supplement
- * - Latin Extended-A
- * - Latin Extended-B
- * - IPA Extensions
- * - Latin Extended Additional
- * - General Punctuation
- * - Letterlike symbols
- * - Number Forms
- *
- * @var array
- */
- static private $utf8_to_ascii = array(
- // Latin-1 Supplement
- '©' => '(c)', '«' => '<<', '®' => '(R)', '»' => '>>', '¼' => '1/4',
- '½' => '1/2', '¾' => '3/4', 'À' => 'A', 'Á' => 'A', 'Â' => 'A',
- 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE', 'Ç' => 'C',
- 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I',
- 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ñ' => 'N', 'Ò' => 'O',
- 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', 'Ø' => 'O',
- 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ý' => 'Y',
- 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a',
- 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e',
- 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i',
- 'ï' => 'i', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o',
- 'õ' => 'o', 'ö' => 'o', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u',
- 'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'ÿ' => 'y',
- // Latin Extended-A
- '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'A',
- '?' => 'a', '?' => 'C', '?' => 'c', '?' => 'C', '?' => 'c',
- '?' => 'C', '?' => 'c', '?' => 'C', '?' => 'c', '?' => 'D',
- '?' => 'd', '?' => 'D', '?' => 'd', '?' => 'E', '?' => 'e',
- '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E',
- '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'G', '?' => 'g',
- '?' => 'G', '?' => 'g', '?' => 'G', '?' => 'g', '?' => 'G',
- '?' => 'g', '?' => 'H', '?' => 'h', '?' => 'H', '?' => 'h',
- '?' => 'I', '?' => 'i', '?' => 'I', '?' => 'i', '?' => 'I',
- '?' => 'i', '?' => 'I', '?' => 'i', '?' => 'I', '?' => 'i',
- '?' => 'IJ', '?' => 'ij', '?' => 'J', '?' => 'j', '?' => 'K',
- '?' => 'k', '?' => 'L', '?' => 'l', '?' => 'L', '?' => 'l',
- '?' => 'L', '?' => 'l', '?' => 'L', '?' => 'l', '?' => 'L',
- '?' => 'l', '?' => 'N', '?' => 'n', '?' => 'N', '?' => 'n',
- '?' => 'N', '?' => 'n', '?' => "'n", '?' => 'N', '?' => 'n',
- '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'O',
- '?' => 'o', 'Œ' => 'OE', 'œ' => 'oe', '?' => 'R', '?' => 'r',
- '?' => 'R', '?' => 'r', '?' => 'R', '?' => 'r', '?' => 'S',
- '?' => 's', '?' => 'S', '?' => 's', '?' => 'S', '?' => 's',
- 'Š' => 'S', 'š' => 's', '?' => 'T', '?' => 't', '?' => 'T',
- '?' => 't', '?' => 'T', '?' => 't', '?' => 'U', '?' => 'u',
- '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U',
- '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u',
- '?' => 'W', '?' => 'w', '?' => 'Y', '?' => 'y', 'Ÿ' => 'Y',
- '?' => 'Z', '?' => 'z', '?' => 'Z', '?' => 'z', 'Ž' => 'Z',
- 'ž' => 'z',
- // Latin Extended-B
- '?' => 'b', '?' => 'B', '?' => 'B', '?' => 'b', '?' => 'O',
- '?' => 'C', '?' => 'c', '?' => 'D', '?' => 'D', '?' => 'D',
- '?' => 'd', '?' => 'E', '?' => 'E', '?' => 'F', 'ƒ' => 'f',
- '?' => 'G', '?' => 'I', '?' => 'K', '?' => 'k', '?' => 'l',
- '?' => 'M', '?' => 'N', '?' => 'n', '?' => 'O', '?' => 'O',
- '?' => 'o', '?' => 'OI', '?' => 'oi', '?' => 'P', '?' => 'p',
- '?' => 't', '?' => 'T', '?' => 't', '?' => 'T', '?' => 'U',
- '?' => 'u', '?' => 'V', '?' => 'Y', '?' => 'y', '?' => 'Z',
- '?' => 'z', '?' => '2', '?' => 'DZ', '?' => 'Dz', '?' => 'dz',
- '?' => 'LJ', '?' => 'Lj', '?' => 'lj', '?' => 'Nj', '?' => 'Nj',
- '?' => 'nj', '?' => 'A', '?' => 'a', '?' => 'I', '?' => 'i',
- '?' => 'O', '?' => 'o', '?' => 'U', '?' => 'u', '?' => 'U',
- '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u',
- '?' => 'U', '?' => 'u', '?' => 'e', '?' => 'A', '?' => 'a',
- '?' => 'A', '?' => 'a', '?' => 'AE', '?' => 'ae', '?' => 'G',
- '?' => 'g', '?' => 'G', '?' => 'g', '?' => 'K', '?' => 'k',
- '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'j',
- '?' => 'DZ', '?' => 'Dz', '?' => 'dz', '?' => 'G', '?' => 'g',
- '?' => 'N', '?' => 'n', '?' => 'A', '?' => 'a', '?' => 'AE',
- '?' => 'ae', '?' => 'O', '?' => 'o', '?' => 'A', '?' => 'a',
- '?' => 'A', '?' => 'a', '?' => 'E', '?' => 'e', '?' => 'E',
- '?' => 'e', '?' => 'I', '?' => 'i', '?' => 'I', '?' => 'i',
- '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'R',
- '?' => 'r', '?' => 'R', '?' => 'r', '?' => 'U', '?' => 'u',
- '?' => 'U', '?' => 'u', '?' => 'S', '?' => 's', '?' => 'T',
- '?' => 't', '?' => 'H', '?' => 'h', '?' => 'N', '?' => 'd',
- '?' => 'Z', '?' => 'z', '?' => 'A', '?' => 'a', '?' => 'E',
- '?' => 'e', '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o',
- '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'Y',
- '?' => 'y', '?' => 'l', '?' => 'n', '?' => 't', '?' => 'j',
- '?' => 'db', '?' => 'qp', '?' => 'A', '?' => 'C', '?' => 'c',
- '?' => 'L', '?' => 'T', '?' => 's', '?' => 'z', '?' => 'B',
- '?' => 'U', '?' => 'V', '?' => 'E', '?' => 'e', '?' => 'J',
- '?' => 'j', '?' => 'Q', '?' => 'q', '?' => 'R', '?' => 'r',
- '?' => 'Y', '?' => 'y',
- // IPA Extensions
- '?' => 'a', '?' => 'b', '?' => 'o', '?' => 'c', '?' => 'd',
- '?' => 'd', '?' => 'e', '?' => 'e', '?' => 'e', '?' => 'e',
- '?' => 'e', '?' => 'j', '?' => 'g', '?' => 'g', '?' => 'G',
- '?' => 'h', '?' => 'h', '?' => 'i', '?' => 'I', '?' => 'l',
- '?' => 'l', '?' => 'l', '?' => 'm', '?' => 'm', '?' => 'm',
- '?' => 'n', '?' => 'n', '?' => 'N', '?' => 'o', '?' => 'OE',
- '?' => 'r', '?' => 'r', '?' => 'r', '?' => 'r', '?' => 'r',
- '?' => 'r', '?' => 'r', '?' => 'R', '?' => 'R', '?' => 's',
- '?' => 't', '?' => 't', '?' => 'u', '?' => 'v', '?' => 'v',
- '?' => 'w', '?' => 'y', '?' => 'Y', '?' => 'z', '?' => 'z',
- '?' => 'C', '?' => 'B', '?' => 'e', '?' => 'G', '?' => 'H',
- '?' => 'j', '?' => 'k', '?' => 'L', '?' => 'q', '?' => 'dz',
- '?' => 'dz', '?' => 'ts', '?' => 'tc', '?' => 'ls', '?' => 'lz',
- '?' => 'h', '?' => 'h',
- // Latin Extended Additional
- '?' => 'A', '?' => 'a', '?' => 'B', '?' => 'b', '?' => 'B',
- '?' => 'b', '?' => 'B', '?' => 'b', '?' => 'C', '?' => 'c',
- '?' => 'D', '?' => 'd', '?' => 'D', '?' => 'd', '?' => 'D',
- '?' => 'd', '?' => 'D', '?' => 'd', '?' => 'D', '?' => 'd',
- '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E',
- '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e',
- '?' => 'F', '?' => 'f', '?' => 'G', '?' => 'g', '?' => 'H',
- '?' => 'h', '?' => 'H', '?' => 'h', '?' => 'H', '?' => 'h',
- '?' => 'H', '?' => 'h', '?' => 'H', '?' => 'h', '?' => 'I',
- '?' => 'i', '?' => 'I', '?' => 'i', '?' => 'K', '?' => 'k',
- '?' => 'K', '?' => 'k', '?' => 'K', '?' => 'k', '?' => 'L',
- '?' => 'l', '?' => 'L', '?' => 'l', '?' => 'L', '?' => 'l',
- '?' => 'L', '?' => 'l', '?' => 'M', '?' => 'm', '?' => 'M',
- '?' => 'm', '?' => 'M', '?' => 'm', '?' => 'N', '?' => 'n',
- '?' => 'N', '?' => 'n', '?' => 'N', '?' => 'n', '?' => 'N',
- '?' => 'n', '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o',
- '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'P',
- '?' => 'p', '?' => 'P', '?' => 'p', '?' => 'R', '?' => 'r',
- '?' => 'R', '?' => 'r', '?' => 'R', '?' => 'r', '?' => 'R',
- '?' => 'r', '?' => 'S', '?' => 's', '?' => 'S', '?' => 's',
- '?' => 'S', '?' => 's', '?' => 'S', '?' => 's', '?' => 'S',
- '?' => 's', '?' => 'T', '?' => 't', '?' => 'T', '?' => 't',
- '?' => 'T', '?' => 't', '?' => 'T', '?' => 't', '?' => 'U',
- '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u',
- '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'V',
- '?' => 'v', '?' => 'V', '?' => 'v', '?' => 'W', '?' => 'w',
- '?' => 'W', '?' => 'w', '?' => 'W', '?' => 'w', '?' => 'W',
- '?' => 'w', '?' => 'W', '?' => 'w', '?' => 'X', '?' => 'x',
- '?' => 'X', '?' => 'x', '?' => 'Y', '?' => 'y', '?' => 'Z',
- '?' => 'z', '?' => 'Z', '?' => 'z', '?' => 'Z', '?' => 'z',
- '?' => 'h', '?' => 't', '?' => 'w', '?' => 'y', '?' => 'a',
- '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'A',
- '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a',
- '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'A',
- '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a',
- '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'E',
- '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e',
- '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E',
- '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e',
- '?' => 'I', '?' => 'i', '?' => 'I', '?' => 'i', '?' => 'O',
- '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o',
- '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'O',
- '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o',
- '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'O',
- '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'U', '?' => 'u',
- '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U',
- '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u',
- '?' => 'U', '?' => 'u', '?' => 'Y', '?' => 'y', '?' => 'Y',
- '?' => 'y', '?' => 'Y', '?' => 'y', '?' => 'Y', '?' => 'y',
- // General Punctuation
- '?' => ' ', '?' => ' ', '?' => ' ', '?' => ' ', '?' => ' ',
- '?' => ' ', '?' => ' ', '?' => ' ', '?' => ' ', '?' => ' ',
- '?' => ' ', '?' => '', '?' => '', '?' => '', '?' => '-',
- '?' => '-', '?' => '-', '–' => '-', '—' => '-', '?' => '-',
- '?' => '||', '‘' => "'", '’' => "'", '‚' => ',', '?' => "'",
- '“' => '"', '”' => '"', '?' => '"', '?' => '.', '?' => '..',
- '…' => '...', '?' => ' ', '?' => "'", '?' => '"', '?' => '\'"',
- '?' => "'", '?' => '"', '?' => '"\'', '‹' => '<', '›' => '>',
- '?' => '!!', '?' => '?!', '?' => '/', '?' => '?/', '?' => '?!',
- '?' => '!?',
- // Letterlike Symbols
- '?' => 'SM', '™' => 'TM',
- // Number Forms
- '?' => '1/3', '?' => '2/3', '?' => '1/5', '?' => '2/5', '?' => '3/5',
- '?' => '4/5', '?' => '1/6', '?' => '5/6', '?' => '1/8', '?' => '3/8',
- '?' => '5/8', '?' => '7/8', '?' => 'I', '?' => 'II', '?' => 'III',
- '?' => 'IV', '?' => 'V', '?' => 'Vi', '?' => 'VII', '?' => 'VIII',
- '?' => 'IX', '?' => 'X', '?' => 'XI', '?' => 'XII', '?' => 'L',
- '?' => 'C', '?' => 'D', '?' => 'M', '?' => 'i', '?' => 'ii',
- '?' => 'iii', '?' => 'iv', '?' => 'v', '?' => 'vi', '?' => 'vii',
- '?' => 'viii','?' => 'ix', '?' => 'x', '?' => 'xi', '?' => 'xii',
- '?' => 'l', '?' => 'c', '?' => 'd', '?' => 'm'
- );
-
- /**
- * If the [http://php.net/mbstring mbstring] extension is available
- *
- * @var boolean
- */
- static private $mbstring_available = NULL;
-
-
- /**
- * Maps UTF-8 ASCII-based latin characters, puntuation, symbols and number forms to ASCII
- *
- * Any characters or symbols that can not be translated will be removed.
- *
- * This function is most useful for situation that only allows ASCII, such
- * as in URLs.
- *
- * Translates elements form the following unicode blocks:
- *
- * - Latin-1 Supplement
- * - Latin Extended-A
- * - Latin Extended-B
- * - IPA Extensions
- * - Latin Extended Additional
- * - General Punctuation
- * - Letterlike symbols
- * - Number Forms
- *
- * @internal
- *
- * @param string $string The string to convert
- * @return string The input string in pure ASCII
- */
- static public function ascii($string)
- {
- if (!self::detect($string)) {
- return $string;
- }
-
- $string = strtr($string, self::$utf8_to_ascii);
- return preg_replace('#[^\x00-\x7F]#', '', $string);
- }
-
-
- /**
- * Checks to see if the [http://php.net/mbstring mbstring] extension is available
- *
- * @return void
- */
- static private function checkMbString()
- {
- self::$mbstring_available = extension_loaded('mbstring');
- }
-
-
- /**
- * Converts a unicode value into a UTF-8 character
- *
- * @param mixed $unicode_code_point The character to create, either the `U+hex` or decimal code point
- * @return string The UTF-8 character
- */
- static public function chr($unicode_code_point)
- {
- if (is_string($unicode_code_point) && substr($unicode_code_point, 0, 2) == 'U+') {
- $unicode_code_point = substr($unicode_code_point, 2);
- $unicode_code_point = hexdec($unicode_code_point);
- }
-
- $bin = decbin($unicode_code_point);
- $digits = strlen($bin);
-
- $first = $second = $third = $fourth = NULL;
-
- // One byte characters
- if ($digits <= 7) {
- $first = chr(bindec($bin));
-
- // Two byte characters
- } elseif ($digits <= 11) {
- $first = chr(bindec('110' . str_pad(substr($bin, 0, -6), 5, '0', STR_PAD_LEFT)));
- $second = chr(bindec('10' . substr($bin, -6)));
-
- // Three byte characters
- } elseif ($digits <= 16) {
- $first = chr(bindec('1110' . str_pad(substr($bin, 0, -12), 4, '0', STR_PAD_LEFT)));
- $second = chr(bindec('10' . substr($bin, -12, -6)));
- $third = chr(bindec('10' . substr($bin, -6)));
-
- // Four byte characters
- } elseif ($digits <= 21) {
- $first = chr(bindec('11110' . str_pad(substr($bin, 0, -18), 3, '0', STR_PAD_LEFT)));
- $second = chr(bindec('10' . substr($bin, -18, -12)));
- $third = chr(bindec('10' . substr($bin, -12, -6)));
- $fourth = chr(bindec('10' . substr($bin, -6)));
- }
-
- $ord = ord($first);
- if ($digits > 21 || $ord == 0xC0 || $ord == 0xC1 || $ord > 0xF4) {
- throw new fProgrammerException(
- 'The code point specified, %s, is invalid.',
- $unicode_code_point
- );
- }
-
- return $first . $second . $third . $fourth;
- }
-
-
- /**
- * Removes any invalid UTF-8 characters from a string or array of strings
- *
- * @param array|string $value The string or array of strings to clean
- * @return string The cleaned string
- */
- static public function clean($value)
- {
- if (!is_array($value)) {
- if (self::$can_ignore_invalid === NULL) {
- self::$can_ignore_invalid = !in_array(strtolower(ICONV_IMPL), array('unknown', 'ibm iconv'));
- }
- fCore::startErrorCapture(E_NOTICE);
- $value = self::iconv('UTF-8', 'UTF-8' . (self::$can_ignore_invalid ? '//IGNORE' : ''), (string) $value);
- fCore::stopErrorCapture();
- return $value;
- }
-
- $keys = array_keys($value);
- $num_keys = sizeof($keys);
- for ($i=0; $i<$num_keys; $i++) {
- $value[$keys[$i]] = self::clean($value[$keys[$i]]);
- }
-
- return $value;
- }
-
-
- /**
- * Compares strings, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
- *
- * Please note that this function sorts based on English language sorting
- * rules only. Locale-sepcific sorting is done by
- * [http://php.net/strcoll strcoll()], however there are technical
- * limitations.
- *
- * @param string $str1 The first string to compare
- * @param string $str2 The second string to compare
- * @return integer < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
- */
- static public function cmp($str1, $str2)
- {
- $ascii_str1 = strtr($str1, self::$utf8_to_ascii);
- $ascii_str2 = strtr($str2, self::$utf8_to_ascii);
-
- $res = strcmp($ascii_str1, $ascii_str2);
-
- // If the ASCII representations are the same, sort by the UTF-8 representations
- if ($res === 0) {
- $res = strcmp($str1, $str2);
- }
-
- return $res;
- }
-
-
- /**
- * Converts an offset in characters to an offset in bytes to that we can use the built-in functions for some operations
- *
- * @param string $string The string to base the offset on
- * @param integer $offset The character offset to conver to bytes
- * @return integer The converted offset
- */
- static private function convertOffsetToBytes($string, $offset)
- {
- if ($offset == 0) {
- return 0;
- }
-
- $len = strlen($string);
-
- $byte_offset = 0;
- $measured_offset = 0;
- $sign = 1;
-
- // Negative offsets require us to reverse some stuff
- if ($offset < 0) {
- $string = strrev($string);
- $sign = -1;
- $offset = abs($offset);
- }
-
- for ($i=0; $i<$len && $measured_offset<$offset; $i++) {
- $char = $string[$i];
- ++$byte_offset;
- if (ord($char) < 0x80) {
- ++$measured_offset;
- } else {
- switch (ord($char) & 0xF0) {
- case 0xF0:
- case 0xE0:
- case 0xD0:
- case 0xC0:
- ++$measured_offset;
- break;
- }
- }
- }
-
- return $byte_offset * $sign;
- }
-
-
- /**
- * Detects if a UTF-8 string contains any non-ASCII characters
- *
- * @param string $string The string to check
- * @return boolean If the string contains any non-ASCII characters
- */
- static private function detect($string)
- {
- return (boolean) preg_match('#[^\x00-\x7F]#', $string);
- }
-
-
- /**
- * Explodes a string on a delimiter
- *
- * If no delimiter is provided, the string will be exploded with each
- * characters being an element in the array.
- *
- * @param string $string The string to explode
- * @param string $delimiter The string to explode on. If `NULL` or `''` this method will return one character per array index.
- * @return array The exploded string
- */
- static public function explode($string, $delimiter=NULL)
- {
- // If a delimiter was passed, we just do an explode
- if ($delimiter || (!$delimiter && is_numeric($delimiter))) {
- return explode($delimiter, $string);
- }
-
- // If no delimiter was passed, we explode the characters into an array
- preg_match_all('#.|^\z#us', $string, $matches);
- return $matches[0];
- }
- /**
- * This works around a bug in MAMP 1.9.4+ and PHP 5.3 where iconv()
- * does not seem to properly assign the return value to a variable, but
- * does work when returning the value.
- *
- * @param string $in_charset The incoming character encoding
- * @param string $out_charset The outgoing character encoding
- * @param string $string The string to convert
- * @return string The converted string
- */
- static private function iconv($in_charset, $out_charset, $string)
- {
- return iconv($in_charset, $out_charset, $string);
- }
-
-
- /**
- * Compares strings in a case-insensitive manner, with the resulting order having characters that are based on ASCII letters placed after the relative ASCII characters
- *
- * Please note that this function sorts based on English language sorting
- * rules only. Locale-sepcific sorting is done by
- * [http://php.net/strcoll strcoll()], however there are technical
- * limitations.
- *
- * @param string $str1 The first string to compare
- * @param string $str2 The second string to compare
- * @return integer < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
- */
- static public function icmp($str1, $str2)
- {
- $str1 = self::lower($str1);
- $str2 = self::lower($str2);
-
- return self::cmp($str1, $str2);
- }
-
-
- /**
- * Compares strings using a natural order algorithm in a case-insensitive manner, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
- *
- * Please note that this function sorts based on English language sorting
- * rules only. Locale-sepcific sorting is done by
- * [http://php.net/strcoll strcoll()], however there are technical
- * limitations.
- *
- * @param string $str1 The first string to compare
- * @param string $str2 The second string to compare
- * @return integer `< 0` if `$str1 < $str2`, `0` if they are equal, `> 0` if `$str1 > $str2`
- */
- static public function inatcmp($str1, $str2)
- {
- $str1 = self::lower($str1);
- $str2 = self::lower($str2);
-
- return self::natcmp($str1, $str2);
- }
-
-
- /**
- * Finds the first position (in characters) of the search value in the string - case is ignored when doing performing a match
- *
- * @param string $haystack The string to search in
- * @param string $needle The string to search for. This match will be done in a case-insensitive manner.
- * @param integer $offset The character position to start searching from
- * @return mixed The integer character position of the first occurence of the needle or `FALSE` if no match
- */
- static public function ipos($haystack, $needle, $offset=0)
- {
- // We get better performance falling back for ASCII strings
- if (!self::detect($haystack)) {
- return stripos($haystack, $needle, $offset);
- }
-
- if (self::$mbstring_available === NULL) {
- self::checkMbString();
- }
-
- if (self::$mbstring_available && function_exists('mb_stripos')) {
- return mb_stripos($haystack, $needle, $offset, 'UTF-8');
- }
-
- $haystack = self::lower($haystack);
- $needle = self::lower($needle);
-
- return self::pos($haystack, $needle, $offset);
- }
-
-
- /**
- * Replaces matching parts of the string, with matches being done in a a case-insensitive manner
- *
- * If `$search` and `$replace` are both arrays and `$replace` is shorter,
- * the extra `$search` string will be replaced with an empty string. If
- * `$search` is an array and `$replace` is a string, all `$search` values
- * will be replaced with the string specified.
- *
- * @param string $string The string to perform the replacements on
- * @param mixed $search The string (or array of strings) to search for - see method description for details
- * @param mixed $replace The string (or array of strings) to replace with - see method description for details
- * @return string The input string with the specified replacements
- */
- static public function ireplace($string, $search, $replace)
- {
- if (is_array($search)) {
- foreach ($search as &$needle) {
- $needle = '#' . preg_quote($needle, '#') . '#ui';
- }
- } else {
- $search = '#' . preg_quote($search, '#') . '#ui';
- }
- return preg_replace(
- $search,
- strtr($replace, array('\\' => '\\\\', '$' => '\\$')),
- $string
- );
- }
-
-
- /**
- * Finds the last position (in characters) of the search value in the string - case is ignored when doing performing a match
- *
- * @param string $haystack The string to search in
- * @param string $needle The string to search for. This match will be done in a case-insensitive manner.
- * @param integer $offset The character position to start searching from. A negative value will stop looking that many characters from the end of the string
- * @return mixed The integer character position of the last occurence of the needle or `FALSE` if no match
- */
- static public function irpos($haystack, $needle, $offset=0)
- {
- // We get better performance falling back for ASCII strings
- if (!self::detect($haystack)) {
- return strripos($haystack, $needle, $offset);
- }
-
- if (self::$mbstring_available === NULL) {
- self::checkMbString();
- }
-
- if (self::$mbstring_available && function_exists('mb_strripos')) {
- return mb_strripos($haystack, $needle, $offset, 'UTF-8');
- }
-
- $haystack = self::lower($haystack);
- $needle = self::lower($needle);
-
- return self::rpos($haystack, $needle, $offset);
- }
-
-
- /**
- * Matches a string needle in the string haystack, returning a substring from the beginning of the needle to the end of the haystack
- *
- * Can optionally return the part of the haystack before the needle. Matching
- * is done in a case-insensitive manner.
- *
- * @param string $haystack The string to search in
- * @param string $needle The string to search for. This match will be done in a case-insensitive manner.
- * @param boolean $before_needle If a substring of the haystack before the needle should be returned instead of the substring from the needle to the end of the haystack
- * @return mixed The specified part of the haystack, or `FALSE` if the needle was not found
- */
- static public function istr($haystack, $needle, $before_needle=FALSE)
- {
- // We get better performance falling back for ASCII strings
- if ($before_needle == FALSE && !self::detect($haystack)) {
- return stristr($haystack, $needle);
- }
-
- if (self::$mbstring_available === NULL) {
- self::checkMbString();
- }
-
- if (self::$mbstring_available && function_exists('mb_stristr')) {
- return mb_stristr($haystack, $needle, $before_needle, 'UTF-8');
- }
-
- $lower_haystack = self::lower($haystack);
- $lower_needle = self::lower($needle);
-
- $pos = strpos($lower_haystack, $lower_needle);
-
- if ($before_needle) {
- return substr($haystack, 0, $pos);
- }
-
- return substr($haystack, $pos);
- }
-
-
- /**
- * Determines the length (in characters) of a string
- *
- * @param string $string The string to measure
- * @return integer The number of characters in the string
- */
- static public function len($string)
- {
- if (self::$mbstring_available === NULL) {
- self::checkMbString();
- }
-
- if (self::$mbstring_available) {
- return mb_strlen($string, 'UTF-8');
- }
-
- return strlen(utf8_decode($string));
- }
-
-
- /**
- * Converts all uppercase characters to lowercase
- *
- * @param string $string The string to convert
- * @return string The input string with all uppercase characters in lowercase
- */
- static public function lower($string)
- {
- // We get better performance falling back for ASCII strings
- if (!self::detect($string)) {
- return strtolower($string);
- }
-
- if (self::$mbstring_available === NULL) {
- self::checkMbString();
- }
-
- if (self::$mbstring_available) {
- $string = mb_strtolower($string, 'utf-8');
- // For some reason mb_strtolower misses some character
- return strtr($string, self::$mb_upper_to_lower_fix);
- }
-
- return strtr($string, self::$upper_to_lower);
- }
-
-
- /**
- * Trims whitespace, or any specified characters, from the beginning of a string
- *
- * @param string $string The string to trim
- * @param string $charlist The characters to trim
- * @return string The trimmed string
- */
- static public function ltrim($string, $charlist=NULL)
- {
- if (strlen($charlist) === 0) {
- return ltrim($string);
- }
-
- $search = preg_quote($charlist, '#');
- $search = str_replace('-', '\-', $search);
- $search = str_replace('\.\.', '-', $search);
- return preg_replace('#^[' . $search . ']+#Du', '', $string);
- }
-
-
- /**
- * Compares strings using a natural order algorithm, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
- *
- * Please note that this function sorts based on English language sorting
- * rules only. Locale-sepcific sorting is done by
- * [http://php.net/strcoll strcoll()], however there are technical
- * limitations.
- *
- * @param string $str1 The first string to compare
- * @param string $str2 The second string to compare
- * @return integer `< 0` if `$str1 < $str2`, `0` if they are equal, `> 0` if `$str1 > $str2`
- */
- static public function…
Large files files are truncated, but you can click here to view the full file