/Phergie/Plugin/Encoding.php
PHP | 182 lines | 104 code | 7 blank | 71 comment | 3 complexity | e4de89bad9f1c130bbd9644a7890a313 MD5 | raw file
- <?php
- /**
- * Phergie
- *
- * PHP version 5
- *
- * LICENSE
- *
- * This source file is subject to the new BSD license that is bundled
- * with this package in the file LICENSE.
- * It is also available through the world-wide-web at this URL:
- * http://phergie.org/license
- *
- * @category Phergie
- * @package Phergie_Plugin_Encoding
- * @author Phergie Development Team <team@phergie.org>
- * @copyright 2008-2010 Phergie Development Team (http://phergie.org)
- * @license http://phergie.org/license New BSD License
- * @link http://pear.phergie.org/package/Phergie_Plugin_Encoding
- */
- /**
- * Handles decoding markup entities and converting text between character
- * encodings.
- *
- * @category Phergie
- * @package Phergie_Plugin_Encoding
- * @author Phergie Development Team <team@phergie.org>
- * @license http://phergie.org/license New BSD License
- * @link http://pear.phergie.org/package/Phergie_Plugin_Encoding
- */
- class Phergie_Plugin_Encoding extends Phergie_Plugin_Abstract
- {
- /**
- * Lookup table for entity conversions not supported by
- * html_entity_decode()
- *
- * @var array
- * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73409
- * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73410
- */
- protected static $entities = array(
- 'α' => 913,
- ''' => 39,
- 'β' => 914,
- '•' => 149,
- 'χ' => 935,
- 'ˆ' => 94,
- 'δ' => 916,
- 'ε' => 917,
- 'η' => 919,
- 'ƒ' => 402,
- 'γ' => 915,
- 'ι' => 921,
- 'κ' => 922,
- 'λ' => 923,
- '“' => 147,
- '‹' => 139,
- '‘' => 145,
- '—' => 151,
- '−' => 45,
- 'μ' => 924,
- '–' => 150,
- 'ν' => 925,
- 'œ' => 140,
- 'ω' => 937,
- 'ο' => 927,
- 'φ' => 934,
- 'π' => 928,
- 'ϖ' => 982,
- 'ψ' => 936,
- '”' => 148,
- 'ρ' => 929,
- '›' => 155,
- '’' => 146,
- 'š' => 138,
- 'σ' => 931,
- 'ς' => 962,
- 'τ' => 932,
- 'θ' => 920,
- 'ϑ' => 977,
- '˜' => 126,
- '™' => 153,
- 'ϒ' => 978,
- 'υ' => 933,
- 'ξ' => 926,
- 'ÿ' => 159,
- 'ζ' => 918,
- );
- /**
- * Decodes markup entities in a given string.
- *
- * @param string $string String containing markup entities
- * @param string $charset Optional character set name to use in decoding
- * entities, defaults to UTF-8
- *
- * @return string String with markup entities decoded
- */
- public function decodeEntities($string, $charset = 'UTF-8')
- {
- $string = str_ireplace(
- array_keys(self::$entities),
- array_map('chr', self::$entities),
- $string
- );
- $string = html_entity_decode($string, ENT_QUOTES, $charset);
- $string = preg_replace(
- array('/�*([0-9]+);/me', '/�*([a-f0-9]+);/mei'),
- array('$this->codeToUtf(\\1)', '$this->codeToUtf(hexdec(\\1))'),
- $string
- );
- return $string;
- }
- /**
- * Converts a given unicode to its UTF-8 equivalent.
- *
- * @param int $code Code to convert
- * @return string Character corresponding to code
- */
- public function codeToUtf8($code)
- {
- $code = (int) $code;
- switch ($code) {
- // 1 byte, 7 bits
- case 0:
- return chr(0);
- case ($code & 0x7F):
- return chr($code);
- // 2 bytes, 11 bits
- case ($code & 0x7FF):
- return chr(0xC0 | (($code >> 6) & 0x1F)) .
- chr(0x80 | ($code & 0x3F));
- // 3 bytes, 16 bits
- case ($code & 0xFFFF):
- return chr(0xE0 | (($code >> 12) & 0x0F)) .
- chr(0x80 | (($code >> 6) & 0x3F)) .
- chr(0x80 | ($code & 0x3F));
- // 4 bytes, 21 bits
- case ($code & 0x1FFFFF):
- return chr(0xF0 | ($code >> 18)) .
- chr(0x80 | (($code >> 12) & 0x3F)) .
- chr(0x80 | (($code >> 6) & 0x3F)) .
- chr(0x80 | ($code & 0x3F));
- }
- }
- /**
- * Transliterates characters in a given string where possible.
- *
- * @param string $string String containing characters to
- * transliterate
- * @param string $charsetFrom Optional character set of the string,
- * defaults to UTF-8
- * @param string $charsetTo Optional character set to which the string
- * should be converted, defaults to ISO-8859-1
- *
- * @return string String with characters transliterated or the original
- * string if transliteration was not possible
- */
- public function transliterate($string, $charsetFrom = 'UTF-8', $charsetTo = 'ISO-8859-1')
- {
- // @link http://pecl.php.net/package/translit
- if (function_exists('transliterate')) {
- $string = transliterate($string, array('han_transliterate', 'diacritical_remove'), $charsetFrom, $charsetTo);
- } elseif (function_exists('iconv')) {
- $string = iconv($charsetFrom, $charsetTo . '//TRANSLIT', $string);
- } else {
- // @link http://stackoverflow.com/questions/1284535/php-transliteration/1285491#1285491
- $string = preg_replace(
- '~&([a-z]{1,2})(acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i',
- '$1',
- htmlentities($string, ENT_COMPAT, $charsetFrom)
- );
- }
- return $string;
- }
- }