ezcharsetinfo.php | searchcode

/lib/ezi18n/classes/ezcharsetinfo.php

https://bitbucket.org/ericsagnes/ezpublish-multisite · PHP · 219 lines · 148 code · 19 blank · 52 comment · 8 complexity · a4e1a27893640a44a279047d9b924346 MD5 · raw file

<?php
/**
 * File containing the eZCharsetInfo class.
 *
 * @copyright Copyright (C) 1999-2012 eZ Systems AS. All rights reserved.
 * @license http://www.gnu.org/licenses/gpl-2.0.txt GNU General Public License v2
 * @version  2012.8
 * @package lib
 */

/*!
  \class eZCharsetInfo ezcharsetinfo.php
  \ingroup eZI18N
  \brief Allows for quering information about charsets

  A charset can be known by multiple names but the internationlization
  system only works with one name. To fetch the real internal name use
  the static realCharsetCode() function.
  Each charset also has a specific encoding scheme associated with it
  which can be fetched with characterEncodingScheme().

*/

class eZCharsetInfo
{
    /*!
     \private
     \static
     \return the hash table with aliases, creates if it doesn't already exist.
    */
    static function &aliasTable()
    {
        $aliasTable =& $GLOBALS['eZCharsetInfoTable'];
        if ( !is_array( $aliasTable ) )
        {
            $aliasTable = array( 'ascii' => 'us-ascii',
                                 'latin1' => 'iso-8859-1',
                                 'latin2' => 'iso-8859-2',
                                 'latin3' => 'iso-8859-3',
                                 'latin4' => 'iso-8859-4',
                                 'latin5' => 'iso-8859-9',
                                 'latin6' => 'iso-8859-10',
                                 'latin7' => 'iso-8859-13',
                                 'latin8' => 'iso-8859-14',
                                 'latin9' => 'iso-8859-15',
                                 'cyrillic' => 'iso-8859-5',
                                 'arabic' => 'iso-8859-6',
                                 'greek' => 'iso-8859-7',
                                 'hebrew' => 'iso-8859-8',
                                 'thai' => 'iso-8859-11',

                                 'koi8-r' => 'koi8-r',
                                 'koi-8-r' => 'koi8-r',
                                 'koi8r' => 'koi8-r',

                                 'koi8-u' => 'koi8-u',
                                 'koi-8-u' => 'koi8-u',
                                 'koi8u' => 'koi8-u',

                                 'cp1250' => 'windows-1250',
                                 'cp1251' => 'windows-1251',
                                 'cp1252' => 'windows-1252',
                                 'cp1253' => 'windows-1253',
                                 'cp1254' => 'windows-1254',
                                 'cp1255' => 'windows-1255',
                                 'cp1256' => 'windows-1256',
                                 'cp1257' => 'windows-1257',
                                 'cp1258' => 'windows-1258',
                                 'winlatin1' => 'windows-1252',
                                 'winlatin2' => 'windows-1250',
                                 'wincyrillic' => 'windows-1251',
                                 'wingreek' => 'windows-1253',
                                 'winturkish' => 'windows-1254',
                                 'winhebrew' => 'windows-1255',
                                 'winarabic' => 'windows-1256',
                                 'winbaltic' => 'windows-1257',
                                 'winvietnamese' => 'windows-1258',

                                 'doslatinus' => 'cp437',
                                 'dosgreek' => 'cp737',
                                 'dosbaltrim' => 'cp775',
                                 'doslatin1' => 'cp850',
                                 'doslatin2' => 'cp852',
                                 'doscyrillic' => 'cp855',
                                 'dosturkish' => 'cp857',
                                 'dosportuguese' => 'cp860',
                                 'dosicelandic' => 'cp861',
                                 'doshebrew' => 'cp862',
                                 'doscanadaf' => 'cp863',
                                 'dosarabic' => 'cp864',
                                 'dosnordic' => 'cp865',
                                 'dosgreek2' => 'cp869',
                                 'doscyrillicrussian' => 'cp866',
                                 'dosthai' => 'cp874',

                                 'macroman' => 'macintosh',
                                 'nextstep' => 'next',

                                 'utf8' => 'utf-8',
                                 'utf7' => 'utf-7',

                                 'utf16' => 'utf-16',
                                 'utf16be' => 'utf-16be',
                                 'utf16le' => 'utf-16le',

                                 'utf32' => 'utf-32',
                                 'utf32be' => 'utf-32be',
                                 'utf32le' => 'utf-32le',

                                 'ucs2le' => 'ucs-2le',

                                 'ucs4' => 'ucs-4',
                                 'ucs4be' => 'ucs-4be',
                                 'ucs4le' => 'ucs-4le',

                                 'ucs2' => 'ucs-2',
                                 'ucs2be' => 'ucs-2be',
                                 'ucs2le' => 'ucs-2le',

                                 'shift-jis' => 'cp932',
                                 'gbk' => 'gbk',
                                 'euc-cn' => 'euc-cn',
                                 'unifiedhangul' => 'cp849',
                                 'uhc' => 'cp849',
                                 'big5' => 'cp850'
                                 );
            for ( $i = 1; $i <= 15; ++$i )
            {
                $aliasTable["iso8859-$i"] = "iso-8859-$i";
                $aliasTable["iso8859$i"] = "iso-8859-$i";
            }
            $aliasTable['unicode'] = 'unicode';
        }
        return $aliasTable;
    }

    /*!
     \private
     \static
     \return the character encoding hash table, creates it if it does not exist.
     The table will map from a character encoding scheme to an array of character sets.
     \sa reverseEncodingTable
    */
    static function &encodingTable()
    {
        $encodingTable =& $GLOBALS['eZCharsetInfoEncodingTable'];
        if ( !is_array( $encodingTable ) )
        {
            $encodingTable = array( 'doublebyte' => array( 'cp932',
                                                           'GBK',
                                                           'euc-cn',
                                                           'cp849',
                                                           'cp850' ),
                                    'unicode' => array( 'unicode' ),
                                    'utf-8' => array( 'utf-8' ) );
        }
        return $encodingTable;
    }

    /*!
     \private
     \static
     \return the reverse character encoding hash table, creates it if it does not exist.
     The table will map from a character set to a character encoding scheme.
     \sa encodingTable
    */
    static function &reverseEncodingTable()
    {
        $reverseEncodingTable =& $GLOBALS['eZCharsetInfoReverseEncodingTable'];
        if ( !is_array( $reverseEncodingTable ) )
        {
            $encodingTable =& eZCharsetInfo::encodingTable();
            $reverseEncodingTable = array();
            foreach( $encodingTable as $encodingScheme => $charsetMatches )
            {
                foreach( $charsetMatches as $charsetMatch )
                    $reverseEncodingTable[$charsetMatch] = $encodingScheme;
            }
        }
        return $reverseEncodingTable;
    }

    /*!
     Tries to find an alias for the charset code and returns it. If no
     alias code could be find the original charset code is returned.
     \note The resulting charset code will be an all lowercase letters.
    */
    static function realCharsetCode( $charsetCode )
    {
        $aliasTable =& eZCharsetInfo::aliasTable();
        $charsetCode = strtolower( $charsetCode );
        if ( isset( $aliasTable[$charsetCode] ) )
            return $aliasTable[$charsetCode];
        // Check alias without any dashes
        $charsetCodeNoDash = str_replace( '-', '', $charsetCode );
        if ( isset( $aliasTable[$charsetCodeNoDash] ) )
            return $aliasTable[$charsetCodeNoDash];
        return $charsetCode;
    }

    /*!
     Tries to figure out the character encoding scheme for the given character set.
     It uses realCharsetCode() to get the correct internal charset so any charset
     can be given to this function.
     Either returns the found encoding scheme or 'singlebyte' if no scheme was found.
     \sa realCharsetCode
    */
    static function characterEncodingScheme( $charsetCode, $isRealCharset = false )
    {
        if ( !$isRealCharset )
            $charsetCode = eZCharsetInfo::realCharsetCode( $charsetCode );
        $reverseEncodingTable =& eZCharsetInfo::reverseEncodingTable();
        if ( isset( $reverseEncodingTable[$charsetCode] ) )
            return $reverseEncodingTable[$charsetCode];
        return 'singlebyte';
    }
}

?>