/lib/ezi18n/classes/ezcharsetinfo.php

https://bitbucket.org/ericsagnes/ezpublish-multisite · PHP · 219 lines · 148 code · 19 blank · 52 comment · 8 complexity · a4e1a27893640a44a279047d9b924346 MD5 · raw file

  1. <?php
  2. /**
  3. * File containing the eZCharsetInfo class.
  4. *
  5. * @copyright Copyright (C) 1999-2012 eZ Systems AS. All rights reserved.
  6. * @license http://www.gnu.org/licenses/gpl-2.0.txt GNU General Public License v2
  7. * @version 2012.8
  8. * @package lib
  9. */
  10. /*!
  11. \class eZCharsetInfo ezcharsetinfo.php
  12. \ingroup eZI18N
  13. \brief Allows for quering information about charsets
  14. A charset can be known by multiple names but the internationlization
  15. system only works with one name. To fetch the real internal name use
  16. the static realCharsetCode() function.
  17. Each charset also has a specific encoding scheme associated with it
  18. which can be fetched with characterEncodingScheme().
  19. */
  20. class eZCharsetInfo
  21. {
  22. /*!
  23. \private
  24. \static
  25. \return the hash table with aliases, creates if it doesn't already exist.
  26. */
  27. static function &aliasTable()
  28. {
  29. $aliasTable =& $GLOBALS['eZCharsetInfoTable'];
  30. if ( !is_array( $aliasTable ) )
  31. {
  32. $aliasTable = array( 'ascii' => 'us-ascii',
  33. 'latin1' => 'iso-8859-1',
  34. 'latin2' => 'iso-8859-2',
  35. 'latin3' => 'iso-8859-3',
  36. 'latin4' => 'iso-8859-4',
  37. 'latin5' => 'iso-8859-9',
  38. 'latin6' => 'iso-8859-10',
  39. 'latin7' => 'iso-8859-13',
  40. 'latin8' => 'iso-8859-14',
  41. 'latin9' => 'iso-8859-15',
  42. 'cyrillic' => 'iso-8859-5',
  43. 'arabic' => 'iso-8859-6',
  44. 'greek' => 'iso-8859-7',
  45. 'hebrew' => 'iso-8859-8',
  46. 'thai' => 'iso-8859-11',
  47. 'koi8-r' => 'koi8-r',
  48. 'koi-8-r' => 'koi8-r',
  49. 'koi8r' => 'koi8-r',
  50. 'koi8-u' => 'koi8-u',
  51. 'koi-8-u' => 'koi8-u',
  52. 'koi8u' => 'koi8-u',
  53. 'cp1250' => 'windows-1250',
  54. 'cp1251' => 'windows-1251',
  55. 'cp1252' => 'windows-1252',
  56. 'cp1253' => 'windows-1253',
  57. 'cp1254' => 'windows-1254',
  58. 'cp1255' => 'windows-1255',
  59. 'cp1256' => 'windows-1256',
  60. 'cp1257' => 'windows-1257',
  61. 'cp1258' => 'windows-1258',
  62. 'winlatin1' => 'windows-1252',
  63. 'winlatin2' => 'windows-1250',
  64. 'wincyrillic' => 'windows-1251',
  65. 'wingreek' => 'windows-1253',
  66. 'winturkish' => 'windows-1254',
  67. 'winhebrew' => 'windows-1255',
  68. 'winarabic' => 'windows-1256',
  69. 'winbaltic' => 'windows-1257',
  70. 'winvietnamese' => 'windows-1258',
  71. 'doslatinus' => 'cp437',
  72. 'dosgreek' => 'cp737',
  73. 'dosbaltrim' => 'cp775',
  74. 'doslatin1' => 'cp850',
  75. 'doslatin2' => 'cp852',
  76. 'doscyrillic' => 'cp855',
  77. 'dosturkish' => 'cp857',
  78. 'dosportuguese' => 'cp860',
  79. 'dosicelandic' => 'cp861',
  80. 'doshebrew' => 'cp862',
  81. 'doscanadaf' => 'cp863',
  82. 'dosarabic' => 'cp864',
  83. 'dosnordic' => 'cp865',
  84. 'dosgreek2' => 'cp869',
  85. 'doscyrillicrussian' => 'cp866',
  86. 'dosthai' => 'cp874',
  87. 'macroman' => 'macintosh',
  88. 'nextstep' => 'next',
  89. 'utf8' => 'utf-8',
  90. 'utf7' => 'utf-7',
  91. 'utf16' => 'utf-16',
  92. 'utf16be' => 'utf-16be',
  93. 'utf16le' => 'utf-16le',
  94. 'utf32' => 'utf-32',
  95. 'utf32be' => 'utf-32be',
  96. 'utf32le' => 'utf-32le',
  97. 'ucs2le' => 'ucs-2le',
  98. 'ucs4' => 'ucs-4',
  99. 'ucs4be' => 'ucs-4be',
  100. 'ucs4le' => 'ucs-4le',
  101. 'ucs2' => 'ucs-2',
  102. 'ucs2be' => 'ucs-2be',
  103. 'ucs2le' => 'ucs-2le',
  104. 'shift-jis' => 'cp932',
  105. 'gbk' => 'gbk',
  106. 'euc-cn' => 'euc-cn',
  107. 'unifiedhangul' => 'cp849',
  108. 'uhc' => 'cp849',
  109. 'big5' => 'cp850'
  110. );
  111. for ( $i = 1; $i <= 15; ++$i )
  112. {
  113. $aliasTable["iso8859-$i"] = "iso-8859-$i";
  114. $aliasTable["iso8859$i"] = "iso-8859-$i";
  115. }
  116. $aliasTable['unicode'] = 'unicode';
  117. }
  118. return $aliasTable;
  119. }
  120. /*!
  121. \private
  122. \static
  123. \return the character encoding hash table, creates it if it does not exist.
  124. The table will map from a character encoding scheme to an array of character sets.
  125. \sa reverseEncodingTable
  126. */
  127. static function &encodingTable()
  128. {
  129. $encodingTable =& $GLOBALS['eZCharsetInfoEncodingTable'];
  130. if ( !is_array( $encodingTable ) )
  131. {
  132. $encodingTable = array( 'doublebyte' => array( 'cp932',
  133. 'GBK',
  134. 'euc-cn',
  135. 'cp849',
  136. 'cp850' ),
  137. 'unicode' => array( 'unicode' ),
  138. 'utf-8' => array( 'utf-8' ) );
  139. }
  140. return $encodingTable;
  141. }
  142. /*!
  143. \private
  144. \static
  145. \return the reverse character encoding hash table, creates it if it does not exist.
  146. The table will map from a character set to a character encoding scheme.
  147. \sa encodingTable
  148. */
  149. static function &reverseEncodingTable()
  150. {
  151. $reverseEncodingTable =& $GLOBALS['eZCharsetInfoReverseEncodingTable'];
  152. if ( !is_array( $reverseEncodingTable ) )
  153. {
  154. $encodingTable =& eZCharsetInfo::encodingTable();
  155. $reverseEncodingTable = array();
  156. foreach( $encodingTable as $encodingScheme => $charsetMatches )
  157. {
  158. foreach( $charsetMatches as $charsetMatch )
  159. $reverseEncodingTable[$charsetMatch] = $encodingScheme;
  160. }
  161. }
  162. return $reverseEncodingTable;
  163. }
  164. /*!
  165. Tries to find an alias for the charset code and returns it. If no
  166. alias code could be find the original charset code is returned.
  167. \note The resulting charset code will be an all lowercase letters.
  168. */
  169. static function realCharsetCode( $charsetCode )
  170. {
  171. $aliasTable =& eZCharsetInfo::aliasTable();
  172. $charsetCode = strtolower( $charsetCode );
  173. if ( isset( $aliasTable[$charsetCode] ) )
  174. return $aliasTable[$charsetCode];
  175. // Check alias without any dashes
  176. $charsetCodeNoDash = str_replace( '-', '', $charsetCode );
  177. if ( isset( $aliasTable[$charsetCodeNoDash] ) )
  178. return $aliasTable[$charsetCodeNoDash];
  179. return $charsetCode;
  180. }
  181. /*!
  182. Tries to figure out the character encoding scheme for the given character set.
  183. It uses realCharsetCode() to get the correct internal charset so any charset
  184. can be given to this function.
  185. Either returns the found encoding scheme or 'singlebyte' if no scheme was found.
  186. \sa realCharsetCode
  187. */
  188. static function characterEncodingScheme( $charsetCode, $isRealCharset = false )
  189. {
  190. if ( !$isRealCharset )
  191. $charsetCode = eZCharsetInfo::realCharsetCode( $charsetCode );
  192. $reverseEncodingTable =& eZCharsetInfo::reverseEncodingTable();
  193. if ( isset( $reverseEncodingTable[$charsetCode] ) )
  194. return $reverseEncodingTable[$charsetCode];
  195. return 'singlebyte';
  196. }
  197. }
  198. ?>