PageRenderTime 40ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/Phergie/Plugin/Encoding.php

https://github.com/jellydoughnut/phergie
PHP | 182 lines | 104 code | 7 blank | 71 comment | 3 complexity | e4de89bad9f1c130bbd9644a7890a313 MD5 | raw file
  1. <?php
  2. /**
  3. * Phergie
  4. *
  5. * PHP version 5
  6. *
  7. * LICENSE
  8. *
  9. * This source file is subject to the new BSD license that is bundled
  10. * with this package in the file LICENSE.
  11. * It is also available through the world-wide-web at this URL:
  12. * http://phergie.org/license
  13. *
  14. * @category Phergie
  15. * @package Phergie_Plugin_Encoding
  16. * @author Phergie Development Team <team@phergie.org>
  17. * @copyright 2008-2010 Phergie Development Team (http://phergie.org)
  18. * @license http://phergie.org/license New BSD License
  19. * @link http://pear.phergie.org/package/Phergie_Plugin_Encoding
  20. */
  21. /**
  22. * Handles decoding markup entities and converting text between character
  23. * encodings.
  24. *
  25. * @category Phergie
  26. * @package Phergie_Plugin_Encoding
  27. * @author Phergie Development Team <team@phergie.org>
  28. * @license http://phergie.org/license New BSD License
  29. * @link http://pear.phergie.org/package/Phergie_Plugin_Encoding
  30. */
  31. class Phergie_Plugin_Encoding extends Phergie_Plugin_Abstract
  32. {
  33. /**
  34. * Lookup table for entity conversions not supported by
  35. * html_entity_decode()
  36. *
  37. * @var array
  38. * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73409
  39. * @link http://us.php.net/manual/en/function.get-html-translation-table.php#73410
  40. */
  41. protected static $entities = array(
  42. '&alpha;' => 913,
  43. '&apos;' => 39,
  44. '&beta;' => 914,
  45. '&bull;' => 149,
  46. '&chi;' => 935,
  47. '&circ;' => 94,
  48. '&delta;' => 916,
  49. '&epsilon;' => 917,
  50. '&eta;' => 919,
  51. '&fnof;' => 402,
  52. '&gamma;' => 915,
  53. '&iota;' => 921,
  54. '&kappa;' => 922,
  55. '&lambda;' => 923,
  56. '&ldquo;' => 147,
  57. '&lsaquo;' => 139,
  58. '&lsquo;' => 145,
  59. '&mdash;' => 151,
  60. '&minus;' => 45,
  61. '&mu;' => 924,
  62. '&ndash;' => 150,
  63. '&nu;' => 925,
  64. '&oelig;' => 140,
  65. '&omega;' => 937,
  66. '&omicron;' => 927,
  67. '&phi;' => 934,
  68. '&pi;' => 928,
  69. '&piv;' => 982,
  70. '&psi;' => 936,
  71. '&rdquo;' => 148,
  72. '&rho;' => 929,
  73. '&rsaquo;' => 155,
  74. '&rsquo;' => 146,
  75. '&scaron;' => 138,
  76. '&sigma;' => 931,
  77. '&sigmaf;' => 962,
  78. '&tau;' => 932,
  79. '&theta;' => 920,
  80. '&thetasym;' => 977,
  81. '&tilde;' => 126,
  82. '&trade;' => 153,
  83. '&upsih;' => 978,
  84. '&upsilon;' => 933,
  85. '&xi;' => 926,
  86. '&yuml;' => 159,
  87. '&zeta;' => 918,
  88. );
  89. /**
  90. * Decodes markup entities in a given string.
  91. *
  92. * @param string $string String containing markup entities
  93. * @param string $charset Optional character set name to use in decoding
  94. * entities, defaults to UTF-8
  95. *
  96. * @return string String with markup entities decoded
  97. */
  98. public function decodeEntities($string, $charset = 'UTF-8')
  99. {
  100. $string = str_ireplace(
  101. array_keys(self::$entities),
  102. array_map('chr', self::$entities),
  103. $string
  104. );
  105. $string = html_entity_decode($string, ENT_QUOTES, $charset);
  106. $string = preg_replace(
  107. array('/&#0*([0-9]+);/me', '/&#x0*([a-f0-9]+);/mei'),
  108. array('$this->codeToUtf(\\1)', '$this->codeToUtf(hexdec(\\1))'),
  109. $string
  110. );
  111. return $string;
  112. }
  113. /**
  114. * Converts a given unicode to its UTF-8 equivalent.
  115. *
  116. * @param int $code Code to convert
  117. * @return string Character corresponding to code
  118. */
  119. public function codeToUtf8($code)
  120. {
  121. $code = (int) $code;
  122. switch ($code) {
  123. // 1 byte, 7 bits
  124. case 0:
  125. return chr(0);
  126. case ($code & 0x7F):
  127. return chr($code);
  128. // 2 bytes, 11 bits
  129. case ($code & 0x7FF):
  130. return chr(0xC0 | (($code >> 6) & 0x1F)) .
  131. chr(0x80 | ($code & 0x3F));
  132. // 3 bytes, 16 bits
  133. case ($code & 0xFFFF):
  134. return chr(0xE0 | (($code >> 12) & 0x0F)) .
  135. chr(0x80 | (($code >> 6) & 0x3F)) .
  136. chr(0x80 | ($code & 0x3F));
  137. // 4 bytes, 21 bits
  138. case ($code & 0x1FFFFF):
  139. return chr(0xF0 | ($code >> 18)) .
  140. chr(0x80 | (($code >> 12) & 0x3F)) .
  141. chr(0x80 | (($code >> 6) & 0x3F)) .
  142. chr(0x80 | ($code & 0x3F));
  143. }
  144. }
  145. /**
  146. * Transliterates characters in a given string where possible.
  147. *
  148. * @param string $string String containing characters to
  149. * transliterate
  150. * @param string $charsetFrom Optional character set of the string,
  151. * defaults to UTF-8
  152. * @param string $charsetTo Optional character set to which the string
  153. * should be converted, defaults to ISO-8859-1
  154. *
  155. * @return string String with characters transliterated or the original
  156. * string if transliteration was not possible
  157. */
  158. public function transliterate($string, $charsetFrom = 'UTF-8', $charsetTo = 'ISO-8859-1')
  159. {
  160. // @link http://pecl.php.net/package/translit
  161. if (function_exists('transliterate')) {
  162. $string = transliterate($string, array('han_transliterate', 'diacritical_remove'), $charsetFrom, $charsetTo);
  163. } elseif (function_exists('iconv')) {
  164. $string = iconv($charsetFrom, $charsetTo . '//TRANSLIT', $string);
  165. } else {
  166. // @link http://stackoverflow.com/questions/1284535/php-transliteration/1285491#1285491
  167. $string = preg_replace(
  168. '~&([a-z]{1,2})(acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i',
  169. '$1',
  170. htmlentities($string, ENT_COMPAT, $charsetFrom)
  171. );
  172. }
  173. return $string;
  174. }
  175. }