PageRenderTime 43ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/Phergie/Plugin/Encoding.php

https://github.com/zburnham/phergie
PHP | 190 lines | 111 code | 7 blank | 72 comment | 3 complexity | 23c5405a672451a0db28fb775b654622 MD5 | raw file
  1. <?php
  2. /**
  3. * Phergie
  4. *
  5. * PHP version 5
  6. *
  7. * LICENSE
  8. *
  9. * This source file is subject to the new BSD license that is bundled
  10. * with this package in the file LICENSE.
  11. * It is also available through the world-wide-web at this URL:
  12. * http://phergie.org/license
  13. *
  14. * @category Phergie
  15. * @package Phergie_Plugin_Encoding
  16. * @author Phergie Development Team <team@phergie.org>
  17. * @copyright 2008-2012 Phergie Development Team (http://phergie.org)
  18. * @license http://phergie.org/license New BSD License
  19. * @link http://pear.phergie.org/package/Phergie_Plugin_Encoding
  20. */
  21. /**
  22. * Handles decoding markup entities and converting text between character
  23. * encodings.
  24. *
  25. * @category Phergie
  26. * @package Phergie_Plugin_Encoding
  27. * @author Phergie Development Team <team@phergie.org>
  28. * @license http://phergie.org/license New BSD License
  29. * @link http://pear.phergie.org/package/Phergie_Plugin_Encoding
  30. */
  31. class Phergie_Plugin_Encoding extends Phergie_Plugin_Abstract
  32. {
  33. /**
  34. * Lookup table for entity conversions not supported by
  35. * html_entity_decode()
  36. *
  37. * @var array
  38. * @link http://php.net/get_html_translation_table#73409
  39. * @link http://php.net/get_html_translation_table#73410
  40. */
  41. protected static $entities = array(
  42. '&alpha;' => 913,
  43. '&apos;' => 39,
  44. '&beta;' => 914,
  45. '&bull;' => 149,
  46. '&chi;' => 935,
  47. '&circ;' => 94,
  48. '&delta;' => 916,
  49. '&epsilon;' => 917,
  50. '&eta;' => 919,
  51. '&fnof;' => 402,
  52. '&gamma;' => 915,
  53. '&iota;' => 921,
  54. '&kappa;' => 922,
  55. '&lambda;' => 923,
  56. '&ldquo;' => 147,
  57. '&lsaquo;' => 139,
  58. '&lsquo;' => 145,
  59. '&mdash;' => 151,
  60. '&minus;' => 45,
  61. '&mu;' => 924,
  62. '&ndash;' => 150,
  63. '&nu;' => 925,
  64. '&oelig;' => 140,
  65. '&omega;' => 937,
  66. '&omicron;' => 927,
  67. '&phi;' => 934,
  68. '&pi;' => 928,
  69. '&piv;' => 982,
  70. '&psi;' => 936,
  71. '&rdquo;' => 148,
  72. '&rho;' => 929,
  73. '&rsaquo;' => 155,
  74. '&rsquo;' => 146,
  75. '&scaron;' => 138,
  76. '&sigma;' => 931,
  77. '&sigmaf;' => 962,
  78. '&tau;' => 932,
  79. '&theta;' => 920,
  80. '&thetasym;' => 977,
  81. '&tilde;' => 126,
  82. '&trade;' => 153,
  83. '&upsih;' => 978,
  84. '&upsilon;' => 933,
  85. '&xi;' => 926,
  86. '&yuml;' => 159,
  87. '&zeta;' => 918,
  88. );
  89. /**
  90. * Decodes markup entities in a given string.
  91. *
  92. * @param string $string String containing markup entities
  93. * @param string $charset Optional character set name to use in decoding
  94. * entities, defaults to UTF-8
  95. *
  96. * @return string String with markup entities decoded
  97. */
  98. public function decodeEntities($string, $charset = 'UTF-8')
  99. {
  100. $string = str_ireplace(
  101. array_keys(self::$entities),
  102. array_map('chr', self::$entities),
  103. $string
  104. );
  105. $string = html_entity_decode($string, ENT_QUOTES, $charset);
  106. $string = preg_replace(
  107. array('/&#0*([0-9]+);/me', '/&#x0*([a-f0-9]+);/mei'),
  108. array('$this->codeToUtf(\\1)', '$this->codeToUtf(hexdec(\\1))'),
  109. $string
  110. );
  111. return $string;
  112. }
  113. /**
  114. * Converts a given unicode to its UTF-8 equivalent.
  115. *
  116. * @param int $code Code to convert
  117. *
  118. * @return string Character corresponding to code
  119. */
  120. public function codeToUtf8($code)
  121. {
  122. $code = (int) $code;
  123. switch ($code) {
  124. // 1 byte, 7 bits
  125. case 0:
  126. return chr(0);
  127. case ($code & 0x7F):
  128. return chr($code);
  129. // 2 bytes, 11 bits
  130. case ($code & 0x7FF):
  131. return chr(0xC0 | (($code >> 6) & 0x1F)) .
  132. chr(0x80 | ($code & 0x3F));
  133. // 3 bytes, 16 bits
  134. case ($code & 0xFFFF):
  135. return chr(0xE0 | (($code >> 12) & 0x0F)) .
  136. chr(0x80 | (($code >> 6) & 0x3F)) .
  137. chr(0x80 | ($code & 0x3F));
  138. // 4 bytes, 21 bits
  139. case ($code & 0x1FFFFF):
  140. return chr(0xF0 | ($code >> 18)) .
  141. chr(0x80 | (($code >> 12) & 0x3F)) .
  142. chr(0x80 | (($code >> 6) & 0x3F)) .
  143. chr(0x80 | ($code & 0x3F));
  144. }
  145. }
  146. /**
  147. * Transliterates characters in a given string where possible.
  148. *
  149. * @param string $string String containing characters to
  150. * transliterate
  151. * @param string $charsetFrom Optional character set of the string,
  152. * defaults to UTF-8
  153. * @param string $charsetTo Optional character set to which the string
  154. * should be converted, defaults to ISO-8859-1
  155. *
  156. * @return string String with characters transliterated or the original
  157. * string if transliteration was not possible
  158. */
  159. public function transliterate($string, $charsetFrom = 'UTF-8',
  160. $charsetTo = 'ISO-8859-1'
  161. ) {
  162. // @link http://pecl.php.net/package/translit
  163. if (function_exists('transliterate')) {
  164. $string = transliterate(
  165. $string,
  166. array('han_transliterate', 'diacritical_remove'),
  167. $charsetFrom,
  168. $charsetTo
  169. );
  170. } elseif (function_exists('iconv')) {
  171. $string = iconv($charsetFrom, $charsetTo . '//TRANSLIT', $string);
  172. } else {
  173. // @link http://stackoverflow.com/questions/1284535/php-transliteration/1285491#1285491
  174. $string = preg_replace(
  175. '~&([a-z]{1,2})'
  176. . '(acute|cedil|circ|grave|lig|orn|ring|slash|th|tilde|uml);~i',
  177. '$1',
  178. htmlentities($string, ENT_COMPAT, $charsetFrom)
  179. );
  180. }
  181. return $string;
  182. }
  183. }