PageRenderTime 45ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/includes/utf8/utf8_functions.php

https://gitlab.com/truongdacngoc1993/tuanviet
PHP | 303 lines | 168 code | 40 blank | 95 comment | 23 complexity | 657f454299b98eb605f3a84c36878151 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. /**
  3. * @Project NUKEVIET 4.x
  4. * @Author VINADES.,JSC (contact@vinades.vn)
  5. * @Copyright (C) 2014 VINADES.,JSC. All rights reserved
  6. * @License GNU/GPL version 2 or any later version
  7. * @Createdate 23/8/2010, 0:13
  8. */
  9. if( ! defined( 'NV_MAINFILE' ) ) die( 'Stop!!!' );
  10. /**
  11. * utf8_to_unicode()
  12. * Vie^.t Nam => Array ( [0] => 86 [1] => 105 [2] => 7879 [3] => 116 [4] => 32 [5] => 78 [6] => 97 [7] => 109 )
  13. * @param mixed $str
  14. * @return
  15. */
  16. function utf8_to_unicode( $str )
  17. {
  18. $unicode = array();
  19. $values = array();
  20. $lookingFor = 1;
  21. $strlen = strlen( $str );
  22. for( $i = 0; $i < $strlen; ++$i )
  23. {
  24. $thisValue = ord( $str[$i] );
  25. if( $thisValue < 128 ) $unicode[] = $thisValue;
  26. else
  27. {
  28. if( sizeof( $values ) == 0 ) $lookingFor = ( $thisValue < 224 ) ? 2 : 3;
  29. $values[] = $thisValue;
  30. if( sizeof( $values ) == $lookingFor )
  31. {
  32. $number = ( $lookingFor == 3 ) ? ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ) : ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 );
  33. $unicode[] = $number;
  34. $values = array();
  35. $lookingFor = 1;
  36. }
  37. }
  38. }
  39. return $unicode;
  40. }
  41. /**
  42. * unicode_to_entities()
  43. * Array ( [0] => 86 [1] => 105 [2] => 7879 [3] => 116 [4] => 32 [5] => 78 [6] => 97 [7] => 109 ) => &#86;&#105;&#7879;&#116;&#32;&#78;&#97;&#109;
  44. *
  45. * @param mixed $unicode
  46. * @return
  47. */
  48. function unicode_to_entities( $unicode )
  49. {
  50. $entities = '';
  51. foreach( $unicode as $value )
  52. {
  53. $entities .= '&#' . $value . ';';
  54. }
  55. return $entities;
  56. }
  57. /**
  58. * unicode_to_entities_preserving_ascii()
  59. * Array ( [0] => 86 [1] => 105 [2] => 7879 [3] => 116 [4] => 32 [5] => 78 [6] => 97 [7] => 109 ) => Vi&#7879;t Nam
  60. *
  61. * @param mixed $unicode
  62. * @return
  63. */
  64. function unicode_to_entities_preserving_ascii( $unicode )
  65. {
  66. $entities = '';
  67. foreach( $unicode as $value )
  68. {
  69. $entities .= ( $value > 127 ) ? '&#' . $value . ';' : chr( $value );
  70. }
  71. return $entities;
  72. }
  73. /**
  74. * unicode_to_utf8()
  75. * Array ( [0] => 86 [1] => 105 [2] => 7879 [3] => 116 [4] => 32 [5] => 78 [6] => 97 [7] => 109 ) => Vie^.t Nam
  76. *
  77. * @param mixed $str
  78. * @return
  79. */
  80. function unicode_to_utf8( $str )
  81. {
  82. $utf8 = '';
  83. foreach( $str as $unicode )
  84. {
  85. if( $unicode < 128 )
  86. {
  87. $utf8 .= chr( $unicode );
  88. }
  89. elseif( $unicode < 2048 )
  90. {
  91. $utf8 .= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) );
  92. $utf8 .= chr( 128 + ( $unicode % 64 ) );
  93. }
  94. else
  95. {
  96. $utf8 .= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) );
  97. $utf8 .= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) );
  98. $utf8 .= chr( 128 + ( $unicode % 64 ) );
  99. }
  100. }
  101. return $utf8;
  102. }
  103. /**
  104. * nv_str_split()
  105. *
  106. * @param mixed $str
  107. * @param integer $split_len
  108. * @return
  109. */
  110. function nv_str_split( $str, $split_len = 1 )
  111. {
  112. if( ! is_int( $split_len ) || $split_len < 1 )
  113. {
  114. return false;
  115. }
  116. $len = nv_strlen( $str );
  117. if( $len <= $split_len )
  118. {
  119. return array( $str );
  120. }
  121. preg_match_all( '/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar );
  122. return $ar[0];
  123. }
  124. /**
  125. * nv_strspn()
  126. *
  127. * @param mixed $str
  128. * @param mixed $mask
  129. * @param mixed $start
  130. * @param mixed $length
  131. * @return
  132. */
  133. function nv_strspn( $str, $mask, $start = null, $length = null )
  134. {
  135. if( $start !== null || $length !== null )
  136. {
  137. $str = nv_substr( $str, $start, $length );
  138. }
  139. preg_match( '/^[' . $mask . ']+/u', $str, $matches );
  140. if( isset( $matches[0] ) )
  141. {
  142. return nv_strlen( $matches[0] );
  143. }
  144. return 0;
  145. }
  146. /**
  147. * nv_ucfirst()
  148. *
  149. * @param mixed $str
  150. * @return
  151. */
  152. function nv_ucfirst( $str )
  153. {
  154. switch( nv_strlen( $str ) )
  155. {
  156. case 0:
  157. return '';
  158. break;
  159. case 1:
  160. return nv_strtoupper( $str );
  161. break;
  162. default:
  163. preg_match( '/^(.{1})(.*)$/us', $str, $matches );
  164. return nv_strtoupper( $matches[1] ) . $matches[2];
  165. break;
  166. }
  167. }
  168. /**
  169. * nv_ltrim()
  170. *
  171. * @param mixed $str
  172. * @param bool $charlist
  173. * @return
  174. */
  175. function nv_ltrim( $str, $charlist = false )
  176. {
  177. if( $charlist === false ) return ltrim( $str );
  178. $charlist = preg_replace( '!([\\\\\\-\\]\\[/^])!', '\\\${1}', $charlist );
  179. return preg_replace( '/^[' . $charlist . ']+/u', '', $str );
  180. }
  181. /**
  182. * nv_rtrim()
  183. *
  184. * @param mixed $str
  185. * @param bool $charlist
  186. * @return
  187. */
  188. function nv_rtrim( $str, $charlist = false )
  189. {
  190. if( $charlist === false ) return rtrim( $str );
  191. $charlist = preg_replace( '!([\\\\\\-\\]\\[/^])!', '\\\${1}', $charlist );
  192. return preg_replace( '/[' . $charlist . ']+$/u', '', $str );
  193. }
  194. /**
  195. * nv_trim()
  196. *
  197. * @param mixed $str
  198. * @param bool $charlist
  199. * @return
  200. */
  201. function nv_trim( $str, $charlist = false )
  202. {
  203. if( $charlist === false ) return trim( $str );
  204. return nv_ltrim( nv_rtrim( $str, $charlist ), $charlist );
  205. }
  206. /**
  207. * nv_EncString()
  208. *
  209. * @param mixed $str
  210. * @return
  211. */
  212. function nv_EncString( $string )
  213. {
  214. if( file_exists( NV_ROOTDIR . '/includes/utf8/lookup_' . NV_LANG_DATA . '.php' ) )
  215. {
  216. include NV_ROOTDIR . '/includes/utf8/lookup_' . NV_LANG_DATA . '.php' ;
  217. $string = strtr( $string, $utf8_lookup_lang );
  218. }
  219. include NV_ROOTDIR . '/includes/utf8/lookup.php' ;
  220. return strtr( $string, $utf8_lookup['romanize'] );
  221. }
  222. /**
  223. * change_alias()
  224. *
  225. * @return
  226. */
  227. function change_alias( $alias )
  228. {
  229. $alias = preg_replace('/[\x{0300}\x{0301}\x{0303}\x{0309}\x{0323}]/u', '', $alias); // fix unicode consortium for Vietnamese
  230. $search = array( '&amp;', '&#039;', '&quot;', '&lt;', '&gt;', '&#x005C;', '&#x002F;', '&#40;', '&#41;', '&#42;', '&#91;', '&#93;', '&#33;', '&#x3D;', '&#x23;', '&#x25;', '&#x5E;', '&#x3A;', '&#x7B;', '&#x7D;', '&#x60;', '&#x7E;' );
  231. $alias = preg_replace( array( '/[^a-zA-Z0-9]/', '/[ ]+/', '/^[\-]+/', '/[\-]+$/' ), array( ' ', '-', '', '' ), str_replace( $search, ' ', nv_EncString( $alias ) ) );
  232. return $alias;
  233. }
  234. /**
  235. * nv_clean60()
  236. *
  237. * @param mixed $string
  238. * @param integer $num
  239. * @return
  240. */
  241. function nv_clean60( $string, $num = 60, $specialchars = false )
  242. {
  243. global $global_config;
  244. $string = nv_unhtmlspecialchars( $string );
  245. $len = nv_strlen( $string );
  246. if( $num and $num < $len )
  247. {
  248. if( ord( nv_substr( $string, $num, 1 ) ) == 32 )
  249. {
  250. $string = nv_substr( $string, 0, $num ) . '...';
  251. }
  252. elseif( strpos( $string, ' ' ) === false )
  253. {
  254. $string = nv_substr( $string, 0, $num );
  255. }
  256. else
  257. {
  258. $string = nv_clean60( $string, $num - 1 );
  259. }
  260. }
  261. if( $specialchars ) $string = nv_htmlspecialchars( $string );
  262. return $string;
  263. }