PageRenderTime 42ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/nimbus/lib/utf8/functions.php

https://github.com/codepassive/dev-nimbus
PHP | 408 lines | 304 code | 76 blank | 28 comment | 79 complexity | 069e25f66307f5654b85b8b997176481 MD5 | raw file
  1. <?php
  2. /**
  3. * Nimbus - Manage, Share & Collaborate
  4. *
  5. * Nimbus is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. * see LICENSE for more Copyright goodness.
  10. *
  11. * @package: Nimbus
  12. * @subpackage: Nimbus_utf8
  13. * @copyright: 2009-2010, Nimbus Dev Group, All rights reserved.
  14. * @license: GNU/GPLv3, see LICENSE
  15. * @version: 1.0.0 Alpha
  16. */
  17. /**
  18. * Copyright (c) PHPUTF8
  19. * http://sourceforge.net/projects/phputf8/
  20. */
  21. function utf8_ucfirst($str){
  22. switch (utf8_strlen($str)) {
  23. case 0:
  24. return '';
  25. break;
  26. case 1:
  27. return utf8_strtoupper($str);
  28. break;
  29. default:
  30. preg_match('/^(.{1})(.*)$/us', $str, $matches);
  31. return utf8_strtoupper($matches[1]) . $matches[2];
  32. break;
  33. }
  34. }
  35. function utf8_ltrim($str, $charlist = FALSE){
  36. if ($charlist === FALSE) return ltrim($str);
  37. //quote charlist for use in a characterclass
  38. $charlist = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $charlist);
  39. return preg_replace('/^['.$charlist.']+/u', '', $str);
  40. }
  41. function utf8_rtrim($str, $charlist = FALSE){
  42. if ($charlist === FALSE) return rtrim($str);
  43. //quote charlist for use in a characterclass
  44. $charlist = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $charlist);
  45. return preg_replace('/['.$charlist.']+$/u', '', $str);
  46. }
  47. function utf8_trim($str, $charlist = FALSE){
  48. if ($charlist === FALSE) return trim($str);
  49. return utf8_ltrim(utf8_rtrim($str, $charlist), $charlist);
  50. }
  51. function utf8_substr_replace($str, $repl, $start , $length = NULL){
  52. preg_match_all('/./us', $str, $ar);
  53. preg_match_all('/./us', $repl, $rar);
  54. if ($length === NULL) {
  55. $length = utf8_strlen($str);
  56. }
  57. array_splice($ar[0], $start, $length, $rar[0]);
  58. return join('', $ar[0]);
  59. }
  60. function utf8_strspn($str, $mask, $start = NULL, $length = NULL){
  61. $mask = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $mask);
  62. if ($start !== NULL || $length !== NULL) {
  63. $str = utf8_substr($str, $start, $length);
  64. }
  65. preg_match('/^['.$mask.']+/u', $str, $matches);
  66. if (isset($matches[0])) {
  67. return utf8_strlen($matches[0]);
  68. }
  69. return 0;
  70. }
  71. function utf8_str_split($str, $split_len = 1){
  72. if (!preg_match('/^[0-9]+$/', $split_len) || $split_len < 1 ) {
  73. return FALSE;
  74. }
  75. $len = utf8_strlen($str);
  76. if ($len <= $split_len) {
  77. return array($str);
  78. }
  79. preg_match_all('/.{'.$split_len.'}|[^\x00]{1,'.$split_len.'}$/us', $str, $ar);
  80. return $ar[0];
  81. }
  82. function utf8_strrev($str){
  83. preg_match_all('/./us', $str, $ar);
  84. return join('', array_reverse($ar[0]));
  85. }
  86. function utf8_str_pad($input, $length, $padStr = ' ', $type = STR_PAD_RIGHT){
  87. $inputLen = utf8_strlen($input);
  88. if ($length <= $inputLen) {
  89. return $input;
  90. }
  91. $padutf8_strlen = utf8_strlen($padStr);
  92. $padLen = $length - $inputLen;
  93. if ($type == STR_PAD_RIGHT) {
  94. $repeatTimes = ceil($padLen / $padutf8_strlen);
  95. return utf8_substr($input . str_repeat($padStr, $repeatTimes), 0, $length);
  96. }
  97. if ($type == STR_PAD_LEFT) {
  98. $repeatTimes = ceil($padLen / $padutf8_strlen);
  99. return utf8_substr(str_repeat($padStr, $repeatTimes), 0, floor($padLen)) . $input;
  100. }
  101. if ($type == STR_PAD_BOTH) {
  102. $padLen/= 2;
  103. $padAmountLeft = floor($padLen);
  104. $padAmountRight = ceil($padLen);
  105. $repeatTimesLeft = ceil($padAmountLeft / $padutf8_strlen);
  106. $repeatTimesRight = ceil($padAmountRight / $padutf8_strlen);
  107. $paddingLeft = utf8_substr(str_repeat($padStr, $repeatTimesLeft), 0, $padAmountLeft);
  108. $paddingRight = utf8_substr(str_repeat($padStr, $repeatTimesRight), 0, $padAmountLeft);
  109. return $paddingLeft . $input . $paddingRight;
  110. }
  111. trigger_error('utf8_str_pad: Unknown padding type (' . $type . ')',E_USER_ERROR);
  112. }
  113. function utf8_stristr($str, $search){
  114. if (utf8_strlen($search) == 0) {
  115. return $str;
  116. }
  117. $lstr = utf8_strtolower($str);
  118. $lsearch = utf8_strtolower($search);
  119. preg_match('/^(.*)'.preg_quote($lsearch).'/Us', $lstr, $matches);
  120. if (count($matches) == 2) {
  121. return utf8_substr($str, utf8_strlen($matches[1]));
  122. }
  123. return FALSE;
  124. }
  125. function utf8_ireplace($search, $replace, $str, $count = NULL){
  126. if (!is_array($search)) {
  127. $slen = utf8_strlen($search);
  128. if ($slen == 0) {
  129. return $str;
  130. }
  131. $lendif = utf8_strlen($replace) - utf8_strlen($search);
  132. $search = utf8_strtolower($search);
  133. $search = preg_quote($search);
  134. $lstr = utf8_strtolower($str);
  135. $i = 0;
  136. $matched = 0;
  137. while ( preg_match('/(.*)'.$search.'/Us', $lstr, $matches) ) {
  138. if ( $i === $count ) {
  139. break;
  140. }
  141. $mlen = utf8_strlen($matches[0]);
  142. $lstr = utf8_substr($lstr, $mlen);
  143. $str = utf8_substr_replace($str, $replace, $matched+utf8_strlen($matches[1]), $slen);
  144. $matched += $mlen + $lendif;
  145. $i++;
  146. }
  147. return $str;
  148. } else {
  149. foreach (array_keys($search) as $k) {
  150. if (is_array($replace)) {
  151. if (array_key_exists($k,$replace)) {
  152. $str = utf8_ireplace($search[$k], $replace[$k], $str, $count);
  153. } else {
  154. $str = utf8_ireplace($search[$k], '', $str, $count);
  155. }
  156. } else {
  157. $str = utf8_ireplace($search[$k], $replace, $str, $count);
  158. }
  159. }
  160. return $str;
  161. }
  162. }
  163. function utf8_strcspn($str, $mask, $start = NULL, $length = NULL){
  164. if (empty($mask) || utf8_strlen($mask) == 0) {
  165. return NULL;
  166. }
  167. $mask = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $mask);
  168. if ($start !== NULL || $length !== NULL) {
  169. $str = utf8_substr($str, $start, $length);
  170. }
  171. preg_match('/^[^'.$mask.']+/u', $str, $matches);
  172. if (isset($matches[0])) {
  173. return utf8_strlen($matches[0]);
  174. }
  175. return 0;
  176. }
  177. function utf8_ucwords($str){
  178. // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
  179. // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
  180. // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords
  181. $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
  182. return preg_replace_callback($pattern, 'utf8_ucwords_callback', $str);
  183. }
  184. function utf8_ucwords_callback($matches){
  185. $leadingws = $matches[2];
  186. $ucfirst = utf8_strtoupper($matches[3]);
  187. $ucword = utf8_substr_replace(ltrim($matches[0]), $ucfirst, 0, 1);
  188. return $leadingws . $ucword;
  189. }
  190. function utf8_strcasecmp($strX, $strY){
  191. $strX = utf8_strtolower($strX);
  192. $strY = utf8_strtolower($strY);
  193. return strcmp($strX, $strY);
  194. }
  195. function utf8_ord($chr){
  196. $ord0 = ord($chr);
  197. if ($ord0 >= 0 && $ord0 <= 127) {
  198. return $ord0;
  199. }
  200. if (!isset($chr{1})) {
  201. trigger_error('Short sequence - at least 2 bytes expected, only 1 seen');
  202. return FALSE;
  203. }
  204. $ord1 = ord($chr{1});
  205. if ($ord0 >= 192 && $ord0 <= 223) {
  206. return ( $ord0 - 192 ) * 64
  207. + ( $ord1 - 128 );
  208. }
  209. if (!isset($chr{2})) {
  210. trigger_error('Short sequence - at least 3 bytes expected, only 2 seen');
  211. return FALSE;
  212. }
  213. $ord2 = ord($chr{2});
  214. if ($ord0 >= 224 && $ord0 <= 239) {
  215. return ($ord0-224)*4096
  216. + ($ord1-128)*64
  217. + ($ord2-128);
  218. }
  219. if (!isset($chr{3})) {
  220. trigger_error('Short sequence - at least 4 bytes expected, only 3 seen');
  221. return FALSE;
  222. }
  223. $ord3 = ord($chr{3});
  224. if ($ord0>=240 && $ord0<=247) {
  225. return ($ord0-240)*262144
  226. + ($ord1-128)*4096
  227. + ($ord2-128)*64
  228. + ($ord3-128);
  229. }
  230. if (!isset($chr{4})) {
  231. trigger_error('Short sequence - at least 5 bytes expected, only 4 seen');
  232. return FALSE;
  233. }
  234. $ord4 = ord($chr{4});
  235. if ($ord0>=248 && $ord0<=251) {
  236. return ($ord0-248)*16777216
  237. + ($ord1-128)*262144
  238. + ($ord2-128)*4096
  239. + ($ord3-128)*64
  240. + ($ord4-128);
  241. }
  242. if (!isset($chr{5})) {
  243. trigger_error('Short sequence - at least 6 bytes expected, only 5 seen');
  244. return FALSE;
  245. }
  246. if ($ord0>=252 && $ord0<=253) {
  247. return ($ord0-252) * 1073741824
  248. + ($ord1-128)*16777216
  249. + ($ord2-128)*262144
  250. + ($ord3-128)*4096
  251. + ($ord4-128)*64
  252. + (ord($c{5})-128);
  253. }
  254. if ($ord0 >= 254 && $ord0 <= 255) {
  255. trigger_error('Invalid UTF-8 with surrogate ordinal '.$ord0);
  256. return FALSE;
  257. }
  258. }
  259. function utf8_wordwrap($str, $width=75, $break='\n', $cut=false){
  260. $newString = '';
  261. if ($cut == true) {
  262. $y = 1;
  263. for ($x=0;$x < utf8_strlen($str);$x++) {
  264. $newString .= utf8_substr($str, $x,1);
  265. if($y == $width){
  266. $newString .= $break;
  267. $y = 1;
  268. }else{
  269. $y++;
  270. }
  271. }
  272. return $newString;
  273. }
  274. $y = 1;
  275. for ($x=0;$x < utf8_strlen($str);$x++) {
  276. $newString .= utf8_substr($str, $x, 1);
  277. if ($str{$x} == ' ') {
  278. if ($y >= $width) {
  279. $newString .= $break;
  280. $y = 1;
  281. } else {
  282. $y++;
  283. }
  284. }
  285. }
  286. return $newString;
  287. }
  288. function utf8_pathinfo($string){
  289. $info['basename'] = utf8_basename($string);
  290. $info['dirname'] = dirname($string);
  291. $info['filename'] = utf8_getFileName($string);
  292. $info['extension'] = utf8_getExtension($string);
  293. return $info;
  294. }
  295. function utf8_basename($string, $suffix=''){
  296. //Checking if path are from windows...\
  297. if (utf8_strpos($string, '\\') !== false) {
  298. //Replacing all \ with /
  299. $string = utf8_ireplace('\\', '/', $string);
  300. }
  301. //Remove the last character if it is a slash
  302. if (utf8_strrpos($string, '/') == utf8_strlen($string) - 1) {
  303. $string = utf8_substr($string, 0, utf8_strlen($string) - 1) ;
  304. }
  305. //TODO: check if dirname is more speed than utf8_str*** functions.
  306. //If is a directory
  307. if (utf8_strpos($string, '/') && utf8_strrpos($string, '/') == utf8_strlen($string) - 1) {
  308. while (utf8_strrpos($string, '/') == utf8_strlen($string) - 1) {
  309. $string = utf8_substr($string, 0, utf8_strrpos($string, '/'));
  310. }
  311. }
  312. if (utf8_strpos($string,'/') !== false) {
  313. $string = utf8_substr($string, utf8_strrpos($string, '/') + 1, utf8_strlen($string));
  314. }
  315. if ($suffix != '') {
  316. $pos = utf8_strrpos($string, $suffix);
  317. if($pos === false){
  318. return $string;
  319. }else{
  320. return utf8_substr($string, 0, utf8_strrpos($string, $suffix));
  321. }
  322. }
  323. return $string;
  324. }
  325. function utf8_getFileName($string){
  326. $name = utf8_basename($string);
  327. return utf8_substr($name, 0, utf8_strrpos($name, '.'));
  328. }
  329. function utf8_getExtension($string){
  330. return utf8_substr($string, utf8_strrpos($string, '.') + 1);
  331. }
  332. ?>