PageRenderTime 53ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/src/PurString.php

https://github.com/pop/pur
PHP | 289 lines | 188 code | 10 blank | 91 comment | 27 complexity | bd9a90d9e76d0f6678a9be99cc2ed074 MD5 | raw file
  1. <?php
  2. /**
  3. * General static methods around strings manipulation.
  4. */
  5. class PurString{
  6. /**
  7. * Convert a word to its singular form.
  8. *
  9. * Exemples:
  10. * - friends -> friend
  11. * - processes -> process
  12. * - families -> family
  13. *
  14. * @return mixed Singular word or null if no available singular transformation
  15. * @param string $word
  16. */
  17. public static function toSingular($word){
  18. if(substr($word,strlen($word)-4,4)=='sses'){
  19. return substr($word,0,strlen($word)-2);
  20. }else if(substr($word,strlen($word)-3,3)=='ies'){
  21. return substr($word,0,strlen($word)-3).'y';
  22. }else if(substr($word,strlen($word)-1,1)=='s'){
  23. return substr($word,0,strlen($word)-1);
  24. }
  25. }
  26. /**
  27. * Convert a word to its plural form.
  28. *
  29. * Note, this is an english centric method with obvious limitations.
  30. *
  31. * Exemples:
  32. * assert( "friends" === PurString::toPlural("friend") );
  33. * assert( "processes" === PurString::toPlural("process") );
  34. * assert( "families" === PurString::toPlural("family") );
  35. *
  36. * @return string Plural word
  37. * @param string $word
  38. */
  39. public static function toPlural($word){
  40. if(substr($word,strlen($word)-2,2)=='ss'){
  41. return $word.'es';
  42. }else if(substr($word,strlen($word)-1,1)=='y'){
  43. return substr($word,0,strlen($word)-1).'ies';
  44. }else{
  45. return $word.'s';
  46. }
  47. }
  48. /**
  49. * Returns the camel cased form of a word.
  50. *
  51. * Exemple:
  52. * assert( "LastName" === PurString::camelize("last_name") );
  53. *
  54. * @param string $word Word to camelize
  55. * @return string Camelized word. likeThis.
  56. */
  57. public static function camelize($word) {
  58. return str_replace(" ", "", ucwords(str_replace("_", " ", $word)));
  59. }
  60. /**
  61. * Returns an underscore syntaxed version of a camel cased word.
  62. *
  63. * Exemple:
  64. * assert( "last_name" === PurString::underscore("LastName") );
  65. *
  66. * @param string $word Camel-cased word to be transform as underscore
  67. * @return string Underscore-syntaxed version of the word
  68. */
  69. public static function underscore($word) {
  70. return strtolower(preg_replace('/(?<=\\w)([A-Z])/', '_\\1', $word));
  71. }
  72. /**
  73. * Sanitize a string with symbols and non ascii charateres.
  74. *
  75. * Exemple:
  76. *
  77. * * $this->assertEquals(
  78. * * 'abc-def-ghi',
  79. * * PurString::path( 'àbc dêf@ghî' ));
  80. *
  81. * @return
  82. * @param object $path
  83. */
  84. public static function path($path){
  85. $path = preg_replace("/[^\w-_]/","-",self::toAscii($path));
  86. $path = strtolower($path);
  87. $path = str_replace('--','-',$path);
  88. if(substr($path,0,1)=='-') $path = substr($path,1);
  89. if(substr($path,-1)=='-') $path = substr($path,0,-1);
  90. return $path;
  91. }
  92. /**
  93. * Format bytes into a readable string.
  94. *
  95. * @return
  96. * @param object $size
  97. */
  98. public static function prettyBytes($size){
  99. $mb = 1024*1024;
  100. $gb = $mb*1024;
  101. if($size>$gb) {
  102. $mysize = sprintf("%01.2f",$size/$gb).' GB';
  103. }else if( $size>$mb && $size<=$gb) {
  104. $mysize = sprintf("%01.2f",$size/$mb).' MB';
  105. }else if( $size>=1024 ) {
  106. $mysize = sprintf("%01.2f",$size/1024).' Kb';
  107. }else{
  108. $mysize = $size.' bytes';
  109. }
  110. return $mysize;
  111. }
  112. /**
  113. * Tries to detect if a string is in Unicode encoding.
  114. *
  115. * @param text string Text to check
  116. * @link http://www.php.net/manual/en/function.utf8-encode.php
  117. */
  118. public static function isUtf8($text) {
  119. for ($i=0; $i<strlen($text); $i++) {
  120. if (ord($text[$i]) < 0x80) continue; # 0bbbbbbb
  121. elseif ((ord($text[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb
  122. elseif ((ord($text[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb
  123. elseif ((ord($text[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb
  124. elseif ((ord($text[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb
  125. elseif ((ord($text[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b
  126. else return false; # Does not match any model
  127. for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
  128. if ((++$i == strlen($text)) || ((ord($text[$i]) & 0xC0) != 0x80))
  129. return false;
  130. }
  131. }
  132. return true;
  133. }
  134. /**
  135. * Validate a provided email.
  136. *
  137. * Note, does not check that the length of the local part is less than 65 nor
  138. * that the length of the domain is less than 256.
  139. *
  140. * @return boolean True if valid, false otherwise
  141. * @param string $email
  142. */
  143. public static function isEmail($email){
  144. //return preg_match('/^[A-Za-z0-9\._-]+[@][A-Za-z0-9\._-]+[\.].[A-Za-z0-9]+$/',$email);
  145. return (bool) preg_match('/^[a-z0-9,!#\$%&\'\*\+\/\=\?\^_`\{\|}~-]+(\.[a-z0-9,!#\$%&\'\*\+\/\=\?\^_`\{\|}~-]+)*@[a-z0-9-]+(\.[a-z0-9-]+)*\.([a-z]{2,})$/',$email);
  146. }
  147. /**
  148. * Remove accent from string by converting it from a utf-8 or a ISO-8859-1 to ASCII.
  149. *
  150. * @return
  151. * @param object $string
  152. */
  153. public static function toAscii($string){
  154. // If string is clean, no need to process it
  155. if(!preg_match('/[\x80-\xff]/',$string))
  156. return $string;
  157. if(self::isUtf8($string)){
  158. $chars = array(
  159. // Latin-1 Supplement
  160. chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
  161. chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
  162. chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
  163. chr(195).chr(135) => 'C', chr(195).chr(136) => 'E',
  164. chr(195).chr(137) => 'E', chr(195).chr(138) => 'E',
  165. chr(195).chr(139) => 'E', chr(195).chr(140) => 'I',
  166. chr(195).chr(141) => 'I', chr(195).chr(142) => 'I',
  167. chr(195).chr(143) => 'I', chr(195).chr(145) => 'N',
  168. chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
  169. chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
  170. chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
  171. chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
  172. chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
  173. chr(195).chr(159) => 's', chr(195).chr(160) => 'a',
  174. chr(195).chr(161) => 'a', chr(195).chr(162) => 'a',
  175. chr(195).chr(163) => 'a', chr(195).chr(164) => 'a',
  176. chr(195).chr(165) => 'a', chr(195).chr(167) => 'c',
  177. chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
  178. chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
  179. chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
  180. chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
  181. chr(195).chr(177) => 'n', chr(195).chr(178) => 'o',
  182. chr(195).chr(179) => 'o', chr(195).chr(180) => 'o',
  183. chr(195).chr(181) => 'o', chr(195).chr(182) => 'o',
  184. chr(195).chr(182) => 'o', chr(195).chr(185) => 'u',
  185. chr(195).chr(186) => 'u', chr(195).chr(187) => 'u',
  186. chr(195).chr(188) => 'u', chr(195).chr(189) => 'y',
  187. chr(195).chr(191) => 'y',
  188. // Latin Extended-A
  189. chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
  190. chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
  191. chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
  192. chr(196).chr(134) => 'C', chr(196).chr(135) => 'c',
  193. chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
  194. chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
  195. chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
  196. chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
  197. chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
  198. chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
  199. chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
  200. chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
  201. chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
  202. chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
  203. chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
  204. chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
  205. chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
  206. chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
  207. chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
  208. chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
  209. chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
  210. chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
  211. chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
  212. chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
  213. chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
  214. chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij',
  215. chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
  216. chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
  217. chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
  218. chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
  219. chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
  220. chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
  221. chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
  222. chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
  223. chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
  224. chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
  225. chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
  226. chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
  227. chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
  228. chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
  229. chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
  230. chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe',
  231. chr(197).chr(148) => 'R',chr(197).chr(149) => 'r',
  232. chr(197).chr(150) => 'R',chr(197).chr(151) => 'r',
  233. chr(197).chr(152) => 'R',chr(197).chr(153) => 'r',
  234. chr(197).chr(154) => 'S',chr(197).chr(155) => 's',
  235. chr(197).chr(156) => 'S',chr(197).chr(157) => 's',
  236. chr(197).chr(158) => 'S',chr(197).chr(159) => 's',
  237. chr(197).chr(160) => 'S', chr(197).chr(161) => 's',
  238. chr(197).chr(162) => 'T', chr(197).chr(163) => 't',
  239. chr(197).chr(164) => 'T', chr(197).chr(165) => 't',
  240. chr(197).chr(166) => 'T', chr(197).chr(167) => 't',
  241. chr(197).chr(168) => 'U', chr(197).chr(169) => 'u',
  242. chr(197).chr(170) => 'U', chr(197).chr(171) => 'u',
  243. chr(197).chr(172) => 'U', chr(197).chr(173) => 'u',
  244. chr(197).chr(174) => 'U', chr(197).chr(175) => 'u',
  245. chr(197).chr(176) => 'U', chr(197).chr(177) => 'u',
  246. chr(197).chr(178) => 'U', chr(197).chr(179) => 'u',
  247. chr(197).chr(180) => 'W', chr(197).chr(181) => 'w',
  248. chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y',
  249. chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z',
  250. chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
  251. chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
  252. chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
  253. // Euro Sign
  254. chr(226).chr(130).chr(172) => 'E',
  255. // GBP (Pound) Sign
  256. chr(194).chr(163) => '');
  257. $string = strtr($string, $chars);
  258. } else {
  259. // Assume ISO-8859-1 if not UTF-8
  260. $chars['in'] =
  261. chr(128).chr(131).chr(138).chr(142).chr(154).chr(158).
  262. chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194).
  263. chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202).
  264. chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210).
  265. chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218).
  266. chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227).
  267. chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235).
  268. chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243).
  269. chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251).
  270. chr(252).chr(253).chr(255);
  271. $chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy";
  272. $string = strtr($string,$chars['in'],$chars['out']);
  273. $double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254));
  274. $double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th');
  275. $string = str_replace($double_chars['in'], $double_chars['out'], $string);
  276. }
  277. return $string;
  278. }
  279. }