PageRenderTime 43ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/classes/kohana/text.php

https://github.com/popovag/kohana_core
PHP | 469 lines | 212 code | 55 blank | 202 comment | 28 complexity | fac7776aef8ca0892dd2cb7bdf8af0a5 MD5 | raw file
  1. <?php defined('SYSPATH') or die('No direct access allowed.');
  2. /**
  3. * Text helper class. Provides simple methods for working with text.
  4. *
  5. * @package Kohana
  6. * @category Helpers
  7. * @author Kohana Team
  8. * @copyright (c) 2007-2008 Kohana Team
  9. * @license http://kohanaphp.com/license
  10. */
  11. class Kohana_Text {
  12. /**
  13. * Limits a phrase to a given number of words.
  14. *
  15. * $text = Text::limit_words($text);
  16. *
  17. * @param string phrase to limit words of
  18. * @param integer number of words to limit to
  19. * @param string end character or entity
  20. * @return string
  21. */
  22. public static function limit_words($str, $limit = 100, $end_char = NULL)
  23. {
  24. $limit = (int) $limit;
  25. $end_char = ($end_char === NULL) ? '&#8230;' : $end_char;
  26. if (trim($str) === '')
  27. return $str;
  28. if ($limit <= 0)
  29. return $end_char;
  30. preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches);
  31. // Only attach the end character if the matched string is shorter
  32. // than the starting string.
  33. return rtrim($matches[0]).(strlen($matches[0]) === strlen($str) ? '' : $end_char);
  34. }
  35. /**
  36. * Limits a phrase to a given number of characters.
  37. *
  38. * $text = Text::limit_chars($text);
  39. *
  40. * @param string phrase to limit characters of
  41. * @param integer number of characters to limit to
  42. * @param string end character or entity
  43. * @param boolean enable or disable the preservation of words while limiting
  44. * @return string
  45. * @uses UTF8::strlen
  46. */
  47. public static function limit_chars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)
  48. {
  49. $end_char = ($end_char === NULL) ? '&#8230;' : $end_char;
  50. $limit = (int) $limit;
  51. if (trim($str) === '' OR UTF8::strlen($str) <= $limit)
  52. return $str;
  53. if ($limit <= 0)
  54. return $end_char;
  55. if ($preserve_words == FALSE)
  56. {
  57. return rtrim(UTF8::substr($str, 0, $limit)).$end_char;
  58. }
  59. preg_match('/^.{'.($limit - 1).'}\S*/us', $str, $matches);
  60. return rtrim($matches[0]).(strlen($matches[0]) == strlen($str) ? '' : $end_char);
  61. }
  62. /**
  63. * Alternates between two or more strings.
  64. *
  65. * echo Text::alternate('one', 'two'); // "one"
  66. * echo Text::alternate('one', 'two'); // "two"
  67. * echo Text::alternate('one', 'two'); // "one"
  68. *
  69. * Note that using multiple iterations of different strings may produce
  70. * unexpected results.
  71. *
  72. * @param string strings to alternate between
  73. * @return string
  74. */
  75. public static function alternate()
  76. {
  77. static $i;
  78. if (func_num_args() === 0)
  79. {
  80. $i = 0;
  81. return '';
  82. }
  83. $args = func_get_args();
  84. return $args[($i++ % count($args))];
  85. }
  86. /**
  87. * Generates a random string of a given type and length.
  88. *
  89. *
  90. * $str = Text::random(); // 8 character random string
  91. *
  92. * The following types are supported:
  93. *
  94. * alnum
  95. * : Upper and lower case a-z, 0-9
  96. *
  97. * alpha
  98. * : Upper and lower case a-z
  99. *
  100. * hexdec
  101. * : Hexadecimal characters a-f, 0-9
  102. *
  103. * distinct
  104. * : Uppercase characters and numbers that cannot be confused
  105. *
  106. * You can also create a custom type by providing the "pool" of characters
  107. * as the type.
  108. *
  109. * @param string a type of pool, or a string of characters to use as the pool
  110. * @param integer length of string to return
  111. * @return string
  112. * @uses UTF8::split
  113. */
  114. public static function random($type = 'alnum', $length = 8)
  115. {
  116. $utf8 = FALSE;
  117. switch ($type)
  118. {
  119. case 'alnum':
  120. $pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
  121. break;
  122. case 'alpha':
  123. $pool = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
  124. break;
  125. case 'hexdec':
  126. $pool = '0123456789abcdef';
  127. break;
  128. case 'numeric':
  129. $pool = '0123456789';
  130. break;
  131. case 'nozero':
  132. $pool = '123456789';
  133. break;
  134. case 'distinct':
  135. $pool = '2345679ACDEFHJKLMNPRSTUVWXYZ';
  136. break;
  137. default:
  138. $pool = (string) $type;
  139. $utf8 = ! UTF8::is_ascii($pool);
  140. break;
  141. }
  142. // Split the pool into an array of characters
  143. $pool = ($utf8 === TRUE) ? UTF8::str_split($pool, 1) : str_split($pool, 1);
  144. // Largest pool key
  145. $max = count($pool) - 1;
  146. $str = '';
  147. for ($i = 0; $i < $length; $i++)
  148. {
  149. // Select a random character from the pool and add it to the string
  150. $str .= $pool[mt_rand(0, $max)];
  151. }
  152. // Make sure alnum strings contain at least one letter and one digit
  153. if ($type === 'alnum' AND $length > 1)
  154. {
  155. if (ctype_alpha($str))
  156. {
  157. // Add a random digit
  158. $str[mt_rand(0, $length - 1)] = chr(mt_rand(48, 57));
  159. }
  160. elseif (ctype_digit($str))
  161. {
  162. // Add a random letter
  163. $str[mt_rand(0, $length - 1)] = chr(mt_rand(65, 90));
  164. }
  165. }
  166. return $str;
  167. }
  168. /**
  169. * Reduces multiple slashes in a string to single slashes.
  170. *
  171. * $str = Text::reduce_slashes('foo//bar/baz'); // "foo/bar/baz"
  172. *
  173. * @param string string to reduce slashes of
  174. * @return string
  175. */
  176. public static function reduce_slashes($str)
  177. {
  178. return preg_replace('#(?<!:)//+#', '/', $str);
  179. }
  180. /**
  181. * Replaces the given words with a string.
  182. *
  183. * // Displays "What the #####, man!"
  184. * echo Text::censor('What the frick, man!', array(
  185. * 'frick' => '#####',
  186. * ));
  187. *
  188. * @param string phrase to replace words in
  189. * @param array words to replace
  190. * @param string replacement string
  191. * @param boolean replace words across word boundries (space, period, etc)
  192. * @return string
  193. * @uses UTF8::strlen
  194. */
  195. public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = TRUE)
  196. {
  197. foreach ((array) $badwords as $key => $badword)
  198. {
  199. $badwords[$key] = str_replace('\*', '\S*?', preg_quote((string) $badword));
  200. }
  201. $regex = '('.implode('|', $badwords).')';
  202. if ($replace_partial_words === FALSE)
  203. {
  204. // Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself
  205. $regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)';
  206. }
  207. $regex = '!'.$regex.'!ui';
  208. if (UTF8::strlen($replacement) == 1)
  209. {
  210. $regex .= 'e';
  211. return preg_replace($regex, 'str_repeat($replacement, UTF8::strlen(\'$1\'))', $str);
  212. }
  213. return preg_replace($regex, $replacement, $str);
  214. }
  215. /**
  216. * Finds the text that is similar between a set of words.
  217. *
  218. * $match = Text::similar(array('fred', 'fran', 'free'); // "fr"
  219. *
  220. * @param array words to find similar text of
  221. * @return string
  222. */
  223. public static function similar(array $words)
  224. {
  225. // First word is the word to match against
  226. $word = current($words);
  227. for ($i = 0, $max = strlen($word); $i < $max; ++$i)
  228. {
  229. foreach ($words as $w)
  230. {
  231. // Once a difference is found, break out of the loops
  232. if ( ! isset($w[$i]) OR $w[$i] !== $word[$i])
  233. break 2;
  234. }
  235. }
  236. // Return the similar text
  237. return substr($word, 0, $i);
  238. }
  239. /**
  240. * Converts text email addresses and anchors into links. Existing links
  241. * will not be altered.
  242. *
  243. * echo Text::auto_link($text);
  244. *
  245. * @param string text to auto link
  246. * @return string
  247. * @uses Text::auto_link_urls
  248. * @uses Text::auto_link_emails
  249. */
  250. public static function auto_link($text)
  251. {
  252. // Auto link emails first to prevent problems with "www.domain.com@example.com"
  253. return Text::auto_link_urls(Text::auto_link_emails($text));
  254. }
  255. /**
  256. * Converts text anchors into links. Existing links will not be altered.
  257. *
  258. * echo Text::auto_link_urls($text);
  259. *
  260. * @param string text to auto link
  261. * @return string
  262. * @uses HTML::anchor
  263. */
  264. public static function auto_link_urls($text)
  265. {
  266. // Finds all http/https/ftp/ftps links that are not part of an existing html anchor
  267. if (preg_match_all('~\b(?<!href="|">)(?:ht|f)tps?://\S+(?:/|\b)~i', $text, $matches))
  268. {
  269. foreach ($matches[0] as $match)
  270. {
  271. // Replace each link with an anchor
  272. $text = str_replace($match, HTML::anchor($match), $text);
  273. }
  274. }
  275. // Find all naked www.links.com (without http://)
  276. if (preg_match_all('~\b(?<!://)www(?:\.[a-z0-9][-a-z0-9]*+)+\.[a-z]{2,6}\b~i', $text, $matches))
  277. {
  278. foreach ($matches[0] as $match)
  279. {
  280. // Replace each link with an anchor
  281. $text = str_replace($match, HTML::anchor('http://'.$match, $match), $text);
  282. }
  283. }
  284. return $text;
  285. }
  286. /**
  287. * Converts text email addresses into links. Existing links will not
  288. * be altered.
  289. *
  290. * echo Text::auto_link_emails($text);
  291. *
  292. * @param string text to auto link
  293. * @return string
  294. * @uses HTML::mailto
  295. */
  296. public static function auto_link_emails($text)
  297. {
  298. // Finds all email addresses that are not part of an existing html mailto anchor
  299. // Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors
  300. // The html entity for a colon (:) is &#58; or &#058; or &#0058; etc.
  301. if (preg_match_all('~\b(?<!href="mailto:|">|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b~i', $text, $matches))
  302. {
  303. foreach ($matches[0] as $match)
  304. {
  305. // Replace each email with an encoded mailto
  306. $text = preg_replace('!\b'.preg_quote($match).'\b!', HTML::mailto($match), $text);
  307. }
  308. }
  309. return $text;
  310. }
  311. /**
  312. * Automatically applies "p" and "br" markup to text.
  313. * Basically [nl2br](http://php.net/nl2br) on steroids.
  314. *
  315. * echo Text::auto_p($text);
  316. *
  317. * [!!] This method is not foolproof since it uses regex to parse HTML.
  318. *
  319. * @param string subject
  320. * @param boolean convert single linebreaks to <br />
  321. * @return string
  322. */
  323. public static function auto_p($str, $br = TRUE)
  324. {
  325. // Trim whitespace
  326. if (($str = trim($str)) === '')
  327. return '';
  328. // Standardize newlines
  329. $str = str_replace(array("\r\n", "\r"), "\n", $str);
  330. // Trim whitespace on each line
  331. $str = preg_replace('~^[ \t]+~m', '', $str);
  332. $str = preg_replace('~[ \t]+$~m', '', $str);
  333. // The following regexes only need to be executed if the string contains html
  334. if ($html_found = (strpos($str, '<') !== FALSE))
  335. {
  336. // Elements that should not be surrounded by p tags
  337. $no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))';
  338. // Put at least two linebreaks before and after $no_p elements
  339. $str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str);
  340. $str = preg_replace('~</'.$no_p.'\s*+>$~im', "$0\n", $str);
  341. }
  342. // Do the <p> magic!
  343. $str = '<p>'.trim($str).'</p>';
  344. $str = preg_replace('~\n{2,}~', "</p>\n\n<p>", $str);
  345. // The following regexes only need to be executed if the string contains html
  346. if ($html_found !== FALSE)
  347. {
  348. // Remove p tags around $no_p elements
  349. $str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str);
  350. $str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str);
  351. }
  352. // Convert single linebreaks to <br />
  353. if ($br === TRUE)
  354. {
  355. $str = preg_replace('~(?<!\n)\n(?!\n)~', "<br />\n", $str);
  356. }
  357. return $str;
  358. }
  359. /**
  360. * Returns human readable sizes. Based on original functions written by
  361. * [Aidan Lister](http://aidanlister.com/repos/v/function.size_readable.php)
  362. * and [Quentin Zervaas](http://www.phpriot.com/d/code/strings/filesize-format/).
  363. *
  364. * echo Text::bytes(filesize($file));
  365. *
  366. * @param integer size in bytes
  367. * @param string a definitive unit
  368. * @param string the return string format
  369. * @param boolean whether to use SI prefixes or IEC
  370. * @return string
  371. */
  372. public static function bytes($bytes, $force_unit = NULL, $format = NULL, $si = TRUE)
  373. {
  374. // Format string
  375. $format = ($format === NULL) ? '%01.2f %s' : (string) $format;
  376. // IEC prefixes (binary)
  377. if ($si == FALSE OR strpos($force_unit, 'i') !== FALSE)
  378. {
  379. $units = array('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB');
  380. $mod = 1024;
  381. }
  382. // SI prefixes (decimal)
  383. else
  384. {
  385. $units = array('B', 'kB', 'MB', 'GB', 'TB', 'PB');
  386. $mod = 1000;
  387. }
  388. // Determine unit to use
  389. if (($power = array_search((string) $force_unit, $units)) === FALSE)
  390. {
  391. $power = ($bytes > 0) ? floor(log($bytes, $mod)) : 0;
  392. }
  393. return sprintf($format, $bytes / pow($mod, $power), $units[$power]);
  394. }
  395. /**
  396. * Prevents [widow words](http://www.shauninman.com/archive/2006/08/22/widont_wordpress_plugin)
  397. * by inserting a non-breaking space between the last two words.
  398. *
  399. * echo Text::widont($text);
  400. *
  401. * @param string text to remove widows from
  402. * @return string
  403. */
  404. public static function widont($str)
  405. {
  406. $str = rtrim($str);
  407. $space = strrpos($str, ' ');
  408. if ($space !== FALSE)
  409. {
  410. $str = substr($str, 0, $space).'&nbsp;'.substr($str, $space + 1);
  411. }
  412. return $str;
  413. }
  414. } // End text