PageRenderTime 50ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/system/helpers/text.php

https://github.com/Toushi/flow
PHP | 389 lines | 196 code | 54 blank | 139 comment | 26 complexity | 2d9b10fce2504e4090b70491040e1a8c MD5 | raw file
  1. <?php defined('SYSPATH') or die('No direct script access.');
  2. /**
  3. * Text helper class.
  4. *
  5. * $Id: text.php 3228 2008-07-28 20:47:04Z dlib $
  6. *
  7. * @package Core
  8. * @author Kohana Team
  9. * @copyright (c) 2007-2008 Kohana Team
  10. * @license http://kohanaphp.com/license.html
  11. */
  12. class text_Core {
  13. /**
  14. * Limits a phrase to a given number of words.
  15. *
  16. * @param string phrase to limit words of
  17. * @param integer number of words to limit to
  18. * @param string end character or entity
  19. * @return string
  20. */
  21. public static function limit_words($str, $limit = 100, $end_char = NULL)
  22. {
  23. $limit = (int) $limit;
  24. $end_char = ($end_char === NULL) ? '&#8230;' : $end_char;
  25. if (trim($str) === '')
  26. return $str;
  27. if ($limit <= 0)
  28. return $end_char;
  29. preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches);
  30. // Only attach the end character if the matched string is shorter
  31. // than the starting string.
  32. return rtrim($matches[0]).(strlen($matches[0]) === strlen($str) ? '' : $end_char);
  33. }
  34. /**
  35. * Limits a phrase to a given number of characters.
  36. *
  37. * @param string phrase to limit characters of
  38. * @param integer number of characters to limit to
  39. * @param string end character or entity
  40. * @param boolean enable or disable the preservation of words while limiting
  41. * @return string
  42. */
  43. public static function limit_chars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)
  44. {
  45. $end_char = ($end_char === NULL) ? '&#8230;' : $end_char;
  46. $limit = (int) $limit;
  47. if (trim($str) === '' OR utf8::strlen($str) <= $limit)
  48. return $str;
  49. if ($limit <= 0)
  50. return $end_char;
  51. if ($preserve_words == FALSE)
  52. {
  53. return rtrim(utf8::substr($str, 0, $limit)).$end_char;
  54. }
  55. preg_match('/^.{'.($limit - 1).'}\S*/us', $str, $matches);
  56. return rtrim($matches[0]).(strlen($matches[0]) == strlen($str) ? '' : $end_char);
  57. }
  58. /**
  59. * Alternates between two or more strings.
  60. *
  61. * @param string strings to alternate between
  62. * @return string
  63. */
  64. public static function alternate()
  65. {
  66. static $i;
  67. if (func_num_args() === 0)
  68. {
  69. $i = 0;
  70. return '';
  71. }
  72. $args = func_get_args();
  73. return $args[($i++ % count($args))];
  74. }
  75. /**
  76. * Generates a random string of a given type and length.
  77. *
  78. * @param string a type of pool, or a string of characters to use as the pool
  79. * @param integer length of string to return
  80. * @return string
  81. *
  82. * @tutorial alnum - alpha-numeric characters
  83. * @tutorial alpha - alphabetical characters
  84. * @tutorial numeric - digit characters, 0-9
  85. * @tutorial nozero - digit characters, 1-9
  86. * @tutorial distinct - clearly distinct alpha-numeric characters
  87. */
  88. public static function random($type = 'alnum', $length = 8)
  89. {
  90. $utf8 = FALSE;
  91. switch ($type)
  92. {
  93. case 'alnum':
  94. $pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
  95. break;
  96. case 'alpha':
  97. $pool = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
  98. break;
  99. case 'numeric':
  100. $pool = '0123456789';
  101. break;
  102. case 'nozero':
  103. $pool = '123456789';
  104. break;
  105. case 'distinct':
  106. $pool = '2345679ACDEFHJKLMNPRSTUVWXYZ';
  107. break;
  108. default:
  109. $pool = (string) $type;
  110. $utf8 = ! utf8::is_ascii($pool);
  111. break;
  112. }
  113. $str = '';
  114. $pool_size = ($utf8 === TRUE) ? utf8::strlen($pool) : strlen($pool);
  115. for ($i = 0; $i < $length; $i++)
  116. {
  117. $str .= ($utf8 === TRUE)
  118. ? utf8::substr($pool, mt_rand(0, $pool_size - 1), 1)
  119. : substr($pool, mt_rand(0, $pool_size - 1), 1);
  120. }
  121. return $str;
  122. }
  123. /**
  124. * Reduces multiple slashes in a string to single slashes.
  125. *
  126. * @param string string to reduce slashes of
  127. * @return string
  128. */
  129. public static function reduce_slashes($str)
  130. {
  131. return preg_replace('#(?<!:)//+#', '/', $str);
  132. }
  133. /**
  134. * Replaces the given words with a string.
  135. *
  136. * @param string phrase to replace words in
  137. * @param array words to replace
  138. * @param string replacement string
  139. * @param boolean replace words across word boundries (space, period, etc)
  140. * @return string
  141. */
  142. public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = FALSE)
  143. {
  144. foreach ((array) $badwords as $key => $badword)
  145. {
  146. $badwords[$key] = str_replace('\*', '\S*?', preg_quote((string) $badword));
  147. }
  148. $regex = '('.implode('|', $badwords).')';
  149. if ($replace_partial_words == TRUE)
  150. {
  151. // Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself
  152. $regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)';
  153. }
  154. $regex = '!'.$regex.'!ui';
  155. if (utf8::strlen($replacement) == 1)
  156. {
  157. $regex .= 'e';
  158. return preg_replace($regex, 'str_repeat($replacement, utf8::strlen(\'$1\')', $str);
  159. }
  160. return preg_replace($regex, $replacement, $str);
  161. }
  162. /**
  163. * Finds the text that is similar between a set of words.
  164. *
  165. * @param array words to find similar text of
  166. * @return string
  167. */
  168. public static function similar(array $words)
  169. {
  170. // First word is the word to match against
  171. $word = current($words);
  172. for ($i = 0, $max = strlen($word); $i < $max; ++$i)
  173. {
  174. foreach ($words as $w)
  175. {
  176. // Once a difference is found, break out of the loops
  177. if ( ! isset($w[$i]) OR $w[$i] !== $word[$i])
  178. break 2;
  179. }
  180. }
  181. // Return the similar text
  182. return substr($word, 0, $i);
  183. }
  184. /**
  185. * Converts text email addresses and anchors into links.
  186. *
  187. * @param string text to auto link
  188. * @return string
  189. */
  190. public static function auto_link($text)
  191. {
  192. // Auto link emails first to prevent problems with "www.domain.com@example.com"
  193. return text::auto_link_urls(text::auto_link_emails($text));
  194. }
  195. /**
  196. * Converts text anchors into links.
  197. *
  198. * @param string text to auto link
  199. * @return string
  200. */
  201. public static function auto_link_urls($text)
  202. {
  203. // Finds all http/https/ftp/ftps links that are not part of an existing html anchor
  204. if (preg_match_all('~\b(?<!href="|">)(?:ht|f)tps?://\S+(?:/|\b)~i', $text, $matches))
  205. {
  206. foreach ($matches[0] as $match)
  207. {
  208. // Replace each link with an anchor
  209. $text = str_replace($match, html::anchor($match), $text);
  210. }
  211. }
  212. // Find all naked www.links.com (without http://)
  213. if (preg_match_all('~\b(?<!://)www(?:\.[a-z0-9][-a-z0-9]*+)+\.[a-z]{2,6}\b~i', $text, $matches))
  214. {
  215. foreach ($matches[0] as $match)
  216. {
  217. // Replace each link with an anchor
  218. $text = str_replace($match, html::anchor('http://'.$match, $match), $text);
  219. }
  220. }
  221. return $text;
  222. }
  223. /**
  224. * Converts text email addresses into links.
  225. *
  226. * @param string text to auto link
  227. * @return string
  228. */
  229. public static function auto_link_emails($text)
  230. {
  231. // Finds all email addresses that are not part of an existing html mailto anchor
  232. // Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors
  233. // The html entity for a colon (:) is &#58; or &#058; or &#0058; etc.
  234. if (preg_match_all('~\b(?<!href="mailto:|">|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b~i', $text, $matches))
  235. {
  236. foreach ($matches[0] as $match)
  237. {
  238. // Replace each email with an encoded mailto
  239. $text = str_replace($match, html::mailto($match), $text);
  240. }
  241. }
  242. return $text;
  243. }
  244. /**
  245. * Automatically applies <p> and <br /> markup to text. Basically nl2br() on steroids.
  246. *
  247. * @param string subject
  248. * @return string
  249. */
  250. public static function auto_p($str)
  251. {
  252. // Trim whitespace
  253. if (($str = trim($str)) === '')
  254. return '';
  255. // Standardize newlines
  256. $str = str_replace(array("\r\n", "\r"), "\n", $str);
  257. // Trim whitespace on each line
  258. $str = preg_replace('~^[ \t]+~m', '', $str);
  259. $str = preg_replace('~[ \t]+$~m', '', $str);
  260. // The following regexes only need to be executed if the string contains html
  261. if ($html_found = (strpos($str, '<') !== FALSE))
  262. {
  263. // Elements that should not be surrounded by p tags
  264. $no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))';
  265. // Put at least two linebreaks before and after $no_p elements
  266. $str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str);
  267. $str = preg_replace('~</'.$no_p.'\s*+>$~im', "$0\n", $str);
  268. }
  269. // Do the <p> magic!
  270. $str = '<p>'.trim($str).'</p>';
  271. $str = preg_replace('~\n{2,}~', "</p>\n\n<p>", $str);
  272. // The following regexes only need to be executed if the string contains html
  273. if ($html_found !== FALSE)
  274. {
  275. // Remove p tags around $no_p elements
  276. $str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str);
  277. $str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str);
  278. }
  279. // Convert single linebreaks to <br />
  280. $str = preg_replace('~(?<!\n)\n(?!\n)~', "<br />\n", $str);
  281. return $str;
  282. }
  283. /**
  284. * Returns human readable sizes.
  285. * @see Based on original functions written by:
  286. * @see Aidan Lister: http://aidanlister.com/repos/v/function.size_readable.php
  287. * @see Quentin Zervaas: http://www.phpriot.com/d/code/strings/filesize-format/
  288. *
  289. * @param integer size in bytes
  290. * @param string a definitive unit
  291. * @param string the return string format
  292. * @param boolean whether to use SI prefixes or IEC
  293. * @return string
  294. */
  295. public static function bytes($bytes, $force_unit = NULL, $format = NULL, $si = TRUE)
  296. {
  297. // Format string
  298. $format = ($format === NULL) ? '%01.2f %s' : (string) $format;
  299. // IEC prefixes (binary)
  300. if ($si == FALSE OR strpos($force_unit, 'i') !== FALSE)
  301. {
  302. $units = array('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB');
  303. $mod = 1024;
  304. }
  305. // SI prefixes (decimal)
  306. else
  307. {
  308. $units = array('B', 'kB', 'MB', 'GB', 'TB', 'PB');
  309. $mod = 1000;
  310. }
  311. // Determine unit to use
  312. if (($power = array_search((string) $force_unit, $units)) === FALSE)
  313. {
  314. $power = ($bytes > 0) ? floor(log($bytes, $mod)) : 0;
  315. }
  316. return sprintf($format, $bytes / pow($mod, $power), $units[$power]);
  317. }
  318. /**
  319. * Prevents widow words by inserting a non-breaking space between the last two words.
  320. * @see http://www.shauninman.com/archive/2006/08/22/widont_wordpress_plugin
  321. *
  322. * @param string string to remove widows from
  323. * @return string
  324. */
  325. public static function widont($str)
  326. {
  327. $str = rtrim($str);
  328. $space = strrpos($str, ' ');
  329. if ($space !== FALSE)
  330. {
  331. $str = substr($str, 0, $space).'&nbsp;'.substr($str, $space + 1);
  332. }
  333. return $str;
  334. }
  335. } // End text