PageRenderTime 44ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/system/helpers/text.php

http://github.com/ushahidi/Ushahidi_Web
PHP | 420 lines | 213 code | 61 blank | 146 comment | 27 complexity | 3818cacc6213ddbbf4351c420d028125 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php defined('SYSPATH') OR die('No direct access allowed.');
  2. /**
  3. * Text helper class.
  4. *
  5. * $Id: text.php 3917 2009-01-21 03:06:22Z zombor $
  6. *
  7. * @package Core
  8. * @author Kohana Team
  9. * @copyright (c) 2007-2008 Kohana Team
  10. * @license http://kohanaphp.com/license.html
  11. */
  12. class text_Core {
  13. /**
  14. * Limits a phrase to a given number of words.
  15. *
  16. * @param string phrase to limit words of
  17. * @param integer number of words to limit to
  18. * @param string end character or entity
  19. * @return string
  20. */
  21. public static function limit_words($str, $limit = 100, $end_char = NULL)
  22. {
  23. $limit = (int) $limit;
  24. $end_char = ($end_char === NULL) ? '&#8230;' : $end_char;
  25. if (trim($str) === '')
  26. return $str;
  27. if ($limit <= 0)
  28. return $end_char;
  29. preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches);
  30. // Only attach the end character if the matched string is shorter
  31. // than the starting string.
  32. return rtrim($matches[0]).(strlen($matches[0]) === strlen($str) ? '' : $end_char);
  33. }
  34. /**
  35. * Limits a phrase to a given number of characters.
  36. *
  37. * @param string phrase to limit characters of
  38. * @param integer number of characters to limit to
  39. * @param string end character or entity
  40. * @param boolean enable or disable the preservation of words while limiting
  41. * @return string
  42. */
  43. public static function limit_chars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)
  44. {
  45. $end_char = ($end_char === NULL) ? '&#8230;' : $end_char;
  46. $limit = (int) $limit;
  47. if (trim($str) === '' OR utf8::strlen($str) <= $limit)
  48. return $str;
  49. if ($limit <= 0)
  50. return $end_char;
  51. if ($preserve_words == FALSE)
  52. {
  53. return rtrim(utf8::substr($str, 0, $limit)).$end_char;
  54. }
  55. preg_match('/^.{'.($limit - 1).'}\S*/us', $str, $matches);
  56. return rtrim($matches[0]).(strlen($matches[0]) == strlen($str) ? '' : $end_char);
  57. }
  58. /**
  59. * Alternates between two or more strings.
  60. *
  61. * @param string strings to alternate between
  62. * @return string
  63. */
  64. public static function alternate()
  65. {
  66. static $i;
  67. if (func_num_args() === 0)
  68. {
  69. $i = 0;
  70. return '';
  71. }
  72. $args = func_get_args();
  73. return $args[($i++ % count($args))];
  74. }
  75. /**
  76. * Generates a random string of a given type and length.
  77. *
  78. * @param string a type of pool, or a string of characters to use as the pool
  79. * @param integer length of string to return
  80. * @return string
  81. *
  82. * @tutorial alnum alpha-numeric characters
  83. * @tutorial alpha alphabetical characters
  84. * @tutorial hexdec hexadecimal characters, 0-9 plus a-f
  85. * @tutorial numeric digit characters, 0-9
  86. * @tutorial nozero digit characters, 1-9
  87. * @tutorial distinct clearly distinct alpha-numeric characters
  88. */
  89. public static function random($type = 'alnum', $length = 8)
  90. {
  91. $utf8 = FALSE;
  92. switch ($type)
  93. {
  94. case 'alnum':
  95. $pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
  96. break;
  97. case 'alpha':
  98. $pool = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
  99. break;
  100. case 'hexdec':
  101. $pool = '0123456789abcdef';
  102. break;
  103. case 'numeric':
  104. $pool = '0123456789';
  105. break;
  106. case 'nozero':
  107. $pool = '123456789';
  108. break;
  109. case 'distinct':
  110. $pool = '2345679ACDEFHJKLMNPRSTUVWXYZ';
  111. break;
  112. default:
  113. $pool = (string) $type;
  114. $utf8 = ! utf8::is_ascii($pool);
  115. break;
  116. }
  117. // Split the pool into an array of characters
  118. $pool = ($utf8 === TRUE) ? utf8::str_split($pool, 1) : str_split($pool, 1);
  119. // Largest pool key
  120. $max = count($pool) - 1;
  121. $str = '';
  122. for ($i = 0; $i < $length; $i++)
  123. {
  124. // Select a random character from the pool and add it to the string
  125. $str .= $pool[mt_rand(0, $max)];
  126. }
  127. // Make sure alnum strings contain at least one letter and one digit
  128. if ($type === 'alnum' AND $length > 1)
  129. {
  130. if (ctype_alpha($str))
  131. {
  132. // Add a random digit
  133. $str[mt_rand(0, $length - 1)] = chr(mt_rand(48, 57));
  134. }
  135. elseif (ctype_digit($str))
  136. {
  137. // Add a random letter
  138. $str[mt_rand(0, $length - 1)] = chr(mt_rand(65, 90));
  139. }
  140. }
  141. return $str;
  142. }
  143. /**
  144. * Reduces multiple slashes in a string to single slashes.
  145. *
  146. * @param string string to reduce slashes of
  147. * @return string
  148. */
  149. public static function reduce_slashes($str)
  150. {
  151. return preg_replace('#(?<!:)//+#', '/', $str);
  152. }
  153. /**
  154. * Replaces the given words with a string.
  155. *
  156. * @param string phrase to replace words in
  157. * @param array words to replace
  158. * @param string replacement string
  159. * @param boolean replace words across word boundries (space, period, etc)
  160. * @return string
  161. */
  162. public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = FALSE)
  163. {
  164. foreach ((array) $badwords as $key => $badword)
  165. {
  166. $badwords[$key] = str_replace('\*', '\S*?', preg_quote((string) $badword));
  167. }
  168. $regex = '('.implode('|', $badwords).')';
  169. if ($replace_partial_words == TRUE)
  170. {
  171. // Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself
  172. $regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)';
  173. }
  174. $regex = '!'.$regex.'!ui';
  175. if (utf8::strlen($replacement) == 1)
  176. {
  177. $regex .= 'e';
  178. return preg_replace($regex, 'str_repeat($replacement, utf8::strlen(\'$1\'))', $str);
  179. }
  180. return preg_replace($regex, $replacement, $str);
  181. }
  182. /**
  183. * Finds the text that is similar between a set of words.
  184. *
  185. * @param array words to find similar text of
  186. * @return string
  187. */
  188. public static function similar(array $words)
  189. {
  190. // First word is the word to match against
  191. $word = current($words);
  192. for ($i = 0, $max = strlen($word); $i < $max; ++$i)
  193. {
  194. foreach ($words as $w)
  195. {
  196. // Once a difference is found, break out of the loops
  197. if ( ! isset($w[$i]) OR $w[$i] !== $word[$i])
  198. break 2;
  199. }
  200. }
  201. // Return the similar text
  202. return substr($word, 0, $i);
  203. }
  204. /**
  205. * Converts text email addresses and anchors into links.
  206. *
  207. * @param string text to auto link
  208. * @return string
  209. */
  210. public static function auto_link($text)
  211. {
  212. // Auto link emails first to prevent problems with "www.domain.com@example.com"
  213. return text::auto_link_urls(text::auto_link_emails($text));
  214. }
  215. /**
  216. * Converts text anchors into links.
  217. *
  218. * @param string text to auto link
  219. * @return string
  220. */
  221. public static function auto_link_urls($text)
  222. {
  223. $regex = '~\\b'
  224. .'((?:ht|f)tps?://)?' // protocol
  225. .'(?:[-a-zA-Z0-9]{1,63}\.)+' // host name
  226. .'(?:[0-9]{1,3}|aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)' // tlds
  227. .'(?:/[!$-/0-9:;=@_\':;!a-zA-Z\x7f-\xff]*?)?' // path
  228. .'(?:\?[!$-/0-9:;=@_\':;!a-zA-Z\x7f-\xff]+?)?' // query
  229. .'(?:#[!$-/0-9:;=@_\':;!a-zA-Z\x7f-\xff]+?)?' // fragment
  230. .'(?=[?.!,;:"]?(?:\s|$))~'; // punctuation and url end
  231. $result = "";
  232. $position = 0;
  233. while (preg_match($regex, $text, $match, PREG_OFFSET_CAPTURE, $position))
  234. {
  235. list($url, $url_pos) = $match[0];
  236. // Add the text before the url
  237. $result .= substr($text, $position, $url_pos - $position);
  238. // Default to http://
  239. $full_url = empty($match[1][0]) ? 'http://'.$url : $url;
  240. // Add the hyperlink.
  241. $result .= html::anchor($full_url, $url);
  242. // New position to start parsing
  243. $position = $url_pos + strlen($url);
  244. }
  245. return $result.substr($text, $position);
  246. }
  247. /**
  248. * Converts text email addresses into links.
  249. *
  250. * @param string text to auto link
  251. * @return string
  252. */
  253. public static function auto_link_emails($text)
  254. {
  255. // Finds all email addresses that are not part of an existing html mailto anchor
  256. // Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors
  257. // The html entity for a colon (:) is &#58; or &#058; or &#0058; etc.
  258. if (preg_match_all('~\b(?<!href="mailto:|">|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b~i', $text, $matches))
  259. {
  260. foreach ($matches[0] as $match)
  261. {
  262. // Replace each email with an encoded mailto
  263. $text = str_replace($match, html::mailto($match), $text);
  264. }
  265. }
  266. return $text;
  267. }
  268. /**
  269. * Automatically applies <p> and <br /> markup to text. Basically nl2br() on steroids.
  270. *
  271. * @param string subject
  272. * @return string
  273. */
  274. public static function auto_p($str)
  275. {
  276. // Trim whitespace
  277. if (($str = trim($str)) === '')
  278. return '';
  279. // Standardize newlines
  280. $str = str_replace(array("\r\n", "\r"), "\n", $str);
  281. // Trim whitespace on each line
  282. $str = preg_replace('~^[ \t]+~m', '', $str);
  283. $str = preg_replace('~[ \t]+$~m', '', $str);
  284. // The following regexes only need to be executed if the string contains html
  285. if ($html_found = (strpos($str, '<') !== FALSE))
  286. {
  287. // Elements that should not be surrounded by p tags
  288. $no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))';
  289. // Put at least two linebreaks before and after $no_p elements
  290. $str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str);
  291. $str = preg_replace('~</'.$no_p.'\s*+>$~im', "$0\n", $str);
  292. }
  293. // Do the <p> magic!
  294. $str = '<p>'.trim($str).'</p>';
  295. $str = preg_replace('~\n{2,}~', "</p>\n\n<p>", $str);
  296. // The following regexes only need to be executed if the string contains html
  297. if ($html_found !== FALSE)
  298. {
  299. // Remove p tags around $no_p elements
  300. $str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str);
  301. $str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str);
  302. }
  303. // Convert single linebreaks to <br />
  304. $str = preg_replace('~(?<!\n)\n(?!\n)~', "<br />\n", $str);
  305. return $str;
  306. }
  307. /**
  308. * Returns human readable sizes.
  309. * @see Based on original functions written by:
  310. * @see Aidan Lister: http://aidanlister.com/repos/v/function.size_readable.php
  311. * @see Quentin Zervaas: http://www.phpriot.com/d/code/strings/filesize-format/
  312. *
  313. * @param integer size in bytes
  314. * @param string a definitive unit
  315. * @param string the return string format
  316. * @param boolean whether to use SI prefixes or IEC
  317. * @return string
  318. */
  319. public static function bytes($bytes, $force_unit = NULL, $format = NULL, $si = TRUE)
  320. {
  321. // Format string
  322. $format = ($format === NULL) ? '%01.2f %s' : (string) $format;
  323. // IEC prefixes (binary)
  324. if ($si == FALSE OR strpos($force_unit, 'i') !== FALSE)
  325. {
  326. $units = array('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB');
  327. $mod = 1024;
  328. }
  329. // SI prefixes (decimal)
  330. else
  331. {
  332. $units = array('B', 'kB', 'MB', 'GB', 'TB', 'PB');
  333. $mod = 1000;
  334. }
  335. // Determine unit to use
  336. if (($power = array_search((string) $force_unit, $units)) === FALSE)
  337. {
  338. $power = ($bytes > 0) ? floor(log($bytes, $mod)) : 0;
  339. }
  340. return sprintf($format, $bytes / pow($mod, $power), $units[$power]);
  341. }
  342. /**
  343. * Prevents widow words by inserting a non-breaking space between the last two words.
  344. * @see http://www.shauninman.com/archive/2006/08/22/widont_wordpress_plugin
  345. *
  346. * @param string string to remove widows from
  347. * @return string
  348. */
  349. public static function widont($str)
  350. {
  351. $str = rtrim($str);
  352. $space = strrpos($str, ' ');
  353. if ($space !== FALSE)
  354. {
  355. $str = substr($str, 0, $space).'&nbsp;'.substr($str, $space + 1);
  356. }
  357. return $str;
  358. }
  359. } // End text