/system/helpers/text.php
PHP | 389 lines | 196 code | 54 blank | 139 comment | 26 complexity | 2d9b10fce2504e4090b70491040e1a8c MD5 | raw file
- <?php defined('SYSPATH') or die('No direct script access.');
- /**
- * Text helper class.
- *
- * $Id: text.php 3228 2008-07-28 20:47:04Z dlib $
- *
- * @package Core
- * @author Kohana Team
- * @copyright (c) 2007-2008 Kohana Team
- * @license http://kohanaphp.com/license.html
- */
- class text_Core {
- /**
- * Limits a phrase to a given number of words.
- *
- * @param string phrase to limit words of
- * @param integer number of words to limit to
- * @param string end character or entity
- * @return string
- */
- public static function limit_words($str, $limit = 100, $end_char = NULL)
- {
- $limit = (int) $limit;
- $end_char = ($end_char === NULL) ? '…' : $end_char;
- if (trim($str) === '')
- return $str;
- if ($limit <= 0)
- return $end_char;
- preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches);
- // Only attach the end character if the matched string is shorter
- // than the starting string.
- return rtrim($matches[0]).(strlen($matches[0]) === strlen($str) ? '' : $end_char);
- }
- /**
- * Limits a phrase to a given number of characters.
- *
- * @param string phrase to limit characters of
- * @param integer number of characters to limit to
- * @param string end character or entity
- * @param boolean enable or disable the preservation of words while limiting
- * @return string
- */
- public static function limit_chars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)
- {
- $end_char = ($end_char === NULL) ? '…' : $end_char;
- $limit = (int) $limit;
- if (trim($str) === '' OR utf8::strlen($str) <= $limit)
- return $str;
- if ($limit <= 0)
- return $end_char;
- if ($preserve_words == FALSE)
- {
- return rtrim(utf8::substr($str, 0, $limit)).$end_char;
- }
- preg_match('/^.{'.($limit - 1).'}\S*/us', $str, $matches);
- return rtrim($matches[0]).(strlen($matches[0]) == strlen($str) ? '' : $end_char);
- }
- /**
- * Alternates between two or more strings.
- *
- * @param string strings to alternate between
- * @return string
- */
- public static function alternate()
- {
- static $i;
- if (func_num_args() === 0)
- {
- $i = 0;
- return '';
- }
- $args = func_get_args();
- return $args[($i++ % count($args))];
- }
- /**
- * Generates a random string of a given type and length.
- *
- * @param string a type of pool, or a string of characters to use as the pool
- * @param integer length of string to return
- * @return string
- *
- * @tutorial alnum - alpha-numeric characters
- * @tutorial alpha - alphabetical characters
- * @tutorial numeric - digit characters, 0-9
- * @tutorial nozero - digit characters, 1-9
- * @tutorial distinct - clearly distinct alpha-numeric characters
- */
- public static function random($type = 'alnum', $length = 8)
- {
- $utf8 = FALSE;
- switch ($type)
- {
- case 'alnum':
- $pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
- break;
- case 'alpha':
- $pool = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
- break;
- case 'numeric':
- $pool = '0123456789';
- break;
- case 'nozero':
- $pool = '123456789';
- break;
- case 'distinct':
- $pool = '2345679ACDEFHJKLMNPRSTUVWXYZ';
- break;
- default:
- $pool = (string) $type;
- $utf8 = ! utf8::is_ascii($pool);
- break;
- }
- $str = '';
- $pool_size = ($utf8 === TRUE) ? utf8::strlen($pool) : strlen($pool);
- for ($i = 0; $i < $length; $i++)
- {
- $str .= ($utf8 === TRUE)
- ? utf8::substr($pool, mt_rand(0, $pool_size - 1), 1)
- : substr($pool, mt_rand(0, $pool_size - 1), 1);
- }
- return $str;
- }
- /**
- * Reduces multiple slashes in a string to single slashes.
- *
- * @param string string to reduce slashes of
- * @return string
- */
- public static function reduce_slashes($str)
- {
- return preg_replace('#(?<!:)//+#', '/', $str);
- }
- /**
- * Replaces the given words with a string.
- *
- * @param string phrase to replace words in
- * @param array words to replace
- * @param string replacement string
- * @param boolean replace words across word boundries (space, period, etc)
- * @return string
- */
- public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = FALSE)
- {
- foreach ((array) $badwords as $key => $badword)
- {
- $badwords[$key] = str_replace('\*', '\S*?', preg_quote((string) $badword));
- }
- $regex = '('.implode('|', $badwords).')';
- if ($replace_partial_words == TRUE)
- {
- // Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself
- $regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)';
- }
- $regex = '!'.$regex.'!ui';
- if (utf8::strlen($replacement) == 1)
- {
- $regex .= 'e';
- return preg_replace($regex, 'str_repeat($replacement, utf8::strlen(\'$1\')', $str);
- }
- return preg_replace($regex, $replacement, $str);
- }
- /**
- * Finds the text that is similar between a set of words.
- *
- * @param array words to find similar text of
- * @return string
- */
- public static function similar(array $words)
- {
- // First word is the word to match against
- $word = current($words);
- for ($i = 0, $max = strlen($word); $i < $max; ++$i)
- {
- foreach ($words as $w)
- {
- // Once a difference is found, break out of the loops
- if ( ! isset($w[$i]) OR $w[$i] !== $word[$i])
- break 2;
- }
- }
- // Return the similar text
- return substr($word, 0, $i);
- }
- /**
- * Converts text email addresses and anchors into links.
- *
- * @param string text to auto link
- * @return string
- */
- public static function auto_link($text)
- {
- // Auto link emails first to prevent problems with "www.domain.com@example.com"
- return text::auto_link_urls(text::auto_link_emails($text));
- }
- /**
- * Converts text anchors into links.
- *
- * @param string text to auto link
- * @return string
- */
- public static function auto_link_urls($text)
- {
- // Finds all http/https/ftp/ftps links that are not part of an existing html anchor
- if (preg_match_all('~\b(?<!href="|">)(?:ht|f)tps?://\S+(?:/|\b)~i', $text, $matches))
- {
- foreach ($matches[0] as $match)
- {
- // Replace each link with an anchor
- $text = str_replace($match, html::anchor($match), $text);
- }
- }
- // Find all naked www.links.com (without http://)
- if (preg_match_all('~\b(?<!://)www(?:\.[a-z0-9][-a-z0-9]*+)+\.[a-z]{2,6}\b~i', $text, $matches))
- {
- foreach ($matches[0] as $match)
- {
- // Replace each link with an anchor
- $text = str_replace($match, html::anchor('http://'.$match, $match), $text);
- }
- }
- return $text;
- }
- /**
- * Converts text email addresses into links.
- *
- * @param string text to auto link
- * @return string
- */
- public static function auto_link_emails($text)
- {
- // Finds all email addresses that are not part of an existing html mailto anchor
- // Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors
- // The html entity for a colon (:) is : or : or : etc.
- if (preg_match_all('~\b(?<!href="mailto:|">|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b~i', $text, $matches))
- {
- foreach ($matches[0] as $match)
- {
- // Replace each email with an encoded mailto
- $text = str_replace($match, html::mailto($match), $text);
- }
- }
- return $text;
- }
- /**
- * Automatically applies <p> and <br /> markup to text. Basically nl2br() on steroids.
- *
- * @param string subject
- * @return string
- */
- public static function auto_p($str)
- {
- // Trim whitespace
- if (($str = trim($str)) === '')
- return '';
- // Standardize newlines
- $str = str_replace(array("\r\n", "\r"), "\n", $str);
- // Trim whitespace on each line
- $str = preg_replace('~^[ \t]+~m', '', $str);
- $str = preg_replace('~[ \t]+$~m', '', $str);
- // The following regexes only need to be executed if the string contains html
- if ($html_found = (strpos($str, '<') !== FALSE))
- {
- // Elements that should not be surrounded by p tags
- $no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))';
- // Put at least two linebreaks before and after $no_p elements
- $str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str);
- $str = preg_replace('~</'.$no_p.'\s*+>$~im', "$0\n", $str);
- }
- // Do the <p> magic!
- $str = '<p>'.trim($str).'</p>';
- $str = preg_replace('~\n{2,}~', "</p>\n\n<p>", $str);
- // The following regexes only need to be executed if the string contains html
- if ($html_found !== FALSE)
- {
- // Remove p tags around $no_p elements
- $str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str);
- $str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str);
- }
- // Convert single linebreaks to <br />
- $str = preg_replace('~(?<!\n)\n(?!\n)~', "<br />\n", $str);
- return $str;
- }
- /**
- * Returns human readable sizes.
- * @see Based on original functions written by:
- * @see Aidan Lister: http://aidanlister.com/repos/v/function.size_readable.php
- * @see Quentin Zervaas: http://www.phpriot.com/d/code/strings/filesize-format/
- *
- * @param integer size in bytes
- * @param string a definitive unit
- * @param string the return string format
- * @param boolean whether to use SI prefixes or IEC
- * @return string
- */
- public static function bytes($bytes, $force_unit = NULL, $format = NULL, $si = TRUE)
- {
- // Format string
- $format = ($format === NULL) ? '%01.2f %s' : (string) $format;
- // IEC prefixes (binary)
- if ($si == FALSE OR strpos($force_unit, 'i') !== FALSE)
- {
- $units = array('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB');
- $mod = 1024;
- }
- // SI prefixes (decimal)
- else
- {
- $units = array('B', 'kB', 'MB', 'GB', 'TB', 'PB');
- $mod = 1000;
- }
- // Determine unit to use
- if (($power = array_search((string) $force_unit, $units)) === FALSE)
- {
- $power = ($bytes > 0) ? floor(log($bytes, $mod)) : 0;
- }
- return sprintf($format, $bytes / pow($mod, $power), $units[$power]);
- }
- /**
- * Prevents widow words by inserting a non-breaking space between the last two words.
- * @see http://www.shauninman.com/archive/2006/08/22/widont_wordpress_plugin
- *
- * @param string string to remove widows from
- * @return string
- */
- public static function widont($str)
- {
- $str = rtrim($str);
- $space = strrpos($str, ' ');
- if ($space !== FALSE)
- {
- $str = substr($str, 0, $space).' '.substr($str, $space + 1);
- }
- return $str;
- }
- } // End text