PageRenderTime 319ms CodeModel.GetById 131ms app.highlight 72ms RepoModel.GetById 109ms app.codeStats 1ms

/system/classes/Kohana/Text.php

https://bitbucket.org/chrispiechowicz/zepto
PHP | 686 lines | 336 code | 81 blank | 269 comment | 37 complexity | d1ba78be677a6e87507e79ddde928195 MD5 | raw file
  1<?php defined('SYSPATH') OR die('No direct script access.');
  2/**
  3 * Text helper class. Provides simple methods for working with text.
  4 *
  5 * @package    Kohana
  6 * @category   Helpers
  7 * @author     Kohana Team
  8 * @copyright  (c) 2007-2012 Kohana Team
  9 * @license    http://kohanaframework.org/license
 10 */
 11class Kohana_Text {
 12
 13	/**
 14	 * @var  array   number units and text equivalents
 15	 */
 16	public static $units = array(
 17		1000000000 => 'billion',
 18		1000000    => 'million',
 19		1000       => 'thousand',
 20		100        => 'hundred',
 21		90 => 'ninety',
 22		80 => 'eighty',
 23		70 => 'seventy',
 24		60 => 'sixty',
 25		50 => 'fifty',
 26		40 => 'fourty',
 27		30 => 'thirty',
 28		20 => 'twenty',
 29		19 => 'nineteen',
 30		18 => 'eighteen',
 31		17 => 'seventeen',
 32		16 => 'sixteen',
 33		15 => 'fifteen',
 34		14 => 'fourteen',
 35		13 => 'thirteen',
 36		12 => 'twelve',
 37		11 => 'eleven',
 38		10 => 'ten',
 39		9  => 'nine',
 40		8  => 'eight',
 41		7  => 'seven',
 42		6  => 'six',
 43		5  => 'five',
 44		4  => 'four',
 45		3  => 'three',
 46		2  => 'two',
 47		1  => 'one',
 48	);
 49
 50	/**
 51	 * Limits a phrase to a given number of words.
 52	 *
 53	 *     $text = Text::limit_words($text);
 54	 *
 55	 * @param   string  $str        phrase to limit words of
 56	 * @param   integer $limit      number of words to limit to
 57	 * @param   string  $end_char   end character or entity
 58	 * @return  string
 59	 */
 60	public static function limit_words($str, $limit = 100, $end_char = NULL)
 61	{
 62		$limit = (int) $limit;
 63		$end_char = ($end_char === NULL) ? '…' : $end_char;
 64
 65		if (trim($str) === '')
 66			return $str;
 67
 68		if ($limit <= 0)
 69			return $end_char;
 70
 71		preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches);
 72
 73		// Only attach the end character if the matched string is shorter
 74		// than the starting string.
 75		return rtrim($matches[0]).((strlen($matches[0]) === strlen($str)) ? '' : $end_char);
 76	}
 77
 78	/**
 79	 * Limits a phrase to a given number of characters.
 80	 *
 81	 *     $text = Text::limit_chars($text);
 82	 *
 83	 * @param   string  $str            phrase to limit characters of
 84	 * @param   integer $limit          number of characters to limit to
 85	 * @param   string  $end_char       end character or entity
 86	 * @param   boolean $preserve_words enable or disable the preservation of words while limiting
 87	 * @return  string
 88	 * @uses    UTF8::strlen
 89	 */
 90	public static function limit_chars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)
 91	{
 92		$end_char = ($end_char === NULL) ? '…' : $end_char;
 93
 94		$limit = (int) $limit;
 95
 96		if (trim($str) === '' OR UTF8::strlen($str) <= $limit)
 97			return $str;
 98
 99		if ($limit <= 0)
100			return $end_char;
101
102		if ($preserve_words === FALSE)
103			return rtrim(UTF8::substr($str, 0, $limit)).$end_char;
104
105		// Don't preserve words. The limit is considered the top limit.
106		// No strings with a length longer than $limit should be returned.
107		if ( ! preg_match('/^.{0,'.$limit.'}\s/us', $str, $matches))
108			return $end_char;
109
110		return rtrim($matches[0]).((strlen($matches[0]) === strlen($str)) ? '' : $end_char);
111	}
112
113	/**
114	 * Alternates between two or more strings.
115	 *
116	 *     echo Text::alternate('one', 'two'); // "one"
117	 *     echo Text::alternate('one', 'two'); // "two"
118	 *     echo Text::alternate('one', 'two'); // "one"
119	 *
120	 * Note that using multiple iterations of different strings may produce
121	 * unexpected results.
122	 *
123	 * @param   string  $str,...    strings to alternate between
124	 * @return  string
125	 */
126	public static function alternate()
127	{
128		static $i;
129
130		if (func_num_args() === 0)
131		{
132			$i = 0;
133			return '';
134		}
135
136		$args = func_get_args();
137		return $args[($i++ % count($args))];
138	}
139
140	/**
141	 * Generates a random string of a given type and length.
142	 *
143	 *
144	 *     $str = Text::random(); // 8 character random string
145	 *
146	 * The following types are supported:
147	 *
148	 * alnum
149	 * :  Upper and lower case a-z, 0-9 (default)
150	 *
151	 * alpha
152	 * :  Upper and lower case a-z
153	 *
154	 * hexdec
155	 * :  Hexadecimal characters a-f, 0-9
156	 *
157	 * distinct
158	 * :  Uppercase characters and numbers that cannot be confused
159	 *
160	 * You can also create a custom type by providing the "pool" of characters
161	 * as the type.
162	 *
163	 * @param   string  $type   a type of pool, or a string of characters to use as the pool
164	 * @param   integer $length length of string to return
165	 * @return  string
166	 * @uses    UTF8::split
167	 */
168	public static function random($type = NULL, $length = 8)
169	{
170		if ($type === NULL)
171		{
172			// Default is to generate an alphanumeric string
173			$type = 'alnum';
174		}
175
176		$utf8 = FALSE;
177
178		switch ($type)
179		{
180			case 'alnum':
181				$pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
182			break;
183			case 'alpha':
184				$pool = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
185			break;
186			case 'hexdec':
187				$pool = '0123456789abcdef';
188			break;
189			case 'numeric':
190				$pool = '0123456789';
191			break;
192			case 'nozero':
193				$pool = '123456789';
194			break;
195			case 'distinct':
196				$pool = '2345679ACDEFHJKLMNPRSTUVWXYZ';
197			break;
198			default:
199				$pool = (string) $type;
200				$utf8 = ! UTF8::is_ascii($pool);
201			break;
202		}
203
204		// Split the pool into an array of characters
205		$pool = ($utf8 === TRUE) ? UTF8::str_split($pool, 1) : str_split($pool, 1);
206
207		// Largest pool key
208		$max = count($pool) - 1;
209
210		$str = '';
211		for ($i = 0; $i < $length; $i++)
212		{
213			// Select a random character from the pool and add it to the string
214			$str .= $pool[mt_rand(0, $max)];
215		}
216
217		// Make sure alnum strings contain at least one letter and one digit
218		if ($type === 'alnum' AND $length > 1)
219		{
220			if (ctype_alpha($str))
221			{
222				// Add a random digit
223				$str[mt_rand(0, $length - 1)] = chr(mt_rand(48, 57));
224			}
225			elseif (ctype_digit($str))
226			{
227				// Add a random letter
228				$str[mt_rand(0, $length - 1)] = chr(mt_rand(65, 90));
229			}
230		}
231
232		return $str;
233	}
234
235	/**
236	 * Uppercase words that are not separated by spaces, using a custom
237	 * delimiter or the default.
238	 * 
239	 *      $str = Text::ucfirst('content-type'); // returns "Content-Type" 
240	 *
241	 * @param   string  $string     string to transform
242	 * @param   string  $delimiter  delemiter to use
243	 * @return  string
244	 */
245	public static function ucfirst($string, $delimiter = '-')
246	{
247		// Put the keys back the Case-Convention expected
248		return implode($delimiter, array_map('ucfirst', explode($delimiter, $string)));
249	}
250
251	/**
252	 * Reduces multiple slashes in a string to single slashes.
253	 *
254	 *     $str = Text::reduce_slashes('foo//bar/baz'); // "foo/bar/baz"
255	 *
256	 * @param   string  $str    string to reduce slashes of
257	 * @return  string
258	 */
259	public static function reduce_slashes($str)
260	{
261		return preg_replace('#(?<!:)//+#', '/', $str);
262	}
263
264	/**
265	 * Replaces the given words with a string.
266	 *
267	 *     // Displays "What the #####, man!"
268	 *     echo Text::censor('What the frick, man!', array(
269	 *         'frick' => '#####',
270	 *     ));
271	 *
272	 * @param   string  $str                    phrase to replace words in
273	 * @param   array   $badwords               words to replace
274	 * @param   string  $replacement            replacement string
275	 * @param   boolean $replace_partial_words  replace words across word boundries (space, period, etc)
276	 * @return  string
277	 * @uses    UTF8::strlen
278	 */
279	public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = TRUE)
280	{
281		foreach ( (array) $badwords as $key => $badword)
282		{
283			$badwords[$key] = str_replace('\*', '\S*?', preg_quote( (string) $badword));
284		}
285
286		$regex = '('.implode('|', $badwords).')';
287
288		if ($replace_partial_words === FALSE)
289		{
290			// Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself
291			$regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)';
292		}
293
294		$regex = '!'.$regex.'!ui';
295
296		if (UTF8::strlen($replacement) == 1)
297		{
298			$regex .= 'e';
299			return preg_replace($regex, 'str_repeat($replacement, UTF8::strlen(\'$1\'))', $str);
300		}
301
302		return preg_replace($regex, $replacement, $str);
303	}
304
305	/**
306	 * Finds the text that is similar between a set of words.
307	 *
308	 *     $match = Text::similar(array('fred', 'fran', 'free'); // "fr"
309	 *
310	 * @param   array   $words  words to find similar text of
311	 * @return  string
312	 */
313	public static function similar(array $words)
314	{
315		// First word is the word to match against
316		$word = current($words);
317
318		for ($i = 0, $max = strlen($word); $i < $max; ++$i)
319		{
320			foreach ($words as $w)
321			{
322				// Once a difference is found, break out of the loops
323				if ( ! isset($w[$i]) OR $w[$i] !== $word[$i])
324					break 2;
325			}
326		}
327
328		// Return the similar text
329		return substr($word, 0, $i);
330	}
331
332	/**
333	 * Converts text email addresses and anchors into links. Existing links
334	 * will not be altered.
335	 *
336	 *     echo Text::auto_link($text);
337	 *
338	 * [!!] This method is not foolproof since it uses regex to parse HTML.
339	 *
340	 * @param   string  $text   text to auto link
341	 * @return  string
342	 * @uses    Text::auto_link_urls
343	 * @uses    Text::auto_link_emails
344	 */
345	public static function auto_link($text)
346	{
347		// Auto link emails first to prevent problems with "www.domain.com@example.com"
348		return Text::auto_link_urls(Text::auto_link_emails($text));
349	}
350
351	/**
352	 * Converts text anchors into links. Existing links will not be altered.
353	 *
354	 *     echo Text::auto_link_urls($text);
355	 *
356	 * [!!] This method is not foolproof since it uses regex to parse HTML.
357	 *
358	 * @param   string  $text   text to auto link
359	 * @return  string
360	 * @uses    HTML::anchor
361	 */
362	public static function auto_link_urls($text)
363	{
364		// Find and replace all http/https/ftp/ftps links that are not part of an existing html anchor
365		$text = preg_replace_callback('~\b(?<!href="|">)(?:ht|f)tps?://[^<\s]+(?:/|\b)~i', 'Text::_auto_link_urls_callback1', $text);
366
367		// Find and replace all naked www.links.com (without http://)
368		return preg_replace_callback('~\b(?<!://|">)www(?:\.[a-z0-9][-a-z0-9]*+)+\.[a-z]{2,6}[^<\s]*\b~i', 'Text::_auto_link_urls_callback2', $text);
369	}
370
371	protected static function _auto_link_urls_callback1($matches)
372	{
373		return HTML::anchor($matches[0]);
374	}
375
376	protected static function _auto_link_urls_callback2($matches)
377	{
378		return HTML::anchor('http://'.$matches[0], $matches[0]);
379	}
380
381	/**
382	 * Converts text email addresses into links. Existing links will not
383	 * be altered.
384	 *
385	 *     echo Text::auto_link_emails($text);
386	 *
387	 * [!!] This method is not foolproof since it uses regex to parse HTML.
388	 *
389	 * @param   string  $text   text to auto link
390	 * @return  string
391	 * @uses    HTML::mailto
392	 */
393	public static function auto_link_emails($text)
394	{
395		// Find and replace all email addresses that are not part of an existing html mailto anchor
396		// Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors
397		//       The html entity for a colon (:) is &#58; or &#058; or &#0058; etc.
398		return preg_replace_callback('~\b(?<!href="mailto:|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b(?!</a>)~i', 'Text::_auto_link_emails_callback', $text);
399	}
400
401	protected static function _auto_link_emails_callback($matches)
402	{
403		return HTML::mailto($matches[0]);
404	}
405
406	/**
407	 * Automatically applies "p" and "br" markup to text.
408	 * Basically [nl2br](http://php.net/nl2br) on steroids.
409	 *
410	 *     echo Text::auto_p($text);
411	 *
412	 * [!!] This method is not foolproof since it uses regex to parse HTML.
413	 *
414	 * @param   string  $str    subject
415	 * @param   boolean $br     convert single linebreaks to <br />
416	 * @return  string
417	 */
418	public static function auto_p($str, $br = TRUE)
419	{
420		// Trim whitespace
421		if (($str = trim($str)) === '')
422			return '';
423
424		// Standardize newlines
425		$str = str_replace(array("\r\n", "\r"), "\n", $str);
426
427		// Trim whitespace on each line
428		$str = preg_replace('~^[ \t]+~m', '', $str);
429		$str = preg_replace('~[ \t]+$~m', '', $str);
430
431		// The following regexes only need to be executed if the string contains html
432		if ($html_found = (strpos($str, '<') !== FALSE))
433		{
434			// Elements that should not be surrounded by p tags
435			$no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))';
436
437			// Put at least two linebreaks before and after $no_p elements
438			$str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str);
439			$str = preg_replace('~</'.$no_p.'\s*+>$~im', "$0\n", $str);
440		}
441
442		// Do the <p> magic!
443		$str = '<p>'.trim($str).'</p>';
444		$str = preg_replace('~\n{2,}~', "</p>\n\n<p>", $str);
445
446		// The following regexes only need to be executed if the string contains html
447		if ($html_found !== FALSE)
448		{
449			// Remove p tags around $no_p elements
450			$str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str);
451			$str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str);
452		}
453
454		// Convert single linebreaks to <br />
455		if ($br === TRUE)
456		{
457			$str = preg_replace('~(?<!\n)\n(?!\n)~', "<br />\n", $str);
458		}
459
460		return $str;
461	}
462
463	/**
464	 * Returns human readable sizes. Based on original functions written by
465	 * [Aidan Lister](http://aidanlister.com/repos/v/function.size_readable.php)
466	 * and [Quentin Zervaas](http://www.phpriot.com/d/code/strings/filesize-format/).
467	 *
468	 *     echo Text::bytes(filesize($file));
469	 *
470	 * @param   integer $bytes      size in bytes
471	 * @param   string  $force_unit a definitive unit
472	 * @param   string  $format     the return string format
473	 * @param   boolean $si         whether to use SI prefixes or IEC
474	 * @return  string
475	 */
476	public static function bytes($bytes, $force_unit = NULL, $format = NULL, $si = TRUE)
477	{
478		// Format string
479		$format = ($format === NULL) ? '%01.2f %s' : (string) $format;
480
481		// IEC prefixes (binary)
482		if ($si == FALSE OR strpos($force_unit, 'i') !== FALSE)
483		{
484			$units = array('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB');
485			$mod   = 1024;
486		}
487		// SI prefixes (decimal)
488		else
489		{
490			$units = array('B', 'kB', 'MB', 'GB', 'TB', 'PB');
491			$mod   = 1000;
492		}
493
494		// Determine unit to use
495		if (($power = array_search( (string) $force_unit, $units)) === FALSE)
496		{
497			$power = ($bytes > 0) ? floor(log($bytes, $mod)) : 0;
498		}
499
500		return sprintf($format, $bytes / pow($mod, $power), $units[$power]);
501	}
502
503	/**
504	 * Format a number to human-readable text.
505	 *
506	 *     // Display: one thousand and twenty-four
507	 *     echo Text::number(1024);
508	 *
509	 *     // Display: five million, six hundred and thirty-two
510	 *     echo Text::number(5000632);
511	 *
512	 * @param   integer $number number to format
513	 * @return  string
514	 * @since   3.0.8
515	 */
516	public static function number($number)
517	{
518		// The number must always be an integer
519		$number = (int) $number;
520
521		// Uncompiled text version
522		$text = array();
523
524		// Last matched unit within the loop
525		$last_unit = NULL;
526
527		// The last matched item within the loop
528		$last_item = '';
529
530		foreach (Text::$units as $unit => $name)
531		{
532			if ($number / $unit >= 1)
533			{
534				// $value = the number of times the number is divisble by unit
535				$number -= $unit * ($value = (int) floor($number / $unit));
536				// Temporary var for textifying the current unit
537				$item = '';
538
539				if ($unit < 100)
540				{
541					if ($last_unit < 100 AND $last_unit >= 20)
542					{
543						$last_item .= '-'.$name;
544					}
545					else
546					{
547						$item = $name;
548					}
549				}
550				else
551				{
552					$item = Text::number($value).' '.$name;
553				}
554
555				// In the situation that we need to make a composite number (i.e. twenty-three)
556				// then we need to modify the previous entry
557				if (empty($item))
558				{
559					array_pop($text);
560
561					$item = $last_item;
562				}
563
564				$last_item = $text[] = $item;
565				$last_unit = $unit;
566			}
567		}
568
569		if (count($text) > 1)
570		{
571			$and = array_pop($text);
572		}
573
574		$text = implode(', ', $text);
575
576		if (isset($and))
577		{
578			$text .= ' and '.$and;
579		}
580
581		return $text;
582	}
583
584	/**
585	 * Prevents [widow words](http://www.shauninman.com/archive/2006/08/22/widont_wordpress_plugin)
586	 * by inserting a non-breaking space between the last two words.
587	 *
588	 *     echo Text::widont($text);
589	 *
590	 * @param   string  $str    text to remove widows from
591	 * @return  string
592	 */
593	public static function widont($str)
594	{
595		$str = rtrim($str);
596		$space = strrpos($str, ' ');
597
598		if ($space !== FALSE)
599		{
600			$str = substr($str, 0, $space).'&nbsp;'.substr($str, $space + 1);
601		}
602
603		return $str;
604	}
605
606	/**
607	 * Returns information about the client user agent.
608	 *
609	 *     // Returns "Chrome" when using Google Chrome
610	 *     $browser = Text::user_agent('browser');
611	 *
612	 * Multiple values can be returned at once by using an array:
613	 *
614	 *     // Get the browser and platform with a single call
615	 *     $info = Text::user_agent(array('browser', 'platform'));
616	 *
617	 * When using an array for the value, an associative array will be returned.
618	 *
619	 * @param   mixed   $value  array or string to return: browser, version, robot, mobile, platform
620	 * @return  mixed   requested information, FALSE if nothing is found
621	 * @uses    Kohana::$config
622	 */
623	public static function user_agent($agent, $value)
624	{
625		if (is_array($value))
626		{
627			$data = array();
628			foreach ($value as $part)
629			{
630				// Add each part to the set
631				$data[$part] = Text::user_agent($agent, $part);
632			}
633
634			return $data;
635		}
636
637		if ($value === 'browser' OR $value == 'version')
638		{
639			// Extra data will be captured
640			$info = array();
641
642			// Load browsers
643			$browsers = Kohana::$config->load('user_agents')->browser;
644
645			foreach ($browsers as $search => $name)
646			{
647				if (stripos($agent, $search) !== FALSE)
648				{
649					// Set the browser name
650					$info['browser'] = $name;
651
652					if (preg_match('#'.preg_quote($search).'[^0-9.]*+([0-9.][0-9.a-z]*)#i', Request::$user_agent, $matches))
653					{
654						// Set the version number
655						$info['version'] = $matches[1];
656					}
657					else
658					{
659						// No version number found
660						$info['version'] = FALSE;
661					}
662
663					return $info[$value];
664				}
665			}
666		}
667		else
668		{
669			// Load the search group for this type
670			$group = Kohana::$config->load('user_agents')->$value;
671
672			foreach ($group as $search => $name)
673			{
674				if (stripos($agent, $search) !== FALSE)
675				{
676					// Set the value name
677					return $name;
678				}
679			}
680		}
681
682		// The value requested could not be found
683		return FALSE;
684	}
685
686} // End text