PageRenderTime 24ms CodeModel.GetById 9ms app.highlight 11ms RepoModel.GetById 1ms app.codeStats 0ms

/system/helpers/text_helper.php

https://github.com/betchi/CodeIgniter
PHP | 541 lines | 327 code | 60 blank | 154 comment | 39 complexity | 82762de9da97d73736801db201bb32d8 MD5 | raw file
  1<?php
  2/**
  3 * CodeIgniter
  4 *
  5 * An open source application development framework for PHP 5.2.4 or newer
  6 *
  7 * NOTICE OF LICENSE
  8 *
  9 * Licensed under the Open Software License version 3.0
 10 *
 11 * This source file is subject to the Open Software License (OSL 3.0) that is
 12 * bundled with this package in the files license.txt / license.rst.  It is
 13 * also available through the world wide web at this URL:
 14 * http://opensource.org/licenses/OSL-3.0
 15 * If you did not receive a copy of the license and are unable to obtain it
 16 * through the world wide web, please send an email to
 17 * licensing@ellislab.com so we can send you a copy immediately.
 18 *
 19 * @package		CodeIgniter
 20 * @author		EllisLab Dev Team
 21 * @copyright	Copyright (c) 2008 - 2014, EllisLab, Inc. (http://ellislab.com/)
 22 * @license		http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0)
 23 * @link		http://codeigniter.com
 24 * @since		Version 1.0
 25 * @filesource
 26 */
 27defined('BASEPATH') OR exit('No direct script access allowed');
 28
 29/**
 30 * CodeIgniter Text Helpers
 31 *
 32 * @package		CodeIgniter
 33 * @subpackage	Helpers
 34 * @category	Helpers
 35 * @author		EllisLab Dev Team
 36 * @link		http://codeigniter.com/user_guide/helpers/text_helper.html
 37 */
 38
 39// ------------------------------------------------------------------------
 40
 41if ( ! function_exists('word_limiter'))
 42{
 43	/**
 44	 * Word Limiter
 45	 *
 46	 * Limits a string to X number of words.
 47	 *
 48	 * @param	string
 49	 * @param	int
 50	 * @param	string	the end character. Usually an ellipsis
 51	 * @return	string
 52	 */
 53	function word_limiter($str, $limit = 100, $end_char = '&#8230;')
 54	{
 55		if (trim($str) === '')
 56		{
 57			return $str;
 58		}
 59
 60		preg_match('/^\s*+(?:\S++\s*+){1,'.(int) $limit.'}/', $str, $matches);
 61
 62		if (strlen($str) === strlen($matches[0]))
 63		{
 64			$end_char = '';
 65		}
 66
 67		return rtrim($matches[0]).$end_char;
 68	}
 69}
 70
 71// ------------------------------------------------------------------------
 72
 73if ( ! function_exists('character_limiter'))
 74{
 75	/**
 76	 * Character Limiter
 77	 *
 78	 * Limits the string based on the character count.  Preserves complete words
 79	 * so the character count may not be exactly as specified.
 80	 *
 81	 * @param	string
 82	 * @param	int
 83	 * @param	string	the end character. Usually an ellipsis
 84	 * @return	string
 85	 */
 86	function character_limiter($str, $n = 500, $end_char = '&#8230;')
 87	{
 88		if (mb_strlen($str) < $n)
 89		{
 90			return $str;
 91		}
 92
 93		// a bit complicated, but faster than preg_replace with \s+
 94		$str = preg_replace('/ {2,}/', ' ', str_replace(array("\r", "\n", "\t", "\x0B", "\x0C"), ' ', $str));
 95
 96		if (mb_strlen($str) <= $n)
 97		{
 98			return $str;
 99		}
100
101		$out = '';
102		foreach (explode(' ', trim($str)) as $val)
103		{
104			$out .= $val.' ';
105
106			if (mb_strlen($out) >= $n)
107			{
108				$out = trim($out);
109				return (mb_strlen($out) === mb_strlen($str)) ? $out : $out.$end_char;
110			}
111		}
112	}
113}
114
115// ------------------------------------------------------------------------
116
117if ( ! function_exists('ascii_to_entities'))
118{
119	/**
120	 * High ASCII to Entities
121	 *
122	 * Converts high ASCII text and MS Word special characters to character entities
123	 *
124	 * @param	string	$str
125	 * @return	string
126	 */
127	function ascii_to_entities($str)
128	{
129		$out = '';
130		for ($i = 0, $s = strlen($str) - 1, $count = 1, $temp = array(); $i <= $s; $i++)
131		{
132			$ordinal = ord($str[$i]);
133
134			if ($ordinal < 128)
135			{
136				/*
137					If the $temp array has a value but we have moved on, then it seems only
138					fair that we output that entity and restart $temp before continuing. -Paul
139				*/
140				if (count($temp) === 1)
141				{
142					$out .= '&#'.array_shift($temp).';';
143					$count = 1;
144				}
145
146				$out .= $str[$i];
147			}
148			else
149			{
150				if (count($temp) === 0)
151				{
152					$count = ($ordinal < 224) ? 2 : 3;
153				}
154
155				$temp[] = $ordinal;
156
157				if (count($temp) === $count)
158				{
159					$number = ($count === 3)
160						? (($temp[0] % 16) * 4096) + (($temp[1] % 64) * 64) + ($temp[2] % 64)
161						: (($temp[0] % 32) * 64) + ($temp[1] % 64);
162
163					$out .= '&#'.$number.';';
164					$count = 1;
165					$temp = array();
166				}
167				// If this is the last iteration, just output whatever we have
168				elseif ($i === $s)
169				{
170					$out .= '&#'.implode(';', $temp).';';
171				}
172			}
173		}
174
175		return $out;
176	}
177}
178
179// ------------------------------------------------------------------------
180
181if ( ! function_exists('entities_to_ascii'))
182{
183	/**
184	 * Entities to ASCII
185	 *
186	 * Converts character entities back to ASCII
187	 *
188	 * @param	string
189	 * @param	bool
190	 * @return	string
191	 */
192	function entities_to_ascii($str, $all = TRUE)
193	{
194		if (preg_match_all('/\&#(\d+)\;/', $str, $matches))
195		{
196			for ($i = 0, $s = count($matches[0]); $i < $s; $i++)
197			{
198				$digits = $matches[1][$i];
199				$out = '';
200
201				if ($digits < 128)
202				{
203					$out .= chr($digits);
204
205				}
206				elseif ($digits < 2048)
207				{
208					$out .= chr(192 + (($digits - ($digits % 64)) / 64)).chr(128 + ($digits % 64));
209				}
210				else
211				{
212					$out .= chr(224 + (($digits - ($digits % 4096)) / 4096))
213						.chr(128 + ((($digits % 4096) - ($digits % 64)) / 64))
214						.chr(128 + ($digits % 64));
215				}
216
217				$str = str_replace($matches[0][$i], $out, $str);
218			}
219		}
220
221		if ($all)
222		{
223			return str_replace(
224				array('&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#45;'),
225				array('&', '<', '>', '"', "'", '-'),
226				$str
227			);
228		}
229
230		return $str;
231	}
232}
233
234// ------------------------------------------------------------------------
235
236if ( ! function_exists('word_censor'))
237{
238	/**
239	 * Word Censoring Function
240	 *
241	 * Supply a string and an array of disallowed words and any
242	 * matched words will be converted to #### or to the replacement
243	 * word you've submitted.
244	 *
245	 * @param	string	the text string
246	 * @param	string	the array of censoered words
247	 * @param	string	the optional replacement value
248	 * @return	string
249	 */
250	function word_censor($str, $censored, $replacement = '')
251	{
252		if ( ! is_array($censored))
253		{
254			return $str;
255		}
256
257		$str = ' '.$str.' ';
258
259		// \w, \b and a few others do not match on a unicode character
260		// set for performance reasons. As a result words like 端ber
261		// will not match on a word boundary. Instead, we'll assume that
262		// a bad word will be bookeneded by any of these characters.
263		$delim = '[-_\'\"`(){}<>\[\]|!?@#%&,.:;^~*+=\/ 0-9\n\r\t]';
264
265		foreach ($censored as $badword)
266		{
267			if ($replacement !== '')
268			{
269				$str = preg_replace("/({$delim})(".str_replace('\*', '\w*?', preg_quote($badword, '/')).")({$delim})/i", "\\1{$replacement}\\3", $str);
270			}
271			else
272			{
273				$str = preg_replace("/({$delim})(".str_replace('\*', '\w*?', preg_quote($badword, '/')).")({$delim})/ie", "'\\1'.str_repeat('#', strlen('\\2')).'\\3'", $str);
274			}
275		}
276
277		return trim($str);
278	}
279}
280
281// ------------------------------------------------------------------------
282
283if ( ! function_exists('highlight_code'))
284{
285	/**
286	 * Code Highlighter
287	 *
288	 * Colorizes code strings
289	 *
290	 * @param	string	the text string
291	 * @return	string
292	 */
293	function highlight_code($str)
294	{
295		/* The highlight string function encodes and highlights
296		 * brackets so we need them to start raw.
297		 *
298		 * Also replace any existing PHP tags to temporary markers
299		 * so they don't accidentally break the string out of PHP,
300		 * and thus, thwart the highlighting.
301		 */
302		$str = str_replace(
303			array('&lt;', '&gt;', '<?', '?>', '<%', '%>', '\\', '</script>'),
304			array('<', '>', 'phptagopen', 'phptagclose', 'asptagopen', 'asptagclose', 'backslashtmp', 'scriptclose'),
305			$str
306		);
307
308		// The highlight_string function requires that the text be surrounded
309		// by PHP tags, which we will remove later
310		$str = highlight_string('<?php '.$str.' ?>', TRUE);
311
312		// Remove our artificially added PHP, and the syntax highlighting that came with it
313		$str = preg_replace(
314			array(
315				'/<span style="color: #([A-Z0-9]+)">&lt;\?php(&nbsp;| )/i',
316				'/(<span style="color: #[A-Z0-9]+">.*?)\?&gt;<\/span>\n<\/span>\n<\/code>/is',
317				'/<span style="color: #[A-Z0-9]+"\><\/span>/i'
318			),
319			array(
320				'<span style="color: #$1">',
321				"$1</span>\n</span>\n</code>",
322				''
323			),
324			$str
325		);
326
327		// Replace our markers back to PHP tags.
328		return str_replace(
329			array('phptagopen', 'phptagclose', 'asptagopen', 'asptagclose', 'backslashtmp', 'scriptclose'),
330			array('&lt;?', '?&gt;', '&lt;%', '%&gt;', '\\', '&lt;/script&gt;'),
331			$str
332		);
333	}
334}
335
336// ------------------------------------------------------------------------
337
338if ( ! function_exists('highlight_phrase'))
339{
340	/**
341	 * Phrase Highlighter
342	 *
343	 * Highlights a phrase within a text string
344	 *
345	 * @param	string	$str		the text string
346	 * @param	string	$phrase		the phrase you'd like to highlight
347	 * @param	string	$tag_open	the openging tag to precede the phrase with
348	 * @param	string	$tag_close	the closing tag to end the phrase with
349	 * @return	string
350	 */
351	function highlight_phrase($str, $phrase, $tag_open = '<mark>', $tag_close = '</mark>')
352	{
353		return ($str !== '' && $phrase !== '')
354			? preg_replace('/('.preg_quote($phrase, '/').')/i'.(UTF8_ENABLED ? 'u' : ''), $tag_open.'\\1'.$tag_close, $str)
355			: $str;
356	}
357}
358
359// ------------------------------------------------------------------------
360
361if ( ! function_exists('convert_accented_characters'))
362{
363	/**
364	 * Convert Accented Foreign Characters to ASCII
365	 *
366	 * @param	string	$str	Input string
367	 * @return	string
368	 */
369	function convert_accented_characters($str)
370	{
371		static $array_from, $array_to;
372
373		if ( ! is_array($array_from))
374		{
375			if (file_exists(APPPATH.'config/foreign_chars.php'))
376			{
377				include(APPPATH.'config/foreign_chars.php');
378			}
379
380			if (file_exists(APPPATH.'config/'.ENVIRONMENT.'/foreign_chars.php'))
381			{
382				include(APPPATH.'config/'.ENVIRONMENT.'/foreign_chars.php');
383			}
384
385			if (empty($foreign_characters) OR ! is_array($foreign_characters))
386			{
387				$array_from = array();
388				$array_to = array();
389
390				return $str;
391			}
392
393			$array_from = array_keys($foreign_characters);
394			$array_to = array_values($foreign_characters);
395		}
396
397		return preg_replace($array_from, $array_to, $str);
398	}
399}
400
401// ------------------------------------------------------------------------
402
403if ( ! function_exists('word_wrap'))
404{
405	/**
406	 * Word Wrap
407	 *
408	 * Wraps text at the specified character. Maintains the integrity of words.
409	 * Anything placed between {unwrap}{/unwrap} will not be word wrapped, nor
410	 * will URLs.
411	 *
412	 * @param	string	$str		the text string
413	 * @param	int	$charlim = 76	the number of characters to wrap at
414	 * @return	string
415	 */
416	function word_wrap($str, $charlim = 76)
417	{
418		// Set the character limit
419		is_numeric($charlim) OR $charlim = 76;
420
421		// Reduce multiple spaces
422		$str = preg_replace('| +|', ' ', $str);
423
424		// Standardize newlines
425		if (strpos($str, "\r") !== FALSE)
426		{
427			$str = str_replace(array("\r\n", "\r"), "\n", $str);
428		}
429
430		// If the current word is surrounded by {unwrap} tags we'll
431		// strip the entire chunk and replace it with a marker.
432		$unwrap = array();
433		if (preg_match_all('|\{unwrap\}(.+?)\{/unwrap\}|s', $str, $matches))
434		{
435			for ($i = 0, $c = count($matches[0]); $i < $c; $i++)
436			{
437				$unwrap[] = $matches[1][$i];
438				$str = str_replace($matches[0][$i], '{{unwrapped'.$i.'}}', $str);
439			}
440		}
441
442		// Use PHP's native function to do the initial wordwrap.
443		// We set the cut flag to FALSE so that any individual words that are
444		// too long get left alone. In the next step we'll deal with them.
445		$str = wordwrap($str, $charlim, "\n", FALSE);
446
447		// Split the string into individual lines of text and cycle through them
448		$output = '';
449		foreach (explode("\n", $str) as $line)
450		{
451			// Is the line within the allowed character count?
452			// If so we'll join it to the output and continue
453			if (mb_strlen($line) <= $charlim)
454			{
455				$output .= $line."\n";
456				continue;
457			}
458
459			$temp = '';
460			while (mb_strlen($line) > $charlim)
461			{
462				// If the over-length word is a URL we won't wrap it
463				if (preg_match('!\[url.+\]|://|www\.!', $line))
464				{
465					break;
466				}
467
468				// Trim the word down
469				$temp .= mb_substr($line, 0, $charlim - 1);
470				$line = mb_substr($line, $charlim - 1);
471			}
472
473			// If $temp contains data it means we had to split up an over-length
474			// word into smaller chunks so we'll add it back to our current line
475			if ($temp !== '')
476			{
477				$output .= $temp."\n".$line."\n";
478			}
479			else
480			{
481				$output .= $line."\n";
482			}
483		}
484
485		// Put our markers back
486		if (count($unwrap) > 0)
487		{
488			foreach ($unwrap as $key => $val)
489			{
490				$output = str_replace('{{unwrapped'.$key.'}}', $val, $output);
491			}
492		}
493
494		return $output;
495	}
496}
497
498// ------------------------------------------------------------------------
499
500if ( ! function_exists('ellipsize'))
501{
502	/**
503	 * Ellipsize String
504	 *
505	 * This function will strip tags from a string, split it at its max_length and ellipsize
506	 *
507	 * @param	string	string to ellipsize
508	 * @param	int	max length of string
509	 * @param	mixed	int (1|0) or float, .5, .2, etc for position to split
510	 * @param	string	ellipsis ; Default '...'
511	 * @return	string	ellipsized string
512	 */
513	function ellipsize($str, $max_length, $position = 1, $ellipsis = '&hellip;')
514	{
515		// Strip tags
516		$str = trim(strip_tags($str));
517
518		// Is the string long enough to ellipsize?
519		if (mb_strlen($str) <= $max_length)
520		{
521			return $str;
522		}
523
524		$beg = mb_substr($str, 0, floor($max_length * $position));
525		$position = ($position > 1) ? 1 : $position;
526
527		if ($position === 1)
528		{
529			$end = mb_substr($str, 0, -($max_length - mb_strlen($beg)));
530		}
531		else
532		{
533			$end = mb_substr($str, -($max_length - mb_strlen($beg)));
534		}
535
536		return $beg.$ellipsis.$end;
537	}
538}
539
540/* End of file text_helper.php */
541/* Location: ./system/helpers/text_helper.php */