PageRenderTime 57ms CodeModel.GetById 8ms app.highlight 41ms RepoModel.GetById 1ms app.codeStats 1ms

/app/controllers/TextHelper.php

https://github.com/BabelZilla/WTS
PHP | 495 lines | 291 code | 66 blank | 138 comment | 46 complexity | fd3a19afed25f913e6da0f7620327d4e MD5 | raw file
  1<?php
  2
  3class TextHelper
  4{
  5    /**
  6     * CodeIgniter
  7     *
  8     * An open source application development framework for PHP 5.1.6 or newer
  9     *
 10     * @package        CodeIgniter
 11     * @author        ExpressionEngine Dev Team
 12     * @copyright    Copyright (c) 2008 - 2011, EllisLab, Inc.
 13     * @license        http://codeigniter.com/user_guide/license.html
 14     * @link        http://codeigniter.com
 15     * @since        Version 1.0
 16     * @filesource
 17     */
 18
 19// ------------------------------------------------------------------------
 20
 21    /**
 22     * CodeIgniter Text Helpers
 23     *
 24     * @package        CodeIgniter
 25     * @subpackage    Helpers
 26     * @category    Helpers
 27     * @author        ExpressionEngine Dev Team
 28     * @link        http://codeigniter.com/user_guide/helpers/text_helper.html
 29     */
 30
 31// ------------------------------------------------------------------------
 32
 33    /**
 34     * Word Limiter
 35     *
 36     * Limits a string to X number of words.
 37     *
 38     * @access    public
 39     * @param    string
 40     * @param    integer
 41     * @param    string    the end character. Usually an ellipsis
 42     * @return    string
 43     */
 44    public static function word_limiter($str, $limit = 100, $end_char = '&#8230;')
 45    {
 46        if (trim($str) == '') {
 47            return $str;
 48        }
 49
 50        preg_match('/^\s*+(?:\S++\s*+){1,' . (int)$limit . '}/', $str, $matches);
 51
 52        if (strlen($str) == strlen($matches[0])) {
 53            $end_char = '';
 54        }
 55
 56        return rtrim($matches[0]) . $end_char;
 57    }
 58
 59// ------------------------------------------------------------------------
 60
 61    /**
 62     * Character Limiter
 63     *
 64     * Limits the string based on the character count.  Preserves complete words
 65     * so the character count may not be exactly as specified.
 66     *
 67     * @access    public
 68     * @param    string
 69     * @param    integer
 70     * @param    string    the end character. Usually an ellipsis
 71     * @return    string
 72     */
 73    public static function character_limiter($str, $n = 500, $end_char = '&#8230;')
 74    {
 75        if (strlen($str) < $n) {
 76            return $str;
 77        }
 78
 79        $str = preg_replace("/\s+/", ' ', str_replace(array("\r\n", "\r", "\n"), ' ', $str));
 80
 81        if (strlen($str) <= $n) {
 82            return $str;
 83        }
 84
 85        $out = "";
 86        foreach (explode(' ', trim($str)) as $val) {
 87            $out .= $val . ' ';
 88
 89            if (strlen($out) >= $n) {
 90                $out = trim($out);
 91                return (strlen($out) == strlen($str)) ? $out : $out . $end_char;
 92            }
 93        }
 94    }
 95
 96// ------------------------------------------------------------------------
 97
 98    /**
 99     * High ASCII to Entities
100     *
101     * Converts High ascii text and MS Word special characters to character entities
102     *
103     * @access    public
104     * @param    string
105     * @return    string
106     */
107    public static function ascii_to_entities($str)
108    {
109        $count = 1;
110        $out = '';
111        $temp = array();
112
113        for ($i = 0, $s = strlen($str); $i < $s; $i++) {
114            $ordinal = ord($str[$i]);
115
116            if ($ordinal < 128) {
117                /*
118                    If the $temp array has a value but we have moved on, then it seems only
119                    fair that we output that entity and restart $temp before continuing. -Paul
120                */
121                if (count($temp) == 1) {
122                    $out .= '&#' . array_shift($temp) . ';';
123                    $count = 1;
124                }
125
126                $out .= $str[$i];
127            } else {
128                if (count($temp) == 0) {
129                    $count = ($ordinal < 224) ? 2 : 3;
130                }
131
132                $temp[] = $ordinal;
133
134                if (count($temp) == $count) {
135                    $number = ($count == 3) ? (($temp['0'] % 16) * 4096) + (($temp['1'] % 64) * 64) + ($temp['2'] % 64) : (($temp['0'] % 32) * 64) + ($temp['1'] % 64);
136
137                    $out .= '&#' . $number . ';';
138                    $count = 1;
139                    $temp = array();
140                }
141            }
142        }
143
144        return $out;
145    }
146
147// ------------------------------------------------------------------------
148
149    /**
150     * Entities to ASCII
151     *
152     * Converts character entities back to ASCII
153     *
154     * @access    public
155     * @param    string
156     * @param    bool
157     * @return    string
158     */
159    public static function entities_to_ascii($str, $all = true)
160    {
161        if (preg_match_all('/\&#(\d+)\;/', $str, $matches)) {
162            for ($i = 0, $s = count($matches['0']); $i < $s; $i++) {
163                $digits = $matches['1'][$i];
164
165                $out = '';
166
167                if ($digits < 128) {
168                    $out .= chr($digits);
169
170                } elseif ($digits < 2048) {
171                    $out .= chr(192 + (($digits - ($digits % 64)) / 64));
172                    $out .= chr(128 + ($digits % 64));
173                } else {
174                    $out .= chr(224 + (($digits - ($digits % 4096)) / 4096));
175                    $out .= chr(128 + ((($digits % 4096) - ($digits % 64)) / 64));
176                    $out .= chr(128 + ($digits % 64));
177                }
178
179                $str = str_replace($matches['0'][$i], $out, $str);
180            }
181        }
182
183        if ($all) {
184            $str = str_replace(
185                array("&amp;", "&lt;", "&gt;", "&quot;", "&apos;", "&#45;"),
186                array("&", "<", ">", "\"", "'", "-"),
187                $str
188            );
189        }
190
191        return $str;
192    }
193
194// ------------------------------------------------------------------------
195
196    /**
197     * Word Censoring Function
198     *
199     * Supply a string and an array of disallowed words and any
200     * matched words will be converted to #### or to the replacement
201     * word you've submitted.
202     *
203     * @access    public
204     * @param    string    the text string
205     * @param    string    the array of censoered words
206     * @param    string    the optional replacement value
207     * @return    string
208     */
209    public static function word_censor($str, $censored, $replacement = '')
210    {
211        if (!is_array($censored)) {
212            return $str;
213        }
214
215        $str = ' ' . $str . ' ';
216
217        // \w, \b and a few others do not match on a unicode character
218        // set for performance reasons. As a result words like über
219        // will not match on a word boundary. Instead, we'll assume that
220        // a bad word will be bookeneded by any of these characters.
221        $delim = '[-_\'\"`(){}<>\[\]|!?@#%&,.:;^~*+=\/ 0-9\n\r\t]';
222
223        foreach ($censored as $badword) {
224            if ($replacement != '') {
225                $str = preg_replace(
226                    "/({$delim})(" . str_replace('\*', '\w*?', preg_quote($badword, '/')) . ")({$delim})/i",
227                    "\\1{$replacement}\\3",
228                    $str
229                );
230            } else {
231                $str = preg_replace(
232                    "/({$delim})(" . str_replace('\*', '\w*?', preg_quote($badword, '/')) . ")({$delim})/ie",
233                    "'\\1'.str_repeat('#', strlen('\\2')).'\\3'",
234                    $str
235                );
236            }
237        }
238
239        return trim($str);
240    }
241
242// ------------------------------------------------------------------------
243
244    /**
245     * Code Highlighter
246     *
247     * Colorizes code strings
248     *
249     * @access    public
250     * @param    string    the text string
251     * @return    string
252     */
253    public static function highlight_code($str)
254    {
255        // The highlight string public static function encodes and highlights
256        // brackets so we need them to start raw
257        $str = str_replace(array('&lt;', '&gt;'), array('<', '>'), $str);
258
259        // Replace any existing PHP tags to temporary markers so they don't accidentally
260        // break the string out of PHP, and thus, thwart the highlighting.
261
262        $str = str_replace(
263            array('<?', '?>', '<%', '%>', '\\', '</script>'),
264            array('phptagopen', 'phptagclose', 'asptagopen', 'asptagclose', 'backslashtmp', 'scriptclose'),
265            $str
266        );
267
268        // The highlight_string public static function requires that the text be surrounded
269        // by PHP tags, which we will remove later
270        $str = '<?php ' . $str . ' ?>'; // <?
271
272        // All the magic happens here, baby!
273        $str = highlight_string($str, true);
274
275        // Prior to PHP 5, the highligh public static function used icky <font> tags
276        // so we'll replace them with <span> tags.
277
278        if (abs(PHP_VERSION) < 5) {
279            $str = str_replace(array('<font ', '</font>'), array('<span ', '</span>'), $str);
280            $str = preg_replace('#color="(.*?)"#', 'style="color: \\1"', $str);
281        }
282
283        // Remove our artificially added PHP, and the syntax highlighting that came with it
284        $str = preg_replace(
285            '/<span style="color: #([A-Z0-9]+)">&lt;\?php(&nbsp;| )/i',
286            '<span style="color: #$1">',
287            $str
288        );
289        $str = preg_replace(
290            '/(<span style="color: #[A-Z0-9]+">.*?)\?&gt;<\/span>\n<\/span>\n<\/code>/is',
291            "$1</span>\n</span>\n</code>",
292            $str
293        );
294        $str = preg_replace('/<span style="color: #[A-Z0-9]+"\><\/span>/i', '', $str);
295
296        // Replace our markers back to PHP tags.
297        $str = str_replace(
298            array('phptagopen', 'phptagclose', 'asptagopen', 'asptagclose', 'backslashtmp', 'scriptclose'),
299            array('&lt;?', '?&gt;', '&lt;%', '%&gt;', '\\', '&lt;/script&gt;'),
300            $str
301        );
302
303        return $str;
304    }
305
306// ------------------------------------------------------------------------
307
308    /**
309     * Phrase Highlighter
310     *
311     * Highlights a phrase within a text string
312     *
313     * @access    public
314     * @param    string    the text string
315     * @param    string    the phrase you'd like to highlight
316     * @param    string    the openging tag to precede the phrase with
317     * @param    string    the closing tag to end the phrase with
318     * @return    string
319     */
320    public static function highlight_phrase($str, $phrase, $tag_open = '<strong>', $tag_close = '</strong>')
321    {
322        if ($str == '') {
323            return '';
324        }
325
326        if ($phrase != '') {
327            return preg_replace('/(' . preg_quote($phrase, '/') . ')/i', $tag_open . "\\1" . $tag_close, $str);
328        }
329
330        return $str;
331    }
332
333
334// ------------------------------------------------------------------------
335
336    /**
337     * Word Wrap
338     *
339     * Wraps text at the specified character.  Maintains the integrity of words.
340     * Anything placed between {unwrap}{/unwrap} will not be word wrapped, nor
341     * will URLs.
342     *
343     * @access    public
344     * @param    string    the text string
345     * @param    integer    the number of characters to wrap at
346     * @return    string
347     */
348    public static function word_wrap($str, $charlim = '76')
349    {
350        // Se the character limit
351        if (!is_numeric($charlim)) {
352            $charlim = 76;
353        }
354
355        // Reduce multiple spaces
356        $str = preg_replace("| +|", " ", $str);
357
358        // Standardize newlines
359        if (strpos($str, "\r") !== false) {
360            $str = str_replace(array("\r\n", "\r"), "\n", $str);
361        }
362
363        // If the current word is surrounded by {unwrap} tags we'll
364        // strip the entire chunk and replace it with a marker.
365        $unwrap = array();
366        if (preg_match_all("|(\{unwrap\}.+?\{/unwrap\})|s", $str, $matches)) {
367            for ($i = 0; $i < count($matches['0']); $i++) {
368                $unwrap[] = $matches['1'][$i];
369                $str = str_replace($matches['1'][$i], "{{unwrapped" . $i . "}}", $str);
370            }
371        }
372
373        // Use PHP's native public static function to do the initial wordwrap.
374        // We set the cut flag to FALSE so that any individual words that are
375        // too long get left alone.  In the next step we'll deal with them.
376        $str = wordwrap($str, $charlim, "\n", false);
377
378        // Split the string into individual lines of text and cycle through them
379        $output = "";
380        foreach (explode("\n", $str) as $line) {
381            // Is the line within the allowed character count?
382            // If so we'll join it to the output and continue
383            if (strlen($line) <= $charlim) {
384                $output .= $line . "\n";
385                continue;
386            }
387
388            $temp = '';
389            while ((strlen($line)) > $charlim) {
390                // If the over-length word is a URL we won't wrap it
391                if (preg_match("!\[url.+\]|://|wwww.!", $line)) {
392                    break;
393                }
394
395                // Trim the word down
396                $temp .= substr($line, 0, $charlim - 1);
397                $line = substr($line, $charlim - 1);
398            }
399
400            // If $temp contains data it means we had to split up an over-length
401            // word into smaller chunks so we'll add it back to our current line
402            if ($temp != '') {
403                $output .= $temp . "\n" . $line;
404            } else {
405                $output .= $line;
406            }
407
408            $output .= "\n";
409        }
410
411        // Put our markers back
412        if (count($unwrap) > 0) {
413            foreach ($unwrap as $key => $val) {
414                $output = str_replace("{{unwrapped" . $key . "}}", $val, $output);
415            }
416        }
417
418        // Remove the unwrap tags
419        $output = str_replace(array('{unwrap}', '{/unwrap}'), '', $output);
420
421        return $output;
422    }
423
424// ------------------------------------------------------------------------
425
426    /**
427     * Ellipsize String
428     *
429     * This public static function will strip tags from a string, split it at its max_length and ellipsize
430     *
431     * @param    string        string to ellipsize
432     * @param    integer        max length of string
433     * @param    mixed        int (1|0) or float, .5, .2, etc for position to split
434     * @param    string        ellipsis ; Default '...'
435     * @return    string        ellipsized string
436     */
437    public static function ellipsize($str, $max_length, $position = 1, $ellipsis = '&hellip;')
438    {
439        // Strip tags
440        $str = trim(strip_tags($str));
441
442        // Is the string long enough to ellipsize?
443        if (strlen($str) <= $max_length) {
444            return $str;
445        }
446
447        $beg = substr($str, 0, floor($max_length * $position));
448
449        $position = ($position > 1) ? 1 : $position;
450
451        if ($position === 1) {
452            $end = substr($str, 0, -($max_length - strlen($beg)));
453        } else {
454            $end = substr($str, -($max_length - strlen($beg)));
455        }
456
457        return $beg . $ellipsis . $end;
458    }
459
460    /**
461     * Convert foreign character into ascii relevant
462     */
463    public static function utf2ascii($str, $replacespace = false, $by = '')
464    {
465        $chars = array(
466            'a' => array('ấ', 'ầ', 'ẩ', 'ẫ', 'ậ', 'ắ', 'ằ', 'ẳ', 'ẵ', 'ặ', 'á', 'à', 'ả', 'ã', 'ạ', 'â', 'ă'),
467            'A' => array('Ấ', 'Ầ', 'Ẩ', 'Ẫ', 'Ậ', 'Ắ', 'Ằ', 'Ẳ', 'Ẵ', 'Ặ', 'Á', 'À', 'Ả', 'Ã', 'Ạ', 'Â', 'Ă'),
468            'e' => array('ế', 'ề', 'ể', 'ễ', 'ệ', 'é', 'è', 'ẻ', 'ẽ', 'ẹ', 'ê'),
469            'E' => array('Ế', 'Ề', 'Ể', 'Ễ', 'Ệ', 'É', 'È', 'Ẻ', 'Ẽ', 'Ẹ', 'Ê'),
470            'i' => array('í', 'ì', 'ỉ', 'ĩ', 'ị'),
471            'I' => array('Í', 'Ì', 'Ỉ', 'Ĩ', 'Ị'),
472            'o' => array('ố', 'ồ', 'ổ', 'ỗ', 'ộ', 'ớ', 'ờ', 'ở', 'ỡ', 'ợ', 'ó', 'ò', 'ỏ', 'õ', 'ọ', 'ô', 'ơ'),
473            'O' => array('Ố', 'Ồ', 'Ổ', 'Ô', 'Ộ', 'Ớ', 'Ờ', 'Ở', 'Ỡ', 'Ợ', 'Ó', 'Ò', 'Ỏ', 'Õ', 'Ọ', 'Ô', 'Ơ'),
474            'u' => array('ứ', 'ừ', 'ử', 'ữ', 'ự', 'ú', 'ù', 'ủ', 'ũ', 'ụ', 'ư'),
475            'U' => array('Ứ', 'Ừ', 'Ử', 'Ữ', 'Ự', 'Ú', 'Ù', 'Ủ', 'Ũ', 'Ụ', 'Ư'),
476            'y' => array('ý', 'ỳ', 'ỷ', 'ỹ', 'ỵ'),
477            'Y' => array('Ý', 'Ỳ', 'Ỷ', 'Ỹ', 'Ỵ'),
478            'd' => array('Ä‘'),
479            'D' => array('Đ')
480        );
481
482        foreach ($chars as $key => $arr) {
483            foreach ($arr as $val) {
484                $str = str_replace($val, $key, $str);
485            }
486        }
487        if ($replacespace) {
488            $str = str_replace(' ', $by, $str);
489        }
490        return $str;
491    }
492}
493/* End of file text_helper.php */
494/* Location: ./system/helpers/text_helper.php */
495