TextHelper.php | searchcode

/app/controllers/TextHelper.php

https://github.com/BabelZilla/WTS · PHP · 495 lines · 267 code · 72 blank · 156 comment · 46 complexity · fd3a19afed25f913e6da0f7620327d4e MD5 · raw file

<?php

class TextHelper
{
    /**
     * CodeIgniter
     *
     * An open source application development framework for PHP 5.1.6 or newer
     *
     * @package        CodeIgniter
     * @author        ExpressionEngine Dev Team
     * @copyright    Copyright (c) 2008 - 2011, EllisLab, Inc.
     * @license        http://codeigniter.com/user_guide/license.html
     * @link        http://codeigniter.com
     * @since        Version 1.0
     * @filesource
     */

// ------------------------------------------------------------------------

    /**
     * CodeIgniter Text Helpers
     *
     * @package        CodeIgniter
     * @subpackage    Helpers
     * @category    Helpers
     * @author        ExpressionEngine Dev Team
     * @link        http://codeigniter.com/user_guide/helpers/text_helper.html
     */

// ------------------------------------------------------------------------

    /**
     * Word Limiter
     *
     * Limits a string to X number of words.
     *
     * @access    public
     * @param    string
     * @param    integer
     * @param    string    the end character. Usually an ellipsis
     * @return    string
     */
    public static function word_limiter($str, $limit = 100, $end_char = '&#8230;')
    {
        if (trim($str) == '') {
            return $str;
        }

        preg_match('/^\s*+(?:\S++\s*+){1,' . (int)$limit . '}/', $str, $matches);

        if (strlen($str) == strlen($matches[0])) {
            $end_char = '';
        }

        return rtrim($matches[0]) . $end_char;
    }

// ------------------------------------------------------------------------

    /**
     * Character Limiter
     *
     * Limits the string based on the character count.  Preserves complete words
     * so the character count may not be exactly as specified.
     *
     * @access    public
     * @param    string
     * @param    integer
     * @param    string    the end character. Usually an ellipsis
     * @return    string
     */
    public static function character_limiter($str, $n = 500, $end_char = '&#8230;')
    {
        if (strlen($str) < $n) {
            return $str;
        }

        $str = preg_replace("/\s+/", ' ', str_replace(array("\r\n", "\r", "\n"), ' ', $str));

        if (strlen($str) <= $n) {
            return $str;
        }

        $out = "";
        foreach (explode(' ', trim($str)) as $val) {
            $out .= $val . ' ';

            if (strlen($out) >= $n) {
                $out = trim($out);
                return (strlen($out) == strlen($str)) ? $out : $out . $end_char;
            }
        }
    }

// ------------------------------------------------------------------------

    /**
     * High ASCII to Entities
     *
     * Converts High ascii text and MS Word special characters to character entities
     *
     * @access    public
     * @param    string
     * @return    string
     */
    public static function ascii_to_entities($str)
    {
        $count = 1;
        $out = '';
        $temp = array();

        for ($i = 0, $s = strlen($str); $i < $s; $i++) {
            $ordinal = ord($str[$i]);

            if ($ordinal < 128) {
                /*
                    If the $temp array has a value but we have moved on, then it seems only
                    fair that we output that entity and restart $temp before continuing. -Paul
                */
                if (count($temp) == 1) {
                    $out .= '&#' . array_shift($temp) . ';';
                    $count = 1;
                }

                $out .= $str[$i];
            } else {
                if (count($temp) == 0) {
                    $count = ($ordinal < 224) ? 2 : 3;
                }

                $temp[] = $ordinal;

                if (count($temp) == $count) {
                    $number = ($count == 3) ? (($temp['0'] % 16) * 4096) + (($temp['1'] % 64) * 64) + ($temp['2'] % 64) : (($temp['0'] % 32) * 64) + ($temp['1'] % 64);

                    $out .= '&#' . $number . ';';
                    $count = 1;
                    $temp = array();
                }
            }
        }

        return $out;
    }

// ------------------------------------------------------------------------

    /**
     * Entities to ASCII
     *
     * Converts character entities back to ASCII
     *
     * @access    public
     * @param    string
     * @param    bool
     * @return    string
     */
    public static function entities_to_ascii($str, $all = true)
    {
        if (preg_match_all('/\&#(\d+)\;/', $str, $matches)) {
            for ($i = 0, $s = count($matches['0']); $i < $s; $i++) {
                $digits = $matches['1'][$i];

                $out = '';

                if ($digits < 128) {
                    $out .= chr($digits);

                } elseif ($digits < 2048) {
                    $out .= chr(192 + (($digits - ($digits % 64)) / 64));
                    $out .= chr(128 + ($digits % 64));
                } else {
                    $out .= chr(224 + (($digits - ($digits % 4096)) / 4096));
                    $out .= chr(128 + ((($digits % 4096) - ($digits % 64)) / 64));
                    $out .= chr(128 + ($digits % 64));
                }

                $str = str_replace($matches['0'][$i], $out, $str);
            }
        }

        if ($all) {
            $str = str_replace(
                array("&amp;", "&lt;", "&gt;", "&quot;", "&apos;", "&#45;"),
                array("&", "<", ">", "\"", "'", "-"),
                $str
            );
        }

        return $str;
    }

// ------------------------------------------------------------------------

    /**
     * Word Censoring Function
     *
     * Supply a string and an array of disallowed words and any
     * matched words will be converted to #### or to the replacement
     * word you've submitted.
     *
     * @access    public
     * @param    string    the text string
     * @param    string    the array of censoered words
     * @param    string    the optional replacement value
     * @return    string
     */
    public static function word_censor($str, $censored, $replacement = '')
    {
        if (!is_array($censored)) {
            return $str;
        }

        $str = ' ' . $str . ' ';

        // \w, \b and a few others do not match on a unicode character
        // set for performance reasons. As a result words like Ã¼ber
        // will not match on a word boundary. Instead, we'll assume that
        // a bad word will be bookeneded by any of these characters.
        $delim = '[-_\'\"`(){}<>\[\]|!?@#%&,.:;^~*+=\/ 0-9\n\r\t]';

        foreach ($censored as $badword) {
            if ($replacement != '') {
                $str = preg_replace(
                    "/({$delim})(" . str_replace('\*', '\w*?', preg_quote($badword, '/')) . ")({$delim})/i",
                    "\\1{$replacement}\\3",
                    $str
                );
            } else {
                $str = preg_replace(
                    "/({$delim})(" . str_replace('\*', '\w*?', preg_quote($badword, '/')) . ")({$delim})/ie",
                    "'\\1'.str_repeat('#', strlen('\\2')).'\\3'",
                    $str
                );
            }
        }

        return trim($str);
    }

// ------------------------------------------------------------------------

    /**
     * Code Highlighter
     *
     * Colorizes code strings
     *
     * @access    public
     * @param    string    the text string
     * @return    string
     */
    public static function highlight_code($str)
    {
        // The highlight string public static function encodes and highlights
        // brackets so we need them to start raw
        $str = str_replace(array('&lt;', '&gt;'), array('<', '>'), $str);

        // Replace any existing PHP tags to temporary markers so they don't accidentally
        // break the string out of PHP, and thus, thwart the highlighting.

        $str = str_replace(
            array('<?', '?>', '<%', '%>', '\\', '</script>'),
            array('phptagopen', 'phptagclose', 'asptagopen', 'asptagclose', 'backslashtmp', 'scriptclose'),
            $str
        );

        // The highlight_string public static function requires that the text be surrounded
        // by PHP tags, which we will remove later
        $str = '<?php ' . $str . ' ?>'; // <?

        // All the magic happens here, baby!
        $str = highlight_string($str, true);

        // Prior to PHP 5, the highligh public static function used icky <font> tags
        // so we'll replace them with <span> tags.

        if (abs(PHP_VERSION) < 5) {
            $str = str_replace(array('<font ', '</font>'), array('<span ', '</span>'), $str);
            $str = preg_replace('#color="(.*?)"#', 'style="color: \\1"', $str);
        }

        // Remove our artificially added PHP, and the syntax highlighting that came with it
        $str = preg_replace(
            '/<span style="color: #([A-Z0-9]+)">&lt;\?php(&nbsp;| )/i',
            '<span style="color: #$1">',
            $str
        );
        $str = preg_replace(
            '/(<span style="color: #[A-Z0-9]+">.*?)\?&gt;<\/span>\n<\/span>\n<\/code>/is',
            "$1</span>\n</span>\n</code>",
            $str
        );
        $str = preg_replace('/<span style="color: #[A-Z0-9]+"\><\/span>/i', '', $str);

        // Replace our markers back to PHP tags.
        $str = str_replace(
            array('phptagopen', 'phptagclose', 'asptagopen', 'asptagclose', 'backslashtmp', 'scriptclose'),
            array('&lt;?', '?&gt;', '&lt;%', '%&gt;', '\\', '&lt;/script&gt;'),
            $str
        );

        return $str;
    }

// ------------------------------------------------------------------------

    /**
     * Phrase Highlighter
     *
     * Highlights a phrase within a text string
     *
     * @access    public
     * @param    string    the text string
     * @param    string    the phrase you'd like to highlight
     * @param    string    the openging tag to precede the phrase with
     * @param    string    the closing tag to end the phrase with
     * @return    string
     */
    public static function highlight_phrase($str, $phrase, $tag_open = '<strong>', $tag_close = '</strong>')
    {
        if ($str == '') {
            return '';
        }

        if ($phrase != '') {
            return preg_replace('/(' . preg_quote($phrase, '/') . ')/i', $tag_open . "\\1" . $tag_close, $str);
        }

        return $str;
    }


// ------------------------------------------------------------------------

    /**
     * Word Wrap
     *
     * Wraps text at the specified character.  Maintains the integrity of words.
     * Anything placed between {unwrap}{/unwrap} will not be word wrapped, nor
     * will URLs.
     *
     * @access    public
     * @param    string    the text string
     * @param    integer    the number of characters to wrap at
     * @return    string
     */
    public static function word_wrap($str, $charlim = '76')
    {
        // Se the character limit
        if (!is_numeric($charlim)) {
            $charlim = 76;
        }

        // Reduce multiple spaces
        $str = preg_replace("| +|", " ", $str);

        // Standardize newlines
        if (strpos($str, "\r") !== false) {
            $str = str_replace(array("\r\n", "\r"), "\n", $str);
        }

        // If the current word is surrounded by {unwrap} tags we'll
        // strip the entire chunk and replace it with a marker.
        $unwrap = array();
        if (preg_match_all("|(\{unwrap\}.+?\{/unwrap\})|s", $str, $matches)) {
            for ($i = 0; $i < count($matches['0']); $i++) {
                $unwrap[] = $matches['1'][$i];
                $str = str_replace($matches['1'][$i], "{{unwrapped" . $i . "}}", $str);
            }
        }

        // Use PHP's native public static function to do the initial wordwrap.
        // We set the cut flag to FALSE so that any individual words that are
        // too long get left alone.  In the next step we'll deal with them.
        $str = wordwrap($str, $charlim, "\n", false);

        // Split the string into individual lines of text and cycle through them
        $output = "";
        foreach (explode("\n", $str) as $line) {
            // Is the line within the allowed character count?
            // If so we'll join it to the output and continue
            if (strlen($line) <= $charlim) {
                $output .= $line . "\n";
                continue;
            }

            $temp = '';
            while ((strlen($line)) > $charlim) {
                // If the over-length word is a URL we won't wrap it
                if (preg_match("!\[url.+\]|://|wwww.!", $line)) {
                    break;
                }

                // Trim the word down
                $temp .= substr($line, 0, $charlim - 1);
                $line = substr($line, $charlim - 1);
            }

            // If $temp contains data it means we had to split up an over-length
            // word into smaller chunks so we'll add it back to our current line
            if ($temp != '') {
                $output .= $temp . "\n" . $line;
            } else {
                $output .= $line;
            }

            $output .= "\n";
        }

        // Put our markers back
        if (count($unwrap) > 0) {
            foreach ($unwrap as $key => $val) {
                $output = str_replace("{{unwrapped" . $key . "}}", $val, $output);
            }
        }

        // Remove the unwrap tags
        $output = str_replace(array('{unwrap}', '{/unwrap}'), '', $output);

        return $output;
    }

// ------------------------------------------------------------------------

    /**
     * Ellipsize String
     *
     * This public static function will strip tags from a string, split it at its max_length and ellipsize
     *
     * @param    string        string to ellipsize
     * @param    integer        max length of string
     * @param    mixed        int (1|0) or float, .5, .2, etc for position to split
     * @param    string        ellipsis ; Default '...'
     * @return    string        ellipsized string
     */
    public static function ellipsize($str, $max_length, $position = 1, $ellipsis = '&hellip;')
    {
        // Strip tags
        $str = trim(strip_tags($str));

        // Is the string long enough to ellipsize?
        if (strlen($str) <= $max_length) {
            return $str;
        }

        $beg = substr($str, 0, floor($max_length * $position));

        $position = ($position > 1) ? 1 : $position;

        if ($position === 1) {
            $end = substr($str, 0, -($max_length - strlen($beg)));
        } else {
            $end = substr($str, -($max_length - strlen($beg)));
        }

        return $beg . $ellipsis . $end;
    }

    /**
     * Convert foreign character into ascii relevant
     */
    public static function utf2ascii($str, $replacespace = false, $by = '')
    {
        $chars = array(
            'a' => array('áº¥', 'áº§', 'áº©', 'áº«', 'áº', 'áº¯', 'áº±', 'áº³', 'áºµ', 'áº·', 'Ã¡', 'Ã ', 'áº£', 'Ã£', 'áº¡', 'Ã¢', 'Äƒ'),
            'A' => array('áº¤', 'áº¦', 'áº¨', 'áºª', 'áº¬', 'áº®', 'áº°', 'áº²', 'áº´', 'áº¶', 'Ã', 'Ã€', 'áº¢', 'Ãƒ', 'áº ', 'Ã‚', 'Ä‚'),
            'e' => array('áº¿', 'á»', 'á»ƒ', 'á»…', 'á»‡', 'Ã©', 'Ã¨', 'áº»', 'áº½', 'áº¹', 'Ãª'),
            'E' => array('áº¾', 'á»€', 'á»‚', 'á»„', 'á»†', 'Ã‰', 'Ãˆ', 'áºº', 'áº¼', 'áº¸', 'ÃŠ'),
            'i' => array('Ã', 'Ã¬', 'á»‰', 'Ä©', 'á»‹'),
            'I' => array('Ã', 'ÃŒ', 'á»ˆ', 'Ä¨', 'á»Š'),
            'o' => array('á»‘', 'á»“', 'á»•', 'á»—', 'á»™', 'á»›', 'á»', 'á»Ÿ', 'á»¡', 'á»£', 'Ã³', 'Ã²', 'á»', 'Ãµ', 'á»', 'Ã´', 'Æ¡'),
            'O' => array('á»', 'á»’', 'á»”', 'Ã”', 'á»˜', 'á»š', 'á»œ', 'á»ž', 'á» ', 'á»¢', 'Ã“', 'Ã’', 'á»Ž', 'Ã•', 'á»Œ', 'Ã”', 'Æ '),
            'u' => array('á»©', 'á»«', 'á»', 'á»¯', 'á»±', 'Ãº', 'Ã¹', 'á»§', 'Å©', 'á»¥', 'Æ°'),
            'U' => array('á»¨', 'á»ª', 'á»¬', 'á»®', 'á»°', 'Ãš', 'Ã™', 'á»¦', 'Å¨', 'á»¤', 'Æ¯'),
            'y' => array('Ã½', 'á»³', 'á»·', 'á»¹', 'á»µ'),
            'Y' => array('Ã', 'á»²', 'á»¶', 'á»¸', 'á»´'),
            'd' => array('Ä‘'),
            'D' => array('Ä')
        );

        foreach ($chars as $key => $arr) {
            foreach ($arr as $val) {
                $str = str_replace($val, $key, $str);
            }
        }
        if ($replacespace) {
            $str = str_replace(' ', $by, $str);
        }
        return $str;
    }
}
/* End of file text_helper.php */
/* Location: ./system/helpers/text_helper.php */
Alerts (4)

Complexity hotspot; lines 127 to 128 (total complexity: 3)
127 128
Complexity hotspot; lines 134 to 135 (total complexity: 3)
134 135