/classes/multibyte.php
https://github.com/rynodivino/system · PHP · 820 lines · 374 code · 146 blank · 300 comment · 98 complexity · 83b3db99b81c628855d88707642c69cc MD5 · raw file
- <?php
- /*
- * @package Habari
- *
- */
- /*
- * Habari MultiByte Class
- *
- * Provides multibyte character set services,
- * a necessity since all of Habari's internal string
- * manipulations are done in UTF-8. Currently
- * this class is a wrapper around mbstring functions.
- *
- */
- class MultiByte
- {
- const USE_MBSTRING = 1;
- /*
- * @var $hab_enc String holding the current encoding the class is using
- */
- static $hab_enc = 'UTF-8';
- /*
- * @var $use_library Integer denoting the current multibyte
- * library the class is using
- */
- private static $use_library = self::USE_MBSTRING;
- /**
- * function __construct
- *
- * An empty constructor since all functions are static
- */
- private function __construct()
- {
- }
- /*
- * function hab_encoding
- *
- * Sets and returns the internal encoding.
- *
- * @param $use_enc string. The encoding to be used
- *
- * @return string. If $enc is null, returns the current
- * encoding. If $enc is not null, returns the old encoding
- */
- public static function hab_encoding( $use_enc = null )
- {
- if ( $use_enc === null ) {
- return self::$hab_enc;
- }
- else {
- $old_enc = self::$hab_enc;
- self::$hab_enc = $use_enc;
- return $old_enc;
- }
- }
- /*
- * function library
- *
- * Sets and returns the multibyte library being used internally
- *
- * @param $int The new library to use. One of the self::USE_* constants, null to simply return, or false to disable and use native non-multibyte-safe PHP methods.
- *
- * @return mixed If $new_library is null, returns the current library
- * being used. If $new_library has a valid value, returns the old library,
- * else returns false.
- */
- public static function library( $new_library = null )
- {
- if ( $new_library === null ) {
- return self::$use_library;
- }
- else if ( $new_library === self::USE_MBSTRING ) {
- $old_library = self::$use_library;
- self::$use_library = $new_library;
- return $old_library;
- }
- else if ( $new_library === false ) {
- $old_library = self::$use_library;
- self::$use_library = $new_library;
- return $old_library;
- }
- else {
- return false;
- }
- }
- /*
- * function convert_encoding
- *
- * Converts a string's encoding to a new encoding
- *
- * @param $str string. The string who's encoding is being changed.
- * @param $use_enc string. The encoding to convert to. If not set,
- * the internal encoding will be used.
- * @param $from_enc string. encoding before conversion. If not set,
- * encoding is detected automatically.
- *
- * @return mixed The source string in the new encoding or boolean false.
- */
- public static function convert_encoding( $str, $use_enc = null, $from_enc = null )
- {
- $ret = false;
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
- if ( self::$use_library == self::USE_MBSTRING ) {
- if ( $from_enc == null ) {
- $from_enc = MultiByte::detect_encoding( $str );
- }
- $ret = mb_convert_encoding( $str, $enc, $from_enc );
- }
- return $ret;
- }
- /*
- * function detect_encoding
- *
- * Detects the encoding being used for a string
- *
- * @param $str string. The string whose encoding is being detected
- *
- * @return mixed The source string's detected encoding, or boolean false.
- */
- public static function detect_encoding( $str )
- {
- $enc = false;
- if ( self::$use_library == self::USE_MBSTRING ) {
- // get original detection order
- $old_order = mb_detect_order();
- // make sure ISO-8859-1 is included
- mb_detect_order( array( 'ASCII', 'JIS', 'UTF-8', 'ISO-8859-1', 'EUC-JP', 'SJIS' ) );
- //detect the encoding . the detected encoding may be wrong, but it's better than guessing
- $enc = mb_detect_encoding( $str );
- // reset detection order
- mb_detect_order( $old_order );
- }
- return $enc;
- }
- /*
- * function detect_bom_encoding
- *
- * Detects the encoding being used for a string using the existence
- * of a byte order mark
- *
- * @param $str string. The string whose encoding is being detected
- *
- * @return mixed The source string's detected encoding, or boolean false.
- */
- public static function detect_bom_encoding( $str )
- {
- $ret = false;
- if ( "\xFE\xFF" == substr( 0, 2, $source_contents ) ) {
- $ret = 'UTF-16BE';
- }
- else if ( "\xFF\xFE" == substr( 0, 2, $source_contents ) ) {
- $ret = 'UTF-16LE';
- }
- else if ( "\xEF\xBB\xBF" == substr( 0, 3, $source_contents ) ) {
- $ret = 'UTF-8';
- }
- return $ret;
- }
- /*
- * function substr
- *
- * Get a section of a string
- *
- * @param $str string. The original string
- * @param $begin. integer. The beginning character of the string to return.
- * @param $len integer. How long the returned string should be. If $len is
- * not set, the section of the string from $begin to the end of the string is
- * returned.
- * @param $use_enc string. The encoding to be used. If not set,
- * the internal encoding will be used.
- *
- * @return mixed The section of the source string requested in the encoding requested or false.
- * If $len is not set, returns substring from $begin to end of string.
- *
- */
- public static function substr( $str, $begin, $len = null, $use_enc = null )
- {
- $ret = false;
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
- if ( self::$use_library == self::USE_MBSTRING ) {
- if ( ! isset( $len ) ) {
- $len = MultiByte::strlen( $str ) - $begin;
- }
- $ret = mb_substr( $str, $begin, $len, $enc );
- }
- else {
- $ret = substr( $str, $begin, $len );
- }
- return $ret;
- }
- /*
- * function strlen
- *
- * Gets the length of a string in characters
- *
- * @param $str string. The string who's length is being returned.
- * @param $use_enc string. The encoding to be used. If not set,
- * the internal encoding will be used.
- *
- * @return integer. The length in characters of the string, or the length in bytes if a valid
- * multibyte library isn't loaded.
- */
- public static function strlen( $str, $use_enc = null )
- {
- $len = 0;
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
- if ( self::$use_library == self::USE_MBSTRING ) {
- $len = mb_strlen( $str, $enc );
- }
- else {
- $len = strlen( $str );
- }
- return $len;
- }
- /*
- * function strpos
- *
- * Find position of first occurrence of string in a string
- *
- * @param $haysack string. The string being checked.
- * @param $needle. string. The position counted from the beginning of haystack .
- * @param $offset integer. The search offset. If it is not specified, 0 is used.
- * @param $use_enc string. The encoding to be used. If not set,
- * the internal encoding will be used.
- *
- * @return mixed The section of the source string requested in the encoding requested or false.
- * If $len is not set, returns substring from $begin to end of string.
- *
- */
- public static function strpos( $haysack, $needle, $offset = 0, $use_enc = null )
- {
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
- if ( self::$use_library == self::USE_MBSTRING ) {
- $ret = mb_strpos( $haysack, $needle, $offset, $enc );
- }
- else {
- $ret = strpos( $haysack, $needle, $offset );
- }
- return $ret;
- }
- /*
- * function stripos
- *
- * Find position of first occurrence of string in a string. Case insensitive.
- *
- * @param $haysack string. The string being checked.
- * @param $needle. string. The position counted from the beginning of haystack .
- * @param $offset integer. The search offset. If it is not specified, 0 is used.
- * @param $use_enc string. The encoding to be used. If not set,
- * the internal encoding will be used.
- *
- * @return mixed The section of the source string requested in the encoding requested or false.
- * If $len is not set, returns substring from $begin to end of string.
- *
- */
- public static function stripos( $haysack, $needle, $offset = 0, $use_enc = null )
- {
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
- if ( self::$use_library == self::USE_MBSTRING ) {
- $ret = mb_stripos( $haysack, $needle, $offset, $enc );
- }
- else {
- $ret = stripos( $haysack, $needle, $offset );
- }
- return $ret;
- }
- /*
- * function strrpos
- *
- * Find position of last occurrence of string in a string.
- *
- * @param $haysack string. The string being checked.
- * @param $needle. string. The position counted from the beginning of haystack .
- * @param $offset integer. The search offset. If it is not specified, 0 is used.
- * @param $use_enc string. The encoding to be used. If not set,
- * the internal encoding will be used.
- *
- * @return mixed The section of the source string requested in the encoding requested or false.
- * If $len is not set, returns substring from $begin to end of string.
- *
- */
- public static function strrpos( $haysack, $needle, $offset = 0, $use_enc = null )
- {
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
- if ( self::$use_library == self::USE_MBSTRING ) {
- $ret = mb_strrpos( $haysack, $needle, $offset, $enc );
- }
- else {
- $ret = strrpos( $haysack, $needle, $offset );
- }
- return $ret;
- }
- /*
- * function strripos
- *
- * Find position of last occurrence of string in a string. Case insensitive.
- *
- * @param $haysack string. The string being checked.
- * @param $needle. string. The position counted from the beginning of haystack .
- * @param $offset integer. The search offset. If it is not specified, 0 is used.
- * @param $use_enc string. The encoding to be used. If not set,
- * the internal encoding will be used.
- *
- * @return mixed The section of the source string requested in the encoding requested or false.
- * If $len is not set, returns substring from $begin to end of string.
- *
- */
- public static function strripos( $haysack, $needle, $offset = 0, $use_enc = null )
- {
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
- if ( self::$use_library == self::USE_MBSTRING ) {
- $ret = mb_strripos( $haysack, $needle, $offset, $enc );
- }
- else {
- $ret = strripos( $haysack, $needle, $offset );
- }
- return $ret;
- }
- /*
- * function strtolower
- *
- * Converts a multibyte string to lowercase. If a valid multibyte library
- * isn't loaded, strtolower() will be used, which can lead to unexpected results.
- *
- * @param $str string. The string to lowercase
- * @param $use_enc string. The encoding to be used. If not set,
- * the internal encoding will be used.
- *
- * @return string. The lowercased string.
- */
- public static function strtolower( $str, $use_enc = null )
- {
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
- if ( self::$use_library == self::USE_MBSTRING ) {
- $ret = mb_strtolower( $str, $enc );
- }
- else {
- $ret = strtolower( $str );
- }
- return $ret;
- }
- /*
- * function strtoupper
- *
- * Converts a multibyte string to uppercase. If a valid multibyte library
- * isn't loaded, strtoupper() will be used, which can lead to unexpected results.
- *
- * @param $str string. The string to uppercase
- * @param $use_enc string. The encoding to be used. If not set,
- * the internal encoding will be used.
- *
- * @return string. The uppercased string.
- */
- public static function strtoupper( $str, $use_enc = null )
- {
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
- if ( self::$use_library == self::USE_MBSTRING ) {
- $ret = mb_strtoupper( $str, $enc );
- }
- else {
- $ret = strtoupper( $str );
- }
- return $ret;
- }
- /**
- * Determines if the passed string is valid character data (according to mbstring)
- *
- * @param string $str the string to check
- * @return bool
- */
- public static function valid_data( $str, $use_enc = null )
- {
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
-
- if ( self::$use_library == self::USE_MBSTRING ) {
- return mb_check_encoding( $str, $enc );
- }
-
- return true;
- }
-
- /**
- * Makes a string's first character uppercase
- *
- * @see http://php.net/ucfirst
- * @param string $str The string to capitalize.
- * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
- * @return string The capitalized string.
- */
- public static function ucfirst ( $str, $use_enc = null )
- {
-
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
-
- if ( self::$use_library == self::USE_MBSTRING ) {
-
- // get the first character
- $first = self::substr( $str, 0, 1, $enc );
-
- // uppercase it
- $first = self::strtoupper( $first, $enc );
-
- // get the rest of the characters
- $last = self::substr( $str, 1, null, $enc );
-
- // put them back together
- $ret = $first . $last;
-
- }
- else {
- $ret = ucfirst( $str );
- }
-
- return $ret;
-
- }
-
- /**
- * Makes a string's first character lowercase
- *
- * @see http://php.net/ucfirst
- * @param string $str The string to lowercase.
- * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
- * @return string The lowercased string.
- */
- public static function lcfirst ( $str, $use_enc = null )
- {
-
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
-
- if ( self::$use_library == self::USE_MBSTRING ) {
-
- // get the first character
- $first = self::substr( $str, 0, 1, $enc );
-
- // lowercase it
- $first = self::strtolower( $first, $enc );
-
- // get the rest of the characters
- $last = self::substr( $str, 1, null, $enc );
-
- // put them back together
- $ret = $first . $last;
-
- }
- else {
-
- // lcfirst() is php 5.3+ so we'll emulate it
- $first = substr( $str, 0, 1 );
- $first = strtolower( $first );
-
- $last = substr( $str, 1 );
-
- $ret = $first . $last;
-
- }
-
- return $ret;
-
- }
-
- /**
- * Replace all occurrences of the search string with the replacement string.
- *
- * @see http://php.net/str_replace
- * @param mixed $search A string or an array of strings to search for.
- * @param mixed $replace A string or an array of strings to replace search values with.
- * @param string $subject The string to perform the search and replace on.
- * @param int $count If passed, this value will hold the number of matched and replaced needles.
- * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
- * @return string The subject with replaced values.
- */
- public static function str_replace ( $search, $replace, $subject, &$count = 0, $use_enc = null )
- {
-
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
-
- if ( self::$use_library == self::USE_MBSTRING ) {
-
- // if search is an array and replace is not, we need to make replace an array and pad it to the same number of values as search
- if ( is_array( $search ) && !is_array( $replace ) ) {
- $replace = array_fill( 0, count( $search ), $replace );
- }
-
- // if search is an array and replace is as well, we need to make sure replace has the same number of values - pad it with empty strings
- if ( is_array( $search ) && is_array( $replace ) ) {
- $replace = array_pad( $replace, count( $search ), '' );
- }
-
- // if search is not an array, make it one
- if ( !is_array( $search ) ) {
- $search = array( $search );
- }
-
- // if replace is not an array, make it one
- if ( !is_array( $replace ) ) {
- $replace = array( $replace );
- }
-
- // if subject is an array, recursively call ourselves on each element of it
- if ( is_array( $subject ) ) {
- foreach ( $subject as $k => $v ) {
- $subject[ $k ] = self::str_replace( $search, $replace, $v, $count, $use_enc );
- }
-
- return $subject;
- }
-
-
-
- // now we've got an array of characters and arrays of search / replace characters with the same values - loop and replace them!
- $search_count = count( $search ); // we modify $search, so we can't include it in the condition next
- for ( $i = 0; $i < $search_count; $i++ ) {
-
- // the values we'll match
- $s = array_shift( $search );
- $r = array_shift( $replace );
-
- // to avoid an infinite loop if you're replacing with a value that contains the subject we get the position of each instance first
- $positions = array();
-
- $offset = 0;
- while ( self::strpos( $subject, $s, $offset, $enc ) !== false ) {
-
- // get the position
- $pos = self::strpos( $subject, $s, $offset, $enc );
-
- // add it to the list
- $positions[] = $pos;
-
- // and set the offset to skip over this value
- $offset = $pos + self::strlen( $s, $enc );
-
- }
-
- // if we pick through from the beginning, our positions will change if the replacement string is longer
- // instead, we pick through from the last place
- $positions = array_reverse( $positions );
-
- // now that we've got the position of each one, just loop through that and replace them
- foreach ( $positions as $pos ) {
-
- // pull out the part before the string
- $before = self::substr( $subject, 0, $pos, $enc );
-
- // pull out the part after
- $after = self::substr( $subject, $pos + self::strlen( $s, $enc ), null, $enc );
-
- // now we have the string in two parts without the string we're searching for
- // put it back together with the replacement
- $subject = $before . $r . $after;
-
- // increment our count, a replacement was made
- $count++;
-
- }
-
- }
-
- }
- else {
-
- $subject = str_replace( $search, $replace, $subject, $count );
-
- }
-
- return $subject;
-
- }
-
- /**
- * Replace all occurrences of the search string with the replacement string.
- *
- * @see http://php.net/str_ireplace
- * @param mixed $search A string or an array of strings to search for.
- * @param mixed $replace A string or an array of strings to replace search values with.
- * @param string $subject The string to perform the search and replace on.
- * @param int $count If passed, this value will hold the number of matched and replaced needles.
- * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
- * @return string The subject with replaced values.
- */
- public static function str_ireplace( $search, $replace, $subject, &$count = 0, $use_enc = null )
- {
-
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
-
- if ( self::$use_library == self::USE_MBSTRING ) {
-
- // if search is an array and replace is not, we need to make replace an array and pad it to the same number of values as search
- if ( is_array( $search ) && !is_array( $replace ) ) {
- $replace = array_fill( 0, count( $search ), $replace );
- }
-
- // if search is an array and replace is as well, we need to make sure replace has the same number of values - pad it with empty strings
- if ( is_array( $search ) && is_array( $replace ) ) {
- $replace = array_pad( $replace, count( $search ), '' );
- }
-
- // if search is not an array, make it one
- if ( !is_array( $search ) ) {
- $search = array( $search );
- }
-
- // if replace is not an array, make it one
- if ( !is_array( $replace ) ) {
- $replace = array( $replace );
- }
-
- // if subject is an array, recursively call ourselves on each element of it
- if ( is_array( $subject ) ) {
- foreach ( $subject as $k => $v ) {
- $subject[ $k ] = self::str_ireplace( $search, $replace, $v, $count, $use_enc );
- }
-
- return $subject;
- }
-
-
-
- $search_count = count( $search ); // we modify $search, so we can't include it in the condition next
- for ( $i = 0; $i < $search_count; $i++ ) {
-
- // the values we'll match
- $s = array_shift( $search );
- $r = array_shift( $replace );
-
-
- // to avoid an infinite loop if you're replacing with a value that contains the subject we get the position of each instance first
- $positions = array();
-
- $offset = 0;
- while ( self::stripos( $subject, $s, $offset, $enc ) !== false ) {
-
- // get the position
- $pos = self::stripos( $subject, $s, $offset, $enc );
-
- // add it to the list
- $positions[] = $pos;
-
- // and set the offset to skip over this value
- $offset = $pos + self::strlen( $s, $enc );
-
- }
-
- // if we pick through from the beginning, our positions will change if the replacement string is longer
- // instead, we pick through from the last place
- $positions = array_reverse( $positions );
-
- // now that we've got the position of each one, just loop through that and replace them
- foreach ( $positions as $pos ) {
-
- // pull out the part before the string
- $before = self::substr( $subject, 0, $pos, $enc );
-
- // pull out the part after
- $after = self::substr( $subject, $pos + self::strlen( $s, $enc ), null, $enc );
-
- // now we have the string in two parts without the string we're searching for
- // put it back together with the replacement
- $subject = $before . $r . $after;
-
- // increment our count, a replacement was made
- $count++;
-
- }
-
- }
-
- }
- else {
-
- $subject = str_ireplace( $search, $replace, $subject, $count );
-
- }
-
- return $subject;
-
- }
-
- /**
- * Uppercase the first character of each word in a string.
- *
- * From php.net/ucwords:
- * The definition of a word is any string of characters that is immediately after a whitespace
- * (These are: space, form-feed, newline, carriage return, horizontal tab, and vertical tab).
- *
- * @see http://php.net/ucwords
- * @param string $str The input string.
- * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
- * @return string The modified string.
- */
- public static function ucwords ( $str, $use_enc = null )
- {
-
- $enc = self::$hab_enc;
- if ( $use_enc !== null ) {
- $enc = $use_enc;
- }
-
- if ( self::$use_library == self::USE_MBSTRING ) {
-
- $delimiters = array(
- chr( 32 ), // space
- chr( 12 ), // form-feed
- chr( 10 ), // newline
- chr( 13 ), // carriage return
- chr( 9 ), // horizontal tab
- chr( 11 ), // vertical tab
- );
-
- // loop through the delimiters and explode the string by each one
- foreach ( $delimiters as $d ) {
-
- $pieces = explode( $d, $str );
-
- for ( $i = 0; $i < count( $pieces ); $i++ ) {
-
- // capitalize each word
- $pieces[ $i ] = self::ucfirst( $pieces[ $i ], $enc );
-
- }
-
- // put the string back together
- $str = implode( $d, $pieces );
-
- }
-
- }
- else {
- $str = ucwords( $str );
- }
-
-
- return $str;
- }
- }
- ?>