/lib/pkp/classes/core/String.inc.php
PHP | 937 lines | 619 code | 82 blank | 236 comment | 83 complexity | 8f6d50b982ce52ccbdb969fea681cdc4 MD5 | raw file
Possible License(s): GPL-2.0
- <?php
- /**
- * @file classes/core/String.inc.php
- *
- * Copyright (c) 2000-2012 John Willinsky
- * Distributed under the GNU GPL v2. For full terms see the file docs/COPYING.
- *
- * @class String
- * @ingroup core
- *
- * @brief String manipulation wrapper class.
- *
- */
- // $Id$
- /*
- * Perl-compatibile regular expression (PCRE) constants:
- * These are defined application-wide for consistency
- */
- /*
- * RFC-2396 URIs
- *
- * Thanks to the PEAR Validation package (Tomas V.V.Cox <cox@idecnet.com>,
- * Pierre-Alain Joye <pajoye@php.net>, Amir Mohammad Saied <amir@php.net>)
- *
- * Originally published under the "New BSD License"
- * http://www.opensource.org/licenses/bsd-license.php
- */
- define('PCRE_URI', '(?:([a-z][-+.a-z0-9]*):)?' . // Scheme
- '(?://' .
- '(?:((?:%[0-9a-f]{2}|[-a-z0-9_.!~*\'();:\&=+$,])*)@)?' . // User
- '(?:((?:[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\.)*[a-z](?:[a-z0-9]+)?\.?)' . // Hostname
- '|([0-9]{1,3}(?:\.[0-9]{1,3}){3}))' . // IP Address
- '(?::([0-9]*))?)' . // Port
- '((?:/(?:%[0-9a-f]{2}|[-a-z0-9_.!~*\'():@\&=+$,;])*)*/?)?' . // Path
- '(?:\?([^#]*))?' . // Query String
- '(?:\#((?:%[0-9a-f]{2}|[-a-z0-9_.!~*\'();/?:@\&=+$,])*))?'); // Fragment
- // RFC-2822 email addresses
- define('PCRE_EMAIL_ADDRESS',
- '[-a-z0-9!#\$%&\'\*\+\/=\?\^_\`\{\|\}~]' . '+' . // One or more atom characters.
- '(\.' . '[-a-z0-9!#\$%&\'\*\+\/=\?\^_\`\{\|\}~]' . '+)*'. // Followed by zero or more dot separated sets of one or more atom characters.
- '@'. // Followed by an "at" character.
- '(' . '([a-z0-9]([-a-z0-9]*[a-z0-9]+)?)' . '{1,63}\.)+'. // Followed by one or max 63 domain characters (dot separated).
- '([a-z0-9]([-a-z0-9]*[a-z0-9]+)?)' . '{2,63}' // Must be followed by one set consisting a period of two or max 63 domain characters.
- );
- // Two different types of camel case: one for class names and one for method names
- define ('CAMEL_CASE_HEAD_UP', 0x01);
- define ('CAMEL_CASE_HEAD_DOWN', 0x02);
- define('DEFAULT_ALLOWED_HTML', '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd> <b> <i> <u> <img src|alt> <sup> <sub> <br> <p>');
- class String {
- /**
- * Perform initialization required for the string wrapper library.
- */
- function init() {
- $clientCharset = strtolower(Config::getVar('i18n', 'client_charset'));
- // Check if mbstring is installed (requires PHP >= 4.3.0)
- if (String::hasMBString()) {
- // mbstring routines are available
- define('ENABLE_MBSTRING', true);
- // Set up required ini settings for mbstring
- // FIXME Do any other mbstring settings need to be set?
- mb_internal_encoding($clientCharset);
- mb_substitute_character('63'); // question mark
- }
- // Define modifier to be used in regexp_* routines
- // FIXME Should non-UTF-8 encodings be supported with mbstring?
- if ($clientCharset == 'utf-8' && String::hasPCREUTF8()) {
- define('PCRE_UTF8', 'u');
- } else {
- define('PCRE_UTF8', '');
- }
- if (checkPhpVersion('5.0.5')) {
- define('USE_HTML_PURIFIER', 1);
- }
- }
- /**
- * Check if server has the mbstring library.
- * Currently requires PHP >= 4.3.0 (for mb_strtolower, mb_strtoupper,
- * and mb_substr_count)
- * @return boolean
- */
- function hasMBString() {
- static $hasMBString;
- if (isset($hasMBString)) return $hasMBString;
- // If string overloading is active, it will break many of the
- // native implementations. mbstring.func_overload must be set
- // to 0, 1 or 4 in php.ini (string overloading disabled).
- if (ini_get('mbstring.func_overload') && defined('MB_OVERLOAD_STRING')) {
- $hasMBString = false;
- } else {
- $hasMBString = (
- extension_loaded('mbstring') &&
- function_exists('mb_strlen') &&
- function_exists('mb_strpos') &&
- function_exists('mb_strrpos') &&
- function_exists('mb_substr') &&
- function_exists('mb_strtolower') &&
- function_exists('mb_strtoupper') &&
- function_exists('mb_substr_count') &&
- function_exists('mb_send_mail')
- );
- }
- return $hasMBString;
- }
- /**
- * Check if server supports the PCRE_UTF8 modifier.
- * @return boolean
- */
- function hasPCREUTF8() {
- // The PCRE_UTF8 modifier is only supported on PHP >= 4.1.0 (*nix) or PHP >= 4.2.3 (win32)
- // Evil check to see if PCRE_UTF8 is supported
- if (@preg_match('//u', '')) {
- return true;
- } else {
- return false;
- }
- }
- //
- // Wrappers for basic string manipulation routines.
- // See the phputf8 documentation for usage.
- //
- /**
- * @see http://ca.php.net/manual/en/function.strlen.php
- */
- function strlen($string) {
- if (defined('ENABLE_MBSTRING')) {
- require_once 'mbstring/core.php';
- } else {
- require_once 'utils/unicode.php';
- require_once 'native/core.php';
- }
- return utf8_strlen($string);
- }
- /**
- * @see http://ca.php.net/manual/en/function.strpos.php
- */
- function strpos($haystack, $needle, $offset = 0) {
- if (defined('ENABLE_MBSTRING')) {
- require_once 'mbstring/core.php';
- } else {
- require_once 'utils/unicode.php';
- require_once 'native/core.php';
- }
- return utf8_strpos($haystack, $needle, $offset);
- }
- /**
- * @see http://ca.php.net/manual/en/function.strrpos.php
- */
- function strrpos($haystack, $needle) {
- if (defined('ENABLE_MBSTRING')) {
- require_once 'mbstring/core.php';
- } else {
- require_once 'utils/unicode.php';
- require_once 'native/core.php';
- }
- return utf8_strrpos($haystack, $needle, $offset);
- }
- /**
- * @see http://ca.php.net/manual/en/function.substr.php
- */
- function substr($string, $start, $length = false) {
- if (defined('ENABLE_MBSTRING')) {
- require_once 'mbstring/core.php';
- } else {
- require_once 'utils/unicode.php';
- require_once 'native/core.php';
- }
- return utf8_substr($string, $start, $length);
- }
- /**
- * @see http://ca.php.net/manual/en/function.substr_replace.php
- * Thanks to poster at http://ca.php.net/manual/en/function.substr-replace.php#90146
- */
- function substr_replace($string, $replacement, $start, $length = null) {
- if (function_exists('mb_substr_replace') === false) {
- function mb_substr_replace($string, $replacement, $start, $length = null) {
- if (extension_loaded('mbstring') === true) {
- $string_length = String::strlen($string);
- if ($start < 0) {
- $start = max(0, $string_length + $start);
- } else if ($start > $string_length) {
- $start = $string_length;
- }
- if ($length < 0) {
- $length = max(0, $string_length - $start + $length);
- } else if ((is_null($length) === true) || ($length > $string_length)) {
- $length = $string_length;
- }
- if (($start + $length) > $string_length) {
- $length = $string_length - $start;
- }
- return String::substr($string, 0, $start) . $replacement . String::substr($string, $start + $length, $string_length - $start - $length);
- }
- }
- return (is_null($length) === true) ? substr_replace($string, $replacement, $start) : substr_replace($string, $replacement, $start, $length);
- }
- }
- /**
- * @see http://ca.php.net/manual/en/function.strtolower.php
- */
- function strtolower($string) {
- if (defined('ENABLE_MBSTRING')) {
- require_once 'mbstring/core.php';
- } else {
- require_once 'utils/unicode.php';
- require_once 'native/core.php';
- }
- return utf8_strtolower($string);
- }
- /**
- * @see http://ca.php.net/manual/en/function.strtoupper.php
- */
- function strtoupper($string) {
- if (defined('ENABLE_MBSTRING')) {
- require_once 'mbstring/core.php';
- } else {
- require_once 'utils/unicode.php';
- require_once 'native/core.php';
- }
- return utf8_strtoupper($string);
- }
- /**
- * @see http://ca.php.net/manual/en/function.ucfirst.php
- */
- function ucfirst($string) {
- if (defined('ENABLE_MBSTRING')) {
- require_once 'mbstring/core.php';
- require_once 'ucfirst.php';
- } else {
- require_once 'utils/unicode.php';
- require_once 'native/core.php';
- require_once 'ucfirst.php';
- }
- return utf8_ucfirst($string);
- }
- /**
- * @see http://ca.php.net/manual/en/function.substr_count.php
- */
- function substr_count($haystack, $needle) {
- if (defined('ENABLE_MBSTRING')) {
- return mb_substr_count($haystack, $needle); // Requires PHP >= 4.3.0
- } else {
- return substr_count($haystack, $needle);
- }
- }
- /**
- * @see http://ca.php.net/manual/en/function.encode_mime_header.php
- */
- function encode_mime_header($string) {
- if (defined('ENABLE_MBSTRING')) {
- return mb_encode_mimeheader($string, mb_internal_encoding(), 'B', MAIL_EOL);
- } else {
- return $string;
- }
- }
- /**
- * @see http://ca.php.net/manual/en/function.mail.php
- */
- function mail($to, $subject, $message, $additional_headers = '', $additional_parameters = '') {
- // Cannot use mb_send_mail as it base64 encodes the whole body of the email,
- // making it useless for multipart emails
- if (empty($additional_parameters)) {
- return mail($to, $subject, $message, $additional_headers);
- } else {
- return mail($to, $subject, $message, $additional_headers, $additional_parameters);
- }
- }
- //
- // Wrappers for PCRE-compatible regular expression routines.
- // See the php.net documentation for usage.
- //
- /**
- * @see http://ca.php.net/manual/en/function.regexp_quote.php
- */
- function regexp_quote($string, $delimiter = '/') {
- return preg_quote($string, $delimiter);
- }
- /**
- * @see http://ca.php.net/manual/en/function.regexp_grep.php
- */
- function regexp_grep($pattern, $input) {
- if (PCRE_UTF8 && !String::utf8_compliant($input)) $input = String::utf8_bad_strip($input);
- return preg_grep($pattern . PCRE_UTF8, $input);
- }
- /**
- * @see http://ca.php.net/manual/en/function.regexp_match.php
- */
- function regexp_match($pattern, $subject) {
- if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
- return preg_match($pattern . PCRE_UTF8, $subject);
- }
- /**
- * @see http://ca.php.net/manual/en/function.regexp_match_get.php
- */
- function regexp_match_get($pattern, $subject, &$matches) {
- // NOTE: This function was created since PHP < 5.x does not support optional reference parameters
- if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
- return preg_match($pattern . PCRE_UTF8, $subject, $matches);
- }
- /**
- * @see http://ca.php.net/manual/en/function.regexp_match_all.php
- */
- function regexp_match_all($pattern, $subject, &$matches) {
- if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
- return preg_match_all($pattern . PCRE_UTF8, $subject, $matches);
- }
- /**
- * @see http://ca.php.net/manual/en/function.regexp_replace.php
- */
- function regexp_replace($pattern, $replacement, $subject, $limit = -1) {
- if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
- return preg_replace($pattern . PCRE_UTF8, $replacement, $subject, $limit);
- }
- /**
- * @see http://ca.php.net/manual/en/function.regexp_replace_callback.php
- */
- function regexp_replace_callback($pattern, $callback, $subject, $limit = -1) {
- if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
- return preg_replace_callback($pattern . PCRE_UTF8, $callback, $subject, $limit);
- }
- /**
- * @see http://ca.php.net/manual/en/function.regexp_split.php
- */
- function regexp_split($pattern, $subject, $limit = -1) {
- if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
- return preg_split($pattern . PCRE_UTF8, $subject, $limit);
- }
- /**
- * @see http://ca.php.net/manual/en/function.mime_content_type.php
- */
- function mime_content_type($filename) {
- if (function_exists('mime_content_type')) {
- $result = mime_content_type($filename);
- // mime_content_type appears to return a charset
- // (erroneously?) in recent versions of PHP5
- if (($i = strpos($result, ';')) !== false) {
- $result = trim(substr($result, 0, $i));
- }
- return $result;
- } elseif (function_exists('finfo_open')) {
- $fi =& Registry::get('fileInfo', true, null);
- if ($fi === null) {
- $fi = finfo_open(FILEINFO_MIME, Config::getVar('finfo', 'mime_database_path'));
- }
- if ($fi !== false) {
- return strtok(finfo_file($fi, $filename), ' ;');
- }
- }
- // Fall back on an external "file" tool
- $f = escapeshellarg($filename);
- $result = trim(`file --brief --mime $f`);
- // Make sure we just return the mime type.
- if (($i = strpos($result, ';')) !== false) {
- $result = trim(substr($result, 0, $i));
- }
- return $result;
- }
- /**
- * Strip unsafe HTML from the input text. Covers XSS attacks like scripts,
- * onclick(...) attributes, javascript: urls, and special characters.
- * @param $input string input string
- * @return string
- */
- function stripUnsafeHtml($input) {
- // If possible, use the HTML purifier.
- if (defined('USE_HTML_PURIFIER')) {
- require_once('lib/pkp/lib/htmlpurifier/library/HTMLPurifier.path.php');
- require_once('HTMLPurifier.includes.php');
- static $purifier;
- if (!isset($purifier)) {
- $config = HTMLPurifier_Config::createDefault();
- $config->set('Core.Encoding', Config::getVar('i18n', 'client_charset'));
- $config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
- // Transform the old allowed_html setting into
- // a form HTMLPurifier can use.
- $config->set('HTML.Allowed', preg_replace(
- '/<(\w+)[ ]?([^>]*)>[ ]?/',
- '${1}[${2}],',
- Config::getVar('security', 'allowed_html', DEFAULT_ALLOWED_HTML)
- ));
- $config->set('Cache.SerializerPath', 'cache');
- $purifier = new HTMLPurifier($config);
- }
- return $purifier->purify($input);
- }
- // Fall back on imperfect but PHP4-capable implementation.
- // Parts of this implementation were taken from Horde:
- // see http://cvs.horde.org/co.php/framework/MIME/MIME/Viewer/html.php.
- static $allowedHtml;
- if (!isset($allowedHtml)) {
- $allowedHtml = preg_replace(
- '/<(\w+)( [^>]+)*>/', // Strip out attr specs
- '<${1}> ',
- Config::getVar('security', 'allowed_html', DEFAULT_ALLOWED_HTML)
- );
- }
- $html = strip_tags($input, $allowedHtml);
- // Change space entities to space characters
- $html = preg_replace('/&#(x0*20|0*32);?/i', ' ', $html);
- // Remove non-printable characters
- $html = preg_replace('/&#x?0*([9A-D]|1[0-3]);/i', ' ', $html);
- $html = preg_replace('/&#x?0*[9A-D]([^0-9A-F]|$)/i', ' \\1', $html);
- $html = preg_replace('/�*(9|1[0-3])([^0-9]|$)/i', ' \\2', $html);
- // Remove overly long numeric entities
- $html = preg_replace('/&#x?0*[0-9A-F]{6,};?/i', ' ', $html);
- /* Get all attribute="javascript:foo()" tags. This is
- * essentially the regex /(=|url\()("?)[^>]* script:/ but
- * expanded to catch camouflage with spaces and entities. */
- $preg = '/((�*61;?|�*3D;?|=)|'
- . '((u|�*85;?|�*55;?|�*117;?|�*75;?)\s*'
- . '(r|�*82;?|�*52;?|�*114;?|�*72;?)\s*'
- . '(l|�*76;?|�*4c;?|�*108;?|�*6c;?)\s*'
- . '(\()))\s*'
- . '(�*34;?|�*22;?|"|�*39;?|�*27;?|\')?'
- . '[^>]*\s*'
- . '(s|�*83;?|�*53;?|�*115;?|�*73;?)\s*'
- . '(c|�*67;?|�*43;?|�*99;?|�*63;?)\s*'
- . '(r|�*82;?|�*52;?|�*114;?|�*72;?)\s*'
- . '(i|�*73;?|�*49;?|�*105;?|�*69;?)\s*'
- . '(p|�*80;?|�*50;?|�*112;?|�*70;?)\s*'
- . '(t|�*84;?|�*54;?|�*116;?|�*74;?)\s*'
- . '(:|�*58;?|�*3a;?)/i';
- $html = preg_replace($preg, '\1\8PKPCleaned', $html);
- /* Get all on<foo>="bar()". NEVER allow these. */
- $html = preg_replace('/([\s"\']+'
- . '(o|�*79;?|�*4f;?|�*111;?|�*6f;?)'
- . '(n|�*78;?|�*4e;?|�*110;?|�*6e;?)'
- . '\w+)\s*=/i', '\1PKPCleaned=', $html);
- $pattern = array(
- '|<([^>]*)&{.*}([^>]*)>|',
- '|<([^>]*)mocha:([^>]*)>|i',
- '|<([^>]*)binding:([^>]*)>|i'
- );
- $replace = array('<&{;}\3>', '<\1PKPCleaned:\2>', '<\1PKPCleaned:\2>');
- $html = preg_replace($pattern, $replace, $html);
- return $html;
- }
- /**
- * Convert limited HTML into a string.
- * @param $html string
- * @return string
- */
- function html2text($html) {
- $html = String::regexp_replace('/<[\/]?p>/', "\n", $html);
- $html = String::regexp_replace('/<li>/', '• ', $html);
- $html = String::regexp_replace('/<\/li>/', "\n", $html);
- $html = String::regexp_replace('/<br[ ]?[\/]?>/', "\n", $html);
- $html = String::html2utf(strip_tags($html));
- return $html;
- }
- //
- // Wrappers for UTF-8 validation routines
- // See the phputf8 documentation for usage.
- //
- /**
- * Detect whether a string contains non-ascii multibyte sequences in the UTF-8 range
- * @param $str string input string
- * @return boolean
- */
- function utf8_is_valid($str) {
- require_once 'utils/validation.php';
- return utf8_is_valid($str);
- }
- /**
- * Tests whether a string complies as UTF-8; faster and less strict than utf8_is_valid
- * see lib/phputf8/utils/validation.php for more details
- * @param $str string input string
- * @return boolean
- */
- function utf8_compliant($str) {
- require_once 'utils/validation.php';
- return utf8_compliant($str);
- }
- /**
- * Locates the first bad byte in a UTF-8 string returning it's byte index in the string
- * @param $str string input string
- * @return string
- */
- function utf8_bad_find($str) {
- require_once 'utils/bad.php';
- return utf8_bad_find($str);
- }
- /**
- * Strips out any bad bytes from a UTF-8 string and returns the rest
- * @param $str string input string
- * @return string
- */
- function utf8_bad_strip($str) {
- require_once 'utils/bad.php';
- return utf8_bad_strip($str);
- }
- /**
- * Replace bad bytes with an alternative character - ASCII character
- * @param $str string input string
- * @param $replace string optional
- * @return string
- */
- function utf8_bad_replace($str, $replace = '?') {
- require_once 'utils/bad.php';
- return utf8_bad_replace($str, $replace);
- }
- /**
- * Replace bad bytes with an alternative character - ASCII character
- * @param $str string input string
- * @return string
- */
- function utf8_strip_ascii_ctrl($str) {
- require_once 'utils/ascii.php';
- return utf8_strip_ascii_ctrl($str);
- }
- /**
- * Normalize a string in an unknown (non-UTF8) encoding into a valid UTF-8 sequence
- * @param $str string input string
- * @return string
- */
- function utf8_normalize($str) {
- import('core.Transcoder');
- if (String::hasMBString()) {
- // NB: CP-1252 often segfaults; we've left it out here but it will detect as 'ISO-8859-1'
- $mb_encoding_order = 'UTF-8, UTF-7, ASCII, ISO-8859-1, EUC-JP, SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP';
- if (checkPhpVersion('4.3.8')) {
- $detected_encoding = mb_detect_encoding($str, $mb_encoding_order, FALSE);
- } else {
- $detected_encoding = mb_detect_encoding($str, $mb_encoding_order);
- }
- } elseif (function_exists('iconv') && strlen(iconv('CP1252', 'UTF-8', $str)) != strlen(iconv('ISO-8859-1', 'UTF-8', $str))) {
- // use iconv to detect CP-1252, assuming default ISO-8859-1
- $detected_encoding = 'CP1252';
- } else {
- // assume ISO-8859-1, PHP default
- $detected_encoding = 'ISO-8859-1';
- }
- // transcode CP-1252/ISO-8859-1 into HTML entities; this works because CP-1252 is mapped onto ISO-8859-1
- if ('ISO-8859-1' == $detected_encoding || 'CP1252' == $detected_encoding) {
- $trans = new Transcoder('CP1252', 'HTML-ENTITIES');
- $str = $trans->trans($str);
- }
- // transcode from detected encoding to to UTF-8
- $trans = new Transcoder($detected_encoding, 'UTF-8');
- $str = $trans->trans($str);
- return $str;
- }
- /**
- * US-ASCII transliterations of Unicode text
- * @param $str string input string
- * @return string
- */
- function utf8_to_ascii($str) {
- require_once('utf8_to_ascii.php');
- return utf8_to_ascii($str);
- }
- /**
- * Returns the UTF-8 string corresponding to the unicode value
- * Does not require any multibyte PHP libraries
- * (from php.net, courtesy - romans@void.lv)
- * @param $num int
- * @return string
- */
- function code2utf ($num) {
- if ($num < 128) return chr($num);
- if ($num < 2048) return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
- if ($num < 65536) return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
- if ($num < 2097152) return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
- return '';
- }
- /**
- * Convert UTF-8 encoded characters in a string to escaped HTML entities
- * This is a helper function for transcoding into HTML or XML for output
- * @param $str string input string
- * @return string
- */
- function utf2html ($str) {
- $ret = "";
- $max = strlen($str);
- $last = 0; // keeps the index of the last regular character
- for ($i=0; $i<$max; $i++) {
- $c = $str{$i};
- $c1 = ord($c);
- if ($c1>>5 == 6) { // 110x xxxx, 110 prefix for 2 bytes unicode
- $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed
- $c1 &= 31; // remove the 3 bit two bytes prefix
- $c2 = ord($str{++$i}); // the next byte
- $c2 &= 63; // remove the 2 bit trailing byte prefix
- $c2 |= (($c1 & 3) << 6); // last 2 bits of c1 become first 2 of c2
- $c1 >>= 2; // c1 shifts 2 to the right
- $ret .= "&#" . ($c1 * 0x100 + $c2) . ";"; // this is the fastest string concatenation
- $last = $i+1;
- }
- elseif ($c1>>4 == 14) { // 1110 xxxx, 110 prefix for 3 bytes unicode
- $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed
- $c2 = ord($str{++$i}); // the next byte
- $c3 = ord($str{++$i}); // the third byte
- $c1 &= 15; // remove the 4 bit three bytes prefix
- $c2 &= 63; // remove the 2 bit trailing byte prefix
- $c3 &= 63; // remove the 2 bit trailing byte prefix
- $c3 |= (($c2 & 3) << 6); // last 2 bits of c2 become first 2 of c3
- $c2 >>=2; //c2 shifts 2 to the right
- $c2 |= (($c1 & 15) << 4); // last 4 bits of c1 become first 4 of c2
- $c1 >>= 4; // c1 shifts 4 to the right
- $ret .= '&#' . (($c1 * 0x10000) + ($c2 * 0x100) + $c3) . ';'; // this is the fastest string concatenation
- $last = $i+1;
- }
- }
- $str=$ret . substr($str, $last, $i); // append the last batch of regular characters
- return $str;
- }
- /**
- * Convert numeric HTML entities in a string to UTF-8 encoded characters
- * This is a native alternative to the buggy html_entity_decode() using UTF8
- * @param $str string input string
- * @return string
- */
- function html2utf($str) {
- // convert named entities to numeric entities
- $str = strtr($str, String::getHTMLEntities());
- // use PCRE-aware replace function to replace numeric entities
- $str = String::regexp_replace('~&#x([0-9a-f]+);~ei', 'String::code2utf(hexdec("\\1"))', $str);
- $str = String::regexp_replace('~&#([0-9]+);~e', 'String::code2utf(\\1)', $str);
- return $str;
- }
- /**
- * Return an associative array of named->numeric HTML entities
- * Required to support HTML functions without objects in PHP4/PHP5
- * From php.net: function.get-html-translation-table.php
- * @return string
- */
- function getHTMLEntities () {
- // define the conversion table
- $html_entities = array(
- "Á" => "Á", "á" => "á", "Â" => "Â",
- "â" => "â", "´" => "´", "Æ" => "Æ",
- "æ" => "æ", "À" => "À", "à" => "à",
- "ℵ" => "ℵ","Α" => "Α", "α" => "α",
- "&" => "&", "∧" => "∧", "∠" => "∠",
- "'" => "'", "Å" => "Å", "å" => "å",
- "≈" => "≈", "Ã" => "Ã", "ã" => "ã",
- "Ä" => "Ä", "ä" => "ä", "„" => "„",
- "Β" => "Β", "β" => "β", "¦" => "¦",
- "•" => "•", "∩" => "∩", "Ç" => "Ç",
- "ç" => "ç", "¸" => "¸", "¢" => "¢",
- "Χ" => "Χ", "χ" => "χ", "ˆ" => "^",
- "♣" => "♣", "≅" => "≅", "©" => "©",
- "↵" => "↵", "∪" => "∪", "¤" => "¤",
- "†" => "†","‡" => "‡", "↓" => "↓",
- "⇓" => "⇓", "°" => "°", "Δ" => "Δ",
- "δ" => "δ", "♦" => "♦", "÷" => "÷",
- "É" => "É", "é" => "é", "Ê" => "Ê",
- "ê" => "ê", "È" => "È", "è" => "è",
- "∅" => "∅", " " => " ", " " => " ",
- "Ε" => "Ε","ε" => "ε","≡" => "≡",
- "Η" => "Η", "η" => "η", "Ð" => "Ð",
- "ð" => "ð", "Ë" => "Ë", "ë" => "ë",
- "€" => "€", "∃" => "∃", "ƒ" => "ƒ",
- "∀" => "∀","½" => "½", "¼" => "¼",
- "¾" => "¾", "⁄" => "⁄", "Γ" => "Γ",
- "γ" => "γ", "≥" => "≥", ">" => ">",
- "↔" => "↔", "⇔" => "⇔", "♥" => "♥",
- "…" => "…","Í" => "Í", "í" => "í",
- "Î" => "Î", "î" => "î", "¡" => "¡",
- "Ì" => "Ì", "ì" => "ì", "ℑ" => "ℑ",
- "∞" => "∞", "∫" => "∫", "Ι" => "Ι",
- "ι" => "ι", "¿" => "¿", "∈" => "∈",
- "Ï" => "Ï", "ï" => "ï", "Κ" => "Κ",
- "κ" => "κ", "Λ" => "Λ", "λ" => "λ",
- "⟨" => "〈", "«" => "«", "←" => "←",
- "⇐" => "⇐", "⌈" => "⌈",
- "“" => "“", "≤" => "≤", "⌊" => "⌊",
- "∗" => "∗","◊" => "◊", "‎" => "‎",
- "‹" => "‹","‘" => "‘", "<" => "<",
- "¯" => "¯", "—" => "—", "µ" => "µ",
- "·" => "·", "−" => "-", "Μ" => "Μ",
- "μ" => "μ", "∇" => "∇", " " => " ",
- "–" => "–", "≠" => "≠", "∋" => "∋",
- "¬" => "¬", "∉" => "∉", "⊄" => "⊄",
- "Ñ" => "Ñ", "ñ" => "ñ", "Ν" => "Ν",
- "ν" => "ν", "Ó" => "Ó", "ó" => "ó",
- "Ô" => "Ô", "ô" => "ô", "Œ" => "Œ",
- "œ" => "œ", "Ò" => "Ò", "ò" => "ò",
- "‾" => "‾", "Ω" => "Ω", "ω" => "ω",
- "Ο" => "Ο","ο" => "ο","⊕" => "⊕",
- "∨" => "∨", "ª" => "ª", "º" => "º",
- "Ø" => "Ø", "ø" => "ø", "Õ" => "Õ",
- "õ" => "õ", "⊗" => "⊗","Ö" => "Ö",
- "ö" => "ö", "¶" => "¶", "∂" => "∂",
- "‰" => "‰","⊥" => "⊥", "Φ" => "Φ",
- "φ" => "φ", "Π" => "Π", "π" => "π",
- "ϖ" => "ϖ", "±" => "±", "£" => "£",
- "′" => "′", "″" => "″", "∏" => "∏",
- "∝" => "∝", "Ψ" => "Ψ", "ψ" => "ψ",
- """ => """, "√" => "√", "⟩" => "〉",
- "»" => "»", "→" => "→", "⇒" => "⇒",
- "⌉" => "⌉", "”" => "”", "ℜ" => "ℜ",
- "®" => "®", "⌋" => "⌋","Ρ" => "Ρ",
- "ρ" => "ρ", "‏" => "‏", "›" => "›",
- "’" => "’", "‚" => "‚", "Š" => "Š",
- "š" => "š", "⋅" => "⋅", "§" => "§",
- "­" => "­", "Σ" => "Σ", "σ" => "σ",
- "ς" => "ς", "∼" => "∼", "♠" => "♠",
- "⊂" => "⊂", "⊆" => "⊆", "∑" => "∑",
- "¹" => "¹", "²" => "²", "³" => "³",
- "⊃" => "⊃", "⊇" => "⊇", "ß" => "ß",
- "Τ" => "Τ", "τ" => "τ", "∴" => "∴",
- "Θ" => "Θ", "θ" => "θ", "ϑ" => "ϑ",
- " " => " ","Þ" => "Þ", "þ" => "þ",
- "˜" => "~", "×" => "×", "™" => "™",
- "Ú" => "Ú", "ú" => "ú", "↑" => "↑",
- "⇑" => "⇑", "Û" => "Û", "û" => "û",
- "Ù" => "Ù", "ù" => "ù", "¨" => "¨",
- "ϒ" => "ϒ", "Υ" => "Υ","υ" => "υ",
- "Ü" => "Ü", "ü" => "ü", "℘" => "℘",
- "Ξ" => "Ξ", "ξ" => "ξ", "Ý" => "Ý",
- "ý" => "ý", "¥" => "¥", "ÿ" => "ÿ",
- "Ÿ" => "Ÿ", "Ζ" => "Ζ", "ζ" => "ζ",
- "‍" => "‍", "‌" => "‌"
- );
- return $html_entities;
- }
- /**
- * Wrapper around fputcsv for systems that may or may not support it
- * (i.e. PHP before 5.1.0); see PHP documentation for fputcsv.
- */
- function fputcsv(&$handle, $fields = array(), $delimiter = ',', $enclosure = '"') {
- // From PHP website, thanks to boefje at hotmail dot com
- if (function_exists('fputcsv')) {
- return fputcsv($handle, $fields, $delimiter, $enclosure);
- }
- $str = '';
- $escape_char = '\\';
- foreach ($fields as $value) {
- if ( strpos($value, $delimiter) !== false ||
- strpos($value, $enclosure) !== false ||
- strpos($value, "\n") !== false ||
- strpos($value, "\r") !== false ||
- strpos($value, "\t") !== false ||
- strpos($value, ' ') !== false
- ) {
- $str2 = $enclosure;
- $escaped = 0;
- $len = strlen($value);
- for ($i=0; $i<$len; $i++) {
- if ($value[$i] == $escape_char) $escaped = 1;
- elseif (!$escaped && $value[$i] == $enclosure) $str2 .= $enclosure;
- else $escaped = 0;
- $str2 .= $value[$i];
- }
- $str2 .= $enclosure;
- $str .= $str2 . $delimiter;
- } else {
- $str .= $value . $delimiter;
- }
- }
- $str = substr($str, 0, -1);
- $str .= "\n";
- return fwrite($handle, $str);
- }
- /**
- * Trim punctuation from a string
- * @param $string string input string
- * @return string the trimmed string
- */
- function trimPunctuation($string) {
- return trim($string, ' ,.;:!?&()[]\\/');
- }
- /**
- * Convert a string to proper title case
- * @param $title string
- * @return string
- */
- function titleCase($title) {
- $smallWords = array(
- 'of', 'a', 'the', 'and', 'an', 'or', 'nor', 'but', 'is', 'if', 'then',
- 'else', 'when', 'at', 'from', 'by', 'on', 'off', 'for', 'in', 'out',
- 'over', 'to', 'into', 'with'
- );
- $words = explode(' ', $title);
- foreach ($words as $key => $word) {
- if ($key == 0 or !in_array(self::strtolower($word), $smallWords)) {
- $words[$key] = ucfirst(self::strtolower($word));
- } else {
- $words[$key] = self::strtolower($word);
- }
- }
- $newTitle = implode(' ', $words);
- return $newTitle;
- }
- /**
- * Iterate over an array of delimiters and see whether
- * it exists in the given input string. If so, then use
- * it to explode the string into an array.
- * @param $delimiters array
- * @param $input string
- * @return array
- */
- function iterativeExplode($delimiters, $input) {
- // Run through the delimiters and try them out
- // one by one.
- foreach($delimiters as $delimiter) {
- if (strstr($input, $delimiter) !== false) {
- return explode($delimiter, $input);
- }
- }
- // If none of the delimiters works then return
- // the original string as an array.
- return (array($input));
- }
- /**
- * Transform "handler-class" to "HandlerClass"
- * and "my-op" to "myOp".
- * @param $string input string
- * @param $type which kind of camel case?
- * @return string the string in camel case
- */
- function camelize($string, $type = CAMEL_CASE_HEAD_UP) {
- assert($type == CAMEL_CASE_HEAD_UP || $type == CAMEL_CASE_HEAD_DOWN);
- // Transform "handler-class" to "HandlerClass" and "my-op" to "MyOp"
- $string = str_replace(' ', '', ucwords(str_replace('-', ' ', $string)));
- // Transform "MyOp" to "myOp"
- if ($type == CAMEL_CASE_HEAD_DOWN) {
- // lcfirst() is PHP>5.3, so use workaround for PHP4 compatibility
- $string = strtolower(substr($string, 0, 1)).substr($string, 1);
- }
- return $string;
- }
- /**
- * Transform "HandlerClass" to "handler-class"
- * and "myOp" to "my-op".
- * @param $string
- */
- function uncamelize($string) {
- assert(!empty($string));
- // Transform "myOp" to "MyOp"
- $string = ucfirst($string);
- // Insert hyphens between words and return the string in lowercase
- $words = array();
- String::regexp_match_all('/[A-Z][a-z0-9]*/', $string, $words);
- assert(isset($words[0]) && !empty($words[0]) && strlen(implode('', $words[0])) == strlen($string));
- return strtolower(implode('-', $words[0]));
- }
- }
- ?>