/ext/mbstring/mbstring.c
C | 4499 lines | 3515 code | 567 blank | 417 comment | 906 complexity | f81abfb47963bc3baaeb982bb86d7726 MD5 | raw file
Possible License(s): BSD-2-Clause, BSD-3-Clause, MPL-2.0-no-copyleft-exception, LGPL-2.1
Large files files are truncated, but you can click here to view the full file
- /*
- +----------------------------------------------------------------------+
- | Copyright (c) The PHP Group |
- +----------------------------------------------------------------------+
- | This source file is subject to version 3.01 of the PHP license, |
- | that is bundled with this package in the file LICENSE, and is |
- | available through the world-wide-web at the following url: |
- | http://www.php.net/license/3_01.txt |
- | If you did not receive a copy of the PHP license and are unable to |
- | obtain it through the world-wide-web, please send a note to |
- | license@php.net so we can mail you a copy immediately. |
- +----------------------------------------------------------------------+
- | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
- | Rui Hirokawa <hirokawa@php.net> |
- | Hironori Sato <satoh@jpnnet.com> |
- | Shigeru Kanemoto <sgk@happysize.co.jp> |
- +----------------------------------------------------------------------+
- */
- /* {{{ includes */
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #include "php.h"
- #include "php_ini.h"
- #include "php_variables.h"
- #include "mbstring.h"
- #include "ext/standard/php_string.h"
- #include "ext/standard/php_mail.h"
- #include "ext/standard/exec.h"
- #include "ext/standard/url.h"
- #include "main/php_output.h"
- #include "ext/standard/info.h"
- #include "libmbfl/mbfl/mbfl_allocators.h"
- #include "libmbfl/mbfl/mbfilter_8bit.h"
- #include "libmbfl/mbfl/mbfilter_pass.h"
- #include "libmbfl/mbfl/mbfilter_wchar.h"
- #include "libmbfl/filters/mbfilter_ascii.h"
- #include "libmbfl/filters/mbfilter_base64.h"
- #include "libmbfl/filters/mbfilter_qprint.h"
- #include "libmbfl/filters/mbfilter_ucs4.h"
- #include "libmbfl/filters/mbfilter_utf8.h"
- #include "php_variables.h"
- #include "php_globals.h"
- #include "rfc1867.h"
- #include "php_content_types.h"
- #include "SAPI.h"
- #include "php_unicode.h"
- #include "TSRM.h"
- #include "mb_gpc.h"
- #if HAVE_MBREGEX
- # include "php_mbregex.h"
- # include "php_onig_compat.h"
- # include <oniguruma.h>
- # undef UChar
- #if ONIGURUMA_VERSION_INT < 60800
- typedef void OnigMatchParam;
- #define onig_new_match_param() (NULL)
- #define onig_initialize_match_param(x) (void)(x)
- #define onig_set_match_stack_limit_size_of_match_param(x, y)
- #define onig_set_retry_limit_in_match_of_match_param(x, y)
- #define onig_free_match_param(x)
- #define onig_search_with_param(reg, str, end, start, range, region, option, mp) \
- onig_search(reg, str, end, start, range, region, option)
- #define onig_match_with_param(re, str, end, at, region, option, mp) \
- onig_match(re, str, end, at, region, option)
- #endif
- #else
- # include "ext/pcre/php_pcre.h"
- #endif
- #include "zend_multibyte.h"
- #include "mbstring_arginfo.h"
- /* }}} */
- #if HAVE_MBSTRING
- /* {{{ prototypes */
- ZEND_DECLARE_MODULE_GLOBALS(mbstring)
- static PHP_GINIT_FUNCTION(mbstring);
- static PHP_GSHUTDOWN_FUNCTION(mbstring);
- static void php_mb_populate_current_detect_order_list(void);
- static int php_mb_encoding_translation(void);
- static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
- static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
- static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
- static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
- /* }}} */
- /* {{{ php_mb_default_identify_list */
- typedef struct _php_mb_nls_ident_list {
- enum mbfl_no_language lang;
- const enum mbfl_no_encoding *list;
- size_t list_size;
- } php_mb_nls_ident_list;
- static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_jis,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_euc_jp,
- mbfl_no_encoding_sjis
- };
- static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_euc_cn,
- mbfl_no_encoding_cp936
- };
- static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_euc_tw,
- mbfl_no_encoding_big5
- };
- static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_euc_kr,
- mbfl_no_encoding_uhc
- };
- static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_koi8r,
- mbfl_no_encoding_cp1251,
- mbfl_no_encoding_cp866
- };
- static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_armscii8
- };
- static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_cp1254,
- mbfl_no_encoding_8859_9
- };
- static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_koi8u
- };
- static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8
- };
- static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
- { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
- { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
- { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
- { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
- { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
- { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
- { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
- { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
- { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
- };
- /* }}} */
- /* {{{ zend_module_entry mbstring_module_entry */
- zend_module_entry mbstring_module_entry = {
- STANDARD_MODULE_HEADER,
- "mbstring",
- ext_functions,
- PHP_MINIT(mbstring),
- PHP_MSHUTDOWN(mbstring),
- PHP_RINIT(mbstring),
- PHP_RSHUTDOWN(mbstring),
- PHP_MINFO(mbstring),
- PHP_MBSTRING_VERSION,
- PHP_MODULE_GLOBALS(mbstring),
- PHP_GINIT(mbstring),
- PHP_GSHUTDOWN(mbstring),
- NULL,
- STANDARD_MODULE_PROPERTIES_EX
- };
- /* }}} */
- /* {{{ static sapi_post_entry php_post_entries[] */
- static const sapi_post_entry php_post_entries[] = {
- { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
- { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
- { NULL, 0, NULL, NULL }
- };
- /* }}} */
- #ifdef COMPILE_DL_MBSTRING
- #ifdef ZTS
- ZEND_TSRMLS_CACHE_DEFINE()
- #endif
- ZEND_GET_MODULE(mbstring)
- #endif
- /* {{{ allocators */
- static void *_php_mb_allocators_malloc(size_t sz)
- {
- return emalloc(sz);
- }
- static void *_php_mb_allocators_realloc(void *ptr, size_t sz)
- {
- return erealloc(ptr, sz);
- }
- static void *_php_mb_allocators_calloc(size_t nelems, size_t szelem)
- {
- return ecalloc(nelems, szelem);
- }
- static void _php_mb_allocators_free(void *ptr)
- {
- efree(ptr);
- }
- static const mbfl_allocators _php_mb_allocators = {
- _php_mb_allocators_malloc,
- _php_mb_allocators_realloc,
- _php_mb_allocators_calloc,
- _php_mb_allocators_free,
- };
- /* }}} */
- /* {{{ static sapi_post_entry mbstr_post_entries[] */
- static const sapi_post_entry mbstr_post_entries[] = {
- { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
- { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
- { NULL, 0, NULL, NULL }
- };
- /* }}} */
- static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, uint32_t arg_num) {
- if (encoding_name) {
- const mbfl_encoding *encoding;
- zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
- if (last_encoding_name && (last_encoding_name == encoding_name
- || !strcasecmp(ZSTR_VAL(encoding_name), ZSTR_VAL(last_encoding_name)))) {
- return MBSTRG(last_used_encoding);
- }
- encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
- if (!encoding) {
- zend_argument_value_error(arg_num, "must be a valid encoding, \"%s\" given", ZSTR_VAL(encoding_name));
- return NULL;
- }
- if (last_encoding_name) {
- zend_string_release(last_encoding_name);
- }
- MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name);
- MBSTRG(last_used_encoding) = encoding;
- return encoding;
- } else {
- return MBSTRG(current_internal_encoding);
- }
- }
- static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_name) {
- if (strcmp(encoding_name, "pass") == 0) {
- return &mbfl_encoding_pass;
- }
- return mbfl_name2encoding(encoding_name);
- }
- /* {{{ static int php_mb_parse_encoding_list()
- * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
- * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
- */
- static int php_mb_parse_encoding_list(const char *value, size_t value_length,
- const mbfl_encoding ***return_list, size_t *return_size, int persistent, uint32_t arg_num,
- zend_bool allow_pass_encoding)
- {
- if (value == NULL || value_length == 0) {
- *return_list = NULL;
- *return_size = 0;
- return SUCCESS;
- } else {
- zend_bool included_auto;
- size_t n, size;
- char *p, *p1, *p2, *endp, *tmpstr;
- const mbfl_encoding **entry, **list;
- /* copy the value string for work */
- if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
- tmpstr = (char *)estrndup(value+1, value_length-2);
- value_length -= 2;
- } else {
- tmpstr = (char *)estrndup(value, value_length);
- }
- /* count the number of listed encoding names */
- endp = tmpstr + value_length;
- n = 1;
- p1 = tmpstr;
- while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
- p1 = p2 + 1;
- n++;
- }
- size = n + MBSTRG(default_detect_order_list_size);
- /* make list */
- list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
- entry = list;
- n = 0;
- included_auto = 0;
- p1 = tmpstr;
- do {
- p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
- if (p == NULL) {
- p = endp;
- }
- *p = '\0';
- /* trim spaces */
- while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
- p1++;
- }
- p--;
- while (p > p1 && (*p == ' ' || *p == '\t')) {
- *p = '\0';
- p--;
- }
- /* convert to the encoding number and check encoding */
- if (strcasecmp(p1, "auto") == 0) {
- if (!included_auto) {
- const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
- const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
- size_t i;
- included_auto = 1;
- for (i = 0; i < identify_list_size; i++) {
- *entry++ = mbfl_no2encoding(*src++);
- n++;
- }
- }
- } else {
- const mbfl_encoding *encoding =
- allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
- if (!encoding) {
- /* Called from an INI setting modification */
- if (arg_num == 0) {
- php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
- } else {
- zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
- }
- efree(tmpstr);
- pefree(list, persistent);
- return FAILURE;
- }
- *entry++ = encoding;
- n++;
- }
- p1 = p2 + 1;
- } while (n < size && p2 != NULL);
- *return_list = list;
- *return_size = n;
- efree(tmpstr);
- }
- return SUCCESS;
- }
- /* }}} */
- /* {{{ static int php_mb_parse_encoding_array()
- * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
- * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
- */
- static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***return_list,
- size_t *return_size, uint32_t arg_num)
- {
- /* Allocate enough space to include the default detect order if "auto" is used. */
- size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size);
- const mbfl_encoding **list = ecalloc(size, sizeof(mbfl_encoding*));
- const mbfl_encoding **entry = list;
- zend_bool included_auto = 0;
- size_t n = 0;
- zval *hash_entry;
- ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
- zend_string *encoding_str = zval_try_get_string(hash_entry);
- if (UNEXPECTED(!encoding_str)) {
- efree(list);
- return FAILURE;
- }
- if (strcasecmp(ZSTR_VAL(encoding_str), "auto") == 0) {
- if (!included_auto) {
- const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
- const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
- size_t j;
- included_auto = 1;
- for (j = 0; j < identify_list_size; j++) {
- *entry++ = mbfl_no2encoding(*src++);
- n++;
- }
- }
- } else {
- const mbfl_encoding *encoding = mbfl_name2encoding(ZSTR_VAL(encoding_str));
- if (encoding) {
- *entry++ = encoding;
- n++;
- } else {
- zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", ZSTR_VAL(encoding_str));
- zend_string_release(encoding_str);
- efree(list);
- return FAILURE;
- }
- }
- zend_string_release(encoding_str);
- } ZEND_HASH_FOREACH_END();
- *return_list = list;
- *return_size = n;
- return SUCCESS;
- }
- /* }}} */
- /* {{{ zend_multibyte interface */
- static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
- {
- return (const zend_encoding*)mbfl_name2encoding(encoding_name);
- }
- static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
- {
- return ((const mbfl_encoding *)encoding)->name;
- }
- static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
- {
- const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
- if (encoding->flag & MBFL_ENCTYPE_SBCS) {
- return 1;
- }
- if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
- return 1;
- }
- return 0;
- }
- static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
- {
- mbfl_string string;
- if (!list) {
- list = (const zend_encoding **)MBSTRG(current_detect_order_list);
- list_size = MBSTRG(current_detect_order_list_size);
- }
- mbfl_string_init(&string);
- string.val = (unsigned char *)arg_string;
- string.len = arg_length;
- return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
- }
- static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
- {
- mbfl_string string, result;
- mbfl_buffer_converter *convd;
- int status;
- size_t loc;
- /* new encoding */
- /* initialize string */
- string.encoding = (const mbfl_encoding*)encoding_from;
- string.val = (unsigned char*)from;
- string.len = from_length;
- /* initialize converter */
- convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
- if (convd == NULL) {
- return (size_t) -1;
- }
- mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
- /* do it */
- status = mbfl_buffer_converter_feed2(convd, &string, &loc);
- if (status) {
- mbfl_buffer_converter_delete(convd);
- return (size_t)-1;
- }
- mbfl_buffer_converter_flush(convd);
- mbfl_string_init(&result);
- if (!mbfl_buffer_converter_result(convd, &result)) {
- mbfl_buffer_converter_delete(convd);
- return (size_t)-1;
- }
- *to = result.val;
- *to_length = result.len;
- mbfl_buffer_converter_delete(convd);
- return loc;
- }
- static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
- {
- return php_mb_parse_encoding_list(
- encoding_list, encoding_list_len,
- (const mbfl_encoding ***)return_list, return_size,
- persistent, /* arg_num */ 0, /* allow_pass_encoding */ 1);
- }
- static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
- {
- return (const zend_encoding *)MBSTRG(internal_encoding);
- }
- static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
- {
- MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
- return SUCCESS;
- }
- static zend_multibyte_functions php_mb_zend_multibyte_functions = {
- "mbstring",
- php_mb_zend_encoding_fetcher,
- php_mb_zend_encoding_name_getter,
- php_mb_zend_encoding_lexer_compatibility_checker,
- php_mb_zend_encoding_detector,
- php_mb_zend_encoding_converter,
- php_mb_zend_encoding_list_parser,
- php_mb_zend_internal_encoding_getter,
- php_mb_zend_internal_encoding_setter
- };
- /* }}} */
- static void *_php_mb_compile_regex(const char *pattern);
- static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
- static void _php_mb_free_regex(void *opaque);
- #if HAVE_MBREGEX
- /* {{{ _php_mb_compile_regex */
- static void *_php_mb_compile_regex(const char *pattern)
- {
- php_mb_regex_t *retval;
- OnigErrorInfo err_info;
- int err_code;
- if ((err_code = onig_new(&retval,
- (const OnigUChar *)pattern,
- (const OnigUChar *)pattern + strlen(pattern),
- ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
- ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
- OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str(err_str, err_code, err_info);
- php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
- retval = NULL;
- }
- return retval;
- }
- /* }}} */
- /* {{{ _php_mb_match_regex */
- static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
- {
- OnigMatchParam *mp = onig_new_match_param();
- int err;
- onig_initialize_match_param(mp);
- if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) {
- onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
- }
- if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_retry_limit))) {
- onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
- }
- /* search */
- err = onig_search_with_param((php_mb_regex_t *)opaque, (const OnigUChar *)str,
- (const OnigUChar*)str + str_len, (const OnigUChar *)str,
- (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE, mp);
- onig_free_match_param(mp);
- return err >= 0;
- }
- /* }}} */
- /* {{{ _php_mb_free_regex */
- static void _php_mb_free_regex(void *opaque)
- {
- onig_free((php_mb_regex_t *)opaque);
- }
- /* }}} */
- #else
- /* {{{ _php_mb_compile_regex */
- static void *_php_mb_compile_regex(const char *pattern)
- {
- pcre2_code *retval;
- PCRE2_SIZE err_offset;
- int errnum;
- if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
- PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
- PCRE2_UCHAR err_str[128];
- pcre2_get_error_message(errnum, err_str, sizeof(err_str));
- php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
- }
- return retval;
- }
- /* }}} */
- /* {{{ _php_mb_match_regex */
- static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
- {
- int res;
- pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
- if (NULL == match_data) {
- pcre2_code_free(opaque);
- php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
- return FAILURE;
- }
- res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
- php_pcre_free_match_data(match_data);
- return res;
- }
- /* }}} */
- /* {{{ _php_mb_free_regex */
- static void _php_mb_free_regex(void *opaque)
- {
- pcre2_code_free(opaque);
- }
- /* }}} */
- #endif
- /* {{{ php_mb_nls_get_default_detect_order_list */
- static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
- {
- size_t i;
- *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
- *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
- for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
- if (php_mb_default_identify_list[i].lang == lang) {
- *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
- *plist_size = php_mb_default_identify_list[i].list_size;
- return 1;
- }
- }
- return 0;
- }
- /* }}} */
- static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
- {
- char *result = emalloc(len + 2);
- char *resp = result;
- size_t i;
- for (i = 0; i < len && start[i] != quote; ++i) {
- if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
- *resp++ = start[++i];
- } else {
- size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
- while (j-- > 0 && i < len) {
- *resp++ = start[i++];
- }
- --i;
- }
- }
- *resp = '\0';
- return result;
- }
- static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
- {
- char *pos = *line, quote;
- char *res;
- while (*pos && *pos != stop) {
- if ((quote = *pos) == '"' || quote == '\'') {
- ++pos;
- while (*pos && *pos != quote) {
- if (*pos == '\\' && pos[1] && pos[1] == quote) {
- pos += 2;
- } else {
- ++pos;
- }
- }
- if (*pos) {
- ++pos;
- }
- } else {
- pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
- }
- }
- if (*pos == '\0') {
- res = estrdup(*line);
- *line += strlen(*line);
- return res;
- }
- res = estrndup(*line, pos - *line);
- while (*pos == stop) {
- pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
- }
- *line = pos;
- return res;
- }
- /* }}} */
- static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
- {
- while (*str && isspace(*(unsigned char *)str)) {
- ++str;
- }
- if (!*str) {
- return estrdup("");
- }
- if (*str == '"' || *str == '\'') {
- char quote = *str;
- str++;
- return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
- } else {
- char *strend = str;
- while (*strend && !isspace(*(unsigned char *)strend)) {
- ++strend;
- }
- return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
- }
- }
- /* }}} */
- static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
- {
- char *s, *s2;
- const size_t filename_len = strlen(filename);
- /* The \ check should technically be needed for win32 systems only where
- * it is a valid path separator. However, IE in all it's wisdom always sends
- * the full path of the file on the user's filesystem, which means that unless
- * the user does basename() they get a bogus file name. Until IE's user base drops
- * to nill or problem is fixed this code must remain enabled for all systems. */
- s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
- s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
- if (s && s2) {
- if (s > s2) {
- return ++s;
- } else {
- return ++s2;
- }
- } else if (s) {
- return ++s;
- } else if (s2) {
- return ++s2;
- } else {
- return filename;
- }
- }
- /* }}} */
- /* {{{ php.ini directive handler */
- /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
- static PHP_INI_MH(OnUpdate_mbstring_language)
- {
- enum mbfl_no_language no_language;
- no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
- if (no_language == mbfl_no_language_invalid) {
- MBSTRG(language) = mbfl_no_language_neutral;
- return FAILURE;
- }
- MBSTRG(language) = no_language;
- php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
- return SUCCESS;
- }
- /* }}} */
- /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
- static PHP_INI_MH(OnUpdate_mbstring_detect_order)
- {
- const mbfl_encoding **list;
- size_t size;
- if (!new_value) {
- if (MBSTRG(detect_order_list)) {
- pefree(MBSTRG(detect_order_list), 1);
- }
- MBSTRG(detect_order_list) = NULL;
- MBSTRG(detect_order_list_size) = 0;
- return SUCCESS;
- }
- if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 0) || size == 0) {
- return FAILURE;
- }
- if (MBSTRG(detect_order_list)) {
- pefree(MBSTRG(detect_order_list), 1);
- }
- MBSTRG(detect_order_list) = list;
- MBSTRG(detect_order_list_size) = size;
- return SUCCESS;
- }
- /* }}} */
- static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
- const mbfl_encoding **list;
- size_t size;
- if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 1) || size == 0) {
- return FAILURE;
- }
- if (MBSTRG(http_input_list)) {
- pefree(MBSTRG(http_input_list), 1);
- }
- MBSTRG(http_input_list) = list;
- MBSTRG(http_input_list_size) = size;
- return SUCCESS;
- }
- /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
- static PHP_INI_MH(OnUpdate_mbstring_http_input)
- {
- if (new_value) {
- php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
- }
- if (!new_value || !ZSTR_VAL(new_value)) {
- const char *encoding = php_get_input_encoding();
- MBSTRG(http_input_set) = 0;
- _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
- return SUCCESS;
- }
- MBSTRG(http_input_set) = 1;
- return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
- }
- /* }}} */
- static int _php_mb_ini_mbstring_http_output_set(const char *new_value) {
- const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(new_value);
- if (!encoding) {
- return FAILURE;
- }
- MBSTRG(http_output_encoding) = encoding;
- MBSTRG(current_http_output_encoding) = encoding;
- return SUCCESS;
- }
- /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
- static PHP_INI_MH(OnUpdate_mbstring_http_output)
- {
- if (new_value) {
- php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
- }
- if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
- MBSTRG(http_output_set) = 0;
- _php_mb_ini_mbstring_http_output_set(php_get_output_encoding());
- return SUCCESS;
- }
- MBSTRG(http_output_set) = 1;
- return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value));
- }
- /* }}} */
- /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
- static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
- {
- const mbfl_encoding *encoding;
- if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
- /* falls back to UTF-8 if an unknown encoding name is given */
- if (new_value) {
- php_error_docref("ref.mbstring", E_WARNING,
- "Unknown encoding \"%s\" in ini setting", new_value);
- }
- encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
- }
- MBSTRG(internal_encoding) = encoding;
- MBSTRG(current_internal_encoding) = encoding;
- #if HAVE_MBREGEX
- {
- const char *enc_name = new_value;
- if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
- /* falls back to UTF-8 if an unknown encoding name is given */
- enc_name = "UTF-8";
- php_mb_regex_set_default_mbctype(enc_name);
- }
- php_mb_regex_set_mbctype(new_value);
- }
- #endif
- return SUCCESS;
- }
- /* }}} */
- /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
- static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
- {
- if (new_value) {
- php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
- }
- if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
- return FAILURE;
- }
- if (new_value && ZSTR_LEN(new_value)) {
- MBSTRG(internal_encoding_set) = 1;
- return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
- } else {
- const char *encoding = php_get_internal_encoding();
- MBSTRG(internal_encoding_set) = 0;
- return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
- }
- }
- /* }}} */
- /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
- static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
- {
- int c;
- char *endptr = NULL;
- if (new_value != NULL) {
- if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
- MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
- } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
- MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
- } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
- MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
- } else {
- MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
- if (ZSTR_LEN(new_value) > 0) {
- c = strtol(ZSTR_VAL(new_value), &endptr, 0);
- if (*endptr == '\0') {
- MBSTRG(filter_illegal_substchar) = c;
- MBSTRG(current_filter_illegal_substchar) = c;
- }
- }
- }
- } else {
- MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
- MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
- MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
- }
- return SUCCESS;
- }
- /* }}} */
- /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
- static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
- {
- if (new_value == NULL) {
- return FAILURE;
- }
- OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
- if (MBSTRG(encoding_translation)) {
- sapi_unregister_post_entry(php_post_entries);
- sapi_register_post_entries(mbstr_post_entries);
- } else {
- sapi_unregister_post_entry(mbstr_post_entries);
- sapi_register_post_entries(php_post_entries);
- }
- return SUCCESS;
- }
- /* }}} */
- /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
- static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
- {
- zend_string *tmp;
- void *re = NULL;
- if (!new_value) {
- new_value = entry->orig_value;
- }
- tmp = php_trim(new_value, NULL, 0, 3);
- if (ZSTR_LEN(tmp) > 0) {
- if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
- zend_string_release_ex(tmp, 0);
- return FAILURE;
- }
- }
- if (MBSTRG(http_output_conv_mimetypes)) {
- _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
- }
- MBSTRG(http_output_conv_mimetypes) = re;
- zend_string_release_ex(tmp, 0);
- return SUCCESS;
- }
- /* }}} */
- /* }}} */
- /* {{{ php.ini directive registration */
- PHP_INI_BEGIN()
- PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
- PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
- PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
- PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
- STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
- PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
- STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
- PHP_INI_SYSTEM | PHP_INI_PERDIR,
- OnUpdate_mbstring_encoding_translation,
- encoding_translation, zend_mbstring_globals, mbstring_globals)
- PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
- "^(text/|application/xhtml\\+xml)",
- PHP_INI_ALL,
- OnUpdate_mbstring_http_output_conv_mimetypes)
- STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
- PHP_INI_ALL,
- OnUpdateBool,
- strict_detection, zend_mbstring_globals, mbstring_globals)
- #if HAVE_MBREGEX
- STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
- STD_PHP_INI_ENTRY("mbstring.regex_retry_limit", "1000000",PHP_INI_ALL, OnUpdateLong, regex_retry_limit, zend_mbstring_globals, mbstring_globals)
- #endif
- PHP_INI_END()
- /* }}} */
- static void mbstring_internal_encoding_changed_hook(void) {
- /* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */
- if (!MBSTRG(internal_encoding_set)) {
- const char *encoding = php_get_internal_encoding();
- _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
- }
- if (!MBSTRG(http_output_set)) {
- const char *encoding = php_get_output_encoding();
- _php_mb_ini_mbstring_http_output_set(encoding);
- }
- if (!MBSTRG(http_input_set)) {
- const char *encoding = php_get_input_encoding();
- _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
- }
- }
- /* {{{ module global initialize handler */
- static PHP_GINIT_FUNCTION(mbstring)
- {
- #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
- ZEND_TSRMLS_CACHE_UPDATE();
- #endif
- mbstring_globals->language = mbfl_no_language_uni;
- mbstring_globals->internal_encoding = NULL;
- mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
- mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
- mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
- mbstring_globals->http_input_identify = NULL;
- mbstring_globals->http_input_identify_get = NULL;
- mbstring_globals->http_input_identify_post = NULL;
- mbstring_globals->http_input_identify_cookie = NULL;
- mbstring_globals->http_input_identify_string = NULL;
- mbstring_globals->http_input_list = NULL;
- mbstring_globals->http_input_list_size = 0;
- mbstring_globals->detect_order_list = NULL;
- mbstring_globals->detect_order_list_size = 0;
- mbstring_globals->current_detect_order_list = NULL;
- mbstring_globals->current_detect_order_list_size = 0;
- mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
- mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
- mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
- mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
- mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
- mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
- mbstring_globals->illegalchars = 0;
- mbstring_globals->encoding_translation = 0;
- mbstring_globals->strict_detection = 0;
- mbstring_globals->outconv = NULL;
- mbstring_globals->http_output_conv_mimetypes = NULL;
- #if HAVE_MBREGEX
- mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
- #endif
- mbstring_globals->last_used_encoding_name = NULL;
- mbstring_globals->last_used_encoding = NULL;
- mbstring_globals->internal_encoding_set = 0;
- mbstring_globals->http_output_set = 0;
- mbstring_globals->http_input_set = 0;
- }
- /* }}} */
- /* {{{ PHP_GSHUTDOWN_FUNCTION */
- static PHP_GSHUTDOWN_FUNCTION(mbstring)
- {
- if (mbstring_globals->http_input_list) {
- free(mbstring_globals->http_input_list);
- }
- if (mbstring_globals->detect_order_list) {
- free(mbstring_globals->detect_order_list);
- }
- if (mbstring_globals->http_output_conv_mimetypes) {
- _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
- }
- #if HAVE_MBREGEX
- php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
- #endif
- }
- /* }}} */
- /* {{{ PHP_MINIT_FUNCTION(mbstring) */
- PHP_MINIT_FUNCTION(mbstring)
- {
- #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
- ZEND_TSRMLS_CACHE_UPDATE();
- #endif
- __mbfl_allocators = (mbfl_allocators*)&_php_mb_allocators;
- REGISTER_INI_ENTRIES();
- /* We assume that we're the only user of the hook. */
- ZEND_ASSERT(php_internal_encoding_changed == NULL);
- php_internal_encoding_changed = mbstring_internal_encoding_changed_hook;
- mbstring_internal_encoding_changed_hook();
- /* This is a global handler. Should not be set in a per-request handler. */
- sapi_register_treat_data(mbstr_treat_data);
- /* Post handlers are stored in the thread-local context. */
- if (MBSTRG(encoding_translation)) {
- sapi_register_post_entries(mbstr_post_entries);
- }
- REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
- REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
- #if HAVE_MBREGEX
- PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
- #endif
- if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
- return FAILURE;
- }
- php_rfc1867_set_multibyte_callbacks(
- php_mb_encoding_translation,
- php_mb_gpc_get_detect_order,
- php_mb_gpc_set_input_encoding,
- php_mb_rfc1867_getword,
- php_mb_rfc1867_getword_conf,
- php_mb_rfc1867_basename);
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
- PHP_MSHUTDOWN_FUNCTION(mbstring)
- {
- UNREGISTER_INI_ENTRIES();
- zend_multibyte_restore_functions();
- #if HAVE_MBREGEX
- PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
- #endif
- php_internal_encoding_changed = NULL;
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_RINIT_FUNCTION(mbstring) */
- PHP_RINIT_FUNCTION(mbstring)
- {
- MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
- MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
- MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
- MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
- MBSTRG(illegalchars) = 0;
- php_mb_populate_current_detect_order_list();
- #if HAVE_MBREGEX
- PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
- #endif
- zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
- PHP_RSHUTDOWN_FUNCTION(mbstring)
- {
- if (MBSTRG(current_detect_order_list) != NULL) {
- efree(MBSTRG(current_detect_order_list));
- MBSTRG(current_detect_order_list) = NULL;
- MBSTRG(current_detect_order_list_size) = 0;
- }
- if (MBSTRG(outconv) != NULL) {
- MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
- mbfl_buffer_converter_delete(MBSTRG(outconv));
- MBSTRG(outconv) = NULL;
- }
- /* clear http input identification. */
- MBSTRG(http_input_identify) = NULL;
- MBSTRG(http_input_identify_post) = NULL;
- MBSTRG(http_input_identify_get) = NULL;
- MBSTRG(http_input_identify_cookie) = NULL;
- MBSTRG(http_input_identify_string) = NULL;
- if (MBSTRG(last_used_encoding_name)) {
- zend_string_release(MBSTRG(last_used_encoding_name));
- MBSTRG(last_used_encoding_name) = NULL;
- }
- MBSTRG(internal_encoding_set) = 0;
- MBSTRG(http_output_set) = 0;
- MBSTRG(http_input_set) = 0;
- #if HAVE_MBREGEX
- PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
- #endif
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_MINFO_FUNCTION(mbstring) */
- PHP_MINFO_FUNCTION(mbstring)
- {
- php_info_print_table_start();
- php_info_print_table_row(2, "Multibyte Support", "enabled");
- php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
- php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
- {
- char tmp[256];
- snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
- php_info_print_table_row(2, "libmbfl version", tmp);
- }
- php_info_print_table_end();
- php_info_print_table_start();
- php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
- php_info_print_table_end();
- #if HAVE_MBREGEX
- PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
- #endif
- DISPLAY_INI_ENTRIES();
- }
- /* }}} */
- /* {{{ proto string mb_language([string language])
- Sets the current language or Returns the current language as a string */
- PHP_FUNCTION(mb_language)
- {
- zend_string *name = NULL;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
- RETURN_THROWS();
- }
- if (name == NULL) {
- RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
- } else {
- zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
- if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
- zend_argument_value_error(1, "must be a valid language, \"%s\" given", ZSTR_VAL(name));
- zend_string_release_ex(ini_name, 0);
- RETURN_THROWS();
- }
- // TODO Make return void
- RETVAL_TRUE;
- zend_string_release_ex(ini_name, 0);
- }
- }
- /* }}} */
- /* {{{ proto string mb_internal_encoding([string encoding])
- Sets the current internal encoding or Returns the current internal encoding as a string */
- PHP_FUNCTION(mb_internal_encoding)
- {
- const char *name = NULL;
- size_t name_len;
- const mbfl_encoding *encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
- RETURN_THROWS();
- }
- if (name == NULL) {
- ZEND_ASSERT(MBSTRG(current_internal_encoding));
- RETURN_STRING(MBSTRG(current_internal_encoding)->name);
- } else {
- encoding = mbfl_name2encoding(name);
- if (!encoding) {
- zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
- RETURN_THROWS();
- } else {
- MBSTRG(current_internal_encoding) = encoding;
- MBSTRG(internal_encoding_set) = 1;
- /* TODO Return old encoding */
- RETURN_TRUE;
- }
- }
- }
- /* }}} */
- /* {{{ proto mixed mb_http_input([string type])
- Returns the input encoding */
- PHP_FUNCTION(mb_http_input)
- {
- char *typ = NULL;
- size_t typ_len;
- int retname;
- char *list, *temp;
- const mbfl_encoding *result = NULL;
- retname = 1;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
- RETURN_THROWS();
- }
- if (typ == NULL) {
- result = MBSTRG(http_input_identify);
- } else {
- switch (*typ) {
- case 'G':
- case 'g':
- result = MBSTRG(http_input_identify_get);
- break;
- case 'P':
- case 'p':
- result = MBSTRG(http_input_identify_post);
- break;
- case 'C':
- case 'c':
- result = MBSTRG(http_input_identify_cookie);
- break;
- case 'S':
- case 's':
- result = MBSTRG(http_input_identify_string);
- break;
- case 'I':
- case 'i':
- {
- const mbfl_encoding **entry = MBSTRG(http_input_list);
- const size_t n = MBSTRG(http_input_list_size);
- size_t i;
- array_init(return_value);
- for (i = 0; i < n; i++) {
- add_next_index_string(return_value, (*entry)->name);
- entry++;
- }
- retname = 0;
- }
- break;
- case 'L':
- case 'l':
- {
- const mbfl_encoding **entry = MBSTRG(http_input_list);
- const size_t n = MBSTRG(http_input_list_size);
- size_t i;
- list = NULL;
- for (i = 0; i < n; i++) {
- if (list) {
- temp = list;
- spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
- efree(temp);
- if (!list) {
- break;
- }
- } else {
- list = estrdup((*entry)->name);
- }
- entry++;
- }
- }
- if (!list) {
- // TODO should return empty string?
- RETURN_FALSE;
- }
- RETVAL_STRING(list);
- efree(list);
- retname = 0;
- break;
- default:
- // TODO ValueError
- result = MBSTRG(http_input_identify);
- break;
- }
- }
- // FIXME this bloc seems useless except for default switch case
- if (retname) {
- if (result) {
- RETVAL_STRING(result->name);
- } else {
- RETVAL_FALSE;
- }
- }
- }
- /* }}} */
- /* {{{ proto string mb_http_output([string encoding])
- Sets the current output_encoding or returns the current output_encoding as a string */
- PHP_FUNCTION(mb_http_output)
- {
- const char *name = NULL;
- size_t name_len;
- const mbfl_encoding *encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
- RETURN_THROWS();
- }
- if (name == NULL) {
- ZEND_ASSERT(MBSTRG(current_http_output_encoding));
- RETURN_STRING(MBSTRG(current_http_output_encoding)->name);
- } else {
- encoding = php_mb_get_encoding_or_pass(name);
- if (!encoding) {
- zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
- RETURN_THROWS();
- } else {
- MBSTRG(http_output_set) = 1;
- MBSTRG(current_http_output_encoding) = encoding;
- /* TODO Return previous encoding? */
- RETURN_TRUE;
- }
- }
- }
- /* }}} */
- /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
- Sets the current detect_order or Return the current detect_order as a array */
- PHP_FUNCTION(mb_detect_order)
- {
- zend_string *order_str = NULL;
- HashTable *order_ht = NULL;
- ZEND_PARSE_PARAMETERS_START(0, 1)
- Z_PARAM_OPTIONAL
- Z_PARAM_STR_OR_ARRAY_HT(order_str, order_ht)
- ZEND_PARSE_PARAMETERS_END();
- if (!order_str && !order_ht) {
- size_t i;
- size_t n = MBSTRG(current_detect_order_list_size);
- const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
- array_init(return_value);
- for (i = 0; i < n; i++) {
- add_next_index_string(return_value, (*entry)->name);
- entry++;
- }
- } else {
- const mbfl_encoding **list;
- size_t size;
- if (order_ht) {
- if (FAILURE == php_mb_parse_encoding_array(order_ht, &list, &size, 1)) {
- RETURN_THROWS();
- }
- } else {
- if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1, /* allow_pass_encoding */ 0)) {
- RETURN_THROWS();
- }
- }
- if (size == 0) {
- efree(list);
- zend_argument_value_error(1, "must specify at least one encoding");
- RETURN_THROWS();
- }
- if (MBSTRG(current_detect_order_list)) {
- efree(MBSTRG(current_detect_order_list));
- }
- MBSTRG(current_detect_order_list) = list;
- MBSTRG(current_detect_order_list_size) = size;
- RETURN_TRUE;
- }
- }
- /* }}} */
- static inline int php_mb_check_code_point(zend_long cp)
- {
- if (cp < 0 || cp >= 0x110000) {
- /* Out of Unicode range */
- return 0;
- }
- if (cp >= 0xd800 && cp <= 0xdfff) {
- /* Surrogate code-point. These are never valid on their own and we only allow a single
- * substitute character. */
- return 0;
- }
- /* As the we do not know the target encoding of the conversion operation that is going to
- * use the substitution character, we cannot check whether the codepoint is actually mapped
- * in the given encoding at this point. Thus we have to accept everything. */
- return 1;
- }
- /* {{{ proto string|int|true mb_substitute_character([string|int|null substitute_character])
- Sets the current substitute_character or returns the current substitute_character */
- PHP_FUNCTION(mb_substitute_character)
- {
- zend_string *substitute_character = NULL;
- zend_long substitute_codepoint;
- zend_bool substitute_is_null = 1;
- ZEND_PARSE_PARAMETERS_START(0, 1)
- Z_PARAM_OPTIONAL
- Z_PARAM_STR_OR_LONG_OR_NULL(substitute_character, substitute_codepoint, substitute_is_null)
- ZEND_PARSE_PARAMETERS_END();
- if (substitute_is_null) {
- if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
- RETURN_STRING("none");
- }
- if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
- RETURN_STRING("long");
- }
- if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
- RETURN_STRING("entity");
- }
- RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
- }
- if (substitute_character != NULL) {
- if (zend_string_equals_literal_ci(substitute_character, "none")) {
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
- RETURN_TRUE;
- }
- if (zend_string_equals_literal_ci(substitute_character, "long")) {
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
- RETURN_TRUE;
- }
- if (zend_string_equals_literal_ci(substitute_character, "entity")) {
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
- RETURN_TRUE;
- }
- /* Invalid string value */
- zend_argument_value_error(1, "must be 'none', 'long', 'entity' or a valid codepoint");
- RETURN_THROWS();
- }
- /* Integer codepoint passed */
- if (!php_mb_check_code_point(substitute_codepoint)) {
- zend_argument_value_error(1, "is not a valid codepoint");
- RETURN_THROWS();
- }
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
- MBSTRG(current_filter_illegal_substchar) = substitute_codepoint;
- RETURN_TRUE;
- }
- /* }}} */
- /* {{{ proto string mb_preferred_mime_name(string encoding)
- Return the preferred MIME name (charset) as a string */
- PHP_FUNCTION(mb_preferred_mime_name)
- {
- enum mbfl_no_encoding no_encoding;
- char *name = NULL;
- size_t name_len;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
- RETURN_THROWS();
- }
- no_encoding = mbfl_name2no_encoding(name);
- if (no_encoding == mbfl_no_encoding_invalid) {
- zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
- R…
Large files files are truncated, but you can click here to view the full file