/hphp/runtime/ext/ext_mb.cpp
C++ | 4191 lines | 3589 code | 415 blank | 187 comment | 982 complexity | 6654a83821b70924cecdcf76a157ee43 MD5 | raw file
Possible License(s): LGPL-2.1, BSD-2-Clause, BSD-3-Clause, MPL-2.0-no-copyleft-exception, MIT, LGPL-2.0, Apache-2.0
Large files files are truncated, but you can click here to view the full file
- /*
- +----------------------------------------------------------------------+
- | HipHop for PHP |
- +----------------------------------------------------------------------+
- | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
- | Copyright (c) 1997-2010 The PHP Group |
- +----------------------------------------------------------------------+
- | This source file is subject to version 3.01 of the PHP license, |
- | that is bundled with this package in the file LICENSE, and is |
- | available through the world-wide-web at the following url: |
- | http://www.php.net/license/3_01.txt |
- | If you did not receive a copy of the PHP license and are unable to |
- | obtain it through the world-wide-web, please send a note to |
- | license@php.net so we can mail you a copy immediately. |
- +----------------------------------------------------------------------+
- */
- #include "hphp/runtime/ext/ext_mb.h"
- #include "hphp/runtime/base/string-buffer.h"
- #include "hphp/runtime/base/request-local.h"
- #include "hphp/runtime/ext/php_unicode.h"
- #include "hphp/runtime/ext/unicode_data.h"
- #include "hphp/runtime/ext/ext_process.h"
- #include "hphp/runtime/ext/ext_string.h"
- #include "hphp/runtime/base/zend-url.h"
- #include "hphp/runtime/base/zend-string.h"
- #include "hphp/runtime/base/ini-setting.h"
- extern "C" {
- #include <mbfl/mbfl_convert.h>
- #include <mbfl/mbfilter.h>
- #include <oniguruma.h>
- }
- #define php_mb_re_pattern_buffer re_pattern_buffer
- #define php_mb_regex_t regex_t
- #define php_mb_re_registers re_registers
- extern void mbfl_memory_device_unput(mbfl_memory_device *device);
- #define PARSE_POST 0
- #define PARSE_GET 1
- #define PARSE_COOKIE 2
- #define PARSE_STRING 3
- #define PARSE_ENV 4
- #define PARSE_SERVER 5
- #define PARSE_SESSION 6
- namespace HPHP {
- static class mbstringExtension : public Extension {
- public:
- mbstringExtension() : Extension("mbstring") {}
- virtual void moduleInit() {
- IniSetting::SetGlobalDefault("mbstring.http_input", "pass");
- IniSetting::SetGlobalDefault("mbstring.http_output", "pass");
- }
- } s_mbstring_extension;
- ///////////////////////////////////////////////////////////////////////////////
- // statics
- #define PHP_MBSTR_STACK_BLOCK_SIZE 32
- typedef struct _php_mb_nls_ident_list {
- mbfl_no_language lang;
- mbfl_no_encoding* list;
- int list_size;
- } php_mb_nls_ident_list;
- static mbfl_no_encoding php_mb_default_identify_list_ja[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_jis,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_euc_jp,
- mbfl_no_encoding_sjis
- };
- static mbfl_no_encoding php_mb_default_identify_list_cn[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_euc_cn,
- mbfl_no_encoding_cp936
- };
- static mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_euc_tw,
- mbfl_no_encoding_big5
- };
- static mbfl_no_encoding php_mb_default_identify_list_kr[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_euc_kr,
- mbfl_no_encoding_uhc
- };
- static mbfl_no_encoding php_mb_default_identify_list_ru[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_koi8r,
- mbfl_no_encoding_cp1251,
- mbfl_no_encoding_cp866
- };
- static mbfl_no_encoding php_mb_default_identify_list_hy[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_armscii8
- };
- static mbfl_no_encoding php_mb_default_identify_list_tr[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8,
- mbfl_no_encoding_8859_9
- };
- static mbfl_no_encoding php_mb_default_identify_list_neut[] = {
- mbfl_no_encoding_ascii,
- mbfl_no_encoding_utf8
- };
- static php_mb_nls_ident_list php_mb_default_identify_list[] = {
- { mbfl_no_language_japanese, php_mb_default_identify_list_ja,
- sizeof(php_mb_default_identify_list_ja) /
- sizeof(php_mb_default_identify_list_ja[0]) },
- { mbfl_no_language_korean, php_mb_default_identify_list_kr,
- sizeof(php_mb_default_identify_list_kr) /
- sizeof(php_mb_default_identify_list_kr[0]) },
- { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk,
- sizeof(php_mb_default_identify_list_tw_hk) /
- sizeof(php_mb_default_identify_list_tw_hk[0]) },
- { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn,
- sizeof(php_mb_default_identify_list_cn) /
- sizeof(php_mb_default_identify_list_cn[0]) },
- { mbfl_no_language_russian, php_mb_default_identify_list_ru,
- sizeof(php_mb_default_identify_list_ru) /
- sizeof(php_mb_default_identify_list_ru[0]) },
- { mbfl_no_language_armenian, php_mb_default_identify_list_hy,
- sizeof(php_mb_default_identify_list_hy) /
- sizeof(php_mb_default_identify_list_hy[0]) },
- { mbfl_no_language_turkish, php_mb_default_identify_list_tr,
- sizeof(php_mb_default_identify_list_tr) /
- sizeof(php_mb_default_identify_list_tr[0]) },
- { mbfl_no_language_neutral, php_mb_default_identify_list_neut,
- sizeof(php_mb_default_identify_list_neut) /
- sizeof(php_mb_default_identify_list_neut[0]) }
- };
- ///////////////////////////////////////////////////////////////////////////////
- // globals
- typedef std::map<std::string, php_mb_regex_t *> RegexCache;
- class MBGlobals : public RequestEventHandler {
- public:
- mbfl_no_language language;
- mbfl_no_language current_language;
- mbfl_no_encoding internal_encoding;
- mbfl_no_encoding current_internal_encoding;
- mbfl_no_encoding http_output_encoding;
- mbfl_no_encoding current_http_output_encoding;
- mbfl_no_encoding http_input_identify;
- mbfl_no_encoding http_input_identify_get;
- mbfl_no_encoding http_input_identify_post;
- mbfl_no_encoding http_input_identify_cookie;
- mbfl_no_encoding http_input_identify_string;
- mbfl_no_encoding *http_input_list;
- int http_input_list_size;
- mbfl_no_encoding *detect_order_list;
- int detect_order_list_size;
- mbfl_no_encoding *current_detect_order_list;
- int current_detect_order_list_size;
- mbfl_no_encoding *default_detect_order_list;
- int default_detect_order_list_size;
- int filter_illegal_mode;
- int filter_illegal_substchar;
- int current_filter_illegal_mode;
- int current_filter_illegal_substchar;
- bool encoding_translation;
- long strict_detection;
- long illegalchars;
- mbfl_buffer_converter *outconv;
- OnigEncoding default_mbctype;
- OnigEncoding current_mbctype;
- RegexCache ht_rc;
- std::string search_str;
- unsigned int search_pos;
- php_mb_regex_t *search_re;
- OnigRegion *search_regs;
- OnigOptionType regex_default_options;
- OnigSyntaxType *regex_default_syntax;
- MBGlobals() :
- language(mbfl_no_language_uni),
- current_language(mbfl_no_language_uni),
- internal_encoding(mbfl_no_encoding_utf8),
- current_internal_encoding(mbfl_no_encoding_utf8),
- http_output_encoding(mbfl_no_encoding_pass),
- current_http_output_encoding(mbfl_no_encoding_pass),
- http_input_identify(mbfl_no_encoding_invalid),
- http_input_identify_get(mbfl_no_encoding_invalid),
- http_input_identify_post(mbfl_no_encoding_invalid),
- http_input_identify_cookie(mbfl_no_encoding_invalid),
- http_input_identify_string(mbfl_no_encoding_invalid),
- http_input_list(NULL),
- http_input_list_size(0),
- detect_order_list(NULL),
- detect_order_list_size(0),
- current_detect_order_list(NULL),
- current_detect_order_list_size(0),
- default_detect_order_list
- ((mbfl_no_encoding *)php_mb_default_identify_list_neut),
- default_detect_order_list_size
- (sizeof(php_mb_default_identify_list_neut) /
- sizeof(php_mb_default_identify_list_neut[0])),
- filter_illegal_mode(MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR),
- filter_illegal_substchar(0x3f), /* '?' */
- current_filter_illegal_mode(MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR),
- current_filter_illegal_substchar(0x3f), /* '?' */
- encoding_translation(0),
- strict_detection(0),
- illegalchars(0),
- outconv(NULL),
- default_mbctype(ONIG_ENCODING_EUC_JP),
- current_mbctype(ONIG_ENCODING_EUC_JP),
- search_pos(0),
- search_re((php_mb_regex_t*)NULL),
- search_regs((OnigRegion*)NULL),
- regex_default_options(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE),
- regex_default_syntax(ONIG_SYNTAX_RUBY) {
- }
- virtual void requestInit() {
- current_language = language;
- current_internal_encoding = internal_encoding;
- current_http_output_encoding = http_output_encoding;
- current_filter_illegal_mode = filter_illegal_mode;
- current_filter_illegal_substchar = filter_illegal_substchar;
- if (!encoding_translation) {
- illegalchars = 0;
- }
- mbfl_no_encoding *list=NULL, *entry;
- int n = 0;
- if (detect_order_list) {
- list = detect_order_list;
- n = detect_order_list_size;
- }
- if (n <= 0) {
- list = default_detect_order_list;
- n = default_detect_order_list_size;
- }
- entry = (mbfl_no_encoding *)malloc(n * sizeof(int));
- current_detect_order_list = entry;
- current_detect_order_list_size = n;
- while (n > 0) {
- *entry++ = *list++;
- n--;
- }
- }
- virtual void requestShutdown() {
- if (current_detect_order_list != NULL) {
- free(current_detect_order_list);
- current_detect_order_list = NULL;
- current_detect_order_list_size = 0;
- }
- if (outconv != NULL) {
- illegalchars += mbfl_buffer_illegalchars(outconv);
- mbfl_buffer_converter_delete(outconv);
- outconv = NULL;
- }
- /* clear http input identification. */
- http_input_identify = mbfl_no_encoding_invalid;
- http_input_identify_post = mbfl_no_encoding_invalid;
- http_input_identify_get = mbfl_no_encoding_invalid;
- http_input_identify_cookie = mbfl_no_encoding_invalid;
- http_input_identify_string = mbfl_no_encoding_invalid;
- current_mbctype = default_mbctype;
- search_str.clear();
- search_pos = 0;
- if (search_regs != NULL) {
- onig_region_free(search_regs, 1);
- search_regs = (OnigRegion *)NULL;
- }
- for (RegexCache::const_iterator it = ht_rc.begin(); it != ht_rc.end();
- ++it) {
- onig_free(it->second);
- }
- ht_rc.clear();
- }
- };
- IMPLEMENT_STATIC_REQUEST_LOCAL(MBGlobals, s_mb_globals);
- #define MBSTRG(name) s_mb_globals->name
- ///////////////////////////////////////////////////////////////////////////////
- // unicode functions
- /*
- * A simple array of 32-bit masks for lookup.
- */
- static unsigned long masks32[32] = {
- 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
- 0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
- 0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
- 0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
- 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
- 0x40000000, 0x80000000
- };
- static int prop_lookup(unsigned long code, unsigned long n) {
- long l, r, m;
- /*
- * There is an extra node on the end of the offsets to allow this routine
- * to work right. If the index is 0xffff, then there are no nodes for the
- * property.
- */
- if ((l = _ucprop_offsets[n]) == 0xffff)
- return 0;
- /*
- * Locate the next offset that is not 0xffff. The sentinel at the end of
- * the array is the max index value.
- */
- for (m = 1; n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++)
- ;
- r = _ucprop_offsets[n + m] - 1;
- while (l <= r) {
- /*
- * Determine a "mid" point and adjust to make sure the mid point is at
- * the beginning of a range pair.
- */
- m = (l + r) >> 1;
- m -= (m & 1);
- if (code > _ucprop_ranges[m + 1])
- l = m + 2;
- else if (code < _ucprop_ranges[m])
- r = m - 2;
- else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
- return 1;
- }
- return 0;
- }
- static int php_unicode_is_prop(unsigned long code, unsigned long mask1,
- unsigned long mask2) {
- unsigned long i;
- if (mask1 == 0 && mask2 == 0)
- return 0;
- for (i = 0; mask1 && i < 32; i++) {
- if ((mask1 & masks32[i]) && prop_lookup(code, i))
- return 1;
- }
- for (i = 32; mask2 && i < _ucprop_size; i++) {
- if ((mask2 & masks32[i & 31]) && prop_lookup(code, i))
- return 1;
- }
- return 0;
- }
- static unsigned long case_lookup(unsigned long code, long l, long r,
- int field) {
- long m;
- /*
- * Do the binary search.
- */
- while (l <= r) {
- /*
- * Determine a "mid" point and adjust to make sure the mid point is at
- * the beginning of a case mapping triple.
- */
- m = (l + r) >> 1;
- m -= (m % 3);
- if (code > _uccase_map[m])
- l = m + 3;
- else if (code < _uccase_map[m])
- r = m - 3;
- else if (code == _uccase_map[m])
- return _uccase_map[m + field];
- }
- return code;
- }
- static unsigned long php_turkish_toupper(unsigned long code, long l, long r,
- int field) {
- if (code == 0x0069L) {
- return 0x0130L;
- }
- return case_lookup(code, l, r, field);
- }
- static unsigned long php_turkish_tolower(unsigned long code, long l, long r,
- int field) {
- if (code == 0x0049L) {
- return 0x0131L;
- }
- return case_lookup(code, l, r, field);
- }
- static unsigned long php_unicode_toupper(unsigned long code,
- enum mbfl_no_encoding enc) {
- int field;
- long l, r;
- if (php_unicode_is_upper(code))
- return code;
- if (php_unicode_is_lower(code)) {
- /*
- * The character is lower case.
- */
- field = 2;
- l = _uccase_len[0];
- r = (l + _uccase_len[1]) - 3;
- if (enc == mbfl_no_encoding_8859_9) {
- return php_turkish_toupper(code, l, r, field);
- }
- } else {
- /*
- * The character is title case.
- */
- field = 1;
- l = _uccase_len[0] + _uccase_len[1];
- r = _uccase_size - 3;
- }
- return case_lookup(code, l, r, field);
- }
- static unsigned long php_unicode_tolower(unsigned long code,
- enum mbfl_no_encoding enc) {
- int field;
- long l, r;
- if (php_unicode_is_lower(code))
- return code;
- if (php_unicode_is_upper(code)) {
- /*
- * The character is upper case.
- */
- field = 1;
- l = 0;
- r = _uccase_len[0] - 3;
- if (enc == mbfl_no_encoding_8859_9) {
- return php_turkish_tolower(code, l, r, field);
- }
- } else {
- /*
- * The character is title case.
- */
- field = 2;
- l = _uccase_len[0] + _uccase_len[1];
- r = _uccase_size - 3;
- }
- return case_lookup(code, l, r, field);
- }
- static unsigned long php_unicode_totitle(unsigned long code,
- enum mbfl_no_encoding enc) {
- int field;
- long l, r;
- if (php_unicode_is_title(code))
- return code;
- /*
- * The offset will always be the same for converting to title case.
- */
- field = 2;
- if (php_unicode_is_upper(code)) {
- /*
- * The character is upper case.
- */
- l = 0;
- r = _uccase_len[0] - 3;
- } else {
- /*
- * The character is lower case.
- */
- l = _uccase_len[0];
- r = (l + _uccase_len[1]) - 3;
- }
- return case_lookup(code, l, r, field);
- }
- #define BE_ARY_TO_UINT32(ptr) (\
- ((unsigned char*)(ptr))[0]<<24 |\
- ((unsigned char*)(ptr))[1]<<16 |\
- ((unsigned char*)(ptr))[2]<< 8 |\
- ((unsigned char*)(ptr))[3] )
- #define UINT32_TO_BE_ARY(ptr,val) { \
- unsigned int v = val; \
- ((unsigned char*)(ptr))[0] = (v>>24) & 0xff,\
- ((unsigned char*)(ptr))[1] = (v>>16) & 0xff,\
- ((unsigned char*)(ptr))[2] = (v>> 8) & 0xff,\
- ((unsigned char*)(ptr))[3] = (v ) & 0xff;\
- }
- /**
- * Return 0 if input contains any illegal encoding, otherwise 1.
- * Even if any illegal encoding is detected the result may contain a list
- * of parsed encodings.
- */
- static int php_mb_parse_encoding_list(const char *value, int value_length,
- mbfl_no_encoding **return_list,
- int *return_size, int persistent) {
- int n, l, size, bauto, ret = 1;
- char *p, *p1, *p2, *endp, *tmpstr;
- mbfl_no_encoding no_encoding;
- mbfl_no_encoding *src, *entry, *list;
- list = NULL;
- if (value == NULL || value_length <= 0) {
- if (return_list) {
- *return_list = NULL;
- }
- if (return_size) {
- *return_size = 0;
- }
- return 0;
- } else {
- mbfl_no_encoding *identify_list;
- int identify_list_size;
- identify_list = MBSTRG(default_detect_order_list);
- identify_list_size = MBSTRG(default_detect_order_list_size);
- /* copy the value string for work */
- if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
- tmpstr = (char *)strndup(value+1, value_length-2);
- value_length -= 2;
- }
- else
- tmpstr = (char *)strndup(value, value_length);
- if (tmpstr == NULL) {
- return 0;
- }
- /* count the number of listed encoding names */
- endp = tmpstr + value_length;
- n = 1;
- p1 = tmpstr;
- while ((p2 = (char*)string_memnstr(p1, ",", 1, endp)) != NULL) {
- p1 = p2 + 1;
- n++;
- }
- size = n + identify_list_size;
- /* make list */
- list = (mbfl_no_encoding *)calloc(size, sizeof(int));
- if (list != NULL) {
- entry = list;
- n = 0;
- bauto = 0;
- p1 = tmpstr;
- do {
- p2 = p = (char*)string_memnstr(p1, ",", 1, endp);
- if (p == NULL) {
- p = endp;
- }
- *p = '\0';
- /* trim spaces */
- while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
- p1++;
- }
- p--;
- while (p > p1 && (*p == ' ' || *p == '\t')) {
- *p = '\0';
- p--;
- }
- /* convert to the encoding number and check encoding */
- if (strcasecmp(p1, "auto") == 0) {
- if (!bauto) {
- bauto = 1;
- l = identify_list_size;
- src = identify_list;
- while (l > 0) {
- *entry++ = *src++;
- l--;
- n++;
- }
- }
- } else {
- no_encoding = mbfl_name2no_encoding(p1);
- if (no_encoding != mbfl_no_encoding_invalid) {
- *entry++ = no_encoding;
- n++;
- } else {
- ret = 0;
- }
- }
- p1 = p2 + 1;
- } while (n < size && p2 != NULL);
- if (n > 0) {
- if (return_list) {
- *return_list = list;
- } else {
- free(list);
- }
- } else {
- free(list);
- if (return_list) {
- *return_list = NULL;
- }
- ret = 0;
- }
- if (return_size) {
- *return_size = n;
- }
- } else {
- if (return_list) {
- *return_list = NULL;
- }
- if (return_size) {
- *return_size = 0;
- }
- ret = 0;
- }
- free(tmpstr);
- }
- return ret;
- }
- static char *php_mb_convert_encoding(const char *input, size_t length,
- const char *_to_encoding,
- const char *_from_encodings,
- unsigned int *output_len) {
- mbfl_string string, result, *ret;
- mbfl_no_encoding from_encoding, to_encoding;
- mbfl_buffer_converter *convd;
- int size;
- mbfl_no_encoding *list;
- char *output = NULL;
- if (output_len) {
- *output_len = 0;
- }
- if (!input) {
- return NULL;
- }
- /* new encoding */
- if (_to_encoding && strlen(_to_encoding)) {
- to_encoding = mbfl_name2no_encoding(_to_encoding);
- if (to_encoding == mbfl_no_encoding_invalid) {
- raise_warning("Unknown encoding \"%s\"", _to_encoding);
- return NULL;
- }
- } else {
- to_encoding = MBSTRG(current_internal_encoding);
- }
- /* initialize string */
- mbfl_string_init(&string);
- mbfl_string_init(&result);
- from_encoding = MBSTRG(current_internal_encoding);
- string.no_encoding = from_encoding;
- string.no_language = MBSTRG(current_language);
- string.val = (unsigned char *)input;
- string.len = length;
- /* pre-conversion encoding */
- if (_from_encodings) {
- list = NULL;
- size = 0;
- php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings),
- &list, &size, 0);
- if (size == 1) {
- from_encoding = *list;
- string.no_encoding = from_encoding;
- } else if (size > 1) {
- /* auto detect */
- from_encoding = mbfl_identify_encoding_no(&string, list, size,
- MBSTRG(strict_detection));
- if (from_encoding != mbfl_no_encoding_invalid) {
- string.no_encoding = from_encoding;
- } else {
- raise_warning("Unable to detect character encoding");
- from_encoding = mbfl_no_encoding_pass;
- to_encoding = from_encoding;
- string.no_encoding = from_encoding;
- }
- } else {
- raise_warning("Illegal character encoding specified");
- }
- if (list != NULL) {
- free((void *)list);
- }
- }
- /* initialize converter */
- convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
- if (convd == NULL) {
- raise_warning("Unable to create character encoding converter");
- return NULL;
- }
- mbfl_buffer_converter_illegal_mode
- (convd, MBSTRG(current_filter_illegal_mode));
- mbfl_buffer_converter_illegal_substchar
- (convd, MBSTRG(current_filter_illegal_substchar));
- /* do it */
- ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
- if (ret) {
- if (output_len) {
- *output_len = ret->len;
- }
- output = (char *)ret->val;
- }
- MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
- mbfl_buffer_converter_delete(convd);
- return output;
- }
- static char *php_unicode_convert_case(int case_mode, const char *srcstr,
- size_t srclen, unsigned int *ret_len,
- const char *src_encoding) {
- char *unicode, *newstr;
- unsigned int unicode_len;
- unsigned char *unicode_ptr;
- size_t i;
- enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding);
- unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding,
- &unicode_len);
- if (unicode == NULL)
- return NULL;
- unicode_ptr = (unsigned char *)unicode;
- switch(case_mode) {
- case PHP_UNICODE_CASE_UPPER:
- for (i = 0; i < unicode_len; i+=4) {
- UINT32_TO_BE_ARY(&unicode_ptr[i],
- php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]),
- _src_encoding));
- }
- break;
- case PHP_UNICODE_CASE_LOWER:
- for (i = 0; i < unicode_len; i+=4) {
- UINT32_TO_BE_ARY(&unicode_ptr[i],
- php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]),
- _src_encoding));
- }
- break;
- case PHP_UNICODE_CASE_TITLE:
- {
- int mode = 0;
- for (i = 0; i < unicode_len; i+=4) {
- int res = php_unicode_is_prop
- (BE_ARY_TO_UINT32(&unicode_ptr[i]),
- UC_MN|UC_ME|UC_CF|UC_LM|UC_SK|UC_LU|UC_LL|UC_LT, 0);
- if (mode) {
- if (res) {
- UINT32_TO_BE_ARY
- (&unicode_ptr[i],
- php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]),
- _src_encoding));
- } else {
- mode = 0;
- }
- } else {
- if (res) {
- mode = 1;
- UINT32_TO_BE_ARY
- (&unicode_ptr[i],
- php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]),
- _src_encoding));
- }
- }
- }
- }
- break;
- }
- newstr = php_mb_convert_encoding(unicode, unicode_len, src_encoding,
- "UCS-4BE", ret_len);
- free(unicode);
- return newstr;
- }
- ///////////////////////////////////////////////////////////////////////////////
- // helpers
- /**
- * Return 0 if input contains any illegal encoding, otherwise 1.
- * Even if any illegal encoding is detected the result may contain a list
- * of parsed encodings.
- */
- static int php_mb_parse_encoding_array(CArrRef array,
- mbfl_no_encoding **return_list,
- int *return_size, int persistent) {
- int n, l, size, bauto,ret = 1;
- mbfl_no_encoding no_encoding;
- mbfl_no_encoding *src, *list, *entry;
- list = NULL;
- mbfl_no_encoding *identify_list = MBSTRG(default_detect_order_list);
- int identify_list_size = MBSTRG(default_detect_order_list_size);
- size = array.size() + identify_list_size;
- list = (mbfl_no_encoding *)calloc(size, sizeof(int));
- if (list != NULL) {
- entry = list;
- bauto = 0;
- n = 0;
- for (ArrayIter iter(array); iter; ++iter) {
- String hash_entry = iter.second();
- if (strcasecmp(hash_entry.data(), "auto") == 0) {
- if (!bauto) {
- bauto = 1;
- l = identify_list_size;
- src = identify_list;
- while (l > 0) {
- *entry++ = *src++;
- l--;
- n++;
- }
- }
- } else {
- no_encoding = mbfl_name2no_encoding(hash_entry.data());
- if (no_encoding != mbfl_no_encoding_invalid) {
- *entry++ = no_encoding;
- n++;
- } else {
- ret = 0;
- }
- }
- }
- if (n > 0) {
- if (return_list) {
- *return_list = list;
- } else {
- free(list);
- }
- } else {
- free(list);
- if (return_list) {
- *return_list = NULL;
- }
- ret = 0;
- }
- if (return_size) {
- *return_size = n;
- }
- } else {
- if (return_list) {
- *return_list = NULL;
- }
- if (return_size) {
- *return_size = 0;
- }
- ret = 0;
- }
- return ret;
- }
- static bool php_mb_parse_encoding(CVarRef encoding,
- mbfl_no_encoding **return_list,
- int *return_size, bool persistent) {
- bool ret;
- if (encoding.is(KindOfArray)) {
- ret = php_mb_parse_encoding_array(encoding.toArray(),
- return_list, return_size,
- persistent ? 1 : 0);
- } else {
- String enc = encoding.toString();
- ret = php_mb_parse_encoding_list(enc.data(), enc.size(),
- return_list, return_size,
- persistent ? 1 : 0);
- }
- if (!ret) {
- if (return_list && *return_list) {
- free(*return_list);
- *return_list = NULL;
- }
- return_size = 0;
- }
- return ret;
- }
- static int php_mb_nls_get_default_detect_order_list(mbfl_no_language lang,
- mbfl_no_encoding **plist,
- int* plist_size) {
- size_t i;
- *plist = (mbfl_no_encoding *) php_mb_default_identify_list_neut;
- *plist_size = sizeof(php_mb_default_identify_list_neut) /
- sizeof(php_mb_default_identify_list_neut[0]);
- for (i = 0; i < sizeof(php_mb_default_identify_list) /
- sizeof(php_mb_default_identify_list[0]); i++) {
- if (php_mb_default_identify_list[i].lang == lang) {
- *plist = php_mb_default_identify_list[i].list;
- *plist_size = php_mb_default_identify_list[i].list_size;
- return 1;
- }
- }
- return 0;
- }
- static size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc) {
- if (enc != NULL) {
- if (enc->flag & MBFL_ENCTYPE_MBCS) {
- if (enc->mblen_table != NULL) {
- if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
- }
- } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
- return 2;
- } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
- return 4;
- }
- }
- return 1;
- }
- static int php_mb_stripos(int mode,
- const char *old_haystack, int old_haystack_len,
- const char *old_needle, int old_needle_len,
- long offset, const char *from_encoding) {
- int n;
- mbfl_string haystack, needle;
- n = -1;
- mbfl_string_init(&haystack);
- mbfl_string_init(&needle);
- haystack.no_language = MBSTRG(current_language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
- needle.no_language = MBSTRG(current_language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
- do {
- haystack.val = (unsigned char *)php_unicode_convert_case
- (PHP_UNICODE_CASE_UPPER, old_haystack, (size_t)old_haystack_len,
- &haystack.len, from_encoding);
- if (!haystack.val) {
- break;
- }
- if (haystack.len <= 0) {
- break;
- }
- needle.val = (unsigned char *)php_unicode_convert_case
- (PHP_UNICODE_CASE_UPPER, old_needle, (size_t)old_needle_len,
- &needle.len, from_encoding);
- if (!needle.val) {
- break;
- }
- if (needle.len <= 0) {
- break;
- }
- haystack.no_encoding = needle.no_encoding =
- mbfl_name2no_encoding(from_encoding);
- if (haystack.no_encoding == mbfl_no_encoding_invalid) {
- raise_warning("Unknown encoding \"%s\"", from_encoding);
- break;
- }
- int haystack_char_len = mbfl_strlen(&haystack);
- if (mode) {
- if ((offset > 0 && offset > haystack_char_len) ||
- (offset < 0 && -offset > haystack_char_len)) {
- raise_warning("Offset is greater than the length of haystack string");
- break;
- }
- } else {
- if (offset < 0 || offset > haystack_char_len) {
- raise_warning("Offset not contained in string.");
- break;
- }
- }
- n = mbfl_strpos(&haystack, &needle, offset, mode);
- } while(0);
- if (haystack.val) {
- free(haystack.val);
- }
- if (needle.val) {
- free(needle.val);
- }
- return n;
- }
- ///////////////////////////////////////////////////////////////////////////////
- Array f_mb_list_encodings() {
- Array ret;
- int i = 0;
- const mbfl_encoding **encodings = mbfl_get_supported_encodings();
- const mbfl_encoding *encoding;
- while ((encoding = encodings[i++]) != NULL) {
- ret.append(String(encoding->name, CopyString));
- }
- return ret;
- }
- Variant f_mb_list_encodings_alias_names(const String& name /*= null_string*/) {
- const mbfl_encoding **encodings;
- const mbfl_encoding *encoding;
- mbfl_no_encoding no_encoding;
- int i, j;
- Array ret;
- if (name.isNull()) {
- i = 0;
- encodings = mbfl_get_supported_encodings();
- while ((encoding = encodings[i++]) != NULL) {
- Array row;
- if (encoding->aliases != NULL) {
- j = 0;
- while ((*encoding->aliases)[j] != NULL) {
- row.append(String((*encoding->aliases)[j], CopyString));
- j++;
- }
- }
- ret.set(String(encoding->name, CopyString), row);
- }
- } else {
- no_encoding = mbfl_name2no_encoding(name.data());
- if (no_encoding == mbfl_no_encoding_invalid) {
- raise_warning("Unknown encoding \"%s\"", name.data());
- return false;
- }
- char *name = (char *)mbfl_no_encoding2name(no_encoding);
- if (name != NULL) {
- i = 0;
- encodings = mbfl_get_supported_encodings();
- while ((encoding = encodings[i++]) != NULL) {
- if (strcmp(encoding->name, name) != 0) continue;
- if (encoding->aliases != NULL) {
- j = 0;
- while ((*encoding->aliases)[j] != NULL) {
- ret.append(String((*encoding->aliases)[j], CopyString));
- j++;
- }
- }
- break;
- }
- } else {
- return false;
- }
- }
- return ret;
- }
- Variant f_mb_list_mime_names(const String& name /* = null_string */) {
- const mbfl_encoding **encodings;
- const mbfl_encoding *encoding;
- mbfl_no_encoding no_encoding;
- int i;
- Array ret;
- if (name.isNull()) {
- i = 0;
- encodings = mbfl_get_supported_encodings();
- while ((encoding = encodings[i++]) != NULL) {
- if (encoding->mime_name != NULL) {
- ret.set(String(encoding->name, CopyString),
- String(encoding->mime_name, CopyString));
- } else{
- ret.set(String(encoding->name, CopyString), "");
- }
- }
- } else {
- no_encoding = mbfl_name2no_encoding(name.data());
- if (no_encoding == mbfl_no_encoding_invalid) {
- raise_warning("Unknown encoding \"%s\"", name.data());
- return false;
- }
- char *name = (char *)mbfl_no_encoding2name(no_encoding);
- if (name != NULL) {
- i = 0;
- encodings = mbfl_get_supported_encodings();
- while ((encoding = encodings[i++]) != NULL) {
- if (strcmp(encoding->name, name) != 0) continue;
- if (encoding->mime_name != NULL) {
- return String(encoding->mime_name, CopyString);
- }
- break;
- }
- return "";
- } else {
- return false;
- }
- }
- return ret;
- }
- bool f_mb_check_encoding(const String& var /* = null_string */,
- const String& encoding /* = null_string */) {
- mbfl_buffer_converter *convd;
- mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
- mbfl_string string, result, *ret = NULL;
- long illegalchars = 0;
- if (var.isNull()) {
- return MBSTRG(illegalchars) == 0;
- }
- if (!encoding.isNull()) {
- no_encoding = mbfl_name2no_encoding(encoding.data());
- if (no_encoding == mbfl_no_encoding_invalid ||
- no_encoding == mbfl_no_encoding_pass) {
- raise_warning("Invalid encoding \"%s\"", encoding.data());
- return false;
- }
- }
- convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
- if (convd == NULL) {
- raise_warning("Unable to create converter");
- return false;
- }
- mbfl_buffer_converter_illegal_mode
- (convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
- mbfl_buffer_converter_illegal_substchar
- (convd, 0);
- /* initialize string */
- mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
- mbfl_string_init(&result);
- string.val = (unsigned char *)var.data();
- string.len = var.size();
- ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
- illegalchars = mbfl_buffer_illegalchars(convd);
- mbfl_buffer_converter_delete(convd);
- if (ret != NULL) {
- MBSTRG(illegalchars) += illegalchars;
- if (illegalchars == 0 && string.len == ret->len &&
- memcmp((const char *)string.val, (const char *)ret->val,
- string.len) == 0) {
- mbfl_string_clear(&result);
- return true;
- } else {
- mbfl_string_clear(&result);
- return false;
- }
- } else {
- return false;
- }
- }
- Variant f_mb_convert_case(const String& str, int mode,
- const String& encoding /* = null_string */) {
- const char *enc = NULL;
- if (encoding.empty()) {
- enc = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
- }
- unsigned int ret_len;
- char *newstr = php_unicode_convert_case(mode, str.data(), str.size(),
- &ret_len, enc);
- if (newstr) {
- return String(newstr, ret_len, AttachString);
- }
- return false;
- }
- Variant f_mb_convert_encoding(const String& str, const String& to_encoding,
- CVarRef from_encoding /* = null_variant */) {
- String encoding = from_encoding.toString();
- if (from_encoding.is(KindOfArray)) {
- StringBuffer _from_encodings;
- Array encs = from_encoding.toArray();
- for (ArrayIter iter(encs); iter; ++iter) {
- if (!_from_encodings.empty()) {
- _from_encodings.append(",");
- }
- _from_encodings.append(iter.second().toString());
- }
- encoding = _from_encodings.detach();
- }
- unsigned int size;
- char *ret = php_mb_convert_encoding(str.data(), str.size(),
- to_encoding.data(),
- (!encoding.empty() ?
- encoding.data() : NULL),
- &size);
- if (ret != NULL) {
- return String(ret, size, AttachString);
- }
- return false;
- }
- Variant f_mb_convert_kana(const String& str,
- const String& option /* = null_string */,
- const String& encoding /* = null_string */) {
- mbfl_string string, result, *ret;
- mbfl_string_init(&string);
- string.no_language = MBSTRG(current_language);
- string.no_encoding = MBSTRG(current_internal_encoding);
- string.val = (unsigned char *)str.data();
- string.len = str.size();
- int opt = 0x900;
- if (!option.empty()) {
- const char *p = option.data();
- int n = option.size();
- int i = 0;
- opt = 0;
- while (i < n) {
- i++;
- switch (*p++) {
- case 'A': opt |= 0x1; break;
- case 'a': opt |= 0x10; break;
- case 'R': opt |= 0x2; break;
- case 'r': opt |= 0x20; break;
- case 'N': opt |= 0x4; break;
- case 'n': opt |= 0x40; break;
- case 'S': opt |= 0x8; break;
- case 's': opt |= 0x80; break;
- case 'K': opt |= 0x100; break;
- case 'k': opt |= 0x1000; break;
- case 'H': opt |= 0x200; break;
- case 'h': opt |= 0x2000; break;
- case 'V': opt |= 0x800; break;
- case 'C': opt |= 0x10000; break;
- case 'c': opt |= 0x20000; break;
- case 'M': opt |= 0x100000; break;
- case 'm': opt |= 0x200000; break;
- }
- }
- }
- /* encoding */
- if (!encoding.empty()) {
- string.no_encoding = mbfl_name2no_encoding(encoding.data());
- if (string.no_encoding == mbfl_no_encoding_invalid) {
- raise_warning("Unknown encoding \"%s\"", encoding.data());
- return false;
- }
- }
- ret = mbfl_ja_jp_hantozen(&string, &result, opt);
- if (ret != NULL) {
- return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
- }
- return false;
- }
- static bool php_mbfl_encoding_detect(CVarRef var,
- mbfl_encoding_detector *identd,
- mbfl_string *string) {
- if (var.is(KindOfArray) || var.is(KindOfObject)) {
- Array items = var.toArray();
- for (ArrayIter iter(items); iter; ++iter) {
- if (php_mbfl_encoding_detect(iter.second(), identd, string)) {
- return true;
- }
- }
- } else if (var.isString()) {
- String svar = var.toString();
- string->val = (unsigned char *)svar.data();
- string->len = svar.size();
- if (mbfl_encoding_detector_feed(identd, string)) {
- return true;
- }
- }
- return false;
- }
- static Variant php_mbfl_convert(CVarRef var,
- mbfl_buffer_converter *convd,
- mbfl_string *string,
- mbfl_string *result) {
- if (var.is(KindOfArray)) {
- Array ret;
- Array items = var.toArray();
- for (ArrayIter iter(items); iter; ++iter) {
- ret.set(iter.first(),
- php_mbfl_convert(iter.second(), convd, string, result));
- }
- return ret;
- }
- if (var.is(KindOfObject)) {
- Object obj = var.toObject();
- Array items = var.toArray();
- for (ArrayIter iter(items); iter; ++iter) {
- obj->o_set(iter.first().toString(),
- php_mbfl_convert(iter.second().toString().data(), convd,
- string, result));
- }
- return var; // which still has obj
- }
- if (var.isString()) {
- String svar = var.toString();
- string->val = (unsigned char *)svar.data();
- string->len = svar.size();
- mbfl_string *ret =
- mbfl_buffer_converter_feed_result(convd, string, result);
- return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
- }
- return var;
- }
- Variant f_mb_convert_variables(int _argc, const String& to_encoding,
- CVarRef from_encoding, VRefParam vars,
- CArrRef _argv /* = null_array */) {
- mbfl_string string, result;
- mbfl_no_encoding _from_encoding, _to_encoding;
- mbfl_encoding_detector *identd;
- mbfl_buffer_converter *convd;
- int elistsz;
- mbfl_no_encoding *elist;
- char *name;
- /* new encoding */
- _to_encoding = mbfl_name2no_encoding(to_encoding.data());
- if (_to_encoding == mbfl_no_encoding_invalid) {
- raise_warning("Unknown encoding \"%s\"", to_encoding.data());
- return false;
- }
- /* initialize string */
- mbfl_string_init(&string);
- mbfl_string_init(&result);
- _from_encoding = MBSTRG(current_internal_encoding);
- string.no_encoding = _from_encoding;
- string.no_language = MBSTRG(current_language);
- /* pre-conversion encoding */
- elist = NULL;
- elistsz = 0;
- php_mb_parse_encoding(from_encoding, &elist, &elistsz, false);
- if (elistsz <= 0) {
- _from_encoding = mbfl_no_encoding_pass;
- } else if (elistsz == 1) {
- _from_encoding = *elist;
- } else {
- /* auto detect */
- _from_encoding = mbfl_no_encoding_invalid;
- identd = mbfl_encoding_detector_new(elist, elistsz,
- MBSTRG(strict_detection));
- if (identd != NULL) {
- for (int n = -1; n < _argv.size(); n++) {
- if (php_mbfl_encoding_detect(n < 0 ? (Variant&)vars : _argv[n],
- identd, &string)) {
- break;
- }
- }
- _from_encoding = mbfl_encoding_detector_judge(identd);
- mbfl_encoding_detector_delete(identd);
- }
- if (_from_encoding == mbfl_no_encoding_invalid) {
- raise_warning("Unable to detect encoding");
- _from_encoding = mbfl_no_encoding_pass;
- }
- }
- if (elist != NULL) {
- free((void *)elist);
- }
- /* create converter */
- convd = NULL;
- if (_from_encoding != mbfl_no_encoding_pass) {
- convd = mbfl_buffer_converter_new(_from_encoding, _to_encoding, 0);
- if (convd == NULL) {
- raise_warning("Unable to create converter");
- return false;
- }
- mbfl_buffer_converter_illegal_mode
- (convd, MBSTRG(current_filter_illegal_mode));
- mbfl_buffer_converter_illegal_substchar
- (convd, MBSTRG(current_filter_illegal_substchar));
- }
- /* convert */
- if (convd != NULL) {
- vars = php_mbfl_convert(vars, convd, &string, &result);
- for (int n = 0; n < _argv.size(); n++) {
- const_cast<Array&>(_argv).lval(n) =
- php_mbfl_convert(_argv[n], convd, &string, &result);
- }
- MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
- mbfl_buffer_converter_delete(convd);
- }
- name = (char *)mbfl_no_encoding2name(_from_encoding);
- if (name != NULL) {
- return String(name, CopyString);
- }
- return false;
- }
- Variant f_mb_decode_mimeheader(const String& str) {
- mbfl_string string, result, *ret;
- mbfl_string_init(&string);
- string.no_language = MBSTRG(current_language);
- string.no_encoding = MBSTRG(current_internal_encoding);
- string.val = (unsigned char *)str.data();
- string.len = str.size();
- mbfl_string_init(&result);
- ret = mbfl_mime_header_decode(&string, &result,
- MBSTRG(current_internal_encoding));
- if (ret != NULL) {
- return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
- }
- return false;
- }
- static Variant php_mb_numericentity_exec(const String& str, CVarRef convmap,
- const String& encoding, int type) {
- int mapsize=0;
- mbfl_string string, result, *ret;
- mbfl_no_encoding no_encoding;
- mbfl_string_init(&string);
- string.no_language = MBSTRG(current_language);
- string.no_encoding = MBSTRG(current_internal_encoding);
- string.val = (unsigned char *)str.data();
- string.len = str.size();
- /* encoding */
- if (!encoding.empty()) {
- no_encoding = mbfl_name2no_encoding(encoding.data());
- if (no_encoding == mbfl_no_encoding_invalid) {
- raise_warning("Unknown encoding \"%s\"", encoding.data());
- return false;
- } else {
- string.no_encoding = no_encoding;
- }
- }
- /* conversion map */
- int *iconvmap = NULL;
- if (convmap.is(KindOfArray)) {
- Array convs = convmap.toArray();
- mapsize = convs.size();
- if (mapsize > 0) {
- iconvmap = (int*)malloc(mapsize * sizeof(int));
- int *mapelm = iconvmap;
- for (ArrayIter iter(convs); iter; ++iter) {
- *mapelm++ = iter.second().toInt32();
- }
- }
- }
- if (iconvmap == NULL) {
- return false;
- }
- mapsize /= 4;
- ret = mbfl_html_numeric_entity(&string, &result, iconvmap, mapsize, type);
- free(iconvmap);
- if (ret != NULL) {
- return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
- }
- return false;
- }
- Variant f_mb_decode_numericentity(const String& str, CVarRef convmap,
- const String& encoding /* = null_string */) {
- return php_mb_numericentity_exec(str, convmap, encoding, 1);
- }
- Variant f_mb_detect_encoding(const String& str,
- CVarRef encoding_list /* = null_variant */,
- CVarRef strict /* = null_variant */) {
- mbfl_string string;
- const char *ret;
- mbfl_no_encoding *elist;
- int size;
- mbfl_no_encoding *list = 0;
- /* make encoding list */
- list = NULL;
- size = 0;
- php_mb_parse_encoding(encoding_list, &list, &size, false);
- if (size > 0 && list != NULL) {
- elist = list;
- } else {
- elist = MBSTRG(current_detect_order_list);
- size = MBSTRG(current_detect_order_list_size);
- }
- long nstrict = 0;
- if (!strict.isNull()) {
- nstrict = strict.toInt64();
- } else {
- nstrict = MBSTRG(strict_detection);
- }
- mbfl_string_init(&string);
- string.no_language = MBSTRG(current_language);
- string.val = (unsigned char *)str.data();
- string.len = str.size();
- ret = mbfl_identify_encoding_name(&string, elist, size, nstrict);
- if (list != NULL) {
- free(list);
- }
- if (ret != NULL) {
- return String(ret, CopyString);
- }
- return false;
- }
- Variant f_mb_detect_order(CVarRef encoding_list /* = null_variant */) {
- int n, size;
- mbfl_no_encoding *list, *entry;
- if (encoding_list.isNull()) {
- Array ret;
- entry = MBSTRG(current_detect_order_list);
- n = MBSTRG(current_detect_order_list_size);
- while (n > 0) {
- char *name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
- ret.append(String(name, CopyString));
- }
- entry++;
- n--;
- }
- return ret;
- }
- list = NULL;
- size = 0;
- if (!php_mb_parse_encoding(encoding_list, &list, &size, false) ||
- list == NULL) {
- return false;
- }
- if (MBSTRG(current_detect_order_list)) {
- free(MBSTRG(current_detect_order_list));
- }
- MBSTRG(current_detect_order_list) = list;
- MBSTRG(current_detect_order_list_size) = size;
- return true;
- }
- Variant f_mb_encode_mimeheader(const String& str,
- const String& charset /* = null_string */,
- const String& transfer_encoding/*= null_string*/,
- const String& linefeed /* = "\r\n" */,
- int indent /* = 0 */) {
- mbfl_no_encoding charsetenc, transenc;
- mbfl_string string, result, *ret;
- mbfl_string_init(&string);
- string.no_language = MBSTRG(current_language);
- string.no_encoding = MBSTRG(current_internal_encoding);
- string.val = (unsigned char *)str.data();
- string.len = str.size();
- charsetenc = mbfl_no_encoding_pass;
- transenc = mbfl_no_encoding_base64;
- if (!charset.empty()) {
- charsetenc = mbfl_name2no_encoding(charset.data());
- if (charsetenc == mbfl_no_encoding_invalid) {
- raise_warning("Unknown encoding \"%s\"", charset.data());
- return false;
- }
- } else {
- const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
- if (lang != NULL) {
- charsetenc = lang->mail_charset;
- transenc = lang->mail_header_encoding;
- }
- }
- if (!transfer_encoding.empty()) {
- char ch = *transfer_encoding.data();
- if (ch == 'B' || ch == 'b') {
- transenc = mbfl_no_encoding_base64;
- } else if (ch == 'Q' || ch == 'q') {
- transenc = mbfl_no_encoding_qprint;
- }
- }
- mbfl_string_init(&result);
- ret = mbfl_mime_header_encode(&string, &result, charsetenc, transenc,
- linefeed.data(), indent);
- if (ret != NULL) {
- return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
- }
- return false;
- }
- Variant f_mb_encode_numericentity(const String& str, CVarRef convmap,
- const String& encoding /* = null_string */) {
- return php_mb_numericentity_exec(str, convmap, encoding, 0);
- }
- const StaticString
- s_internal_encoding("internal_encoding"),
- s_http_input("http_input"),
- s_http_output("http_output"),
- s_mail_charset("mail_charset"),
- s_mail_header_encoding("mail_header_encoding"),
- s_mail_body_encoding("mail_body_encoding"),
- s_illegal_chars("illegal_chars"),
- s_encoding_translation("encoding_translation"),
- s_On("On"),
- s_Off("Off"),
- s_language("language"),
- s_detect_order("detect_order"),
- s_substitute_character("substitute_character"),
- s_strict_detection("strict_detection"),
- s_none("none"),
- s_long("long"),
- s_entity("entity");
- Variant f_mb_get_info(const String& type /* = null_string */) {
- const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
- mbfl_no_encoding *entry;
- int n;
- char *name;
- if (type.empty() || strcasecmp(type.data(), "all") == 0) {
- Array ret;
- if ((name = (char *)mbfl_no_encoding2name
- (MBSTRG(current_internal_encoding))) != NULL) {
- ret.set(s_internal_encoding, String(name, CopyString));
- }
- if ((name = (char *)mbfl_no_encoding2name
- (MBSTRG(http_input_identify))) != NULL) {
- ret.set(s_http_input, String(name, CopyString));
- }
- if ((name = (char *)mbfl_no_encoding2name
- (MBSTRG(current_http_output_encoding))) != NULL) {
- ret.set(s_http_output, String(name, CopyString));
- }
- if (lang != NULL) {
- if ((name = (char *)mbfl_no_encoding2name
- (lang->mail_charset)) != NULL) {
- ret.set(s_mail_charset, String(name, CopyString));
- }
- if ((name = (char *)mbfl_no_encoding2name
- (lang->mail_header_encoding)) != NULL) {
- ret.set(s_mail_header_encoding, String(name, CopyString));
- }
- if ((name = (char *)mbfl_no_encoding2name
- (lang->mail_body_encoding)) != NULL) {
- ret.set(s_mail_body_encoding, String(name, CopyString));
- }
- }
- ret.se…
Large files files are truncated, but you can click here to view the full file