PageRenderTime 69ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 1ms

/ext/mbstring/mbstring.c

http://github.com/php/php-src
C | 4499 lines | 3515 code | 567 blank | 417 comment | 906 complexity | f81abfb47963bc3baaeb982bb86d7726 MD5 | raw file
Possible License(s): BSD-2-Clause, BSD-3-Clause, MPL-2.0-no-copyleft-exception, LGPL-2.1

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. +----------------------------------------------------------------------+
  3. | Copyright (c) The PHP Group |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | http://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  14. | Rui Hirokawa <hirokawa@php.net> |
  15. | Hironori Sato <satoh@jpnnet.com> |
  16. | Shigeru Kanemoto <sgk@happysize.co.jp> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* {{{ includes */
  20. #ifdef HAVE_CONFIG_H
  21. #include "config.h"
  22. #endif
  23. #include "php.h"
  24. #include "php_ini.h"
  25. #include "php_variables.h"
  26. #include "mbstring.h"
  27. #include "ext/standard/php_string.h"
  28. #include "ext/standard/php_mail.h"
  29. #include "ext/standard/exec.h"
  30. #include "ext/standard/url.h"
  31. #include "main/php_output.h"
  32. #include "ext/standard/info.h"
  33. #include "libmbfl/mbfl/mbfl_allocators.h"
  34. #include "libmbfl/mbfl/mbfilter_8bit.h"
  35. #include "libmbfl/mbfl/mbfilter_pass.h"
  36. #include "libmbfl/mbfl/mbfilter_wchar.h"
  37. #include "libmbfl/filters/mbfilter_ascii.h"
  38. #include "libmbfl/filters/mbfilter_base64.h"
  39. #include "libmbfl/filters/mbfilter_qprint.h"
  40. #include "libmbfl/filters/mbfilter_ucs4.h"
  41. #include "libmbfl/filters/mbfilter_utf8.h"
  42. #include "php_variables.h"
  43. #include "php_globals.h"
  44. #include "rfc1867.h"
  45. #include "php_content_types.h"
  46. #include "SAPI.h"
  47. #include "php_unicode.h"
  48. #include "TSRM.h"
  49. #include "mb_gpc.h"
  50. #if HAVE_MBREGEX
  51. # include "php_mbregex.h"
  52. # include "php_onig_compat.h"
  53. # include <oniguruma.h>
  54. # undef UChar
  55. #if ONIGURUMA_VERSION_INT < 60800
  56. typedef void OnigMatchParam;
  57. #define onig_new_match_param() (NULL)
  58. #define onig_initialize_match_param(x) (void)(x)
  59. #define onig_set_match_stack_limit_size_of_match_param(x, y)
  60. #define onig_set_retry_limit_in_match_of_match_param(x, y)
  61. #define onig_free_match_param(x)
  62. #define onig_search_with_param(reg, str, end, start, range, region, option, mp) \
  63. onig_search(reg, str, end, start, range, region, option)
  64. #define onig_match_with_param(re, str, end, at, region, option, mp) \
  65. onig_match(re, str, end, at, region, option)
  66. #endif
  67. #else
  68. # include "ext/pcre/php_pcre.h"
  69. #endif
  70. #include "zend_multibyte.h"
  71. #include "mbstring_arginfo.h"
  72. /* }}} */
  73. #if HAVE_MBSTRING
  74. /* {{{ prototypes */
  75. ZEND_DECLARE_MODULE_GLOBALS(mbstring)
  76. static PHP_GINIT_FUNCTION(mbstring);
  77. static PHP_GSHUTDOWN_FUNCTION(mbstring);
  78. static void php_mb_populate_current_detect_order_list(void);
  79. static int php_mb_encoding_translation(void);
  80. static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
  81. static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
  82. static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
  83. static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
  84. /* }}} */
  85. /* {{{ php_mb_default_identify_list */
  86. typedef struct _php_mb_nls_ident_list {
  87. enum mbfl_no_language lang;
  88. const enum mbfl_no_encoding *list;
  89. size_t list_size;
  90. } php_mb_nls_ident_list;
  91. static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
  92. mbfl_no_encoding_ascii,
  93. mbfl_no_encoding_jis,
  94. mbfl_no_encoding_utf8,
  95. mbfl_no_encoding_euc_jp,
  96. mbfl_no_encoding_sjis
  97. };
  98. static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
  99. mbfl_no_encoding_ascii,
  100. mbfl_no_encoding_utf8,
  101. mbfl_no_encoding_euc_cn,
  102. mbfl_no_encoding_cp936
  103. };
  104. static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
  105. mbfl_no_encoding_ascii,
  106. mbfl_no_encoding_utf8,
  107. mbfl_no_encoding_euc_tw,
  108. mbfl_no_encoding_big5
  109. };
  110. static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
  111. mbfl_no_encoding_ascii,
  112. mbfl_no_encoding_utf8,
  113. mbfl_no_encoding_euc_kr,
  114. mbfl_no_encoding_uhc
  115. };
  116. static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
  117. mbfl_no_encoding_ascii,
  118. mbfl_no_encoding_utf8,
  119. mbfl_no_encoding_koi8r,
  120. mbfl_no_encoding_cp1251,
  121. mbfl_no_encoding_cp866
  122. };
  123. static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
  124. mbfl_no_encoding_ascii,
  125. mbfl_no_encoding_utf8,
  126. mbfl_no_encoding_armscii8
  127. };
  128. static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
  129. mbfl_no_encoding_ascii,
  130. mbfl_no_encoding_utf8,
  131. mbfl_no_encoding_cp1254,
  132. mbfl_no_encoding_8859_9
  133. };
  134. static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
  135. mbfl_no_encoding_ascii,
  136. mbfl_no_encoding_utf8,
  137. mbfl_no_encoding_koi8u
  138. };
  139. static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
  140. mbfl_no_encoding_ascii,
  141. mbfl_no_encoding_utf8
  142. };
  143. static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
  144. { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
  145. { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
  146. { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
  147. { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
  148. { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
  149. { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
  150. { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
  151. { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
  152. { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
  153. };
  154. /* }}} */
  155. /* {{{ zend_module_entry mbstring_module_entry */
  156. zend_module_entry mbstring_module_entry = {
  157. STANDARD_MODULE_HEADER,
  158. "mbstring",
  159. ext_functions,
  160. PHP_MINIT(mbstring),
  161. PHP_MSHUTDOWN(mbstring),
  162. PHP_RINIT(mbstring),
  163. PHP_RSHUTDOWN(mbstring),
  164. PHP_MINFO(mbstring),
  165. PHP_MBSTRING_VERSION,
  166. PHP_MODULE_GLOBALS(mbstring),
  167. PHP_GINIT(mbstring),
  168. PHP_GSHUTDOWN(mbstring),
  169. NULL,
  170. STANDARD_MODULE_PROPERTIES_EX
  171. };
  172. /* }}} */
  173. /* {{{ static sapi_post_entry php_post_entries[] */
  174. static const sapi_post_entry php_post_entries[] = {
  175. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
  176. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  177. { NULL, 0, NULL, NULL }
  178. };
  179. /* }}} */
  180. #ifdef COMPILE_DL_MBSTRING
  181. #ifdef ZTS
  182. ZEND_TSRMLS_CACHE_DEFINE()
  183. #endif
  184. ZEND_GET_MODULE(mbstring)
  185. #endif
  186. /* {{{ allocators */
  187. static void *_php_mb_allocators_malloc(size_t sz)
  188. {
  189. return emalloc(sz);
  190. }
  191. static void *_php_mb_allocators_realloc(void *ptr, size_t sz)
  192. {
  193. return erealloc(ptr, sz);
  194. }
  195. static void *_php_mb_allocators_calloc(size_t nelems, size_t szelem)
  196. {
  197. return ecalloc(nelems, szelem);
  198. }
  199. static void _php_mb_allocators_free(void *ptr)
  200. {
  201. efree(ptr);
  202. }
  203. static const mbfl_allocators _php_mb_allocators = {
  204. _php_mb_allocators_malloc,
  205. _php_mb_allocators_realloc,
  206. _php_mb_allocators_calloc,
  207. _php_mb_allocators_free,
  208. };
  209. /* }}} */
  210. /* {{{ static sapi_post_entry mbstr_post_entries[] */
  211. static const sapi_post_entry mbstr_post_entries[] = {
  212. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
  213. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  214. { NULL, 0, NULL, NULL }
  215. };
  216. /* }}} */
  217. static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, uint32_t arg_num) {
  218. if (encoding_name) {
  219. const mbfl_encoding *encoding;
  220. zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
  221. if (last_encoding_name && (last_encoding_name == encoding_name
  222. || !strcasecmp(ZSTR_VAL(encoding_name), ZSTR_VAL(last_encoding_name)))) {
  223. return MBSTRG(last_used_encoding);
  224. }
  225. encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
  226. if (!encoding) {
  227. zend_argument_value_error(arg_num, "must be a valid encoding, \"%s\" given", ZSTR_VAL(encoding_name));
  228. return NULL;
  229. }
  230. if (last_encoding_name) {
  231. zend_string_release(last_encoding_name);
  232. }
  233. MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name);
  234. MBSTRG(last_used_encoding) = encoding;
  235. return encoding;
  236. } else {
  237. return MBSTRG(current_internal_encoding);
  238. }
  239. }
  240. static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_name) {
  241. if (strcmp(encoding_name, "pass") == 0) {
  242. return &mbfl_encoding_pass;
  243. }
  244. return mbfl_name2encoding(encoding_name);
  245. }
  246. /* {{{ static int php_mb_parse_encoding_list()
  247. * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
  248. * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
  249. */
  250. static int php_mb_parse_encoding_list(const char *value, size_t value_length,
  251. const mbfl_encoding ***return_list, size_t *return_size, int persistent, uint32_t arg_num,
  252. zend_bool allow_pass_encoding)
  253. {
  254. if (value == NULL || value_length == 0) {
  255. *return_list = NULL;
  256. *return_size = 0;
  257. return SUCCESS;
  258. } else {
  259. zend_bool included_auto;
  260. size_t n, size;
  261. char *p, *p1, *p2, *endp, *tmpstr;
  262. const mbfl_encoding **entry, **list;
  263. /* copy the value string for work */
  264. if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
  265. tmpstr = (char *)estrndup(value+1, value_length-2);
  266. value_length -= 2;
  267. } else {
  268. tmpstr = (char *)estrndup(value, value_length);
  269. }
  270. /* count the number of listed encoding names */
  271. endp = tmpstr + value_length;
  272. n = 1;
  273. p1 = tmpstr;
  274. while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
  275. p1 = p2 + 1;
  276. n++;
  277. }
  278. size = n + MBSTRG(default_detect_order_list_size);
  279. /* make list */
  280. list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
  281. entry = list;
  282. n = 0;
  283. included_auto = 0;
  284. p1 = tmpstr;
  285. do {
  286. p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
  287. if (p == NULL) {
  288. p = endp;
  289. }
  290. *p = '\0';
  291. /* trim spaces */
  292. while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
  293. p1++;
  294. }
  295. p--;
  296. while (p > p1 && (*p == ' ' || *p == '\t')) {
  297. *p = '\0';
  298. p--;
  299. }
  300. /* convert to the encoding number and check encoding */
  301. if (strcasecmp(p1, "auto") == 0) {
  302. if (!included_auto) {
  303. const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
  304. const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
  305. size_t i;
  306. included_auto = 1;
  307. for (i = 0; i < identify_list_size; i++) {
  308. *entry++ = mbfl_no2encoding(*src++);
  309. n++;
  310. }
  311. }
  312. } else {
  313. const mbfl_encoding *encoding =
  314. allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
  315. if (!encoding) {
  316. /* Called from an INI setting modification */
  317. if (arg_num == 0) {
  318. php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
  319. } else {
  320. zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
  321. }
  322. efree(tmpstr);
  323. pefree(list, persistent);
  324. return FAILURE;
  325. }
  326. *entry++ = encoding;
  327. n++;
  328. }
  329. p1 = p2 + 1;
  330. } while (n < size && p2 != NULL);
  331. *return_list = list;
  332. *return_size = n;
  333. efree(tmpstr);
  334. }
  335. return SUCCESS;
  336. }
  337. /* }}} */
  338. /* {{{ static int php_mb_parse_encoding_array()
  339. * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
  340. * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
  341. */
  342. static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***return_list,
  343. size_t *return_size, uint32_t arg_num)
  344. {
  345. /* Allocate enough space to include the default detect order if "auto" is used. */
  346. size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size);
  347. const mbfl_encoding **list = ecalloc(size, sizeof(mbfl_encoding*));
  348. const mbfl_encoding **entry = list;
  349. zend_bool included_auto = 0;
  350. size_t n = 0;
  351. zval *hash_entry;
  352. ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
  353. zend_string *encoding_str = zval_try_get_string(hash_entry);
  354. if (UNEXPECTED(!encoding_str)) {
  355. efree(list);
  356. return FAILURE;
  357. }
  358. if (strcasecmp(ZSTR_VAL(encoding_str), "auto") == 0) {
  359. if (!included_auto) {
  360. const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
  361. const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
  362. size_t j;
  363. included_auto = 1;
  364. for (j = 0; j < identify_list_size; j++) {
  365. *entry++ = mbfl_no2encoding(*src++);
  366. n++;
  367. }
  368. }
  369. } else {
  370. const mbfl_encoding *encoding = mbfl_name2encoding(ZSTR_VAL(encoding_str));
  371. if (encoding) {
  372. *entry++ = encoding;
  373. n++;
  374. } else {
  375. zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", ZSTR_VAL(encoding_str));
  376. zend_string_release(encoding_str);
  377. efree(list);
  378. return FAILURE;
  379. }
  380. }
  381. zend_string_release(encoding_str);
  382. } ZEND_HASH_FOREACH_END();
  383. *return_list = list;
  384. *return_size = n;
  385. return SUCCESS;
  386. }
  387. /* }}} */
  388. /* {{{ zend_multibyte interface */
  389. static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
  390. {
  391. return (const zend_encoding*)mbfl_name2encoding(encoding_name);
  392. }
  393. static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
  394. {
  395. return ((const mbfl_encoding *)encoding)->name;
  396. }
  397. static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
  398. {
  399. const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
  400. if (encoding->flag & MBFL_ENCTYPE_SBCS) {
  401. return 1;
  402. }
  403. if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
  404. return 1;
  405. }
  406. return 0;
  407. }
  408. static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
  409. {
  410. mbfl_string string;
  411. if (!list) {
  412. list = (const zend_encoding **)MBSTRG(current_detect_order_list);
  413. list_size = MBSTRG(current_detect_order_list_size);
  414. }
  415. mbfl_string_init(&string);
  416. string.val = (unsigned char *)arg_string;
  417. string.len = arg_length;
  418. return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
  419. }
  420. static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
  421. {
  422. mbfl_string string, result;
  423. mbfl_buffer_converter *convd;
  424. int status;
  425. size_t loc;
  426. /* new encoding */
  427. /* initialize string */
  428. string.encoding = (const mbfl_encoding*)encoding_from;
  429. string.val = (unsigned char*)from;
  430. string.len = from_length;
  431. /* initialize converter */
  432. convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
  433. if (convd == NULL) {
  434. return (size_t) -1;
  435. }
  436. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  437. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  438. /* do it */
  439. status = mbfl_buffer_converter_feed2(convd, &string, &loc);
  440. if (status) {
  441. mbfl_buffer_converter_delete(convd);
  442. return (size_t)-1;
  443. }
  444. mbfl_buffer_converter_flush(convd);
  445. mbfl_string_init(&result);
  446. if (!mbfl_buffer_converter_result(convd, &result)) {
  447. mbfl_buffer_converter_delete(convd);
  448. return (size_t)-1;
  449. }
  450. *to = result.val;
  451. *to_length = result.len;
  452. mbfl_buffer_converter_delete(convd);
  453. return loc;
  454. }
  455. static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
  456. {
  457. return php_mb_parse_encoding_list(
  458. encoding_list, encoding_list_len,
  459. (const mbfl_encoding ***)return_list, return_size,
  460. persistent, /* arg_num */ 0, /* allow_pass_encoding */ 1);
  461. }
  462. static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
  463. {
  464. return (const zend_encoding *)MBSTRG(internal_encoding);
  465. }
  466. static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
  467. {
  468. MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
  469. return SUCCESS;
  470. }
  471. static zend_multibyte_functions php_mb_zend_multibyte_functions = {
  472. "mbstring",
  473. php_mb_zend_encoding_fetcher,
  474. php_mb_zend_encoding_name_getter,
  475. php_mb_zend_encoding_lexer_compatibility_checker,
  476. php_mb_zend_encoding_detector,
  477. php_mb_zend_encoding_converter,
  478. php_mb_zend_encoding_list_parser,
  479. php_mb_zend_internal_encoding_getter,
  480. php_mb_zend_internal_encoding_setter
  481. };
  482. /* }}} */
  483. static void *_php_mb_compile_regex(const char *pattern);
  484. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
  485. static void _php_mb_free_regex(void *opaque);
  486. #if HAVE_MBREGEX
  487. /* {{{ _php_mb_compile_regex */
  488. static void *_php_mb_compile_regex(const char *pattern)
  489. {
  490. php_mb_regex_t *retval;
  491. OnigErrorInfo err_info;
  492. int err_code;
  493. if ((err_code = onig_new(&retval,
  494. (const OnigUChar *)pattern,
  495. (const OnigUChar *)pattern + strlen(pattern),
  496. ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
  497. ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
  498. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  499. onig_error_code_to_str(err_str, err_code, err_info);
  500. php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
  501. retval = NULL;
  502. }
  503. return retval;
  504. }
  505. /* }}} */
  506. /* {{{ _php_mb_match_regex */
  507. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
  508. {
  509. OnigMatchParam *mp = onig_new_match_param();
  510. int err;
  511. onig_initialize_match_param(mp);
  512. if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) {
  513. onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
  514. }
  515. if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_retry_limit))) {
  516. onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
  517. }
  518. /* search */
  519. err = onig_search_with_param((php_mb_regex_t *)opaque, (const OnigUChar *)str,
  520. (const OnigUChar*)str + str_len, (const OnigUChar *)str,
  521. (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE, mp);
  522. onig_free_match_param(mp);
  523. return err >= 0;
  524. }
  525. /* }}} */
  526. /* {{{ _php_mb_free_regex */
  527. static void _php_mb_free_regex(void *opaque)
  528. {
  529. onig_free((php_mb_regex_t *)opaque);
  530. }
  531. /* }}} */
  532. #else
  533. /* {{{ _php_mb_compile_regex */
  534. static void *_php_mb_compile_regex(const char *pattern)
  535. {
  536. pcre2_code *retval;
  537. PCRE2_SIZE err_offset;
  538. int errnum;
  539. if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
  540. PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
  541. PCRE2_UCHAR err_str[128];
  542. pcre2_get_error_message(errnum, err_str, sizeof(err_str));
  543. php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
  544. }
  545. return retval;
  546. }
  547. /* }}} */
  548. /* {{{ _php_mb_match_regex */
  549. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
  550. {
  551. int res;
  552. pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
  553. if (NULL == match_data) {
  554. pcre2_code_free(opaque);
  555. php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
  556. return FAILURE;
  557. }
  558. res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
  559. php_pcre_free_match_data(match_data);
  560. return res;
  561. }
  562. /* }}} */
  563. /* {{{ _php_mb_free_regex */
  564. static void _php_mb_free_regex(void *opaque)
  565. {
  566. pcre2_code_free(opaque);
  567. }
  568. /* }}} */
  569. #endif
  570. /* {{{ php_mb_nls_get_default_detect_order_list */
  571. static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
  572. {
  573. size_t i;
  574. *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  575. *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  576. for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
  577. if (php_mb_default_identify_list[i].lang == lang) {
  578. *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
  579. *plist_size = php_mb_default_identify_list[i].list_size;
  580. return 1;
  581. }
  582. }
  583. return 0;
  584. }
  585. /* }}} */
  586. static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
  587. {
  588. char *result = emalloc(len + 2);
  589. char *resp = result;
  590. size_t i;
  591. for (i = 0; i < len && start[i] != quote; ++i) {
  592. if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
  593. *resp++ = start[++i];
  594. } else {
  595. size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
  596. while (j-- > 0 && i < len) {
  597. *resp++ = start[i++];
  598. }
  599. --i;
  600. }
  601. }
  602. *resp = '\0';
  603. return result;
  604. }
  605. static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
  606. {
  607. char *pos = *line, quote;
  608. char *res;
  609. while (*pos && *pos != stop) {
  610. if ((quote = *pos) == '"' || quote == '\'') {
  611. ++pos;
  612. while (*pos && *pos != quote) {
  613. if (*pos == '\\' && pos[1] && pos[1] == quote) {
  614. pos += 2;
  615. } else {
  616. ++pos;
  617. }
  618. }
  619. if (*pos) {
  620. ++pos;
  621. }
  622. } else {
  623. pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
  624. }
  625. }
  626. if (*pos == '\0') {
  627. res = estrdup(*line);
  628. *line += strlen(*line);
  629. return res;
  630. }
  631. res = estrndup(*line, pos - *line);
  632. while (*pos == stop) {
  633. pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
  634. }
  635. *line = pos;
  636. return res;
  637. }
  638. /* }}} */
  639. static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
  640. {
  641. while (*str && isspace(*(unsigned char *)str)) {
  642. ++str;
  643. }
  644. if (!*str) {
  645. return estrdup("");
  646. }
  647. if (*str == '"' || *str == '\'') {
  648. char quote = *str;
  649. str++;
  650. return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
  651. } else {
  652. char *strend = str;
  653. while (*strend && !isspace(*(unsigned char *)strend)) {
  654. ++strend;
  655. }
  656. return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
  657. }
  658. }
  659. /* }}} */
  660. static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
  661. {
  662. char *s, *s2;
  663. const size_t filename_len = strlen(filename);
  664. /* The \ check should technically be needed for win32 systems only where
  665. * it is a valid path separator. However, IE in all it's wisdom always sends
  666. * the full path of the file on the user's filesystem, which means that unless
  667. * the user does basename() they get a bogus file name. Until IE's user base drops
  668. * to nill or problem is fixed this code must remain enabled for all systems. */
  669. s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
  670. s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
  671. if (s && s2) {
  672. if (s > s2) {
  673. return ++s;
  674. } else {
  675. return ++s2;
  676. }
  677. } else if (s) {
  678. return ++s;
  679. } else if (s2) {
  680. return ++s2;
  681. } else {
  682. return filename;
  683. }
  684. }
  685. /* }}} */
  686. /* {{{ php.ini directive handler */
  687. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
  688. static PHP_INI_MH(OnUpdate_mbstring_language)
  689. {
  690. enum mbfl_no_language no_language;
  691. no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
  692. if (no_language == mbfl_no_language_invalid) {
  693. MBSTRG(language) = mbfl_no_language_neutral;
  694. return FAILURE;
  695. }
  696. MBSTRG(language) = no_language;
  697. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  698. return SUCCESS;
  699. }
  700. /* }}} */
  701. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
  702. static PHP_INI_MH(OnUpdate_mbstring_detect_order)
  703. {
  704. const mbfl_encoding **list;
  705. size_t size;
  706. if (!new_value) {
  707. if (MBSTRG(detect_order_list)) {
  708. pefree(MBSTRG(detect_order_list), 1);
  709. }
  710. MBSTRG(detect_order_list) = NULL;
  711. MBSTRG(detect_order_list_size) = 0;
  712. return SUCCESS;
  713. }
  714. if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 0) || size == 0) {
  715. return FAILURE;
  716. }
  717. if (MBSTRG(detect_order_list)) {
  718. pefree(MBSTRG(detect_order_list), 1);
  719. }
  720. MBSTRG(detect_order_list) = list;
  721. MBSTRG(detect_order_list_size) = size;
  722. return SUCCESS;
  723. }
  724. /* }}} */
  725. static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
  726. const mbfl_encoding **list;
  727. size_t size;
  728. if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 1) || size == 0) {
  729. return FAILURE;
  730. }
  731. if (MBSTRG(http_input_list)) {
  732. pefree(MBSTRG(http_input_list), 1);
  733. }
  734. MBSTRG(http_input_list) = list;
  735. MBSTRG(http_input_list_size) = size;
  736. return SUCCESS;
  737. }
  738. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
  739. static PHP_INI_MH(OnUpdate_mbstring_http_input)
  740. {
  741. if (new_value) {
  742. php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
  743. }
  744. if (!new_value || !ZSTR_VAL(new_value)) {
  745. const char *encoding = php_get_input_encoding();
  746. MBSTRG(http_input_set) = 0;
  747. _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
  748. return SUCCESS;
  749. }
  750. MBSTRG(http_input_set) = 1;
  751. return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
  752. }
  753. /* }}} */
  754. static int _php_mb_ini_mbstring_http_output_set(const char *new_value) {
  755. const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(new_value);
  756. if (!encoding) {
  757. return FAILURE;
  758. }
  759. MBSTRG(http_output_encoding) = encoding;
  760. MBSTRG(current_http_output_encoding) = encoding;
  761. return SUCCESS;
  762. }
  763. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
  764. static PHP_INI_MH(OnUpdate_mbstring_http_output)
  765. {
  766. if (new_value) {
  767. php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
  768. }
  769. if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
  770. MBSTRG(http_output_set) = 0;
  771. _php_mb_ini_mbstring_http_output_set(php_get_output_encoding());
  772. return SUCCESS;
  773. }
  774. MBSTRG(http_output_set) = 1;
  775. return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value));
  776. }
  777. /* }}} */
  778. /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
  779. static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
  780. {
  781. const mbfl_encoding *encoding;
  782. if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
  783. /* falls back to UTF-8 if an unknown encoding name is given */
  784. if (new_value) {
  785. php_error_docref("ref.mbstring", E_WARNING,
  786. "Unknown encoding \"%s\" in ini setting", new_value);
  787. }
  788. encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
  789. }
  790. MBSTRG(internal_encoding) = encoding;
  791. MBSTRG(current_internal_encoding) = encoding;
  792. #if HAVE_MBREGEX
  793. {
  794. const char *enc_name = new_value;
  795. if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
  796. /* falls back to UTF-8 if an unknown encoding name is given */
  797. enc_name = "UTF-8";
  798. php_mb_regex_set_default_mbctype(enc_name);
  799. }
  800. php_mb_regex_set_mbctype(new_value);
  801. }
  802. #endif
  803. return SUCCESS;
  804. }
  805. /* }}} */
  806. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
  807. static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
  808. {
  809. if (new_value) {
  810. php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
  811. }
  812. if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
  813. return FAILURE;
  814. }
  815. if (new_value && ZSTR_LEN(new_value)) {
  816. MBSTRG(internal_encoding_set) = 1;
  817. return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
  818. } else {
  819. const char *encoding = php_get_internal_encoding();
  820. MBSTRG(internal_encoding_set) = 0;
  821. return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
  822. }
  823. }
  824. /* }}} */
  825. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
  826. static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
  827. {
  828. int c;
  829. char *endptr = NULL;
  830. if (new_value != NULL) {
  831. if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
  832. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  833. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  834. } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
  835. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  836. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  837. } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
  838. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  839. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  840. } else {
  841. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  842. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  843. if (ZSTR_LEN(new_value) > 0) {
  844. c = strtol(ZSTR_VAL(new_value), &endptr, 0);
  845. if (*endptr == '\0') {
  846. MBSTRG(filter_illegal_substchar) = c;
  847. MBSTRG(current_filter_illegal_substchar) = c;
  848. }
  849. }
  850. }
  851. } else {
  852. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  853. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  854. MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
  855. MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
  856. }
  857. return SUCCESS;
  858. }
  859. /* }}} */
  860. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
  861. static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
  862. {
  863. if (new_value == NULL) {
  864. return FAILURE;
  865. }
  866. OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
  867. if (MBSTRG(encoding_translation)) {
  868. sapi_unregister_post_entry(php_post_entries);
  869. sapi_register_post_entries(mbstr_post_entries);
  870. } else {
  871. sapi_unregister_post_entry(mbstr_post_entries);
  872. sapi_register_post_entries(php_post_entries);
  873. }
  874. return SUCCESS;
  875. }
  876. /* }}} */
  877. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
  878. static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
  879. {
  880. zend_string *tmp;
  881. void *re = NULL;
  882. if (!new_value) {
  883. new_value = entry->orig_value;
  884. }
  885. tmp = php_trim(new_value, NULL, 0, 3);
  886. if (ZSTR_LEN(tmp) > 0) {
  887. if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
  888. zend_string_release_ex(tmp, 0);
  889. return FAILURE;
  890. }
  891. }
  892. if (MBSTRG(http_output_conv_mimetypes)) {
  893. _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
  894. }
  895. MBSTRG(http_output_conv_mimetypes) = re;
  896. zend_string_release_ex(tmp, 0);
  897. return SUCCESS;
  898. }
  899. /* }}} */
  900. /* }}} */
  901. /* {{{ php.ini directive registration */
  902. PHP_INI_BEGIN()
  903. PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
  904. PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
  905. PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
  906. PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
  907. STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
  908. PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
  909. STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
  910. PHP_INI_SYSTEM | PHP_INI_PERDIR,
  911. OnUpdate_mbstring_encoding_translation,
  912. encoding_translation, zend_mbstring_globals, mbstring_globals)
  913. PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
  914. "^(text/|application/xhtml\\+xml)",
  915. PHP_INI_ALL,
  916. OnUpdate_mbstring_http_output_conv_mimetypes)
  917. STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
  918. PHP_INI_ALL,
  919. OnUpdateBool,
  920. strict_detection, zend_mbstring_globals, mbstring_globals)
  921. #if HAVE_MBREGEX
  922. STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
  923. STD_PHP_INI_ENTRY("mbstring.regex_retry_limit", "1000000",PHP_INI_ALL, OnUpdateLong, regex_retry_limit, zend_mbstring_globals, mbstring_globals)
  924. #endif
  925. PHP_INI_END()
  926. /* }}} */
  927. static void mbstring_internal_encoding_changed_hook(void) {
  928. /* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */
  929. if (!MBSTRG(internal_encoding_set)) {
  930. const char *encoding = php_get_internal_encoding();
  931. _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
  932. }
  933. if (!MBSTRG(http_output_set)) {
  934. const char *encoding = php_get_output_encoding();
  935. _php_mb_ini_mbstring_http_output_set(encoding);
  936. }
  937. if (!MBSTRG(http_input_set)) {
  938. const char *encoding = php_get_input_encoding();
  939. _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
  940. }
  941. }
  942. /* {{{ module global initialize handler */
  943. static PHP_GINIT_FUNCTION(mbstring)
  944. {
  945. #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
  946. ZEND_TSRMLS_CACHE_UPDATE();
  947. #endif
  948. mbstring_globals->language = mbfl_no_language_uni;
  949. mbstring_globals->internal_encoding = NULL;
  950. mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
  951. mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
  952. mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
  953. mbstring_globals->http_input_identify = NULL;
  954. mbstring_globals->http_input_identify_get = NULL;
  955. mbstring_globals->http_input_identify_post = NULL;
  956. mbstring_globals->http_input_identify_cookie = NULL;
  957. mbstring_globals->http_input_identify_string = NULL;
  958. mbstring_globals->http_input_list = NULL;
  959. mbstring_globals->http_input_list_size = 0;
  960. mbstring_globals->detect_order_list = NULL;
  961. mbstring_globals->detect_order_list_size = 0;
  962. mbstring_globals->current_detect_order_list = NULL;
  963. mbstring_globals->current_detect_order_list_size = 0;
  964. mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  965. mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  966. mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  967. mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
  968. mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  969. mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
  970. mbstring_globals->illegalchars = 0;
  971. mbstring_globals->encoding_translation = 0;
  972. mbstring_globals->strict_detection = 0;
  973. mbstring_globals->outconv = NULL;
  974. mbstring_globals->http_output_conv_mimetypes = NULL;
  975. #if HAVE_MBREGEX
  976. mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
  977. #endif
  978. mbstring_globals->last_used_encoding_name = NULL;
  979. mbstring_globals->last_used_encoding = NULL;
  980. mbstring_globals->internal_encoding_set = 0;
  981. mbstring_globals->http_output_set = 0;
  982. mbstring_globals->http_input_set = 0;
  983. }
  984. /* }}} */
  985. /* {{{ PHP_GSHUTDOWN_FUNCTION */
  986. static PHP_GSHUTDOWN_FUNCTION(mbstring)
  987. {
  988. if (mbstring_globals->http_input_list) {
  989. free(mbstring_globals->http_input_list);
  990. }
  991. if (mbstring_globals->detect_order_list) {
  992. free(mbstring_globals->detect_order_list);
  993. }
  994. if (mbstring_globals->http_output_conv_mimetypes) {
  995. _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
  996. }
  997. #if HAVE_MBREGEX
  998. php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
  999. #endif
  1000. }
  1001. /* }}} */
  1002. /* {{{ PHP_MINIT_FUNCTION(mbstring) */
  1003. PHP_MINIT_FUNCTION(mbstring)
  1004. {
  1005. #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
  1006. ZEND_TSRMLS_CACHE_UPDATE();
  1007. #endif
  1008. __mbfl_allocators = (mbfl_allocators*)&_php_mb_allocators;
  1009. REGISTER_INI_ENTRIES();
  1010. /* We assume that we're the only user of the hook. */
  1011. ZEND_ASSERT(php_internal_encoding_changed == NULL);
  1012. php_internal_encoding_changed = mbstring_internal_encoding_changed_hook;
  1013. mbstring_internal_encoding_changed_hook();
  1014. /* This is a global handler. Should not be set in a per-request handler. */
  1015. sapi_register_treat_data(mbstr_treat_data);
  1016. /* Post handlers are stored in the thread-local context. */
  1017. if (MBSTRG(encoding_translation)) {
  1018. sapi_register_post_entries(mbstr_post_entries);
  1019. }
  1020. REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
  1021. REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
  1022. REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
  1023. REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
  1024. REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
  1025. REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
  1026. REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
  1027. REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
  1028. #if HAVE_MBREGEX
  1029. PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1030. #endif
  1031. if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
  1032. return FAILURE;
  1033. }
  1034. php_rfc1867_set_multibyte_callbacks(
  1035. php_mb_encoding_translation,
  1036. php_mb_gpc_get_detect_order,
  1037. php_mb_gpc_set_input_encoding,
  1038. php_mb_rfc1867_getword,
  1039. php_mb_rfc1867_getword_conf,
  1040. php_mb_rfc1867_basename);
  1041. return SUCCESS;
  1042. }
  1043. /* }}} */
  1044. /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
  1045. PHP_MSHUTDOWN_FUNCTION(mbstring)
  1046. {
  1047. UNREGISTER_INI_ENTRIES();
  1048. zend_multibyte_restore_functions();
  1049. #if HAVE_MBREGEX
  1050. PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1051. #endif
  1052. php_internal_encoding_changed = NULL;
  1053. return SUCCESS;
  1054. }
  1055. /* }}} */
  1056. /* {{{ PHP_RINIT_FUNCTION(mbstring) */
  1057. PHP_RINIT_FUNCTION(mbstring)
  1058. {
  1059. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  1060. MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
  1061. MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
  1062. MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
  1063. MBSTRG(illegalchars) = 0;
  1064. php_mb_populate_current_detect_order_list();
  1065. #if HAVE_MBREGEX
  1066. PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1067. #endif
  1068. zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
  1069. return SUCCESS;
  1070. }
  1071. /* }}} */
  1072. /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
  1073. PHP_RSHUTDOWN_FUNCTION(mbstring)
  1074. {
  1075. if (MBSTRG(current_detect_order_list) != NULL) {
  1076. efree(MBSTRG(current_detect_order_list));
  1077. MBSTRG(current_detect_order_list) = NULL;
  1078. MBSTRG(current_detect_order_list_size) = 0;
  1079. }
  1080. if (MBSTRG(outconv) != NULL) {
  1081. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1082. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1083. MBSTRG(outconv) = NULL;
  1084. }
  1085. /* clear http input identification. */
  1086. MBSTRG(http_input_identify) = NULL;
  1087. MBSTRG(http_input_identify_post) = NULL;
  1088. MBSTRG(http_input_identify_get) = NULL;
  1089. MBSTRG(http_input_identify_cookie) = NULL;
  1090. MBSTRG(http_input_identify_string) = NULL;
  1091. if (MBSTRG(last_used_encoding_name)) {
  1092. zend_string_release(MBSTRG(last_used_encoding_name));
  1093. MBSTRG(last_used_encoding_name) = NULL;
  1094. }
  1095. MBSTRG(internal_encoding_set) = 0;
  1096. MBSTRG(http_output_set) = 0;
  1097. MBSTRG(http_input_set) = 0;
  1098. #if HAVE_MBREGEX
  1099. PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1100. #endif
  1101. return SUCCESS;
  1102. }
  1103. /* }}} */
  1104. /* {{{ PHP_MINFO_FUNCTION(mbstring) */
  1105. PHP_MINFO_FUNCTION(mbstring)
  1106. {
  1107. php_info_print_table_start();
  1108. php_info_print_table_row(2, "Multibyte Support", "enabled");
  1109. php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
  1110. php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
  1111. {
  1112. char tmp[256];
  1113. snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
  1114. php_info_print_table_row(2, "libmbfl version", tmp);
  1115. }
  1116. php_info_print_table_end();
  1117. php_info_print_table_start();
  1118. php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
  1119. php_info_print_table_end();
  1120. #if HAVE_MBREGEX
  1121. PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
  1122. #endif
  1123. DISPLAY_INI_ENTRIES();
  1124. }
  1125. /* }}} */
  1126. /* {{{ proto string mb_language([string language])
  1127. Sets the current language or Returns the current language as a string */
  1128. PHP_FUNCTION(mb_language)
  1129. {
  1130. zend_string *name = NULL;
  1131. if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
  1132. RETURN_THROWS();
  1133. }
  1134. if (name == NULL) {
  1135. RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
  1136. } else {
  1137. zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
  1138. if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
  1139. zend_argument_value_error(1, "must be a valid language, \"%s\" given", ZSTR_VAL(name));
  1140. zend_string_release_ex(ini_name, 0);
  1141. RETURN_THROWS();
  1142. }
  1143. // TODO Make return void
  1144. RETVAL_TRUE;
  1145. zend_string_release_ex(ini_name, 0);
  1146. }
  1147. }
  1148. /* }}} */
  1149. /* {{{ proto string mb_internal_encoding([string encoding])
  1150. Sets the current internal encoding or Returns the current internal encoding as a string */
  1151. PHP_FUNCTION(mb_internal_encoding)
  1152. {
  1153. const char *name = NULL;
  1154. size_t name_len;
  1155. const mbfl_encoding *encoding;
  1156. if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
  1157. RETURN_THROWS();
  1158. }
  1159. if (name == NULL) {
  1160. ZEND_ASSERT(MBSTRG(current_internal_encoding));
  1161. RETURN_STRING(MBSTRG(current_internal_encoding)->name);
  1162. } else {
  1163. encoding = mbfl_name2encoding(name);
  1164. if (!encoding) {
  1165. zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
  1166. RETURN_THROWS();
  1167. } else {
  1168. MBSTRG(current_internal_encoding) = encoding;
  1169. MBSTRG(internal_encoding_set) = 1;
  1170. /* TODO Return old encoding */
  1171. RETURN_TRUE;
  1172. }
  1173. }
  1174. }
  1175. /* }}} */
  1176. /* {{{ proto mixed mb_http_input([string type])
  1177. Returns the input encoding */
  1178. PHP_FUNCTION(mb_http_input)
  1179. {
  1180. char *typ = NULL;
  1181. size_t typ_len;
  1182. int retname;
  1183. char *list, *temp;
  1184. const mbfl_encoding *result = NULL;
  1185. retname = 1;
  1186. if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
  1187. RETURN_THROWS();
  1188. }
  1189. if (typ == NULL) {
  1190. result = MBSTRG(http_input_identify);
  1191. } else {
  1192. switch (*typ) {
  1193. case 'G':
  1194. case 'g':
  1195. result = MBSTRG(http_input_identify_get);
  1196. break;
  1197. case 'P':
  1198. case 'p':
  1199. result = MBSTRG(http_input_identify_post);
  1200. break;
  1201. case 'C':
  1202. case 'c':
  1203. result = MBSTRG(http_input_identify_cookie);
  1204. break;
  1205. case 'S':
  1206. case 's':
  1207. result = MBSTRG(http_input_identify_string);
  1208. break;
  1209. case 'I':
  1210. case 'i':
  1211. {
  1212. const mbfl_encoding **entry = MBSTRG(http_input_list);
  1213. const size_t n = MBSTRG(http_input_list_size);
  1214. size_t i;
  1215. array_init(return_value);
  1216. for (i = 0; i < n; i++) {
  1217. add_next_index_string(return_value, (*entry)->name);
  1218. entry++;
  1219. }
  1220. retname = 0;
  1221. }
  1222. break;
  1223. case 'L':
  1224. case 'l':
  1225. {
  1226. const mbfl_encoding **entry = MBSTRG(http_input_list);
  1227. const size_t n = MBSTRG(http_input_list_size);
  1228. size_t i;
  1229. list = NULL;
  1230. for (i = 0; i < n; i++) {
  1231. if (list) {
  1232. temp = list;
  1233. spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
  1234. efree(temp);
  1235. if (!list) {
  1236. break;
  1237. }
  1238. } else {
  1239. list = estrdup((*entry)->name);
  1240. }
  1241. entry++;
  1242. }
  1243. }
  1244. if (!list) {
  1245. // TODO should return empty string?
  1246. RETURN_FALSE;
  1247. }
  1248. RETVAL_STRING(list);
  1249. efree(list);
  1250. retname = 0;
  1251. break;
  1252. default:
  1253. // TODO ValueError
  1254. result = MBSTRG(http_input_identify);
  1255. break;
  1256. }
  1257. }
  1258. // FIXME this bloc seems useless except for default switch case
  1259. if (retname) {
  1260. if (result) {
  1261. RETVAL_STRING(result->name);
  1262. } else {
  1263. RETVAL_FALSE;
  1264. }
  1265. }
  1266. }
  1267. /* }}} */
  1268. /* {{{ proto string mb_http_output([string encoding])
  1269. Sets the current output_encoding or returns the current output_encoding as a string */
  1270. PHP_FUNCTION(mb_http_output)
  1271. {
  1272. const char *name = NULL;
  1273. size_t name_len;
  1274. const mbfl_encoding *encoding;
  1275. if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
  1276. RETURN_THROWS();
  1277. }
  1278. if (name == NULL) {
  1279. ZEND_ASSERT(MBSTRG(current_http_output_encoding));
  1280. RETURN_STRING(MBSTRG(current_http_output_encoding)->name);
  1281. } else {
  1282. encoding = php_mb_get_encoding_or_pass(name);
  1283. if (!encoding) {
  1284. zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
  1285. RETURN_THROWS();
  1286. } else {
  1287. MBSTRG(http_output_set) = 1;
  1288. MBSTRG(current_http_output_encoding) = encoding;
  1289. /* TODO Return previous encoding? */
  1290. RETURN_TRUE;
  1291. }
  1292. }
  1293. }
  1294. /* }}} */
  1295. /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
  1296. Sets the current detect_order or Return the current detect_order as a array */
  1297. PHP_FUNCTION(mb_detect_order)
  1298. {
  1299. zend_string *order_str = NULL;
  1300. HashTable *order_ht = NULL;
  1301. ZEND_PARSE_PARAMETERS_START(0, 1)
  1302. Z_PARAM_OPTIONAL
  1303. Z_PARAM_STR_OR_ARRAY_HT(order_str, order_ht)
  1304. ZEND_PARSE_PARAMETERS_END();
  1305. if (!order_str && !order_ht) {
  1306. size_t i;
  1307. size_t n = MBSTRG(current_detect_order_list_size);
  1308. const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
  1309. array_init(return_value);
  1310. for (i = 0; i < n; i++) {
  1311. add_next_index_string(return_value, (*entry)->name);
  1312. entry++;
  1313. }
  1314. } else {
  1315. const mbfl_encoding **list;
  1316. size_t size;
  1317. if (order_ht) {
  1318. if (FAILURE == php_mb_parse_encoding_array(order_ht, &list, &size, 1)) {
  1319. RETURN_THROWS();
  1320. }
  1321. } else {
  1322. if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1, /* allow_pass_encoding */ 0)) {
  1323. RETURN_THROWS();
  1324. }
  1325. }
  1326. if (size == 0) {
  1327. efree(list);
  1328. zend_argument_value_error(1, "must specify at least one encoding");
  1329. RETURN_THROWS();
  1330. }
  1331. if (MBSTRG(current_detect_order_list)) {
  1332. efree(MBSTRG(current_detect_order_list));
  1333. }
  1334. MBSTRG(current_detect_order_list) = list;
  1335. MBSTRG(current_detect_order_list_size) = size;
  1336. RETURN_TRUE;
  1337. }
  1338. }
  1339. /* }}} */
  1340. static inline int php_mb_check_code_point(zend_long cp)
  1341. {
  1342. if (cp < 0 || cp >= 0x110000) {
  1343. /* Out of Unicode range */
  1344. return 0;
  1345. }
  1346. if (cp >= 0xd800 && cp <= 0xdfff) {
  1347. /* Surrogate code-point. These are never valid on their own and we only allow a single
  1348. * substitute character. */
  1349. return 0;
  1350. }
  1351. /* As the we do not know the target encoding of the conversion operation that is going to
  1352. * use the substitution character, we cannot check whether the codepoint is actually mapped
  1353. * in the given encoding at this point. Thus we have to accept everything. */
  1354. return 1;
  1355. }
  1356. /* {{{ proto string|int|true mb_substitute_character([string|int|null substitute_character])
  1357. Sets the current substitute_character or returns the current substitute_character */
  1358. PHP_FUNCTION(mb_substitute_character)
  1359. {
  1360. zend_string *substitute_character = NULL;
  1361. zend_long substitute_codepoint;
  1362. zend_bool substitute_is_null = 1;
  1363. ZEND_PARSE_PARAMETERS_START(0, 1)
  1364. Z_PARAM_OPTIONAL
  1365. Z_PARAM_STR_OR_LONG_OR_NULL(substitute_character, substitute_codepoint, substitute_is_null)
  1366. ZEND_PARSE_PARAMETERS_END();
  1367. if (substitute_is_null) {
  1368. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  1369. RETURN_STRING("none");
  1370. }
  1371. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  1372. RETURN_STRING("long");
  1373. }
  1374. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  1375. RETURN_STRING("entity");
  1376. }
  1377. RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
  1378. }
  1379. if (substitute_character != NULL) {
  1380. if (zend_string_equals_literal_ci(substitute_character, "none")) {
  1381. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1382. RETURN_TRUE;
  1383. }
  1384. if (zend_string_equals_literal_ci(substitute_character, "long")) {
  1385. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1386. RETURN_TRUE;
  1387. }
  1388. if (zend_string_equals_literal_ci(substitute_character, "entity")) {
  1389. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1390. RETURN_TRUE;
  1391. }
  1392. /* Invalid string value */
  1393. zend_argument_value_error(1, "must be 'none', 'long', 'entity' or a valid codepoint");
  1394. RETURN_THROWS();
  1395. }
  1396. /* Integer codepoint passed */
  1397. if (!php_mb_check_code_point(substitute_codepoint)) {
  1398. zend_argument_value_error(1, "is not a valid codepoint");
  1399. RETURN_THROWS();
  1400. }
  1401. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1402. MBSTRG(current_filter_illegal_substchar) = substitute_codepoint;
  1403. RETURN_TRUE;
  1404. }
  1405. /* }}} */
  1406. /* {{{ proto string mb_preferred_mime_name(string encoding)
  1407. Return the preferred MIME name (charset) as a string */
  1408. PHP_FUNCTION(mb_preferred_mime_name)
  1409. {
  1410. enum mbfl_no_encoding no_encoding;
  1411. char *name = NULL;
  1412. size_t name_len;
  1413. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
  1414. RETURN_THROWS();
  1415. }
  1416. no_encoding = mbfl_name2no_encoding(name);
  1417. if (no_encoding == mbfl_no_encoding_invalid) {
  1418. zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
  1419. R

Large files files are truncated, but you can click here to view the full file