PageRenderTime 90ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 1ms

/ext/mbstring/mbstring.c

http://github.com/php/php-src
C | 4499 lines | 3515 code | 567 blank | 417 comment | 906 complexity | f81abfb47963bc3baaeb982bb86d7726 MD5 | raw file
Possible License(s): BSD-2-Clause, BSD-3-Clause, MPL-2.0-no-copyleft-exception, LGPL-2.1
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Copyright (c) The PHP Group |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | http://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  14. | Rui Hirokawa <hirokawa@php.net> |
  15. | Hironori Sato <satoh@jpnnet.com> |
  16. | Shigeru Kanemoto <sgk@happysize.co.jp> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* {{{ includes */
  20. #ifdef HAVE_CONFIG_H
  21. #include "config.h"
  22. #endif
  23. #include "php.h"
  24. #include "php_ini.h"
  25. #include "php_variables.h"
  26. #include "mbstring.h"
  27. #include "ext/standard/php_string.h"
  28. #include "ext/standard/php_mail.h"
  29. #include "ext/standard/exec.h"
  30. #include "ext/standard/url.h"
  31. #include "main/php_output.h"
  32. #include "ext/standard/info.h"
  33. #include "libmbfl/mbfl/mbfl_allocators.h"
  34. #include "libmbfl/mbfl/mbfilter_8bit.h"
  35. #include "libmbfl/mbfl/mbfilter_pass.h"
  36. #include "libmbfl/mbfl/mbfilter_wchar.h"
  37. #include "libmbfl/filters/mbfilter_ascii.h"
  38. #include "libmbfl/filters/mbfilter_base64.h"
  39. #include "libmbfl/filters/mbfilter_qprint.h"
  40. #include "libmbfl/filters/mbfilter_ucs4.h"
  41. #include "libmbfl/filters/mbfilter_utf8.h"
  42. #include "php_variables.h"
  43. #include "php_globals.h"
  44. #include "rfc1867.h"
  45. #include "php_content_types.h"
  46. #include "SAPI.h"
  47. #include "php_unicode.h"
  48. #include "TSRM.h"
  49. #include "mb_gpc.h"
  50. #if HAVE_MBREGEX
  51. # include "php_mbregex.h"
  52. # include "php_onig_compat.h"
  53. # include <oniguruma.h>
  54. # undef UChar
  55. #if ONIGURUMA_VERSION_INT < 60800
  56. typedef void OnigMatchParam;
  57. #define onig_new_match_param() (NULL)
  58. #define onig_initialize_match_param(x) (void)(x)
  59. #define onig_set_match_stack_limit_size_of_match_param(x, y)
  60. #define onig_set_retry_limit_in_match_of_match_param(x, y)
  61. #define onig_free_match_param(x)
  62. #define onig_search_with_param(reg, str, end, start, range, region, option, mp) \
  63. onig_search(reg, str, end, start, range, region, option)
  64. #define onig_match_with_param(re, str, end, at, region, option, mp) \
  65. onig_match(re, str, end, at, region, option)
  66. #endif
  67. #else
  68. # include "ext/pcre/php_pcre.h"
  69. #endif
  70. #include "zend_multibyte.h"
  71. #include "mbstring_arginfo.h"
  72. /* }}} */
  73. #if HAVE_MBSTRING
  74. /* {{{ prototypes */
  75. ZEND_DECLARE_MODULE_GLOBALS(mbstring)
  76. static PHP_GINIT_FUNCTION(mbstring);
  77. static PHP_GSHUTDOWN_FUNCTION(mbstring);
  78. static void php_mb_populate_current_detect_order_list(void);
  79. static int php_mb_encoding_translation(void);
  80. static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
  81. static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
  82. static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
  83. static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
  84. /* }}} */
  85. /* {{{ php_mb_default_identify_list */
  86. typedef struct _php_mb_nls_ident_list {
  87. enum mbfl_no_language lang;
  88. const enum mbfl_no_encoding *list;
  89. size_t list_size;
  90. } php_mb_nls_ident_list;
  91. static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
  92. mbfl_no_encoding_ascii,
  93. mbfl_no_encoding_jis,
  94. mbfl_no_encoding_utf8,
  95. mbfl_no_encoding_euc_jp,
  96. mbfl_no_encoding_sjis
  97. };
  98. static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
  99. mbfl_no_encoding_ascii,
  100. mbfl_no_encoding_utf8,
  101. mbfl_no_encoding_euc_cn,
  102. mbfl_no_encoding_cp936
  103. };
  104. static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
  105. mbfl_no_encoding_ascii,
  106. mbfl_no_encoding_utf8,
  107. mbfl_no_encoding_euc_tw,
  108. mbfl_no_encoding_big5
  109. };
  110. static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
  111. mbfl_no_encoding_ascii,
  112. mbfl_no_encoding_utf8,
  113. mbfl_no_encoding_euc_kr,
  114. mbfl_no_encoding_uhc
  115. };
  116. static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
  117. mbfl_no_encoding_ascii,
  118. mbfl_no_encoding_utf8,
  119. mbfl_no_encoding_koi8r,
  120. mbfl_no_encoding_cp1251,
  121. mbfl_no_encoding_cp866
  122. };
  123. static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
  124. mbfl_no_encoding_ascii,
  125. mbfl_no_encoding_utf8,
  126. mbfl_no_encoding_armscii8
  127. };
  128. static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
  129. mbfl_no_encoding_ascii,
  130. mbfl_no_encoding_utf8,
  131. mbfl_no_encoding_cp1254,
  132. mbfl_no_encoding_8859_9
  133. };
  134. static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
  135. mbfl_no_encoding_ascii,
  136. mbfl_no_encoding_utf8,
  137. mbfl_no_encoding_koi8u
  138. };
  139. static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
  140. mbfl_no_encoding_ascii,
  141. mbfl_no_encoding_utf8
  142. };
  143. static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
  144. { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
  145. { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
  146. { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
  147. { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
  148. { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
  149. { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
  150. { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
  151. { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
  152. { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
  153. };
  154. /* }}} */
  155. /* {{{ zend_module_entry mbstring_module_entry */
  156. zend_module_entry mbstring_module_entry = {
  157. STANDARD_MODULE_HEADER,
  158. "mbstring",
  159. ext_functions,
  160. PHP_MINIT(mbstring),
  161. PHP_MSHUTDOWN(mbstring),
  162. PHP_RINIT(mbstring),
  163. PHP_RSHUTDOWN(mbstring),
  164. PHP_MINFO(mbstring),
  165. PHP_MBSTRING_VERSION,
  166. PHP_MODULE_GLOBALS(mbstring),
  167. PHP_GINIT(mbstring),
  168. PHP_GSHUTDOWN(mbstring),
  169. NULL,
  170. STANDARD_MODULE_PROPERTIES_EX
  171. };
  172. /* }}} */
  173. /* {{{ static sapi_post_entry php_post_entries[] */
  174. static const sapi_post_entry php_post_entries[] = {
  175. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
  176. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  177. { NULL, 0, NULL, NULL }
  178. };
  179. /* }}} */
  180. #ifdef COMPILE_DL_MBSTRING
  181. #ifdef ZTS
  182. ZEND_TSRMLS_CACHE_DEFINE()
  183. #endif
  184. ZEND_GET_MODULE(mbstring)
  185. #endif
  186. /* {{{ allocators */
  187. static void *_php_mb_allocators_malloc(size_t sz)
  188. {
  189. return emalloc(sz);
  190. }
  191. static void *_php_mb_allocators_realloc(void *ptr, size_t sz)
  192. {
  193. return erealloc(ptr, sz);
  194. }
  195. static void *_php_mb_allocators_calloc(size_t nelems, size_t szelem)
  196. {
  197. return ecalloc(nelems, szelem);
  198. }
  199. static void _php_mb_allocators_free(void *ptr)
  200. {
  201. efree(ptr);
  202. }
  203. static const mbfl_allocators _php_mb_allocators = {
  204. _php_mb_allocators_malloc,
  205. _php_mb_allocators_realloc,
  206. _php_mb_allocators_calloc,
  207. _php_mb_allocators_free,
  208. };
  209. /* }}} */
  210. /* {{{ static sapi_post_entry mbstr_post_entries[] */
  211. static const sapi_post_entry mbstr_post_entries[] = {
  212. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
  213. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  214. { NULL, 0, NULL, NULL }
  215. };
  216. /* }}} */
  217. static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, uint32_t arg_num) {
  218. if (encoding_name) {
  219. const mbfl_encoding *encoding;
  220. zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
  221. if (last_encoding_name && (last_encoding_name == encoding_name
  222. || !strcasecmp(ZSTR_VAL(encoding_name), ZSTR_VAL(last_encoding_name)))) {
  223. return MBSTRG(last_used_encoding);
  224. }
  225. encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
  226. if (!encoding) {
  227. zend_argument_value_error(arg_num, "must be a valid encoding, \"%s\" given", ZSTR_VAL(encoding_name));
  228. return NULL;
  229. }
  230. if (last_encoding_name) {
  231. zend_string_release(last_encoding_name);
  232. }
  233. MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name);
  234. MBSTRG(last_used_encoding) = encoding;
  235. return encoding;
  236. } else {
  237. return MBSTRG(current_internal_encoding);
  238. }
  239. }
  240. static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_name) {
  241. if (strcmp(encoding_name, "pass") == 0) {
  242. return &mbfl_encoding_pass;
  243. }
  244. return mbfl_name2encoding(encoding_name);
  245. }
  246. /* {{{ static int php_mb_parse_encoding_list()
  247. * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
  248. * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
  249. */
  250. static int php_mb_parse_encoding_list(const char *value, size_t value_length,
  251. const mbfl_encoding ***return_list, size_t *return_size, int persistent, uint32_t arg_num,
  252. zend_bool allow_pass_encoding)
  253. {
  254. if (value == NULL || value_length == 0) {
  255. *return_list = NULL;
  256. *return_size = 0;
  257. return SUCCESS;
  258. } else {
  259. zend_bool included_auto;
  260. size_t n, size;
  261. char *p, *p1, *p2, *endp, *tmpstr;
  262. const mbfl_encoding **entry, **list;
  263. /* copy the value string for work */
  264. if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
  265. tmpstr = (char *)estrndup(value+1, value_length-2);
  266. value_length -= 2;
  267. } else {
  268. tmpstr = (char *)estrndup(value, value_length);
  269. }
  270. /* count the number of listed encoding names */
  271. endp = tmpstr + value_length;
  272. n = 1;
  273. p1 = tmpstr;
  274. while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
  275. p1 = p2 + 1;
  276. n++;
  277. }
  278. size = n + MBSTRG(default_detect_order_list_size);
  279. /* make list */
  280. list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
  281. entry = list;
  282. n = 0;
  283. included_auto = 0;
  284. p1 = tmpstr;
  285. do {
  286. p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
  287. if (p == NULL) {
  288. p = endp;
  289. }
  290. *p = '\0';
  291. /* trim spaces */
  292. while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
  293. p1++;
  294. }
  295. p--;
  296. while (p > p1 && (*p == ' ' || *p == '\t')) {
  297. *p = '\0';
  298. p--;
  299. }
  300. /* convert to the encoding number and check encoding */
  301. if (strcasecmp(p1, "auto") == 0) {
  302. if (!included_auto) {
  303. const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
  304. const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
  305. size_t i;
  306. included_auto = 1;
  307. for (i = 0; i < identify_list_size; i++) {
  308. *entry++ = mbfl_no2encoding(*src++);
  309. n++;
  310. }
  311. }
  312. } else {
  313. const mbfl_encoding *encoding =
  314. allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
  315. if (!encoding) {
  316. /* Called from an INI setting modification */
  317. if (arg_num == 0) {
  318. php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
  319. } else {
  320. zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
  321. }
  322. efree(tmpstr);
  323. pefree(list, persistent);
  324. return FAILURE;
  325. }
  326. *entry++ = encoding;
  327. n++;
  328. }
  329. p1 = p2 + 1;
  330. } while (n < size && p2 != NULL);
  331. *return_list = list;
  332. *return_size = n;
  333. efree(tmpstr);
  334. }
  335. return SUCCESS;
  336. }
  337. /* }}} */
  338. /* {{{ static int php_mb_parse_encoding_array()
  339. * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
  340. * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
  341. */
  342. static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***return_list,
  343. size_t *return_size, uint32_t arg_num)
  344. {
  345. /* Allocate enough space to include the default detect order if "auto" is used. */
  346. size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size);
  347. const mbfl_encoding **list = ecalloc(size, sizeof(mbfl_encoding*));
  348. const mbfl_encoding **entry = list;
  349. zend_bool included_auto = 0;
  350. size_t n = 0;
  351. zval *hash_entry;
  352. ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
  353. zend_string *encoding_str = zval_try_get_string(hash_entry);
  354. if (UNEXPECTED(!encoding_str)) {
  355. efree(list);
  356. return FAILURE;
  357. }
  358. if (strcasecmp(ZSTR_VAL(encoding_str), "auto") == 0) {
  359. if (!included_auto) {
  360. const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
  361. const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
  362. size_t j;
  363. included_auto = 1;
  364. for (j = 0; j < identify_list_size; j++) {
  365. *entry++ = mbfl_no2encoding(*src++);
  366. n++;
  367. }
  368. }
  369. } else {
  370. const mbfl_encoding *encoding = mbfl_name2encoding(ZSTR_VAL(encoding_str));
  371. if (encoding) {
  372. *entry++ = encoding;
  373. n++;
  374. } else {
  375. zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", ZSTR_VAL(encoding_str));
  376. zend_string_release(encoding_str);
  377. efree(list);
  378. return FAILURE;
  379. }
  380. }
  381. zend_string_release(encoding_str);
  382. } ZEND_HASH_FOREACH_END();
  383. *return_list = list;
  384. *return_size = n;
  385. return SUCCESS;
  386. }
  387. /* }}} */
  388. /* {{{ zend_multibyte interface */
  389. static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
  390. {
  391. return (const zend_encoding*)mbfl_name2encoding(encoding_name);
  392. }
  393. static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
  394. {
  395. return ((const mbfl_encoding *)encoding)->name;
  396. }
  397. static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
  398. {
  399. const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
  400. if (encoding->flag & MBFL_ENCTYPE_SBCS) {
  401. return 1;
  402. }
  403. if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
  404. return 1;
  405. }
  406. return 0;
  407. }
  408. static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
  409. {
  410. mbfl_string string;
  411. if (!list) {
  412. list = (const zend_encoding **)MBSTRG(current_detect_order_list);
  413. list_size = MBSTRG(current_detect_order_list_size);
  414. }
  415. mbfl_string_init(&string);
  416. string.val = (unsigned char *)arg_string;
  417. string.len = arg_length;
  418. return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
  419. }
  420. static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
  421. {
  422. mbfl_string string, result;
  423. mbfl_buffer_converter *convd;
  424. int status;
  425. size_t loc;
  426. /* new encoding */
  427. /* initialize string */
  428. string.encoding = (const mbfl_encoding*)encoding_from;
  429. string.val = (unsigned char*)from;
  430. string.len = from_length;
  431. /* initialize converter */
  432. convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
  433. if (convd == NULL) {
  434. return (size_t) -1;
  435. }
  436. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  437. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  438. /* do it */
  439. status = mbfl_buffer_converter_feed2(convd, &string, &loc);
  440. if (status) {
  441. mbfl_buffer_converter_delete(convd);
  442. return (size_t)-1;
  443. }
  444. mbfl_buffer_converter_flush(convd);
  445. mbfl_string_init(&result);
  446. if (!mbfl_buffer_converter_result(convd, &result)) {
  447. mbfl_buffer_converter_delete(convd);
  448. return (size_t)-1;
  449. }
  450. *to = result.val;
  451. *to_length = result.len;
  452. mbfl_buffer_converter_delete(convd);
  453. return loc;
  454. }
  455. static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
  456. {
  457. return php_mb_parse_encoding_list(
  458. encoding_list, encoding_list_len,
  459. (const mbfl_encoding ***)return_list, return_size,
  460. persistent, /* arg_num */ 0, /* allow_pass_encoding */ 1);
  461. }
  462. static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
  463. {
  464. return (const zend_encoding *)MBSTRG(internal_encoding);
  465. }
  466. static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
  467. {
  468. MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
  469. return SUCCESS;
  470. }
  471. static zend_multibyte_functions php_mb_zend_multibyte_functions = {
  472. "mbstring",
  473. php_mb_zend_encoding_fetcher,
  474. php_mb_zend_encoding_name_getter,
  475. php_mb_zend_encoding_lexer_compatibility_checker,
  476. php_mb_zend_encoding_detector,
  477. php_mb_zend_encoding_converter,
  478. php_mb_zend_encoding_list_parser,
  479. php_mb_zend_internal_encoding_getter,
  480. php_mb_zend_internal_encoding_setter
  481. };
  482. /* }}} */
  483. static void *_php_mb_compile_regex(const char *pattern);
  484. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
  485. static void _php_mb_free_regex(void *opaque);
  486. #if HAVE_MBREGEX
  487. /* {{{ _php_mb_compile_regex */
  488. static void *_php_mb_compile_regex(const char *pattern)
  489. {
  490. php_mb_regex_t *retval;
  491. OnigErrorInfo err_info;
  492. int err_code;
  493. if ((err_code = onig_new(&retval,
  494. (const OnigUChar *)pattern,
  495. (const OnigUChar *)pattern + strlen(pattern),
  496. ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
  497. ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
  498. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  499. onig_error_code_to_str(err_str, err_code, err_info);
  500. php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
  501. retval = NULL;
  502. }
  503. return retval;
  504. }
  505. /* }}} */
  506. /* {{{ _php_mb_match_regex */
  507. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
  508. {
  509. OnigMatchParam *mp = onig_new_match_param();
  510. int err;
  511. onig_initialize_match_param(mp);
  512. if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) {
  513. onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
  514. }
  515. if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_retry_limit))) {
  516. onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
  517. }
  518. /* search */
  519. err = onig_search_with_param((php_mb_regex_t *)opaque, (const OnigUChar *)str,
  520. (const OnigUChar*)str + str_len, (const OnigUChar *)str,
  521. (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE, mp);
  522. onig_free_match_param(mp);
  523. return err >= 0;
  524. }
  525. /* }}} */
  526. /* {{{ _php_mb_free_regex */
  527. static void _php_mb_free_regex(void *opaque)
  528. {
  529. onig_free((php_mb_regex_t *)opaque);
  530. }
  531. /* }}} */
  532. #else
  533. /* {{{ _php_mb_compile_regex */
  534. static void *_php_mb_compile_regex(const char *pattern)
  535. {
  536. pcre2_code *retval;
  537. PCRE2_SIZE err_offset;
  538. int errnum;
  539. if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
  540. PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
  541. PCRE2_UCHAR err_str[128];
  542. pcre2_get_error_message(errnum, err_str, sizeof(err_str));
  543. php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
  544. }
  545. return retval;
  546. }
  547. /* }}} */
  548. /* {{{ _php_mb_match_regex */
  549. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
  550. {
  551. int res;
  552. pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
  553. if (NULL == match_data) {
  554. pcre2_code_free(opaque);
  555. php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
  556. return FAILURE;
  557. }
  558. res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
  559. php_pcre_free_match_data(match_data);
  560. return res;
  561. }
  562. /* }}} */
  563. /* {{{ _php_mb_free_regex */
  564. static void _php_mb_free_regex(void *opaque)
  565. {
  566. pcre2_code_free(opaque);
  567. }
  568. /* }}} */
  569. #endif
  570. /* {{{ php_mb_nls_get_default_detect_order_list */
  571. static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
  572. {
  573. size_t i;
  574. *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  575. *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  576. for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
  577. if (php_mb_default_identify_list[i].lang == lang) {
  578. *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
  579. *plist_size = php_mb_default_identify_list[i].list_size;
  580. return 1;
  581. }
  582. }
  583. return 0;
  584. }
  585. /* }}} */
  586. static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
  587. {
  588. char *result = emalloc(len + 2);
  589. char *resp = result;
  590. size_t i;
  591. for (i = 0; i < len && start[i] != quote; ++i) {
  592. if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
  593. *resp++ = start[++i];
  594. } else {
  595. size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
  596. while (j-- > 0 && i < len) {
  597. *resp++ = start[i++];
  598. }
  599. --i;
  600. }
  601. }
  602. *resp = '\0';
  603. return result;
  604. }
  605. static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
  606. {
  607. char *pos = *line, quote;
  608. char *res;
  609. while (*pos && *pos != stop) {
  610. if ((quote = *pos) == '"' || quote == '\'') {
  611. ++pos;
  612. while (*pos && *pos != quote) {
  613. if (*pos == '\\' && pos[1] && pos[1] == quote) {
  614. pos += 2;
  615. } else {
  616. ++pos;
  617. }
  618. }
  619. if (*pos) {
  620. ++pos;
  621. }
  622. } else {
  623. pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
  624. }
  625. }
  626. if (*pos == '\0') {
  627. res = estrdup(*line);
  628. *line += strlen(*line);
  629. return res;
  630. }
  631. res = estrndup(*line, pos - *line);
  632. while (*pos == stop) {
  633. pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
  634. }
  635. *line = pos;
  636. return res;
  637. }
  638. /* }}} */
  639. static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
  640. {
  641. while (*str && isspace(*(unsigned char *)str)) {
  642. ++str;
  643. }
  644. if (!*str) {
  645. return estrdup("");
  646. }
  647. if (*str == '"' || *str == '\'') {
  648. char quote = *str;
  649. str++;
  650. return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
  651. } else {
  652. char *strend = str;
  653. while (*strend && !isspace(*(unsigned char *)strend)) {
  654. ++strend;
  655. }
  656. return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
  657. }
  658. }
  659. /* }}} */
  660. static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
  661. {
  662. char *s, *s2;
  663. const size_t filename_len = strlen(filename);
  664. /* The \ check should technically be needed for win32 systems only where
  665. * it is a valid path separator. However, IE in all it's wisdom always sends
  666. * the full path of the file on the user's filesystem, which means that unless
  667. * the user does basename() they get a bogus file name. Until IE's user base drops
  668. * to nill or problem is fixed this code must remain enabled for all systems. */
  669. s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
  670. s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
  671. if (s && s2) {
  672. if (s > s2) {
  673. return ++s;
  674. } else {
  675. return ++s2;
  676. }
  677. } else if (s) {
  678. return ++s;
  679. } else if (s2) {
  680. return ++s2;
  681. } else {
  682. return filename;
  683. }
  684. }
  685. /* }}} */
  686. /* {{{ php.ini directive handler */
  687. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
  688. static PHP_INI_MH(OnUpdate_mbstring_language)
  689. {
  690. enum mbfl_no_language no_language;
  691. no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
  692. if (no_language == mbfl_no_language_invalid) {
  693. MBSTRG(language) = mbfl_no_language_neutral;
  694. return FAILURE;
  695. }
  696. MBSTRG(language) = no_language;
  697. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  698. return SUCCESS;
  699. }
  700. /* }}} */
  701. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
  702. static PHP_INI_MH(OnUpdate_mbstring_detect_order)
  703. {
  704. const mbfl_encoding **list;
  705. size_t size;
  706. if (!new_value) {
  707. if (MBSTRG(detect_order_list)) {
  708. pefree(MBSTRG(detect_order_list), 1);
  709. }
  710. MBSTRG(detect_order_list) = NULL;
  711. MBSTRG(detect_order_list_size) = 0;
  712. return SUCCESS;
  713. }
  714. if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 0) || size == 0) {
  715. return FAILURE;
  716. }
  717. if (MBSTRG(detect_order_list)) {
  718. pefree(MBSTRG(detect_order_list), 1);
  719. }
  720. MBSTRG(detect_order_list) = list;
  721. MBSTRG(detect_order_list_size) = size;
  722. return SUCCESS;
  723. }
  724. /* }}} */
  725. static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
  726. const mbfl_encoding **list;
  727. size_t size;
  728. if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 1) || size == 0) {
  729. return FAILURE;
  730. }
  731. if (MBSTRG(http_input_list)) {
  732. pefree(MBSTRG(http_input_list), 1);
  733. }
  734. MBSTRG(http_input_list) = list;
  735. MBSTRG(http_input_list_size) = size;
  736. return SUCCESS;
  737. }
  738. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
  739. static PHP_INI_MH(OnUpdate_mbstring_http_input)
  740. {
  741. if (new_value) {
  742. php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
  743. }
  744. if (!new_value || !ZSTR_VAL(new_value)) {
  745. const char *encoding = php_get_input_encoding();
  746. MBSTRG(http_input_set) = 0;
  747. _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
  748. return SUCCESS;
  749. }
  750. MBSTRG(http_input_set) = 1;
  751. return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
  752. }
  753. /* }}} */
  754. static int _php_mb_ini_mbstring_http_output_set(const char *new_value) {
  755. const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(new_value);
  756. if (!encoding) {
  757. return FAILURE;
  758. }
  759. MBSTRG(http_output_encoding) = encoding;
  760. MBSTRG(current_http_output_encoding) = encoding;
  761. return SUCCESS;
  762. }
  763. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
  764. static PHP_INI_MH(OnUpdate_mbstring_http_output)
  765. {
  766. if (new_value) {
  767. php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
  768. }
  769. if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
  770. MBSTRG(http_output_set) = 0;
  771. _php_mb_ini_mbstring_http_output_set(php_get_output_encoding());
  772. return SUCCESS;
  773. }
  774. MBSTRG(http_output_set) = 1;
  775. return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value));
  776. }
  777. /* }}} */
  778. /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
  779. static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
  780. {
  781. const mbfl_encoding *encoding;
  782. if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
  783. /* falls back to UTF-8 if an unknown encoding name is given */
  784. if (new_value) {
  785. php_error_docref("ref.mbstring", E_WARNING,
  786. "Unknown encoding \"%s\" in ini setting", new_value);
  787. }
  788. encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
  789. }
  790. MBSTRG(internal_encoding) = encoding;
  791. MBSTRG(current_internal_encoding) = encoding;
  792. #if HAVE_MBREGEX
  793. {
  794. const char *enc_name = new_value;
  795. if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
  796. /* falls back to UTF-8 if an unknown encoding name is given */
  797. enc_name = "UTF-8";
  798. php_mb_regex_set_default_mbctype(enc_name);
  799. }
  800. php_mb_regex_set_mbctype(new_value);
  801. }
  802. #endif
  803. return SUCCESS;
  804. }
  805. /* }}} */
  806. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
  807. static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
  808. {
  809. if (new_value) {
  810. php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
  811. }
  812. if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
  813. return FAILURE;
  814. }
  815. if (new_value && ZSTR_LEN(new_value)) {
  816. MBSTRG(internal_encoding_set) = 1;
  817. return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
  818. } else {
  819. const char *encoding = php_get_internal_encoding();
  820. MBSTRG(internal_encoding_set) = 0;
  821. return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
  822. }
  823. }
  824. /* }}} */
  825. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
  826. static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
  827. {
  828. int c;
  829. char *endptr = NULL;
  830. if (new_value != NULL) {
  831. if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
  832. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  833. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  834. } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
  835. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  836. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  837. } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
  838. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  839. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  840. } else {
  841. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  842. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  843. if (ZSTR_LEN(new_value) > 0) {
  844. c = strtol(ZSTR_VAL(new_value), &endptr, 0);
  845. if (*endptr == '\0') {
  846. MBSTRG(filter_illegal_substchar) = c;
  847. MBSTRG(current_filter_illegal_substchar) = c;
  848. }
  849. }
  850. }
  851. } else {
  852. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  853. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  854. MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
  855. MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
  856. }
  857. return SUCCESS;
  858. }
  859. /* }}} */
  860. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
  861. static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
  862. {
  863. if (new_value == NULL) {
  864. return FAILURE;
  865. }
  866. OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
  867. if (MBSTRG(encoding_translation)) {
  868. sapi_unregister_post_entry(php_post_entries);
  869. sapi_register_post_entries(mbstr_post_entries);
  870. } else {
  871. sapi_unregister_post_entry(mbstr_post_entries);
  872. sapi_register_post_entries(php_post_entries);
  873. }
  874. return SUCCESS;
  875. }
  876. /* }}} */
  877. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
  878. static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
  879. {
  880. zend_string *tmp;
  881. void *re = NULL;
  882. if (!new_value) {
  883. new_value = entry->orig_value;
  884. }
  885. tmp = php_trim(new_value, NULL, 0, 3);
  886. if (ZSTR_LEN(tmp) > 0) {
  887. if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
  888. zend_string_release_ex(tmp, 0);
  889. return FAILURE;
  890. }
  891. }
  892. if (MBSTRG(http_output_conv_mimetypes)) {
  893. _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
  894. }
  895. MBSTRG(http_output_conv_mimetypes) = re;
  896. zend_string_release_ex(tmp, 0);
  897. return SUCCESS;
  898. }
  899. /* }}} */
  900. /* }}} */
  901. /* {{{ php.ini directive registration */
  902. PHP_INI_BEGIN()
  903. PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
  904. PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
  905. PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
  906. PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
  907. STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
  908. PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
  909. STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
  910. PHP_INI_SYSTEM | PHP_INI_PERDIR,
  911. OnUpdate_mbstring_encoding_translation,
  912. encoding_translation, zend_mbstring_globals, mbstring_globals)
  913. PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
  914. "^(text/|application/xhtml\\+xml)",
  915. PHP_INI_ALL,
  916. OnUpdate_mbstring_http_output_conv_mimetypes)
  917. STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
  918. PHP_INI_ALL,
  919. OnUpdateBool,
  920. strict_detection, zend_mbstring_globals, mbstring_globals)
  921. #if HAVE_MBREGEX
  922. STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
  923. STD_PHP_INI_ENTRY("mbstring.regex_retry_limit", "1000000",PHP_INI_ALL, OnUpdateLong, regex_retry_limit, zend_mbstring_globals, mbstring_globals)
  924. #endif
  925. PHP_INI_END()
  926. /* }}} */
  927. static void mbstring_internal_encoding_changed_hook(void) {
  928. /* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */
  929. if (!MBSTRG(internal_encoding_set)) {
  930. const char *encoding = php_get_internal_encoding();
  931. _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
  932. }
  933. if (!MBSTRG(http_output_set)) {
  934. const char *encoding = php_get_output_encoding();
  935. _php_mb_ini_mbstring_http_output_set(encoding);
  936. }
  937. if (!MBSTRG(http_input_set)) {
  938. const char *encoding = php_get_input_encoding();
  939. _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
  940. }
  941. }
  942. /* {{{ module global initialize handler */
  943. static PHP_GINIT_FUNCTION(mbstring)
  944. {
  945. #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
  946. ZEND_TSRMLS_CACHE_UPDATE();
  947. #endif
  948. mbstring_globals->language = mbfl_no_language_uni;
  949. mbstring_globals->internal_encoding = NULL;
  950. mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
  951. mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
  952. mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
  953. mbstring_globals->http_input_identify = NULL;
  954. mbstring_globals->http_input_identify_get = NULL;
  955. mbstring_globals->http_input_identify_post = NULL;
  956. mbstring_globals->http_input_identify_cookie = NULL;
  957. mbstring_globals->http_input_identify_string = NULL;
  958. mbstring_globals->http_input_list = NULL;
  959. mbstring_globals->http_input_list_size = 0;
  960. mbstring_globals->detect_order_list = NULL;
  961. mbstring_globals->detect_order_list_size = 0;
  962. mbstring_globals->current_detect_order_list = NULL;
  963. mbstring_globals->current_detect_order_list_size = 0;
  964. mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  965. mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  966. mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  967. mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
  968. mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  969. mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
  970. mbstring_globals->illegalchars = 0;
  971. mbstring_globals->encoding_translation = 0;
  972. mbstring_globals->strict_detection = 0;
  973. mbstring_globals->outconv = NULL;
  974. mbstring_globals->http_output_conv_mimetypes = NULL;
  975. #if HAVE_MBREGEX
  976. mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
  977. #endif
  978. mbstring_globals->last_used_encoding_name = NULL;
  979. mbstring_globals->last_used_encoding = NULL;
  980. mbstring_globals->internal_encoding_set = 0;
  981. mbstring_globals->http_output_set = 0;
  982. mbstring_globals->http_input_set = 0;
  983. }
  984. /* }}} */
  985. /* {{{ PHP_GSHUTDOWN_FUNCTION */
  986. static PHP_GSHUTDOWN_FUNCTION(mbstring)
  987. {
  988. if (mbstring_globals->http_input_list) {
  989. free(mbstring_globals->http_input_list);
  990. }
  991. if (mbstring_globals->detect_order_list) {
  992. free(mbstring_globals->detect_order_list);
  993. }
  994. if (mbstring_globals->http_output_conv_mimetypes) {
  995. _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
  996. }
  997. #if HAVE_MBREGEX
  998. php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
  999. #endif
  1000. }
  1001. /* }}} */
  1002. /* {{{ PHP_MINIT_FUNCTION(mbstring) */
  1003. PHP_MINIT_FUNCTION(mbstring)
  1004. {
  1005. #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
  1006. ZEND_TSRMLS_CACHE_UPDATE();
  1007. #endif
  1008. __mbfl_allocators = (mbfl_allocators*)&_php_mb_allocators;
  1009. REGISTER_INI_ENTRIES();
  1010. /* We assume that we're the only user of the hook. */
  1011. ZEND_ASSERT(php_internal_encoding_changed == NULL);
  1012. php_internal_encoding_changed = mbstring_internal_encoding_changed_hook;
  1013. mbstring_internal_encoding_changed_hook();
  1014. /* This is a global handler. Should not be set in a per-request handler. */
  1015. sapi_register_treat_data(mbstr_treat_data);
  1016. /* Post handlers are stored in the thread-local context. */
  1017. if (MBSTRG(encoding_translation)) {
  1018. sapi_register_post_entries(mbstr_post_entries);
  1019. }
  1020. REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
  1021. REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
  1022. REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
  1023. REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
  1024. REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
  1025. REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
  1026. REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
  1027. REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
  1028. #if HAVE_MBREGEX
  1029. PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1030. #endif
  1031. if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
  1032. return FAILURE;
  1033. }
  1034. php_rfc1867_set_multibyte_callbacks(
  1035. php_mb_encoding_translation,
  1036. php_mb_gpc_get_detect_order,
  1037. php_mb_gpc_set_input_encoding,
  1038. php_mb_rfc1867_getword,
  1039. php_mb_rfc1867_getword_conf,
  1040. php_mb_rfc1867_basename);
  1041. return SUCCESS;
  1042. }
  1043. /* }}} */
  1044. /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
  1045. PHP_MSHUTDOWN_FUNCTION(mbstring)
  1046. {
  1047. UNREGISTER_INI_ENTRIES();
  1048. zend_multibyte_restore_functions();
  1049. #if HAVE_MBREGEX
  1050. PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1051. #endif
  1052. php_internal_encoding_changed = NULL;
  1053. return SUCCESS;
  1054. }
  1055. /* }}} */
  1056. /* {{{ PHP_RINIT_FUNCTION(mbstring) */
  1057. PHP_RINIT_FUNCTION(mbstring)
  1058. {
  1059. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  1060. MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
  1061. MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
  1062. MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
  1063. MBSTRG(illegalchars) = 0;
  1064. php_mb_populate_current_detect_order_list();
  1065. #if HAVE_MBREGEX
  1066. PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1067. #endif
  1068. zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
  1069. return SUCCESS;
  1070. }
  1071. /* }}} */
  1072. /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
  1073. PHP_RSHUTDOWN_FUNCTION(mbstring)
  1074. {
  1075. if (MBSTRG(current_detect_order_list) != NULL) {
  1076. efree(MBSTRG(current_detect_order_list));
  1077. MBSTRG(current_detect_order_list) = NULL;
  1078. MBSTRG(current_detect_order_list_size) = 0;
  1079. }
  1080. if (MBSTRG(outconv) != NULL) {
  1081. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1082. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1083. MBSTRG(outconv) = NULL;
  1084. }
  1085. /* clear http input identification. */
  1086. MBSTRG(http_input_identify) = NULL;
  1087. MBSTRG(http_input_identify_post) = NULL;
  1088. MBSTRG(http_input_identify_get) = NULL;
  1089. MBSTRG(http_input_identify_cookie) = NULL;
  1090. MBSTRG(http_input_identify_string) = NULL;
  1091. if (MBSTRG(last_used_encoding_name)) {
  1092. zend_string_release(MBSTRG(last_used_encoding_name));
  1093. MBSTRG(last_used_encoding_name) = NULL;
  1094. }
  1095. MBSTRG(internal_encoding_set) = 0;
  1096. MBSTRG(http_output_set) = 0;
  1097. MBSTRG(http_input_set) = 0;
  1098. #if HAVE_MBREGEX
  1099. PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1100. #endif
  1101. return SUCCESS;
  1102. }
  1103. /* }}} */
  1104. /* {{{ PHP_MINFO_FUNCTION(mbstring) */
  1105. PHP_MINFO_FUNCTION(mbstring)
  1106. {
  1107. php_info_print_table_start();
  1108. php_info_print_table_row(2, "Multibyte Support", "enabled");
  1109. php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
  1110. php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
  1111. {
  1112. char tmp[256];
  1113. snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
  1114. php_info_print_table_row(2, "libmbfl version", tmp);
  1115. }
  1116. php_info_print_table_end();
  1117. php_info_print_table_start();
  1118. php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
  1119. php_info_print_table_end();
  1120. #if HAVE_MBREGEX
  1121. PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
  1122. #endif
  1123. DISPLAY_INI_ENTRIES();
  1124. }
  1125. /* }}} */
  1126. /* {{{ proto string mb_language([string language])
  1127. Sets the current language or Returns the current language as a string */
  1128. PHP_FUNCTION(mb_language)
  1129. {
  1130. zend_string *name = NULL;
  1131. if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
  1132. RETURN_THROWS();
  1133. }
  1134. if (name == NULL) {
  1135. RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
  1136. } else {
  1137. zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
  1138. if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
  1139. zend_argument_value_error(1, "must be a valid language, \"%s\" given", ZSTR_VAL(name));
  1140. zend_string_release_ex(ini_name, 0);
  1141. RETURN_THROWS();
  1142. }
  1143. // TODO Make return void
  1144. RETVAL_TRUE;
  1145. zend_string_release_ex(ini_name, 0);
  1146. }
  1147. }
  1148. /* }}} */
  1149. /* {{{ proto string mb_internal_encoding([string encoding])
  1150. Sets the current internal encoding or Returns the current internal encoding as a string */
  1151. PHP_FUNCTION(mb_internal_encoding)
  1152. {
  1153. const char *name = NULL;
  1154. size_t name_len;
  1155. const mbfl_encoding *encoding;
  1156. if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
  1157. RETURN_THROWS();
  1158. }
  1159. if (name == NULL) {
  1160. ZEND_ASSERT(MBSTRG(current_internal_encoding));
  1161. RETURN_STRING(MBSTRG(current_internal_encoding)->name);
  1162. } else {
  1163. encoding = mbfl_name2encoding(name);
  1164. if (!encoding) {
  1165. zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
  1166. RETURN_THROWS();
  1167. } else {
  1168. MBSTRG(current_internal_encoding) = encoding;
  1169. MBSTRG(internal_encoding_set) = 1;
  1170. /* TODO Return old encoding */
  1171. RETURN_TRUE;
  1172. }
  1173. }
  1174. }
  1175. /* }}} */
  1176. /* {{{ proto mixed mb_http_input([string type])
  1177. Returns the input encoding */
  1178. PHP_FUNCTION(mb_http_input)
  1179. {
  1180. char *typ = NULL;
  1181. size_t typ_len;
  1182. int retname;
  1183. char *list, *temp;
  1184. const mbfl_encoding *result = NULL;
  1185. retname = 1;
  1186. if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
  1187. RETURN_THROWS();
  1188. }
  1189. if (typ == NULL) {
  1190. result = MBSTRG(http_input_identify);
  1191. } else {
  1192. switch (*typ) {
  1193. case 'G':
  1194. case 'g':
  1195. result = MBSTRG(http_input_identify_get);
  1196. break;
  1197. case 'P':
  1198. case 'p':
  1199. result = MBSTRG(http_input_identify_post);
  1200. break;
  1201. case 'C':
  1202. case 'c':
  1203. result = MBSTRG(http_input_identify_cookie);
  1204. break;
  1205. case 'S':
  1206. case 's':
  1207. result = MBSTRG(http_input_identify_string);
  1208. break;
  1209. case 'I':
  1210. case 'i':
  1211. {
  1212. const mbfl_encoding **entry = MBSTRG(http_input_list);
  1213. const size_t n = MBSTRG(http_input_list_size);
  1214. size_t i;
  1215. array_init(return_value);
  1216. for (i = 0; i < n; i++) {
  1217. add_next_index_string(return_value, (*entry)->name);
  1218. entry++;
  1219. }
  1220. retname = 0;
  1221. }
  1222. break;
  1223. case 'L':
  1224. case 'l':
  1225. {
  1226. const mbfl_encoding **entry = MBSTRG(http_input_list);
  1227. const size_t n = MBSTRG(http_input_list_size);
  1228. size_t i;
  1229. list = NULL;
  1230. for (i = 0; i < n; i++) {
  1231. if (list) {
  1232. temp = list;
  1233. spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
  1234. efree(temp);
  1235. if (!list) {
  1236. break;
  1237. }
  1238. } else {
  1239. list = estrdup((*entry)->name);
  1240. }
  1241. entry++;
  1242. }
  1243. }
  1244. if (!list) {
  1245. // TODO should return empty string?
  1246. RETURN_FALSE;
  1247. }
  1248. RETVAL_STRING(list);
  1249. efree(list);
  1250. retname = 0;
  1251. break;
  1252. default:
  1253. // TODO ValueError
  1254. result = MBSTRG(http_input_identify);
  1255. break;
  1256. }
  1257. }
  1258. // FIXME this bloc seems useless except for default switch case
  1259. if (retname) {
  1260. if (result) {
  1261. RETVAL_STRING(result->name);
  1262. } else {
  1263. RETVAL_FALSE;
  1264. }
  1265. }
  1266. }
  1267. /* }}} */
  1268. /* {{{ proto string mb_http_output([string encoding])
  1269. Sets the current output_encoding or returns the current output_encoding as a string */
  1270. PHP_FUNCTION(mb_http_output)
  1271. {
  1272. const char *name = NULL;
  1273. size_t name_len;
  1274. const mbfl_encoding *encoding;
  1275. if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
  1276. RETURN_THROWS();
  1277. }
  1278. if (name == NULL) {
  1279. ZEND_ASSERT(MBSTRG(current_http_output_encoding));
  1280. RETURN_STRING(MBSTRG(current_http_output_encoding)->name);
  1281. } else {
  1282. encoding = php_mb_get_encoding_or_pass(name);
  1283. if (!encoding) {
  1284. zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
  1285. RETURN_THROWS();
  1286. } else {
  1287. MBSTRG(http_output_set) = 1;
  1288. MBSTRG(current_http_output_encoding) = encoding;
  1289. /* TODO Return previous encoding? */
  1290. RETURN_TRUE;
  1291. }
  1292. }
  1293. }
  1294. /* }}} */
  1295. /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
  1296. Sets the current detect_order or Return the current detect_order as a array */
  1297. PHP_FUNCTION(mb_detect_order)
  1298. {
  1299. zend_string *order_str = NULL;
  1300. HashTable *order_ht = NULL;
  1301. ZEND_PARSE_PARAMETERS_START(0, 1)
  1302. Z_PARAM_OPTIONAL
  1303. Z_PARAM_STR_OR_ARRAY_HT(order_str, order_ht)
  1304. ZEND_PARSE_PARAMETERS_END();
  1305. if (!order_str && !order_ht) {
  1306. size_t i;
  1307. size_t n = MBSTRG(current_detect_order_list_size);
  1308. const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
  1309. array_init(return_value);
  1310. for (i = 0; i < n; i++) {
  1311. add_next_index_string(return_value, (*entry)->name);
  1312. entry++;
  1313. }
  1314. } else {
  1315. const mbfl_encoding **list;
  1316. size_t size;
  1317. if (order_ht) {
  1318. if (FAILURE == php_mb_parse_encoding_array(order_ht, &list, &size, 1)) {
  1319. RETURN_THROWS();
  1320. }
  1321. } else {
  1322. if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1, /* allow_pass_encoding */ 0)) {
  1323. RETURN_THROWS();
  1324. }
  1325. }
  1326. if (size == 0) {
  1327. efree(list);
  1328. zend_argument_value_error(1, "must specify at least one encoding");
  1329. RETURN_THROWS();
  1330. }
  1331. if (MBSTRG(current_detect_order_list)) {
  1332. efree(MBSTRG(current_detect_order_list));
  1333. }
  1334. MBSTRG(current_detect_order_list) = list;
  1335. MBSTRG(current_detect_order_list_size) = size;
  1336. RETURN_TRUE;
  1337. }
  1338. }
  1339. /* }}} */
  1340. static inline int php_mb_check_code_point(zend_long cp)
  1341. {
  1342. if (cp < 0 || cp >= 0x110000) {
  1343. /* Out of Unicode range */
  1344. return 0;
  1345. }
  1346. if (cp >= 0xd800 && cp <= 0xdfff) {
  1347. /* Surrogate code-point. These are never valid on their own and we only allow a single
  1348. * substitute character. */
  1349. return 0;
  1350. }
  1351. /* As the we do not know the target encoding of the conversion operation that is going to
  1352. * use the substitution character, we cannot check whether the codepoint is actually mapped
  1353. * in the given encoding at this point. Thus we have to accept everything. */
  1354. return 1;
  1355. }
  1356. /* {{{ proto string|int|true mb_substitute_character([string|int|null substitute_character])
  1357. Sets the current substitute_character or returns the current substitute_character */
  1358. PHP_FUNCTION(mb_substitute_character)
  1359. {
  1360. zend_string *substitute_character = NULL;
  1361. zend_long substitute_codepoint;
  1362. zend_bool substitute_is_null = 1;
  1363. ZEND_PARSE_PARAMETERS_START(0, 1)
  1364. Z_PARAM_OPTIONAL
  1365. Z_PARAM_STR_OR_LONG_OR_NULL(substitute_character, substitute_codepoint, substitute_is_null)
  1366. ZEND_PARSE_PARAMETERS_END();
  1367. if (substitute_is_null) {
  1368. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  1369. RETURN_STRING("none");
  1370. }
  1371. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  1372. RETURN_STRING("long");
  1373. }
  1374. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  1375. RETURN_STRING("entity");
  1376. }
  1377. RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
  1378. }
  1379. if (substitute_character != NULL) {
  1380. if (zend_string_equals_literal_ci(substitute_character, "none")) {
  1381. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1382. RETURN_TRUE;
  1383. }
  1384. if (zend_string_equals_literal_ci(substitute_character, "long")) {
  1385. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1386. RETURN_TRUE;
  1387. }
  1388. if (zend_string_equals_literal_ci(substitute_character, "entity")) {
  1389. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1390. RETURN_TRUE;
  1391. }
  1392. /* Invalid string value */
  1393. zend_argument_value_error(1, "must be 'none', 'long', 'entity' or a valid codepoint");
  1394. RETURN_THROWS();
  1395. }
  1396. /* Integer codepoint passed */
  1397. if (!php_mb_check_code_point(substitute_codepoint)) {
  1398. zend_argument_value_error(1, "is not a valid codepoint");
  1399. RETURN_THROWS();
  1400. }
  1401. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1402. MBSTRG(current_filter_illegal_substchar) = substitute_codepoint;
  1403. RETURN_TRUE;
  1404. }
  1405. /* }}} */
  1406. /* {{{ proto string mb_preferred_mime_name(string encoding)
  1407. Return the preferred MIME name (charset) as a string */
  1408. PHP_FUNCTION(mb_preferred_mime_name)
  1409. {
  1410. enum mbfl_no_encoding no_encoding;
  1411. char *name = NULL;
  1412. size_t name_len;
  1413. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
  1414. RETURN_THROWS();
  1415. }
  1416. no_encoding = mbfl_name2no_encoding(name);
  1417. if (no_encoding == mbfl_no_encoding_invalid) {
  1418. zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
  1419. RETURN_THROWS();
  1420. }
  1421. const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
  1422. if (preferred_name == NULL || *preferred_name == '\0') {
  1423. php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
  1424. RETVAL_FALSE;
  1425. } else {
  1426. RETVAL_STRING((char *)preferred_name);
  1427. }
  1428. }
  1429. /* }}} */
  1430. #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
  1431. #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
  1432. /* {{{ proto bool mb_parse_str(string encoded_string, array &result)
  1433. Parses GET/POST/COOKIE data and sets global variables */
  1434. PHP_FUNCTION(mb_parse_str)
  1435. {
  1436. zval *track_vars_array;
  1437. char *encstr;
  1438. size_t encstr_len;
  1439. php_mb_encoding_handler_info_t info;
  1440. const mbfl_encoding *detected;
  1441. track_vars_array = NULL;
  1442. if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
  1443. RETURN_THROWS();
  1444. }
  1445. track_vars_array = zend_try_array_init(track_vars_array);
  1446. if (!track_vars_array) {
  1447. RETURN_THROWS();
  1448. }
  1449. encstr = estrndup(encstr, encstr_len);
  1450. info.data_type = PARSE_STRING;
  1451. info.separator = PG(arg_separator).input;
  1452. info.report_errors = 1;
  1453. info.to_encoding = MBSTRG(current_internal_encoding);
  1454. info.to_language = MBSTRG(language);
  1455. info.from_encodings = MBSTRG(http_input_list);
  1456. info.num_from_encodings = MBSTRG(http_input_list_size);
  1457. info.from_language = MBSTRG(language);
  1458. detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
  1459. MBSTRG(http_input_identify) = detected;
  1460. RETVAL_BOOL(detected);
  1461. if (encstr != NULL) efree(encstr);
  1462. }
  1463. /* }}} */
  1464. /* {{{ proto string mb_output_handler(string contents, int status)
  1465. Returns string in output buffer converted to the http_output encoding */
  1466. PHP_FUNCTION(mb_output_handler)
  1467. {
  1468. char *arg_string;
  1469. size_t arg_string_len;
  1470. zend_long arg_status;
  1471. mbfl_string string, result;
  1472. const char *charset;
  1473. char *p;
  1474. const mbfl_encoding *encoding;
  1475. int last_feed;
  1476. size_t len;
  1477. unsigned char send_text_mimetype = 0;
  1478. char *s, *mimetype = NULL;
  1479. if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
  1480. RETURN_THROWS();
  1481. }
  1482. encoding = MBSTRG(current_http_output_encoding);
  1483. /* start phase only */
  1484. if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
  1485. /* delete the converter just in case. */
  1486. if (MBSTRG(outconv)) {
  1487. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1488. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1489. MBSTRG(outconv) = NULL;
  1490. }
  1491. if (encoding == &mbfl_encoding_pass) {
  1492. RETURN_STRINGL(arg_string, arg_string_len);
  1493. }
  1494. /* analyze mime type */
  1495. if (SG(sapi_headers).mimetype &&
  1496. _php_mb_match_regex(
  1497. MBSTRG(http_output_conv_mimetypes),
  1498. SG(sapi_headers).mimetype,
  1499. strlen(SG(sapi_headers).mimetype))) {
  1500. if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
  1501. mimetype = estrdup(SG(sapi_headers).mimetype);
  1502. } else {
  1503. mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
  1504. }
  1505. send_text_mimetype = 1;
  1506. } else if (SG(sapi_headers).send_default_content_type) {
  1507. mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
  1508. }
  1509. /* if content-type is not yet set, set it and activate the converter */
  1510. if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
  1511. charset = encoding->mime_name;
  1512. if (charset) {
  1513. len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
  1514. if (sapi_add_header(p, len, 0) != FAILURE) {
  1515. SG(sapi_headers).send_default_content_type = 0;
  1516. }
  1517. }
  1518. /* activate the converter */
  1519. MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
  1520. if (send_text_mimetype){
  1521. efree(mimetype);
  1522. }
  1523. }
  1524. }
  1525. /* just return if the converter is not activated. */
  1526. if (MBSTRG(outconv) == NULL) {
  1527. RETURN_STRINGL(arg_string, arg_string_len);
  1528. }
  1529. /* flag */
  1530. last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
  1531. /* mode */
  1532. mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
  1533. mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
  1534. /* feed the string */
  1535. mbfl_string_init(&string);
  1536. /* these are not needed. convd has encoding info.
  1537. string.encoding = MBSTRG(current_internal_encoding);
  1538. */
  1539. string.val = (unsigned char *)arg_string;
  1540. string.len = arg_string_len;
  1541. mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
  1542. if (last_feed) {
  1543. mbfl_buffer_converter_flush(MBSTRG(outconv));
  1544. }
  1545. /* get the converter output, and return it */
  1546. mbfl_buffer_converter_result(MBSTRG(outconv), &result);
  1547. // TODO: avoid reallocation ???
  1548. RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */
  1549. efree(result.val);
  1550. /* delete the converter if it is the last feed. */
  1551. if (last_feed) {
  1552. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1553. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1554. MBSTRG(outconv) = NULL;
  1555. }
  1556. }
  1557. /* }}} */
  1558. /* {{{ proto array mb_str_split(string str [, int split_length] [, string encoding])
  1559. Convert a multibyte string to an array. If split_length is specified,
  1560. break the string down into chunks each split_length characters long. */
  1561. /* structure to pass split params to the callback */
  1562. struct mbfl_split_params {
  1563. zval *return_value; /* php function return value structure pointer */
  1564. mbfl_string *result_string; /* string to store result chunk */
  1565. size_t mb_chunk_length; /* actual chunk length in chars */
  1566. size_t split_length; /* split length in chars */
  1567. mbfl_convert_filter *next_filter; /* widechar to encoding converter */
  1568. };
  1569. /* callback function to fill split array */
  1570. static int mbfl_split_output(int c, void *data)
  1571. {
  1572. struct mbfl_split_params *params = (struct mbfl_split_params *)data; /* cast passed data */
  1573. (*params->next_filter->filter_function)(c, params->next_filter); /* decoder filter */
  1574. if(params->split_length == ++params->mb_chunk_length) { /* if current chunk size reached defined chunk size or last char reached */
  1575. mbfl_convert_filter_flush(params->next_filter);/* concatenate separate decoded chars to the solid string */
  1576. mbfl_memory_device *device = (mbfl_memory_device *)params->next_filter->data; /* chars container */
  1577. mbfl_string *chunk = params->result_string;
  1578. mbfl_memory_device_result(device, chunk); /* make chunk */
  1579. add_next_index_stringl(params->return_value, (const char *)chunk->val, chunk->len); /* add chunk to the array */
  1580. efree(chunk->val);
  1581. params->mb_chunk_length = 0; /* reset mb_chunk size */
  1582. }
  1583. return 0;
  1584. }
  1585. /* TODO Document this function on php.net */
  1586. PHP_FUNCTION(mb_str_split)
  1587. {
  1588. zend_string *str, *encoding = NULL;
  1589. size_t mb_len, chunks, chunk_len;
  1590. const char *p, *last; /* pointer for the string cursor and last string char */
  1591. mbfl_string string, result_string;
  1592. const mbfl_encoding *mbfl_encoding;
  1593. zend_long split_length = 1;
  1594. ZEND_PARSE_PARAMETERS_START(1, 3)
  1595. Z_PARAM_STR(str)
  1596. Z_PARAM_OPTIONAL
  1597. Z_PARAM_LONG(split_length)
  1598. Z_PARAM_STR(encoding)
  1599. ZEND_PARSE_PARAMETERS_END();
  1600. if (split_length <= 0) {
  1601. zend_argument_value_error(2, "must be greater than 0");
  1602. RETURN_THROWS();
  1603. }
  1604. /* fill mbfl_string structure */
  1605. string.val = (unsigned char *) ZSTR_VAL(str);
  1606. string.len = ZSTR_LEN(str);
  1607. string.encoding = php_mb_get_encoding(encoding, 3);
  1608. if (!string.encoding) {
  1609. RETURN_THROWS();
  1610. }
  1611. p = ZSTR_VAL(str); /* string cursor pointer */
  1612. last = ZSTR_VAL(str) + ZSTR_LEN(str); /* last string char pointer */
  1613. mbfl_encoding = string.encoding;
  1614. /* first scenario: 1,2,4-bytes fixed width encodings (head part) */
  1615. if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */
  1616. mb_len = string.len;
  1617. chunk_len = (size_t)split_length; /* chunk length in bytes */
  1618. } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { /* 2 bytes */
  1619. mb_len = string.len / 2;
  1620. chunk_len = split_length * 2;
  1621. } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { /* 4 bytes */
  1622. mb_len = string.len / 4;
  1623. chunk_len = split_length * 4;
  1624. } else if (mbfl_encoding->mblen_table != NULL) {
  1625. /* second scenario: variable width encodings with length table */
  1626. char unsigned const *mbtab = mbfl_encoding->mblen_table;
  1627. /* assume that we have 1-bytes characters */
  1628. array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
  1629. while (p < last) { /* split cycle work until the cursor has reached the last byte */
  1630. char const *chunk_p = p; /* chunk first byte pointer */
  1631. chunk_len = 0; /* chunk length in bytes */
  1632. zend_long char_count;
  1633. for (char_count = 0; char_count < split_length && p < last; ++char_count) {
  1634. char unsigned const m = mbtab[*(const unsigned char *)p]; /* single character length table */
  1635. chunk_len += m;
  1636. p += m;
  1637. }
  1638. if (p >= last) chunk_len -= p - last; /* check if chunk is in bounds */
  1639. add_next_index_stringl(return_value, chunk_p, chunk_len);
  1640. }
  1641. return;
  1642. } else {
  1643. /* third scenario: other multibyte encodings */
  1644. mbfl_convert_filter *filter, *decoder;
  1645. /* assume that we have 1-bytes characters */
  1646. array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
  1647. /* decoder filter to decode wchar to encoding */
  1648. mbfl_memory_device device;
  1649. mbfl_memory_device_init(&device, split_length + 1, 0);
  1650. decoder = mbfl_convert_filter_new(
  1651. &mbfl_encoding_wchar,
  1652. string.encoding,
  1653. mbfl_memory_device_output,
  1654. NULL,
  1655. &device);
  1656. /* assert that nothing is wrong with the decoder */
  1657. ZEND_ASSERT(decoder != NULL);
  1658. /* wchar filter */
  1659. mbfl_string_init(&result_string); /* mbfl_string to store chunk in the callback */
  1660. struct mbfl_split_params params = { /* init callback function params structure */
  1661. .return_value = return_value,
  1662. .result_string = &result_string,
  1663. .mb_chunk_length = 0,
  1664. .split_length = (size_t)split_length,
  1665. .next_filter = decoder,
  1666. };
  1667. filter = mbfl_convert_filter_new(
  1668. string.encoding,
  1669. &mbfl_encoding_wchar,
  1670. mbfl_split_output,
  1671. NULL,
  1672. &params);
  1673. /* assert that nothing is wrong with the filter */
  1674. ZEND_ASSERT(filter != NULL);
  1675. while (p < last - 1) { /* cycle each byte except last with callback function */
  1676. (*filter->filter_function)(*p++, filter);
  1677. }
  1678. params.mb_chunk_length = split_length - 1; /* force to finish current chunk */
  1679. (*filter->filter_function)(*p++, filter); /*process last char */
  1680. mbfl_convert_filter_delete(decoder);
  1681. mbfl_convert_filter_delete(filter);
  1682. mbfl_memory_device_clear(&device);
  1683. return;
  1684. }
  1685. /* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
  1686. chunks = (mb_len + split_length - 1) / split_length; /* (round up idiom) */
  1687. array_init_size(return_value, chunks);
  1688. if (chunks != 0) {
  1689. zend_long i;
  1690. for (i = 0; i < chunks - 1; p += chunk_len, ++i) {
  1691. add_next_index_stringl(return_value, p, chunk_len);
  1692. }
  1693. add_next_index_stringl(return_value, p, last - p);
  1694. }
  1695. }
  1696. /* }}} */
  1697. /* {{{ proto int mb_strlen(string str [, string encoding])
  1698. Get character numbers of a string */
  1699. PHP_FUNCTION(mb_strlen)
  1700. {
  1701. size_t n;
  1702. mbfl_string string;
  1703. char *str;
  1704. size_t str_len;
  1705. zend_string *enc_name = NULL;
  1706. ZEND_PARSE_PARAMETERS_START(1, 2)
  1707. Z_PARAM_STRING(str, str_len)
  1708. Z_PARAM_OPTIONAL
  1709. Z_PARAM_STR(enc_name)
  1710. ZEND_PARSE_PARAMETERS_END();
  1711. string.val = (unsigned char *) str;
  1712. string.len = str_len;
  1713. string.encoding = php_mb_get_encoding(enc_name, 2);
  1714. if (!string.encoding) {
  1715. RETURN_THROWS();
  1716. }
  1717. n = mbfl_strlen(&string);
  1718. /* Only way this can fail is if the conversion creation fails
  1719. * this would imply some sort of memory allocation failure which is a bug */
  1720. ZEND_ASSERT(!mbfl_is_error(n));
  1721. RETVAL_LONG(n);
  1722. }
  1723. /* }}} */
  1724. static void handle_strpos_error(size_t error) {
  1725. switch (error) {
  1726. case MBFL_ERROR_NOT_FOUND:
  1727. break;
  1728. case MBFL_ERROR_ENCODING:
  1729. php_error_docref(NULL, E_WARNING, "Conversion error");
  1730. break;
  1731. case MBFL_ERROR_OFFSET:
  1732. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1733. break;
  1734. default:
  1735. zend_value_error("mb_strpos(): Unknown error");
  1736. break;
  1737. }
  1738. }
  1739. /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
  1740. Find position of first occurrence of a string within another */
  1741. PHP_FUNCTION(mb_strpos)
  1742. {
  1743. int reverse = 0;
  1744. zend_long offset = 0;
  1745. mbfl_string haystack, needle;
  1746. zend_string *enc_name = NULL;
  1747. size_t n;
  1748. if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name) == FAILURE) {
  1749. RETURN_THROWS();
  1750. }
  1751. haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
  1752. if (!haystack.encoding) {
  1753. RETURN_THROWS();
  1754. }
  1755. n = mbfl_strpos(&haystack, &needle, offset, reverse);
  1756. if (!mbfl_is_error(n)) {
  1757. RETVAL_LONG(n);
  1758. } else {
  1759. handle_strpos_error(n);
  1760. RETVAL_FALSE;
  1761. }
  1762. }
  1763. /* }}} */
  1764. /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
  1765. Find position of last occurrence of a string within another */
  1766. PHP_FUNCTION(mb_strrpos)
  1767. {
  1768. mbfl_string haystack, needle;
  1769. zend_string *enc_name = NULL;
  1770. zend_long offset = 0, n;
  1771. if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name) == FAILURE) {
  1772. RETURN_THROWS();
  1773. }
  1774. haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
  1775. if (!haystack.encoding) {
  1776. RETURN_THROWS();
  1777. }
  1778. n = mbfl_strpos(&haystack, &needle, offset, 1);
  1779. if (!mbfl_is_error(n)) {
  1780. RETVAL_LONG(n);
  1781. } else {
  1782. handle_strpos_error(n);
  1783. RETVAL_FALSE;
  1784. }
  1785. }
  1786. /* }}} */
  1787. /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
  1788. Finds position of first occurrence of a string within another, case insensitive */
  1789. PHP_FUNCTION(mb_stripos)
  1790. {
  1791. size_t n = (size_t) -1;
  1792. zend_long offset = 0;
  1793. mbfl_string haystack, needle;
  1794. zend_string *from_encoding = NULL;
  1795. const mbfl_encoding *enc;
  1796. if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) {
  1797. RETURN_THROWS();
  1798. }
  1799. enc = php_mb_get_encoding(from_encoding, 4);
  1800. if (!enc) {
  1801. RETURN_THROWS();
  1802. }
  1803. n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
  1804. if (!mbfl_is_error(n)) {
  1805. RETVAL_LONG(n);
  1806. } else {
  1807. handle_strpos_error(n);
  1808. RETVAL_FALSE;
  1809. }
  1810. }
  1811. /* }}} */
  1812. /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
  1813. Finds position of last occurrence of a string within another, case insensitive */
  1814. PHP_FUNCTION(mb_strripos)
  1815. {
  1816. size_t n = (size_t) -1;
  1817. zend_long offset = 0;
  1818. mbfl_string haystack, needle;
  1819. zend_string *from_encoding = NULL;
  1820. const mbfl_encoding *enc;
  1821. if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) {
  1822. RETURN_THROWS();
  1823. }
  1824. enc = php_mb_get_encoding(from_encoding, 4);
  1825. if (!enc) {
  1826. RETURN_THROWS();
  1827. }
  1828. n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
  1829. if (!mbfl_is_error(n)) {
  1830. RETVAL_LONG(n);
  1831. } else {
  1832. handle_strpos_error(n);
  1833. RETVAL_FALSE;
  1834. }
  1835. }
  1836. /* }}} */
  1837. #define MB_STRSTR 1
  1838. #define MB_STRRCHR 2
  1839. #define MB_STRISTR 3
  1840. #define MB_STRRICHR 4
  1841. /* {{{ php_mb_strstr_variants */
  1842. static void php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS, unsigned int variant)
  1843. {
  1844. int reverse_mode = 0;
  1845. size_t n;
  1846. mbfl_string haystack, needle, result, *ret = NULL;
  1847. zend_string *encoding_name = NULL;
  1848. zend_bool part = 0;
  1849. if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS",
  1850. (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len,
  1851. &part, &encoding_name) == FAILURE
  1852. ) {
  1853. RETURN_THROWS();
  1854. }
  1855. haystack.encoding = needle.encoding = php_mb_get_encoding(encoding_name, 4);
  1856. if (!haystack.encoding) {
  1857. RETURN_THROWS();
  1858. }
  1859. if (variant == MB_STRRCHR || variant == MB_STRRICHR) { reverse_mode = 1; }
  1860. if (variant == MB_STRISTR || variant == MB_STRRICHR) {
  1861. n = php_mb_stripos(reverse_mode, (char *)haystack.val, haystack.len, (char *)needle.val,
  1862. needle.len, 0, needle.encoding);
  1863. } else {
  1864. n = mbfl_strpos(&haystack, &needle, 0, reverse_mode);
  1865. }
  1866. if (!mbfl_is_error(n)) {
  1867. if (part) {
  1868. ret = mbfl_substr(&haystack, &result, 0, n);
  1869. ZEND_ASSERT(ret != NULL);
  1870. // TODO: avoid reallocation ???
  1871. RETVAL_STRINGL((char *)ret->val, ret->len);
  1872. efree(ret->val);
  1873. } else {
  1874. ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
  1875. ZEND_ASSERT(ret != NULL);
  1876. // TODO: avoid reallocation ???
  1877. RETVAL_STRINGL((char *)ret->val, ret->len);
  1878. efree(ret->val);
  1879. }
  1880. } else {
  1881. // FIXME use handle_strpos_error(n)
  1882. RETVAL_FALSE;
  1883. }
  1884. }
  1885. /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
  1886. Finds first occurrence of a string within another */
  1887. PHP_FUNCTION(mb_strstr)
  1888. {
  1889. php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRSTR);
  1890. }
  1891. /* }}} */
  1892. /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
  1893. Finds the last occurrence of a character in a string within another */
  1894. PHP_FUNCTION(mb_strrchr)
  1895. {
  1896. php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRCHR);
  1897. }
  1898. /* }}} */
  1899. /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
  1900. Finds first occurrence of a string within another, case insensitive */
  1901. PHP_FUNCTION(mb_stristr)
  1902. {
  1903. php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRISTR);
  1904. }
  1905. /* }}} */
  1906. /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
  1907. Finds the last occurrence of a character in a string within another, case insensitive */
  1908. PHP_FUNCTION(mb_strrichr)
  1909. {
  1910. php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRICHR);
  1911. }
  1912. /* }}} */
  1913. #undef MB_STRSTR
  1914. #undef MB_STRRCHR
  1915. #undef MB_STRISTR
  1916. #undef MB_STRRICHR
  1917. /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
  1918. Count the number of substring occurrences */
  1919. PHP_FUNCTION(mb_substr_count)
  1920. {
  1921. size_t n;
  1922. mbfl_string haystack, needle;
  1923. zend_string *enc_name = NULL;
  1924. if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|S", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name) == FAILURE) {
  1925. RETURN_THROWS();
  1926. }
  1927. if (needle.len == 0) {
  1928. zend_argument_value_error(2, "must not be empty");
  1929. RETURN_THROWS();
  1930. }
  1931. haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 3);
  1932. if (!haystack.encoding) {
  1933. RETURN_THROWS();
  1934. }
  1935. n = mbfl_substr_count(&haystack, &needle);
  1936. /* An error can only occur if needle is empty,
  1937. * an encoding error happens (which should not happen at this stage and is a bug)
  1938. * or the haystack is more than sizeof(size_t) bytes
  1939. * If one of these things occur this is a bug and should be flagged as such */
  1940. ZEND_ASSERT(!mbfl_is_error(n));
  1941. RETVAL_LONG(n);
  1942. }
  1943. /* }}} */
  1944. /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
  1945. Returns part of a string */
  1946. PHP_FUNCTION(mb_substr)
  1947. {
  1948. char *str;
  1949. zend_string *encoding = NULL;
  1950. zend_long from, len;
  1951. size_t mblen, real_from, real_len;
  1952. size_t str_len;
  1953. zend_bool len_is_null = 1;
  1954. mbfl_string string, result, *ret;
  1955. if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S", &str, &str_len, &from, &len, &len_is_null, &encoding) == FAILURE) {
  1956. RETURN_THROWS();
  1957. }
  1958. string.encoding = php_mb_get_encoding(encoding, 4);
  1959. if (!string.encoding) {
  1960. RETURN_THROWS();
  1961. }
  1962. string.val = (unsigned char *)str;
  1963. string.len = str_len;
  1964. /* measures length */
  1965. mblen = 0;
  1966. if (from < 0 || (!len_is_null && len < 0)) {
  1967. mblen = mbfl_strlen(&string);
  1968. }
  1969. /* if "from" position is negative, count start position from the end
  1970. * of the string
  1971. */
  1972. if (from >= 0) {
  1973. real_from = (size_t) from;
  1974. } else if (-from < mblen) {
  1975. real_from = mblen + from;
  1976. } else {
  1977. real_from = 0;
  1978. }
  1979. /* if "length" position is negative, set it to the length
  1980. * needed to stop that many chars from the end of the string
  1981. */
  1982. if (len_is_null) {
  1983. real_len = MBFL_SUBSTR_UNTIL_END;
  1984. } else if (len >= 0) {
  1985. real_len = (size_t) len;
  1986. } else if (real_from < mblen && -len < mblen - real_from) {
  1987. real_len = (mblen - real_from) + len;
  1988. } else {
  1989. real_len = 0;
  1990. }
  1991. ret = mbfl_substr(&string, &result, real_from, real_len);
  1992. ZEND_ASSERT(ret != NULL);
  1993. // TODO: avoid reallocation ???
  1994. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  1995. efree(ret->val);
  1996. }
  1997. /* }}} */
  1998. /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
  1999. Returns part of a string */
  2000. PHP_FUNCTION(mb_strcut)
  2001. {
  2002. zend_string *encoding = NULL;
  2003. zend_long from, len;
  2004. zend_bool len_is_null = 1;
  2005. mbfl_string string, result, *ret;
  2006. if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S", (char **)&string.val, &string.len, &from, &len, &len_is_null, &encoding) == FAILURE) {
  2007. RETURN_THROWS();
  2008. }
  2009. string.encoding = php_mb_get_encoding(encoding, 4);
  2010. if (!string.encoding) {
  2011. RETURN_THROWS();
  2012. }
  2013. if (len_is_null) {
  2014. len = string.len;
  2015. }
  2016. /* if "from" position is negative, count start position from the end
  2017. * of the string
  2018. */
  2019. if (from < 0) {
  2020. from = string.len + from;
  2021. if (from < 0) {
  2022. from = 0;
  2023. }
  2024. }
  2025. /* if "length" position is negative, set it to the length
  2026. * needed to stop that many chars from the end of the string
  2027. */
  2028. if (len < 0) {
  2029. len = (string.len - from) + len;
  2030. if (len < 0) {
  2031. len = 0;
  2032. }
  2033. }
  2034. if (from > string.len) {
  2035. // TODO Out of bounds ValueError
  2036. RETURN_FALSE;
  2037. }
  2038. ret = mbfl_strcut(&string, &result, from, len);
  2039. ZEND_ASSERT(ret != NULL);
  2040. // TODO: avoid reallocation ???
  2041. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  2042. efree(ret->val);
  2043. }
  2044. /* }}} */
  2045. /* {{{ proto int mb_strwidth(string str [, string encoding])
  2046. Gets terminal width of a string */
  2047. PHP_FUNCTION(mb_strwidth)
  2048. {
  2049. size_t n;
  2050. mbfl_string string;
  2051. zend_string *enc_name = NULL;
  2052. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S", (char **)&string.val, &string.len, &enc_name) == FAILURE) {
  2053. RETURN_THROWS();
  2054. }
  2055. string.encoding = php_mb_get_encoding(enc_name, 2);
  2056. if (!string.encoding) {
  2057. RETURN_THROWS();
  2058. }
  2059. n = mbfl_strwidth(&string);
  2060. ZEND_ASSERT(n != (size_t) -1);
  2061. RETVAL_LONG(n);
  2062. }
  2063. /* }}} */
  2064. /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
  2065. Trim the string in terminal width */
  2066. PHP_FUNCTION(mb_strimwidth)
  2067. {
  2068. char *str, *trimmarker = NULL;
  2069. zend_string *encoding = NULL;
  2070. zend_long from, width, swidth = 0;
  2071. size_t str_len, trimmarker_len;
  2072. mbfl_string string, result, marker, *ret;
  2073. if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|sS", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding) == FAILURE) {
  2074. RETURN_THROWS();
  2075. }
  2076. string.encoding = marker.encoding = php_mb_get_encoding(encoding, 5);
  2077. if (!string.encoding) {
  2078. RETURN_THROWS();
  2079. }
  2080. string.val = (unsigned char *)str;
  2081. string.len = str_len;
  2082. marker.val = NULL;
  2083. marker.len = 0;
  2084. if ((from < 0) || (width < 0)) {
  2085. swidth = mbfl_strwidth(&string);
  2086. }
  2087. if (from < 0) {
  2088. from += swidth;
  2089. }
  2090. if (from < 0 || (size_t)from > str_len) {
  2091. zend_argument_value_error(2, "is out of range");
  2092. RETURN_THROWS();
  2093. }
  2094. if (width < 0) {
  2095. width = swidth + width - from;
  2096. }
  2097. if (width < 0) {
  2098. zend_argument_value_error(3, "is out of range");
  2099. RETURN_THROWS();
  2100. }
  2101. if (trimmarker) {
  2102. marker.val = (unsigned char *)trimmarker;
  2103. marker.len = trimmarker_len;
  2104. }
  2105. ret = mbfl_strimwidth(&string, &marker, &result, from, width);
  2106. ZEND_ASSERT(ret != NULL);
  2107. // TODO: avoid reallocation ???
  2108. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  2109. efree(ret->val);
  2110. }
  2111. /* }}} */
  2112. /* See mbfl_no_encoding definition for list of unsupported encodings */
  2113. static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
  2114. {
  2115. return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
  2116. || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
  2117. || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
  2118. || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
  2119. }
  2120. /* See mbfl_no_encoding definition for list of UTF-8 encodings */
  2121. static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
  2122. {
  2123. return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
  2124. }
  2125. MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
  2126. {
  2127. mbfl_string string, result, *ret;
  2128. mbfl_buffer_converter *convd;
  2129. char *output = NULL;
  2130. if (output_len) {
  2131. *output_len = 0;
  2132. }
  2133. /* initialize string */
  2134. string.encoding = from_encoding;
  2135. string.val = (unsigned char *)input;
  2136. string.len = length;
  2137. /* initialize converter */
  2138. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  2139. /* If this assertion fails this means some memory allocation failure which is a bug */
  2140. ZEND_ASSERT(convd != NULL);
  2141. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  2142. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  2143. /* do it */
  2144. mbfl_string_init(&result);
  2145. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2146. if (ret) {
  2147. if (output_len) {
  2148. *output_len = ret->len;
  2149. }
  2150. output = (char *)ret->val;
  2151. }
  2152. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  2153. mbfl_buffer_converter_delete(convd);
  2154. return output;
  2155. }
  2156. /* }}} */
  2157. /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
  2158. MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings, size_t *output_len)
  2159. {
  2160. const mbfl_encoding *from_encoding;
  2161. if (output_len) {
  2162. *output_len = 0;
  2163. }
  2164. /* pre-conversion encoding */
  2165. ZEND_ASSERT(num_from_encodings >= 1);
  2166. if (num_from_encodings == 1) {
  2167. from_encoding = *from_encodings;
  2168. } else {
  2169. /* auto detect */
  2170. mbfl_string string;
  2171. mbfl_string_init(&string);
  2172. string.val = (unsigned char *)input;
  2173. string.len = length;
  2174. from_encoding = mbfl_identify_encoding(
  2175. &string, from_encodings, num_from_encodings, MBSTRG(strict_detection));
  2176. if (!from_encoding) {
  2177. php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
  2178. return NULL;
  2179. }
  2180. }
  2181. return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
  2182. }
  2183. /* }}} */
  2184. MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings)
  2185. {
  2186. HashTable *output, *chash;
  2187. zend_long idx;
  2188. zend_string *key;
  2189. zval *entry, entry_tmp;
  2190. size_t ckey_len, cval_len;
  2191. char *ckey, *cval;
  2192. if (!input) {
  2193. return NULL;
  2194. }
  2195. if (GC_IS_RECURSIVE(input)) {
  2196. GC_UNPROTECT_RECURSION(input);
  2197. php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
  2198. return NULL;
  2199. }
  2200. GC_TRY_PROTECT_RECURSION(input);
  2201. output = zend_new_array(zend_hash_num_elements(input));
  2202. ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
  2203. /* convert key */
  2204. if (key) {
  2205. ckey = php_mb_convert_encoding(
  2206. ZSTR_VAL(key), ZSTR_LEN(key),
  2207. to_encoding, from_encodings, num_from_encodings, &ckey_len);
  2208. key = zend_string_init(ckey, ckey_len, 0);
  2209. efree(ckey);
  2210. }
  2211. /* convert value */
  2212. ZEND_ASSERT(entry);
  2213. switch(Z_TYPE_P(entry)) {
  2214. case IS_STRING:
  2215. cval = php_mb_convert_encoding(
  2216. Z_STRVAL_P(entry), Z_STRLEN_P(entry),
  2217. to_encoding, from_encodings, num_from_encodings, &cval_len);
  2218. ZVAL_STRINGL(&entry_tmp, cval, cval_len);
  2219. efree(cval);
  2220. break;
  2221. case IS_NULL:
  2222. case IS_TRUE:
  2223. case IS_FALSE:
  2224. case IS_LONG:
  2225. case IS_DOUBLE:
  2226. ZVAL_COPY(&entry_tmp, entry);
  2227. break;
  2228. case IS_ARRAY:
  2229. chash = php_mb_convert_encoding_recursive(
  2230. Z_ARRVAL_P(entry), to_encoding, from_encodings, num_from_encodings);
  2231. if (chash) {
  2232. ZVAL_ARR(&entry_tmp, chash);
  2233. } else {
  2234. ZVAL_EMPTY_ARRAY(&entry_tmp);
  2235. }
  2236. break;
  2237. case IS_OBJECT:
  2238. default:
  2239. if (key) {
  2240. zend_string_release(key);
  2241. }
  2242. php_error_docref(NULL, E_WARNING, "Object is not supported");
  2243. continue;
  2244. }
  2245. if (key) {
  2246. zend_hash_add(output, key, &entry_tmp);
  2247. zend_string_release(key);
  2248. } else {
  2249. zend_hash_index_add(output, idx, &entry_tmp);
  2250. }
  2251. } ZEND_HASH_FOREACH_END();
  2252. GC_TRY_UNPROTECT_RECURSION(input);
  2253. return output;
  2254. }
  2255. /* }}} */
  2256. /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
  2257. Returns converted string in desired encoding */
  2258. PHP_FUNCTION(mb_convert_encoding)
  2259. {
  2260. zend_string *to_encoding_name;
  2261. zend_string *input_str, *from_encodings_str = NULL;
  2262. HashTable *input_ht, *from_encodings_ht = NULL;
  2263. const mbfl_encoding *to_encoding;
  2264. const mbfl_encoding **from_encodings;
  2265. size_t num_from_encodings;
  2266. zend_bool free_from_encodings;
  2267. ZEND_PARSE_PARAMETERS_START(2, 3)
  2268. Z_PARAM_STR_OR_ARRAY_HT(input_str, input_ht)
  2269. Z_PARAM_STR(to_encoding_name)
  2270. Z_PARAM_OPTIONAL
  2271. Z_PARAM_STR_OR_ARRAY_HT(from_encodings_str, from_encodings_ht)
  2272. ZEND_PARSE_PARAMETERS_END();
  2273. to_encoding = php_mb_get_encoding(to_encoding_name, 2);
  2274. if (!to_encoding) {
  2275. RETURN_THROWS();
  2276. }
  2277. if (from_encodings_ht) {
  2278. if (php_mb_parse_encoding_array(from_encodings_ht, &from_encodings, &num_from_encodings, 3) == FAILURE) {
  2279. RETURN_THROWS();
  2280. }
  2281. free_from_encodings = 1;
  2282. } else if (from_encodings_str) {
  2283. if (php_mb_parse_encoding_list(ZSTR_VAL(from_encodings_str), ZSTR_LEN(from_encodings_str),
  2284. &from_encodings, &num_from_encodings,
  2285. /* persistent */ 0, /* arg_num */ 3, /* allow_pass_encoding */ 0) == FAILURE) {
  2286. RETURN_THROWS();
  2287. }
  2288. free_from_encodings = 1;
  2289. } else {
  2290. from_encodings = &MBSTRG(current_internal_encoding);
  2291. num_from_encodings = 1;
  2292. free_from_encodings = 0;
  2293. }
  2294. if (!num_from_encodings) {
  2295. efree(from_encodings);
  2296. zend_argument_value_error(3, "must specify at least one encoding");
  2297. RETURN_THROWS();
  2298. }
  2299. if (input_str) {
  2300. /* new encoding */
  2301. size_t size;
  2302. char *ret = php_mb_convert_encoding(
  2303. ZSTR_VAL(input_str), ZSTR_LEN(input_str),
  2304. to_encoding, from_encodings, num_from_encodings, &size);
  2305. if (ret != NULL) {
  2306. // TODO: avoid reallocation ???
  2307. RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
  2308. efree(ret);
  2309. } else {
  2310. RETVAL_FALSE;
  2311. }
  2312. } else {
  2313. HashTable *tmp;
  2314. tmp = php_mb_convert_encoding_recursive(
  2315. input_ht, to_encoding, from_encodings, num_from_encodings);
  2316. RETVAL_ARR(tmp);
  2317. }
  2318. if (free_from_encodings) {
  2319. efree(from_encodings);
  2320. }
  2321. }
  2322. /* }}} */
  2323. static char *mbstring_convert_case(
  2324. int case_mode, const char *str, size_t str_len, size_t *ret_len,
  2325. const mbfl_encoding *enc) {
  2326. return php_unicode_convert_case(
  2327. case_mode, str, str_len, ret_len, enc,
  2328. MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
  2329. }
  2330. /* {{{ proto string mb_convert_case(string source_string, int mode [, string encoding])
  2331. Returns a case-folded version of source_string */
  2332. PHP_FUNCTION(mb_convert_case)
  2333. {
  2334. zend_string *from_encoding = NULL;
  2335. char *str;
  2336. size_t str_len;
  2337. zend_long case_mode = 0;
  2338. char *newstr;
  2339. size_t ret_len;
  2340. const mbfl_encoding *enc;
  2341. if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|S!", &str, &str_len, &case_mode, &from_encoding) == FAILURE) {
  2342. RETURN_THROWS();
  2343. }
  2344. enc = php_mb_get_encoding(from_encoding, 3);
  2345. if (!enc) {
  2346. RETURN_THROWS();
  2347. }
  2348. if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
  2349. zend_argument_value_error(2, "must be one of MB_CASE_UPPER, MB_CASE_LOWER, MB_CASE_TITLE, MB_CASE_FOLD,"
  2350. " MB_CASE_UPPER_SIMPLE, MB_CASE_LOWER_SIMPLE, MB_CASE_TITLE_SIMPLE, or MB_CASE_FOLD_SIMPLE");
  2351. RETURN_THROWS();
  2352. }
  2353. newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
  2354. /* If newstr is NULL something went wrong in mbfl and this is a bug */
  2355. ZEND_ASSERT(newstr != NULL);
  2356. // TODO: avoid reallocation ???
  2357. RETVAL_STRINGL(newstr, ret_len);
  2358. efree(newstr);
  2359. }
  2360. /* }}} */
  2361. /* {{{ proto string mb_strtoupper(string source_string [, string encoding])
  2362. * Returns a upper cased version of source_string
  2363. */
  2364. PHP_FUNCTION(mb_strtoupper)
  2365. {
  2366. zend_string *from_encoding = NULL;
  2367. char *str;
  2368. size_t str_len;
  2369. char *newstr;
  2370. size_t ret_len;
  2371. const mbfl_encoding *enc;
  2372. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len, &from_encoding) == FAILURE) {
  2373. RETURN_THROWS();
  2374. }
  2375. enc = php_mb_get_encoding(from_encoding, 2);
  2376. if (!enc) {
  2377. RETURN_THROWS();
  2378. }
  2379. newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
  2380. /* If newstr is NULL something went wrong in mbfl and this is a bug */
  2381. ZEND_ASSERT(newstr != NULL);
  2382. // TODO: avoid reallocation ???
  2383. RETVAL_STRINGL(newstr, ret_len);
  2384. efree(newstr);
  2385. }
  2386. /* }}} */
  2387. /* {{{ proto string mb_strtolower(string source_string [, string encoding])
  2388. * Returns a lower cased version of source_string
  2389. */
  2390. PHP_FUNCTION(mb_strtolower)
  2391. {
  2392. zend_string *from_encoding = NULL;
  2393. char *str;
  2394. size_t str_len;
  2395. char *newstr;
  2396. size_t ret_len;
  2397. const mbfl_encoding *enc;
  2398. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len, &from_encoding) == FAILURE) {
  2399. RETURN_THROWS();
  2400. }
  2401. enc = php_mb_get_encoding(from_encoding, 2);
  2402. if (!enc) {
  2403. RETURN_THROWS();
  2404. }
  2405. newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
  2406. /* If newstr is NULL something went wrong in mbfl and this is a bug */
  2407. ZEND_ASSERT(newstr != NULL);
  2408. // TODO: avoid reallocation ???
  2409. RETVAL_STRINGL(newstr, ret_len);
  2410. efree(newstr);
  2411. }
  2412. /* }}} */
  2413. /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
  2414. Encodings of the given string is returned (as a string) */
  2415. PHP_FUNCTION(mb_detect_encoding)
  2416. {
  2417. char *str;
  2418. size_t str_len;
  2419. zend_string *encoding_str = NULL;
  2420. HashTable *encoding_ht = NULL;
  2421. zend_bool strict = 0;
  2422. mbfl_string string;
  2423. const mbfl_encoding *ret;
  2424. const mbfl_encoding **elist;
  2425. size_t size;
  2426. zend_bool free_elist;
  2427. ZEND_PARSE_PARAMETERS_START(1, 3)
  2428. Z_PARAM_STRING(str, str_len)
  2429. Z_PARAM_OPTIONAL
  2430. Z_PARAM_STR_OR_ARRAY_HT_OR_NULL(encoding_str, encoding_ht)
  2431. Z_PARAM_BOOL(strict)
  2432. ZEND_PARSE_PARAMETERS_END();
  2433. /* make encoding list */
  2434. if (encoding_ht) {
  2435. if (FAILURE == php_mb_parse_encoding_array(encoding_ht, &elist, &size, 2)) {
  2436. RETURN_THROWS();
  2437. }
  2438. free_elist = 1;
  2439. } else if (encoding_str) {
  2440. if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0)) {
  2441. RETURN_THROWS();
  2442. }
  2443. free_elist = 1;
  2444. } else {
  2445. elist = MBSTRG(current_detect_order_list);
  2446. size = MBSTRG(current_detect_order_list_size);
  2447. free_elist = 0;
  2448. }
  2449. if (size == 0) {
  2450. efree(elist);
  2451. zend_argument_value_error(2, "must specify at least one encoding");
  2452. RETURN_THROWS();
  2453. }
  2454. if (ZEND_NUM_ARGS() < 3) {
  2455. strict = MBSTRG(strict_detection);
  2456. }
  2457. mbfl_string_init(&string);
  2458. string.val = (unsigned char *)str;
  2459. string.len = str_len;
  2460. ret = mbfl_identify_encoding(&string, elist, size, strict);
  2461. if (free_elist) {
  2462. efree(elist);
  2463. }
  2464. if (ret == NULL) {
  2465. RETURN_FALSE;
  2466. }
  2467. RETVAL_STRING((char *)ret->name);
  2468. }
  2469. /* }}} */
  2470. /* {{{ proto mixed mb_list_encodings()
  2471. Returns an array of all supported entity encodings */
  2472. PHP_FUNCTION(mb_list_encodings)
  2473. {
  2474. const mbfl_encoding **encodings;
  2475. const mbfl_encoding *encoding;
  2476. int i;
  2477. if (zend_parse_parameters_none() == FAILURE) {
  2478. RETURN_THROWS();
  2479. }
  2480. array_init(return_value);
  2481. i = 0;
  2482. encodings = mbfl_get_supported_encodings();
  2483. while ((encoding = encodings[i++]) != NULL) {
  2484. add_next_index_string(return_value, (char *) encoding->name);
  2485. }
  2486. }
  2487. /* }}} */
  2488. /* {{{ proto array mb_encoding_aliases(string encoding)
  2489. Returns an array of the aliases of a given encoding name */
  2490. PHP_FUNCTION(mb_encoding_aliases)
  2491. {
  2492. const mbfl_encoding *encoding;
  2493. zend_string *encoding_name = NULL;
  2494. if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &encoding_name) == FAILURE) {
  2495. RETURN_THROWS();
  2496. }
  2497. encoding = php_mb_get_encoding(encoding_name, 1);
  2498. if (!encoding) {
  2499. RETURN_THROWS();
  2500. }
  2501. array_init(return_value);
  2502. if (encoding->aliases != NULL) {
  2503. const char **alias;
  2504. for (alias = *encoding->aliases; *alias; ++alias) {
  2505. add_next_index_string(return_value, (char *)*alias);
  2506. }
  2507. }
  2508. }
  2509. /* }}} */
  2510. /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
  2511. Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
  2512. PHP_FUNCTION(mb_encode_mimeheader)
  2513. {
  2514. const mbfl_encoding *charset, *transenc;
  2515. mbfl_string string, result, *ret;
  2516. zend_string *charset_name = NULL;
  2517. char *trans_enc_name = NULL;
  2518. size_t trans_enc_name_len;
  2519. char *linefeed = "\r\n";
  2520. size_t linefeed_len;
  2521. zend_long indent = 0;
  2522. string.encoding = MBSTRG(current_internal_encoding);
  2523. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|Sssl", (char **)&string.val, &string.len, &charset_name, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
  2524. RETURN_THROWS();
  2525. }
  2526. charset = &mbfl_encoding_pass;
  2527. transenc = &mbfl_encoding_base64;
  2528. if (charset_name != NULL) {
  2529. charset = php_mb_get_encoding(charset_name, 2);
  2530. if (!charset) {
  2531. RETURN_THROWS();
  2532. }
  2533. } else {
  2534. const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
  2535. if (lang != NULL) {
  2536. charset = mbfl_no2encoding(lang->mail_charset);
  2537. transenc = mbfl_no2encoding(lang->mail_header_encoding);
  2538. }
  2539. }
  2540. if (trans_enc_name != NULL) {
  2541. if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
  2542. transenc = &mbfl_encoding_base64;
  2543. } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
  2544. transenc = &mbfl_encoding_qprint;
  2545. }
  2546. }
  2547. mbfl_string_init(&result);
  2548. ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
  2549. ZEND_ASSERT(ret != NULL);
  2550. // TODO: avoid reallocation ???
  2551. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  2552. efree(ret->val);
  2553. }
  2554. /* }}} */
  2555. /* {{{ proto string mb_decode_mimeheader(string string)
  2556. Decodes the MIME "encoded-word" in the string */
  2557. PHP_FUNCTION(mb_decode_mimeheader)
  2558. {
  2559. mbfl_string string, result, *ret;
  2560. string.encoding = MBSTRG(current_internal_encoding);
  2561. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string.len) == FAILURE) {
  2562. RETURN_THROWS();
  2563. }
  2564. mbfl_string_init(&result);
  2565. ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
  2566. ZEND_ASSERT(ret != NULL);
  2567. // TODO: avoid reallocation ???
  2568. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  2569. efree(ret->val);
  2570. }
  2571. /* }}} */
  2572. /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
  2573. Conversion between full-width character and half-width character (Japanese) */
  2574. PHP_FUNCTION(mb_convert_kana)
  2575. {
  2576. int opt;
  2577. mbfl_string string, result, *ret;
  2578. char *optstr = NULL;
  2579. size_t optstr_len;
  2580. zend_string *encname = NULL;
  2581. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sS", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname) == FAILURE) {
  2582. RETURN_THROWS();
  2583. }
  2584. /* option */
  2585. if (optstr != NULL) {
  2586. char *p = optstr;
  2587. size_t i = 0, n = optstr_len;
  2588. opt = 0;
  2589. while (i < n) {
  2590. i++;
  2591. switch (*p++) {
  2592. case 'A':
  2593. opt |= 0x1;
  2594. break;
  2595. case 'a':
  2596. opt |= 0x10;
  2597. break;
  2598. case 'R':
  2599. opt |= 0x2;
  2600. break;
  2601. case 'r':
  2602. opt |= 0x20;
  2603. break;
  2604. case 'N':
  2605. opt |= 0x4;
  2606. break;
  2607. case 'n':
  2608. opt |= 0x40;
  2609. break;
  2610. case 'S':
  2611. opt |= 0x8;
  2612. break;
  2613. case 's':
  2614. opt |= 0x80;
  2615. break;
  2616. case 'K':
  2617. opt |= 0x100;
  2618. break;
  2619. case 'k':
  2620. opt |= 0x1000;
  2621. break;
  2622. case 'H':
  2623. opt |= 0x200;
  2624. break;
  2625. case 'h':
  2626. opt |= 0x2000;
  2627. break;
  2628. case 'V':
  2629. opt |= 0x800;
  2630. break;
  2631. case 'C':
  2632. opt |= 0x10000;
  2633. break;
  2634. case 'c':
  2635. opt |= 0x20000;
  2636. break;
  2637. case 'M':
  2638. opt |= 0x100000;
  2639. break;
  2640. case 'm':
  2641. opt |= 0x200000;
  2642. break;
  2643. }
  2644. }
  2645. } else {
  2646. opt = 0x900;
  2647. }
  2648. /* encoding */
  2649. string.encoding = php_mb_get_encoding(encname, 3);
  2650. if (!string.encoding) {
  2651. RETURN_THROWS();
  2652. }
  2653. ret = mbfl_ja_jp_hantozen(&string, &result, opt);
  2654. ZEND_ASSERT(ret != NULL);
  2655. // TODO: avoid reallocation ???
  2656. RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
  2657. efree(ret->val);
  2658. }
  2659. /* }}} */
  2660. static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */
  2661. {
  2662. mbfl_string string;
  2663. HashTable *ht;
  2664. zval *entry;
  2665. ZVAL_DEREF(var);
  2666. if (Z_TYPE_P(var) == IS_STRING) {
  2667. string.val = (unsigned char *)Z_STRVAL_P(var);
  2668. string.len = Z_STRLEN_P(var);
  2669. if (mbfl_encoding_detector_feed(identd, &string)) {
  2670. return 1; /* complete detecting */
  2671. }
  2672. } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
  2673. if (Z_REFCOUNTED_P(var)) {
  2674. if (Z_IS_RECURSIVE_P(var)) {
  2675. *recursion_error = 1;
  2676. return 0;
  2677. }
  2678. Z_PROTECT_RECURSION_P(var);
  2679. }
  2680. ht = HASH_OF(var);
  2681. if (ht != NULL) {
  2682. ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
  2683. if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) {
  2684. if (Z_REFCOUNTED_P(var)) {
  2685. Z_UNPROTECT_RECURSION_P(var);
  2686. }
  2687. return 1;
  2688. } else if (*recursion_error) {
  2689. if (Z_REFCOUNTED_P(var)) {
  2690. Z_UNPROTECT_RECURSION_P(var);
  2691. }
  2692. return 0;
  2693. }
  2694. } ZEND_HASH_FOREACH_END();
  2695. }
  2696. if (Z_REFCOUNTED_P(var)) {
  2697. Z_UNPROTECT_RECURSION_P(var);
  2698. }
  2699. }
  2700. return 0;
  2701. } /* }}} */
  2702. static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */
  2703. {
  2704. mbfl_string string, result, *ret;
  2705. HashTable *ht;
  2706. zval *entry, *orig_var;
  2707. orig_var = var;
  2708. ZVAL_DEREF(var);
  2709. if (Z_TYPE_P(var) == IS_STRING) {
  2710. string.val = (unsigned char *)Z_STRVAL_P(var);
  2711. string.len = Z_STRLEN_P(var);
  2712. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2713. if (ret != NULL) {
  2714. zval_ptr_dtor(orig_var);
  2715. // TODO: avoid reallocation ???
  2716. ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len);
  2717. efree(ret->val);
  2718. }
  2719. } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
  2720. if (Z_TYPE_P(var) == IS_ARRAY) {
  2721. SEPARATE_ARRAY(var);
  2722. }
  2723. if (Z_REFCOUNTED_P(var)) {
  2724. if (Z_IS_RECURSIVE_P(var)) {
  2725. return 1;
  2726. }
  2727. Z_PROTECT_RECURSION_P(var);
  2728. }
  2729. ht = HASH_OF(var);
  2730. if (ht != NULL) {
  2731. ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
  2732. if (mb_recursive_convert_variable(convd, entry)) {
  2733. if (Z_REFCOUNTED_P(var)) {
  2734. Z_UNPROTECT_RECURSION_P(var);
  2735. }
  2736. return 1;
  2737. }
  2738. } ZEND_HASH_FOREACH_END();
  2739. }
  2740. if (Z_REFCOUNTED_P(var)) {
  2741. Z_UNPROTECT_RECURSION_P(var);
  2742. }
  2743. }
  2744. return 0;
  2745. } /* }}} */
  2746. /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
  2747. Converts the string resource in variables to desired encoding */
  2748. PHP_FUNCTION(mb_convert_variables)
  2749. {
  2750. zval *args;
  2751. zend_string *to_enc_str;
  2752. zend_string *from_enc_str;
  2753. HashTable *from_enc_ht;
  2754. mbfl_string string, result;
  2755. const mbfl_encoding *from_encoding, *to_encoding;
  2756. mbfl_encoding_detector *identd;
  2757. mbfl_buffer_converter *convd;
  2758. int n, argc;
  2759. size_t elistsz;
  2760. const mbfl_encoding **elist;
  2761. int recursion_error = 0;
  2762. ZEND_PARSE_PARAMETERS_START(3, -1)
  2763. Z_PARAM_STR(to_enc_str)
  2764. Z_PARAM_STR_OR_ARRAY_HT(from_enc_str, from_enc_ht)
  2765. Z_PARAM_VARIADIC('+', args, argc)
  2766. ZEND_PARSE_PARAMETERS_END();
  2767. /* new encoding */
  2768. to_encoding = php_mb_get_encoding(to_enc_str, 1);
  2769. if (!to_encoding) {
  2770. RETURN_THROWS();
  2771. }
  2772. /* initialize string */
  2773. mbfl_string_init(&string);
  2774. mbfl_string_init(&result);
  2775. from_encoding = MBSTRG(current_internal_encoding);
  2776. string.encoding = from_encoding;
  2777. /* pre-conversion encoding */
  2778. if (from_enc_ht) {
  2779. if (php_mb_parse_encoding_array(from_enc_ht, &elist, &elistsz, 2) == FAILURE) {
  2780. RETURN_THROWS();
  2781. }
  2782. } else {
  2783. if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0) == FAILURE) {
  2784. RETURN_THROWS();
  2785. }
  2786. }
  2787. if (elistsz == 0) {
  2788. efree(elist);
  2789. zend_argument_value_error(2, "must specify at least one encoding");
  2790. RETURN_THROWS();
  2791. }
  2792. if (elistsz == 1) {
  2793. from_encoding = *elist;
  2794. } else {
  2795. /* auto detect */
  2796. from_encoding = NULL;
  2797. identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
  2798. if (identd != NULL) {
  2799. n = 0;
  2800. while (n < argc) {
  2801. if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) {
  2802. break;
  2803. }
  2804. n++;
  2805. }
  2806. from_encoding = mbfl_encoding_detector_judge(identd);
  2807. mbfl_encoding_detector_delete(identd);
  2808. if (recursion_error) {
  2809. efree(elist);
  2810. php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
  2811. RETURN_FALSE;
  2812. }
  2813. }
  2814. if (!from_encoding) {
  2815. php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
  2816. efree(elist);
  2817. RETURN_FALSE;
  2818. }
  2819. }
  2820. efree(elist);
  2821. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
  2822. /* If this assertion fails this means some memory allocation failure which is a bug */
  2823. ZEND_ASSERT(convd != NULL);
  2824. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  2825. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  2826. /* convert */
  2827. n = 0;
  2828. while (n < argc) {
  2829. zval *zv = &args[n];
  2830. ZVAL_DEREF(zv);
  2831. recursion_error = mb_recursive_convert_variable(convd, zv);
  2832. if (recursion_error) {
  2833. break;
  2834. }
  2835. n++;
  2836. }
  2837. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  2838. mbfl_buffer_converter_delete(convd);
  2839. if (recursion_error) {
  2840. php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
  2841. RETURN_FALSE;
  2842. }
  2843. RETURN_STRING(from_encoding->name);
  2844. }
  2845. /* }}} */
  2846. /* {{{ HTML numeric entity */
  2847. /* {{{ static void php_mb_numericentity_exec() */
  2848. static void
  2849. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
  2850. {
  2851. char *str = NULL;
  2852. size_t str_len;
  2853. zend_string *encoding = NULL;
  2854. zval *hash_entry;
  2855. HashTable *target_hash;
  2856. int i, *convmap, *mapelm, mapsize=0;
  2857. zend_bool is_hex = 0;
  2858. mbfl_string string, result, *ret;
  2859. if (type == 0) {
  2860. if (zend_parse_parameters(ZEND_NUM_ARGS(), "sh|Sb", &str, &str_len, &target_hash, &encoding, &is_hex) == FAILURE) {
  2861. RETURN_THROWS();
  2862. }
  2863. } else {
  2864. if (zend_parse_parameters(ZEND_NUM_ARGS(), "sh|S", &str, &str_len, &target_hash, &encoding) == FAILURE) {
  2865. RETURN_THROWS();
  2866. }
  2867. }
  2868. string.val = (unsigned char *)str;
  2869. string.len = str_len;
  2870. string.encoding = php_mb_get_encoding(encoding, 3);
  2871. if (!string.encoding) {
  2872. RETURN_THROWS();
  2873. }
  2874. if (type == 0 && is_hex) {
  2875. type = 2; /* output in hex format */
  2876. }
  2877. /* conversion map */
  2878. i = zend_hash_num_elements(target_hash);
  2879. if (i % 4 != 0) {
  2880. zend_argument_value_error(2, "must have a multiple of 4 elements");
  2881. RETURN_THROWS();
  2882. }
  2883. convmap = (int *)safe_emalloc(i, sizeof(int), 0);
  2884. mapelm = convmap;
  2885. mapsize = 0;
  2886. ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
  2887. *mapelm++ = zval_get_long(hash_entry);
  2888. mapsize++;
  2889. } ZEND_HASH_FOREACH_END();
  2890. mapsize /= 4;
  2891. ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
  2892. ZEND_ASSERT(ret != NULL);
  2893. // TODO: avoid reallocation ???
  2894. RETVAL_STRINGL((char *)ret->val, ret->len);
  2895. efree(ret->val);
  2896. efree((void *)convmap);
  2897. }
  2898. /* }}} */
  2899. /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
  2900. Converts specified characters to HTML numeric entities */
  2901. PHP_FUNCTION(mb_encode_numericentity)
  2902. {
  2903. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  2904. }
  2905. /* }}} */
  2906. /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
  2907. Converts HTML numeric entities to character code */
  2908. PHP_FUNCTION(mb_decode_numericentity)
  2909. {
  2910. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  2911. }
  2912. /* }}} */
  2913. /* }}} */
  2914. /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
  2915. * Sends an email message with MIME scheme
  2916. */
  2917. #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
  2918. if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
  2919. pos += 2; \
  2920. while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
  2921. pos++; \
  2922. } \
  2923. continue; \
  2924. }
  2925. #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
  2926. pp = str; \
  2927. ee = pp + len; \
  2928. while ((pp = memchr(pp, '\0', (ee - pp)))) { \
  2929. *pp = ' '; \
  2930. } \
  2931. static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
  2932. {
  2933. const char *ps;
  2934. size_t icnt;
  2935. int state = 0;
  2936. int crlf_state = -1;
  2937. char *token = NULL;
  2938. size_t token_pos = 0;
  2939. zend_string *fld_name, *fld_val;
  2940. ps = str;
  2941. icnt = str_len;
  2942. fld_name = fld_val = NULL;
  2943. /*
  2944. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  2945. * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
  2946. * state 0 1 2 3
  2947. *
  2948. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  2949. * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
  2950. * crlf_state -1 0 1 -1
  2951. *
  2952. */
  2953. while (icnt > 0) {
  2954. switch (*ps) {
  2955. case ':':
  2956. if (crlf_state == 1) {
  2957. token_pos++;
  2958. }
  2959. if (state == 0 || state == 1) {
  2960. if(token && token_pos > 0) {
  2961. fld_name = zend_string_init(token, token_pos, 0);
  2962. }
  2963. state = 2;
  2964. } else {
  2965. token_pos++;
  2966. }
  2967. crlf_state = 0;
  2968. break;
  2969. case '\n':
  2970. if (crlf_state == -1) {
  2971. goto out;
  2972. }
  2973. crlf_state = -1;
  2974. break;
  2975. case '\r':
  2976. if (crlf_state == 1) {
  2977. token_pos++;
  2978. } else {
  2979. crlf_state = 1;
  2980. }
  2981. break;
  2982. case ' ': case '\t':
  2983. if (crlf_state == -1) {
  2984. if (state == 3) {
  2985. /* continuing from the previous line */
  2986. state = 4;
  2987. } else {
  2988. /* simply skipping this new line */
  2989. state = 5;
  2990. }
  2991. } else {
  2992. if (crlf_state == 1) {
  2993. token_pos++;
  2994. }
  2995. if (state == 1 || state == 3) {
  2996. token_pos++;
  2997. }
  2998. }
  2999. crlf_state = 0;
  3000. break;
  3001. default:
  3002. switch (state) {
  3003. case 0:
  3004. token = (char*)ps;
  3005. token_pos = 0;
  3006. state = 1;
  3007. break;
  3008. case 2:
  3009. if (crlf_state != -1) {
  3010. token = (char*)ps;
  3011. token_pos = 0;
  3012. state = 3;
  3013. break;
  3014. }
  3015. /* break is missing intentionally */
  3016. case 3:
  3017. if (crlf_state == -1) {
  3018. if(token && token_pos > 0) {
  3019. fld_val = zend_string_init(token, token_pos, 0);
  3020. }
  3021. if (fld_name != NULL && fld_val != NULL) {
  3022. zval val;
  3023. /* FIXME: some locale free implementation is
  3024. * really required here,,, */
  3025. php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
  3026. ZVAL_STR(&val, fld_val);
  3027. zend_hash_update(ht, fld_name, &val);
  3028. zend_string_release_ex(fld_name, 0);
  3029. }
  3030. fld_name = fld_val = NULL;
  3031. token = (char*)ps;
  3032. token_pos = 0;
  3033. state = 1;
  3034. }
  3035. break;
  3036. case 4:
  3037. token_pos++;
  3038. state = 3;
  3039. break;
  3040. }
  3041. if (crlf_state == 1) {
  3042. token_pos++;
  3043. }
  3044. token_pos++;
  3045. crlf_state = 0;
  3046. break;
  3047. }
  3048. ps++, icnt--;
  3049. }
  3050. out:
  3051. if (state == 2) {
  3052. token = "";
  3053. token_pos = 0;
  3054. state = 3;
  3055. }
  3056. if (state == 3) {
  3057. if(token && token_pos > 0) {
  3058. fld_val = zend_string_init(token, token_pos, 0);
  3059. }
  3060. if (fld_name != NULL && fld_val != NULL) {
  3061. zval val;
  3062. /* FIXME: some locale free implementation is
  3063. * really required here,,, */
  3064. php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
  3065. ZVAL_STR(&val, fld_val);
  3066. zend_hash_update(ht, fld_name, &val);
  3067. zend_string_release_ex(fld_name, 0);
  3068. }
  3069. }
  3070. return state;
  3071. }
  3072. PHP_FUNCTION(mb_send_mail)
  3073. {
  3074. char *to;
  3075. size_t to_len;
  3076. char *message;
  3077. size_t message_len;
  3078. char *subject;
  3079. size_t subject_len;
  3080. zval *headers = NULL;
  3081. zend_string *extra_cmd = NULL;
  3082. zend_string *str_headers = NULL, *tmp_headers;
  3083. size_t n, i;
  3084. char *to_r = NULL;
  3085. char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
  3086. struct {
  3087. int cnt_type:1;
  3088. int cnt_trans_enc:1;
  3089. } suppressed_hdrs = { 0, 0 };
  3090. char *message_buf = NULL, *subject_buf = NULL, *p;
  3091. mbfl_string orig_str, conv_str;
  3092. mbfl_string *pstr; /* pointer to mbfl string for return value */
  3093. enum mbfl_no_encoding;
  3094. const mbfl_encoding *tran_cs, /* transfar text charset */
  3095. *head_enc, /* header transfar encoding */
  3096. *body_enc; /* body transfar encoding */
  3097. mbfl_memory_device device; /* automatic allocateable buffer for additional header */
  3098. const mbfl_language *lang;
  3099. int err = 0;
  3100. HashTable ht_headers;
  3101. zval *s;
  3102. extern void mbfl_memory_device_unput(mbfl_memory_device *device);
  3103. char *pp, *ee;
  3104. /* initialize */
  3105. mbfl_memory_device_init(&device, 0, 0);
  3106. mbfl_string_init(&orig_str);
  3107. mbfl_string_init(&conv_str);
  3108. /* character-set, transfer-encoding */
  3109. tran_cs = &mbfl_encoding_utf8;
  3110. head_enc = &mbfl_encoding_base64;
  3111. body_enc = &mbfl_encoding_base64;
  3112. lang = mbfl_no2language(MBSTRG(language));
  3113. if (lang != NULL) {
  3114. tran_cs = mbfl_no2encoding(lang->mail_charset);
  3115. head_enc = mbfl_no2encoding(lang->mail_header_encoding);
  3116. body_enc = mbfl_no2encoding(lang->mail_body_encoding);
  3117. }
  3118. if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|zS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) {
  3119. RETURN_THROWS();
  3120. }
  3121. /* ASCIIZ check */
  3122. MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
  3123. MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
  3124. MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
  3125. if (headers) {
  3126. switch(Z_TYPE_P(headers)) {
  3127. case IS_STRING:
  3128. tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0);
  3129. MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers));
  3130. str_headers = php_trim(tmp_headers, NULL, 0, 2);
  3131. zend_string_release_ex(tmp_headers, 0);
  3132. break;
  3133. case IS_ARRAY:
  3134. str_headers = php_mail_build_headers(Z_ARRVAL_P(headers));
  3135. break;
  3136. default:
  3137. php_error_docref(NULL, E_WARNING, "headers parameter must be string or array");
  3138. RETURN_FALSE;
  3139. }
  3140. }
  3141. if (extra_cmd) {
  3142. MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
  3143. }
  3144. zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
  3145. if (str_headers != NULL) {
  3146. _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
  3147. }
  3148. if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
  3149. char *tmp;
  3150. char *param_name;
  3151. char *charset = NULL;
  3152. ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
  3153. p = strchr(Z_STRVAL_P(s), ';');
  3154. if (p != NULL) {
  3155. /* skipping the padded spaces */
  3156. do {
  3157. ++p;
  3158. } while (*p == ' ' || *p == '\t');
  3159. if (*p != '\0') {
  3160. if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
  3161. if (strcasecmp(param_name, "charset") == 0) {
  3162. const mbfl_encoding *_tran_cs = tran_cs;
  3163. charset = php_strtok_r(NULL, "= \"", &tmp);
  3164. if (charset != NULL) {
  3165. _tran_cs = mbfl_name2encoding(charset);
  3166. }
  3167. if (!_tran_cs) {
  3168. php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
  3169. _tran_cs = &mbfl_encoding_ascii;
  3170. }
  3171. tran_cs = _tran_cs;
  3172. }
  3173. }
  3174. }
  3175. }
  3176. suppressed_hdrs.cnt_type = 1;
  3177. }
  3178. if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
  3179. const mbfl_encoding *_body_enc;
  3180. ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
  3181. _body_enc = mbfl_name2encoding(Z_STRVAL_P(s));
  3182. switch (_body_enc ? _body_enc->no_encoding : mbfl_no_encoding_invalid) {
  3183. case mbfl_no_encoding_base64:
  3184. case mbfl_no_encoding_7bit:
  3185. case mbfl_no_encoding_8bit:
  3186. body_enc = _body_enc;
  3187. break;
  3188. default:
  3189. php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
  3190. body_enc = &mbfl_encoding_8bit;
  3191. break;
  3192. }
  3193. suppressed_hdrs.cnt_trans_enc = 1;
  3194. }
  3195. /* To: */
  3196. if (to_len > 0) {
  3197. to_r = estrndup(to, to_len);
  3198. for (; to_len; to_len--) {
  3199. if (!isspace((unsigned char) to_r[to_len - 1])) {
  3200. break;
  3201. }
  3202. to_r[to_len - 1] = '\0';
  3203. }
  3204. for (i = 0; to_r[i]; i++) {
  3205. if (iscntrl((unsigned char) to_r[i])) {
  3206. /* According to RFC 822, section 3.1.1 long headers may be separated into
  3207. * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
  3208. * To prevent these separators from being replaced with a space, we use the
  3209. * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
  3210. */
  3211. SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
  3212. to_r[i] = ' ';
  3213. }
  3214. }
  3215. } else {
  3216. to_r = to;
  3217. }
  3218. /* Subject: */
  3219. orig_str.val = (unsigned char *)subject;
  3220. orig_str.len = subject_len;
  3221. orig_str.encoding = MBSTRG(current_internal_encoding);
  3222. if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
  3223. || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
  3224. orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3225. }
  3226. pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
  3227. if (pstr != NULL) {
  3228. subject_buf = subject = (char *)pstr->val;
  3229. }
  3230. /* message body */
  3231. orig_str.val = (unsigned char *)message;
  3232. orig_str.len = message_len;
  3233. orig_str.encoding = MBSTRG(current_internal_encoding);
  3234. if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
  3235. || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
  3236. orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3237. }
  3238. pstr = NULL;
  3239. {
  3240. mbfl_string tmpstr;
  3241. if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
  3242. tmpstr.encoding = &mbfl_encoding_8bit;
  3243. pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
  3244. efree(tmpstr.val);
  3245. }
  3246. }
  3247. if (pstr != NULL) {
  3248. message_buf = message = (char *)pstr->val;
  3249. }
  3250. /* other headers */
  3251. #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
  3252. #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
  3253. #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
  3254. #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
  3255. if (str_headers != NULL) {
  3256. p = ZSTR_VAL(str_headers);
  3257. n = ZSTR_LEN(str_headers);
  3258. mbfl_memory_device_strncat(&device, p, n);
  3259. if (n > 0 && p[n - 1] != '\n') {
  3260. mbfl_memory_device_strncat(&device, "\n", 1);
  3261. }
  3262. zend_string_release_ex(str_headers, 0);
  3263. }
  3264. if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
  3265. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
  3266. mbfl_memory_device_strncat(&device, "\n", 1);
  3267. }
  3268. if (!suppressed_hdrs.cnt_type) {
  3269. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
  3270. p = (char *)mbfl_no2preferred_mime_name(tran_cs->no_encoding);
  3271. if (p != NULL) {
  3272. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
  3273. mbfl_memory_device_strcat(&device, p);
  3274. }
  3275. mbfl_memory_device_strncat(&device, "\n", 1);
  3276. }
  3277. if (!suppressed_hdrs.cnt_trans_enc) {
  3278. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
  3279. p = (char *)mbfl_no2preferred_mime_name(body_enc->no_encoding);
  3280. if (p == NULL) {
  3281. p = "7bit";
  3282. }
  3283. mbfl_memory_device_strcat(&device, p);
  3284. mbfl_memory_device_strncat(&device, "\n", 1);
  3285. }
  3286. mbfl_memory_device_unput(&device);
  3287. mbfl_memory_device_output('\0', &device);
  3288. str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
  3289. if (force_extra_parameters) {
  3290. extra_cmd = php_escape_shell_cmd(force_extra_parameters);
  3291. } else if (extra_cmd) {
  3292. extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
  3293. }
  3294. if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
  3295. RETVAL_TRUE;
  3296. } else {
  3297. RETVAL_FALSE;
  3298. }
  3299. if (extra_cmd) {
  3300. zend_string_release_ex(extra_cmd, 0);
  3301. }
  3302. if (to_r != to) {
  3303. efree(to_r);
  3304. }
  3305. if (subject_buf) {
  3306. efree((void *)subject_buf);
  3307. }
  3308. if (message_buf) {
  3309. efree((void *)message_buf);
  3310. }
  3311. mbfl_memory_device_clear(&device);
  3312. zend_hash_destroy(&ht_headers);
  3313. if (str_headers) {
  3314. zend_string_release_ex(str_headers, 0);
  3315. }
  3316. }
  3317. #undef SKIP_LONG_HEADER_SEP_MBSTRING
  3318. #undef MAIL_ASCIIZ_CHECK_MBSTRING
  3319. #undef PHP_MBSTR_MAIL_MIME_HEADER1
  3320. #undef PHP_MBSTR_MAIL_MIME_HEADER2
  3321. #undef PHP_MBSTR_MAIL_MIME_HEADER3
  3322. #undef PHP_MBSTR_MAIL_MIME_HEADER4
  3323. /* }}} */
  3324. /* {{{ proto mixed mb_get_info([string type])
  3325. Returns the current settings of mbstring */
  3326. PHP_FUNCTION(mb_get_info)
  3327. {
  3328. char *typ = NULL;
  3329. size_t typ_len;
  3330. size_t n;
  3331. char *name;
  3332. zval row;
  3333. const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
  3334. const mbfl_encoding **entry;
  3335. if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
  3336. RETURN_THROWS();
  3337. }
  3338. if (!typ || !strcasecmp("all", typ)) {
  3339. array_init(return_value);
  3340. if (MBSTRG(current_internal_encoding)) {
  3341. add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
  3342. }
  3343. if (MBSTRG(http_input_identify)) {
  3344. add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
  3345. }
  3346. if (MBSTRG(current_http_output_encoding)) {
  3347. add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
  3348. }
  3349. if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
  3350. add_assoc_string(return_value, "http_output_conv_mimetypes", name);
  3351. }
  3352. if (lang != NULL) {
  3353. if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
  3354. add_assoc_string(return_value, "mail_charset", name);
  3355. }
  3356. if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
  3357. add_assoc_string(return_value, "mail_header_encoding", name);
  3358. }
  3359. if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
  3360. add_assoc_string(return_value, "mail_body_encoding", name);
  3361. }
  3362. }
  3363. add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
  3364. if (MBSTRG(encoding_translation)) {
  3365. add_assoc_string(return_value, "encoding_translation", "On");
  3366. } else {
  3367. add_assoc_string(return_value, "encoding_translation", "Off");
  3368. }
  3369. if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
  3370. add_assoc_string(return_value, "language", name);
  3371. }
  3372. n = MBSTRG(current_detect_order_list_size);
  3373. entry = MBSTRG(current_detect_order_list);
  3374. if (n > 0) {
  3375. size_t i;
  3376. array_init(&row);
  3377. for (i = 0; i < n; i++) {
  3378. add_next_index_string(&row, (*entry)->name);
  3379. entry++;
  3380. }
  3381. add_assoc_zval(return_value, "detect_order", &row);
  3382. }
  3383. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  3384. add_assoc_string(return_value, "substitute_character", "none");
  3385. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  3386. add_assoc_string(return_value, "substitute_character", "long");
  3387. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  3388. add_assoc_string(return_value, "substitute_character", "entity");
  3389. } else {
  3390. add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
  3391. }
  3392. if (MBSTRG(strict_detection)) {
  3393. add_assoc_string(return_value, "strict_detection", "On");
  3394. } else {
  3395. add_assoc_string(return_value, "strict_detection", "Off");
  3396. }
  3397. } else if (!strcasecmp("internal_encoding", typ)) {
  3398. if (MBSTRG(current_internal_encoding)) {
  3399. RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
  3400. }
  3401. } else if (!strcasecmp("http_input", typ)) {
  3402. if (MBSTRG(http_input_identify)) {
  3403. RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
  3404. }
  3405. } else if (!strcasecmp("http_output", typ)) {
  3406. if (MBSTRG(current_http_output_encoding)) {
  3407. RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
  3408. }
  3409. } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
  3410. if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
  3411. RETVAL_STRING(name);
  3412. }
  3413. } else if (!strcasecmp("mail_charset", typ)) {
  3414. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
  3415. RETVAL_STRING(name);
  3416. }
  3417. } else if (!strcasecmp("mail_header_encoding", typ)) {
  3418. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
  3419. RETVAL_STRING(name);
  3420. }
  3421. } else if (!strcasecmp("mail_body_encoding", typ)) {
  3422. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
  3423. RETVAL_STRING(name);
  3424. }
  3425. } else if (!strcasecmp("illegal_chars", typ)) {
  3426. RETVAL_LONG(MBSTRG(illegalchars));
  3427. } else if (!strcasecmp("encoding_translation", typ)) {
  3428. if (MBSTRG(encoding_translation)) {
  3429. RETVAL_STRING("On");
  3430. } else {
  3431. RETVAL_STRING("Off");
  3432. }
  3433. } else if (!strcasecmp("language", typ)) {
  3434. if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
  3435. RETVAL_STRING(name);
  3436. }
  3437. } else if (!strcasecmp("detect_order", typ)) {
  3438. n = MBSTRG(current_detect_order_list_size);
  3439. entry = MBSTRG(current_detect_order_list);
  3440. if (n > 0) {
  3441. size_t i;
  3442. array_init(return_value);
  3443. for (i = 0; i < n; i++) {
  3444. add_next_index_string(return_value, (*entry)->name);
  3445. entry++;
  3446. }
  3447. }
  3448. } else if (!strcasecmp("substitute_character", typ)) {
  3449. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  3450. RETVAL_STRING("none");
  3451. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  3452. RETVAL_STRING("long");
  3453. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  3454. RETVAL_STRING("entity");
  3455. } else {
  3456. RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
  3457. }
  3458. } else if (!strcasecmp("strict_detection", typ)) {
  3459. if (MBSTRG(strict_detection)) {
  3460. RETVAL_STRING("On");
  3461. } else {
  3462. RETVAL_STRING("Off");
  3463. }
  3464. } else {
  3465. // TODO Convert to ValueError
  3466. RETURN_FALSE;
  3467. }
  3468. }
  3469. /* }}} */
  3470. static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
  3471. {
  3472. mbfl_buffer_converter *convd;
  3473. convd = mbfl_buffer_converter_new(encoding, encoding, 0);
  3474. if (convd == NULL) {
  3475. return NULL;
  3476. }
  3477. mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
  3478. mbfl_buffer_converter_illegal_substchar(convd, 0);
  3479. return convd;
  3480. }
  3481. static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
  3482. mbfl_string string, result, *ret = NULL;
  3483. size_t illegalchars = 0;
  3484. /* initialize string */
  3485. mbfl_string_init_set(&string, encoding);
  3486. mbfl_string_init(&result);
  3487. string.val = (unsigned char *) input;
  3488. string.len = length;
  3489. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3490. illegalchars = mbfl_buffer_illegalchars(convd);
  3491. if (ret != NULL) {
  3492. if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
  3493. mbfl_string_clear(&result);
  3494. return 1;
  3495. }
  3496. mbfl_string_clear(&result);
  3497. }
  3498. return 0;
  3499. }
  3500. MBSTRING_API int php_mb_check_encoding(
  3501. const char *input, size_t length, const mbfl_encoding *encoding)
  3502. {
  3503. mbfl_buffer_converter *convd;
  3504. convd = php_mb_init_convd(encoding);
  3505. /* If this assertion fails this means some memory allocation failure which is a bug */
  3506. ZEND_ASSERT(convd != NULL);
  3507. if (php_mb_check_encoding_impl(convd, input, length, encoding)) {
  3508. mbfl_buffer_converter_delete(convd);
  3509. return 1;
  3510. }
  3511. mbfl_buffer_converter_delete(convd);
  3512. return 0;
  3513. }
  3514. static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding *encoding)
  3515. {
  3516. mbfl_buffer_converter *convd;
  3517. zend_long idx;
  3518. zend_string *key;
  3519. zval *entry;
  3520. int valid = 1;
  3521. (void)(idx);
  3522. convd = php_mb_init_convd(encoding);
  3523. /* If this assertion fails this means some memory allocation failure which is a bug */
  3524. ZEND_ASSERT(convd != NULL);
  3525. if (GC_IS_RECURSIVE(vars)) {
  3526. mbfl_buffer_converter_delete(convd);
  3527. php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
  3528. return 0;
  3529. }
  3530. GC_TRY_PROTECT_RECURSION(vars);
  3531. ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
  3532. ZVAL_DEREF(entry);
  3533. if (key) {
  3534. if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
  3535. valid = 0;
  3536. break;
  3537. }
  3538. }
  3539. switch (Z_TYPE_P(entry)) {
  3540. case IS_STRING:
  3541. if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
  3542. valid = 0;
  3543. break;
  3544. }
  3545. break;
  3546. case IS_ARRAY:
  3547. if (!php_mb_check_encoding_recursive(Z_ARRVAL_P(entry), encoding)) {
  3548. valid = 0;
  3549. break;
  3550. }
  3551. break;
  3552. case IS_LONG:
  3553. case IS_DOUBLE:
  3554. case IS_NULL:
  3555. case IS_TRUE:
  3556. case IS_FALSE:
  3557. break;
  3558. default:
  3559. /* Other types are error. */
  3560. valid = 0;
  3561. break;
  3562. }
  3563. } ZEND_HASH_FOREACH_END();
  3564. GC_TRY_UNPROTECT_RECURSION(vars);
  3565. mbfl_buffer_converter_delete(convd);
  3566. return valid;
  3567. }
  3568. /* {{{ proto bool mb_check_encoding([mixed var[, string encoding]])
  3569. Check if the string is valid for the specified encoding */
  3570. PHP_FUNCTION(mb_check_encoding)
  3571. {
  3572. zend_string *input_str = NULL, *enc = NULL;
  3573. HashTable *input_ht = NULL;
  3574. const mbfl_encoding *encoding;
  3575. ZEND_PARSE_PARAMETERS_START(0, 2)
  3576. Z_PARAM_OPTIONAL
  3577. Z_PARAM_STR_OR_ARRAY_HT(input_str, input_ht)
  3578. Z_PARAM_STR(enc)
  3579. ZEND_PARSE_PARAMETERS_END();
  3580. encoding = php_mb_get_encoding(enc, 2);
  3581. if (!encoding) {
  3582. RETURN_THROWS();
  3583. }
  3584. if (input_ht) {
  3585. if (!php_mb_check_encoding_recursive(input_ht, encoding)) {
  3586. RETURN_FALSE;
  3587. }
  3588. RETURN_TRUE;
  3589. } else if (input_str) {
  3590. if (!php_mb_check_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str), encoding)) {
  3591. RETURN_FALSE;
  3592. }
  3593. RETURN_TRUE;
  3594. } else {
  3595. /* FIXME: Actually check all inputs, except $_FILES file content. */
  3596. if (MBSTRG(illegalchars) == 0) {
  3597. RETURN_TRUE;
  3598. }
  3599. RETURN_FALSE;
  3600. }
  3601. }
  3602. /* }}} */
  3603. static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name,
  3604. const uint32_t enc_name_arg_num)
  3605. {
  3606. const mbfl_encoding *enc;
  3607. enum mbfl_no_encoding no_enc;
  3608. ZEND_ASSERT(str_len > 0);
  3609. enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
  3610. if (!enc) {
  3611. return -2;
  3612. }
  3613. no_enc = enc->no_encoding;
  3614. if (php_mb_is_unsupported_no_encoding(no_enc)) {
  3615. zend_value_error("mb_ord() does not support the \"%s\" encoding", enc->name);
  3616. return -2;
  3617. }
  3618. {
  3619. mbfl_wchar_device dev;
  3620. mbfl_convert_filter *filter;
  3621. zend_long cp;
  3622. mbfl_wchar_device_init(&dev);
  3623. filter = mbfl_convert_filter_new(enc, &mbfl_encoding_wchar, mbfl_wchar_device_output, 0, &dev);
  3624. /* If this assertion fails this means some memory allocation failure which is a bug */
  3625. ZEND_ASSERT(filter != NULL);
  3626. mbfl_convert_filter_feed_string(filter, (const unsigned char *) str, str_len);
  3627. mbfl_convert_filter_flush(filter);
  3628. if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] >= MBFL_WCSGROUP_UCS4MAX) {
  3629. mbfl_convert_filter_delete(filter);
  3630. mbfl_wchar_device_clear(&dev);
  3631. return -1;
  3632. }
  3633. cp = dev.buffer[0];
  3634. mbfl_convert_filter_delete(filter);
  3635. mbfl_wchar_device_clear(&dev);
  3636. return cp;
  3637. }
  3638. }
  3639. /* {{{ proto int|false mb_ord([string str[, string encoding]]) */
  3640. PHP_FUNCTION(mb_ord)
  3641. {
  3642. char *str;
  3643. size_t str_len;
  3644. zend_string *enc = NULL;
  3645. zend_long cp;
  3646. ZEND_PARSE_PARAMETERS_START(1, 2)
  3647. Z_PARAM_STRING(str, str_len)
  3648. Z_PARAM_OPTIONAL
  3649. Z_PARAM_STR(enc)
  3650. ZEND_PARSE_PARAMETERS_END();
  3651. if (str_len == 0) {
  3652. zend_argument_value_error(1, "must not be empty");
  3653. RETURN_THROWS();
  3654. }
  3655. cp = php_mb_ord(str, str_len, enc, 2);
  3656. if (0 > cp) {
  3657. if (cp == -2) {
  3658. RETURN_THROWS();
  3659. }
  3660. RETURN_FALSE;
  3661. }
  3662. RETURN_LONG(cp);
  3663. }
  3664. /* }}} */
  3665. static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name, uint32_t enc_name_arg_num)
  3666. {
  3667. const mbfl_encoding *enc;
  3668. enum mbfl_no_encoding no_enc;
  3669. zend_string *ret;
  3670. char* buf;
  3671. size_t buf_len;
  3672. enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
  3673. if (!enc) {
  3674. return NULL;
  3675. }
  3676. no_enc = enc->no_encoding;
  3677. if (php_mb_is_unsupported_no_encoding(no_enc)) {
  3678. zend_value_error("mb_chr() does not support the \"%s\" encoding", enc->name);
  3679. return NULL;
  3680. }
  3681. if (cp < 0 || cp > 0x10ffff) {
  3682. return NULL;
  3683. }
  3684. if (php_mb_is_no_encoding_utf8(no_enc)) {
  3685. if (cp > 0xd7ff && 0xe000 > cp) {
  3686. return NULL;
  3687. }
  3688. if (cp < 0x80) {
  3689. ret = ZSTR_CHAR(cp);
  3690. } else if (cp < 0x800) {
  3691. ret = zend_string_alloc(2, 0);
  3692. ZSTR_VAL(ret)[0] = 0xc0 | (cp >> 6);
  3693. ZSTR_VAL(ret)[1] = 0x80 | (cp & 0x3f);
  3694. ZSTR_VAL(ret)[2] = 0;
  3695. } else if (cp < 0x10000) {
  3696. ret = zend_string_alloc(3, 0);
  3697. ZSTR_VAL(ret)[0] = 0xe0 | (cp >> 12);
  3698. ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 6) & 0x3f);
  3699. ZSTR_VAL(ret)[2] = 0x80 | (cp & 0x3f);
  3700. ZSTR_VAL(ret)[3] = 0;
  3701. } else {
  3702. ret = zend_string_alloc(4, 0);
  3703. ZSTR_VAL(ret)[0] = 0xf0 | (cp >> 18);
  3704. ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 12) & 0x3f);
  3705. ZSTR_VAL(ret)[2] = 0x80 | ((cp >> 6) & 0x3f);
  3706. ZSTR_VAL(ret)[3] = 0x80 | (cp & 0x3f);
  3707. ZSTR_VAL(ret)[4] = 0;
  3708. }
  3709. return ret;
  3710. }
  3711. buf_len = 4;
  3712. buf = (char *) emalloc(buf_len + 1);
  3713. buf[0] = (cp >> 24) & 0xff;
  3714. buf[1] = (cp >> 16) & 0xff;
  3715. buf[2] = (cp >> 8) & 0xff;
  3716. buf[3] = cp & 0xff;
  3717. buf[4] = 0;
  3718. {
  3719. char *ret_str;
  3720. size_t ret_len;
  3721. long orig_illegalchars = MBSTRG(illegalchars);
  3722. MBSTRG(illegalchars) = 0;
  3723. ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
  3724. if (MBSTRG(illegalchars) != 0) {
  3725. efree(buf);
  3726. efree(ret_str);
  3727. MBSTRG(illegalchars) = orig_illegalchars;
  3728. return NULL;
  3729. }
  3730. ret = zend_string_init(ret_str, ret_len, 0);
  3731. efree(ret_str);
  3732. MBSTRG(illegalchars) = orig_illegalchars;
  3733. }
  3734. efree(buf);
  3735. return ret;
  3736. }
  3737. /* {{{ proto string|false mb_chr([int cp[, string encoding]]) */
  3738. PHP_FUNCTION(mb_chr)
  3739. {
  3740. zend_long cp;
  3741. zend_string *enc = NULL;
  3742. zend_string* ret;
  3743. ZEND_PARSE_PARAMETERS_START(1, 2)
  3744. Z_PARAM_LONG(cp)
  3745. Z_PARAM_OPTIONAL
  3746. Z_PARAM_STR(enc)
  3747. ZEND_PARSE_PARAMETERS_END();
  3748. ret = php_mb_chr(cp, enc, 2);
  3749. if (ret == NULL) {
  3750. RETURN_FALSE;
  3751. }
  3752. RETURN_STR(ret);
  3753. }
  3754. /* }}} */
  3755. /* {{{ proto string mb_scrub([string str[, string encoding]]) */
  3756. PHP_FUNCTION(mb_scrub)
  3757. {
  3758. const mbfl_encoding *enc;
  3759. char* str;
  3760. size_t str_len;
  3761. zend_string *enc_name = NULL;
  3762. char *ret;
  3763. size_t ret_len;
  3764. ZEND_PARSE_PARAMETERS_START(1, 2)
  3765. Z_PARAM_STRING(str, str_len)
  3766. Z_PARAM_OPTIONAL
  3767. Z_PARAM_STR(enc_name)
  3768. ZEND_PARSE_PARAMETERS_END();
  3769. enc = php_mb_get_encoding(enc_name, 2);
  3770. if (!enc) {
  3771. RETURN_THROWS();
  3772. }
  3773. ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len);
  3774. RETVAL_STRINGL(ret, ret_len);
  3775. efree(ret);
  3776. }
  3777. /* }}} */
  3778. /* {{{ php_mb_populate_current_detect_order_list */
  3779. static void php_mb_populate_current_detect_order_list(void)
  3780. {
  3781. const mbfl_encoding **entry = 0;
  3782. size_t nentries;
  3783. if (MBSTRG(current_detect_order_list)) {
  3784. return;
  3785. }
  3786. if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
  3787. nentries = MBSTRG(detect_order_list_size);
  3788. entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
  3789. memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
  3790. } else {
  3791. const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
  3792. size_t i;
  3793. nentries = MBSTRG(default_detect_order_list_size);
  3794. entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
  3795. for (i = 0; i < nentries; i++) {
  3796. entry[i] = mbfl_no2encoding(src[i]);
  3797. }
  3798. }
  3799. MBSTRG(current_detect_order_list) = entry;
  3800. MBSTRG(current_detect_order_list_size) = nentries;
  3801. }
  3802. /* }}} */
  3803. /* {{{ static int php_mb_encoding_translation() */
  3804. static int php_mb_encoding_translation(void)
  3805. {
  3806. return MBSTRG(encoding_translation);
  3807. }
  3808. /* }}} */
  3809. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
  3810. MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
  3811. {
  3812. if (enc != NULL) {
  3813. if (enc->flag & MBFL_ENCTYPE_MBCS) {
  3814. if (enc->mblen_table != NULL) {
  3815. if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
  3816. }
  3817. } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
  3818. return 2;
  3819. } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
  3820. return 4;
  3821. }
  3822. }
  3823. return 1;
  3824. }
  3825. /* }}} */
  3826. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
  3827. MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
  3828. {
  3829. return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
  3830. }
  3831. /* }}} */
  3832. /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
  3833. MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
  3834. {
  3835. register const char *p = s;
  3836. char *last=NULL;
  3837. if (nbytes == (size_t)-1) {
  3838. size_t nb = 0;
  3839. while (*p != '\0') {
  3840. if (nb == 0) {
  3841. if ((unsigned char)*p == (unsigned char)c) {
  3842. last = (char *)p;
  3843. }
  3844. nb = php_mb_mbchar_bytes_ex(p, enc);
  3845. if (nb == 0) {
  3846. return NULL; /* something is going wrong! */
  3847. }
  3848. }
  3849. --nb;
  3850. ++p;
  3851. }
  3852. } else {
  3853. register size_t bcnt = nbytes;
  3854. register size_t nbytes_char;
  3855. while (bcnt > 0) {
  3856. if ((unsigned char)*p == (unsigned char)c) {
  3857. last = (char *)p;
  3858. }
  3859. nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
  3860. if (bcnt < nbytes_char) {
  3861. return NULL;
  3862. }
  3863. p += nbytes_char;
  3864. bcnt -= nbytes_char;
  3865. }
  3866. }
  3867. return last;
  3868. }
  3869. /* }}} */
  3870. /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
  3871. MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
  3872. {
  3873. return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
  3874. }
  3875. /* }}} */
  3876. /* {{{ MBSTRING_API int php_mb_stripos()
  3877. */
  3878. MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, const mbfl_encoding *enc)
  3879. {
  3880. size_t n = (size_t) -1;
  3881. mbfl_string haystack, needle;
  3882. mbfl_string_init(&haystack);
  3883. mbfl_string_init(&needle);
  3884. haystack.encoding = enc;
  3885. needle.encoding = enc;
  3886. do {
  3887. /* We're using simple case-folding here, because we'd have to deal with remapping of
  3888. * offsets otherwise. */
  3889. size_t len = 0;
  3890. haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
  3891. haystack.len = len;
  3892. if (!haystack.val) {
  3893. break;
  3894. }
  3895. if (haystack.len == 0) {
  3896. break;
  3897. }
  3898. needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
  3899. needle.len = len;
  3900. if (!needle.val) {
  3901. break;
  3902. }
  3903. n = mbfl_strpos(&haystack, &needle, offset, mode);
  3904. } while(0);
  3905. if (haystack.val) {
  3906. efree(haystack.val);
  3907. }
  3908. if (needle.val) {
  3909. efree(needle.val);
  3910. }
  3911. return n;
  3912. }
  3913. /* }}} */
  3914. static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
  3915. {
  3916. *list = (const zend_encoding **)MBSTRG(http_input_list);
  3917. *list_size = MBSTRG(http_input_list_size);
  3918. }
  3919. /* }}} */
  3920. static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
  3921. {
  3922. MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
  3923. }
  3924. /* }}} */
  3925. #endif /* HAVE_MBSTRING */