PageRenderTime 82ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/ext/mbstring/mbstring.c

http://github.com/infusion/PHP
C | 4891 lines | 3935 code | 550 blank | 406 comment | 1115 complexity | 29f0a79516e817df9626e2903bdb7a4c MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-2.1, BSD-3-Clause
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2011 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  16. | Rui Hirokawa <hirokawa@php.net> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* $Id: mbstring.c 306939 2011-01-01 02:19:59Z felipe $ */
  20. /*
  21. * PHP 4 Multibyte String module "mbstring"
  22. *
  23. * History:
  24. * 2000.5.19 Release php-4.0RC2_jstring-1.0
  25. * 2001.4.1 Release php4_jstring-1.0.91
  26. * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
  27. * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
  28. */
  29. /*
  30. * PHP3 Internationalization support program.
  31. *
  32. * Copyright (c) 1999,2000 by the PHP3 internationalization team.
  33. * All rights reserved.
  34. *
  35. * See README_PHP3-i18n-ja for more detail.
  36. *
  37. * Authors:
  38. * Hironori Sato <satoh@jpnnet.com>
  39. * Shigeru Kanemoto <sgk@happysize.co.jp>
  40. * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
  41. * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
  42. */
  43. /* {{{ includes */
  44. #ifdef HAVE_CONFIG_H
  45. #include "config.h"
  46. #endif
  47. #include "php.h"
  48. #include "php_ini.h"
  49. #include "php_variables.h"
  50. #include "mbstring.h"
  51. #include "ext/standard/php_string.h"
  52. #include "ext/standard/php_mail.h"
  53. #include "ext/standard/exec.h"
  54. #include "ext/standard/php_smart_str.h"
  55. #include "ext/standard/url.h"
  56. #include "main/php_output.h"
  57. #include "ext/standard/info.h"
  58. #include "libmbfl/mbfl/mbfl_allocators.h"
  59. #include "php_variables.h"
  60. #include "php_globals.h"
  61. #include "rfc1867.h"
  62. #include "php_content_types.h"
  63. #include "SAPI.h"
  64. #include "php_unicode.h"
  65. #include "TSRM.h"
  66. #include "mb_gpc.h"
  67. #if HAVE_MBREGEX
  68. #include "php_mbregex.h"
  69. #endif
  70. #ifdef ZEND_MULTIBYTE
  71. #include "zend_multibyte.h"
  72. #endif /* ZEND_MULTIBYTE */
  73. #if HAVE_ONIG
  74. #include "php_onig_compat.h"
  75. #include <oniguruma.h>
  76. #undef UChar
  77. #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
  78. #include "ext/pcre/php_pcre.h"
  79. #endif
  80. /* }}} */
  81. #if HAVE_MBSTRING
  82. /* {{{ prototypes */
  83. ZEND_DECLARE_MODULE_GLOBALS(mbstring)
  84. static PHP_GINIT_FUNCTION(mbstring);
  85. static PHP_GSHUTDOWN_FUNCTION(mbstring);
  86. #ifdef ZEND_MULTIBYTE
  87. static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
  88. static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
  89. static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC);
  90. static int php_mb_set_zend_encoding(TSRMLS_D);
  91. #endif
  92. /* }}} */
  93. /* {{{ php_mb_default_identify_list */
  94. typedef struct _php_mb_nls_ident_list {
  95. enum mbfl_no_language lang;
  96. const enum mbfl_no_encoding* list;
  97. int list_size;
  98. } php_mb_nls_ident_list;
  99. static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
  100. mbfl_no_encoding_ascii,
  101. mbfl_no_encoding_jis,
  102. mbfl_no_encoding_utf8,
  103. mbfl_no_encoding_euc_jp,
  104. mbfl_no_encoding_sjis
  105. };
  106. static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
  107. mbfl_no_encoding_ascii,
  108. mbfl_no_encoding_utf8,
  109. mbfl_no_encoding_euc_cn,
  110. mbfl_no_encoding_cp936
  111. };
  112. static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
  113. mbfl_no_encoding_ascii,
  114. mbfl_no_encoding_utf8,
  115. mbfl_no_encoding_euc_tw,
  116. mbfl_no_encoding_big5
  117. };
  118. static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
  119. mbfl_no_encoding_ascii,
  120. mbfl_no_encoding_utf8,
  121. mbfl_no_encoding_euc_kr,
  122. mbfl_no_encoding_uhc
  123. };
  124. static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
  125. mbfl_no_encoding_ascii,
  126. mbfl_no_encoding_utf8,
  127. mbfl_no_encoding_koi8r,
  128. mbfl_no_encoding_cp1251,
  129. mbfl_no_encoding_cp866
  130. };
  131. static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
  132. mbfl_no_encoding_ascii,
  133. mbfl_no_encoding_utf8,
  134. mbfl_no_encoding_armscii8
  135. };
  136. static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
  137. mbfl_no_encoding_ascii,
  138. mbfl_no_encoding_utf8,
  139. mbfl_no_encoding_cp1254,
  140. mbfl_no_encoding_8859_9
  141. };
  142. static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
  143. mbfl_no_encoding_ascii,
  144. mbfl_no_encoding_utf8,
  145. mbfl_no_encoding_koi8u
  146. };
  147. static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
  148. mbfl_no_encoding_ascii,
  149. mbfl_no_encoding_utf8
  150. };
  151. static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
  152. { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
  153. { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
  154. { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
  155. { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
  156. { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
  157. { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
  158. { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
  159. { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
  160. { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
  161. };
  162. /* }}} */
  163. /* {{{ mb_overload_def mb_ovld[] */
  164. static const struct mb_overload_def mb_ovld[] = {
  165. {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
  166. {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
  167. {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
  168. {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
  169. {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
  170. {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
  171. {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
  172. {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
  173. {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
  174. {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
  175. {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
  176. {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
  177. {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
  178. #if HAVE_MBREGEX
  179. {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
  180. {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
  181. {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
  182. {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
  183. {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
  184. #endif
  185. {0, NULL, NULL, NULL}
  186. };
  187. /* }}} */
  188. /* {{{ arginfo */
  189. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
  190. ZEND_ARG_INFO(0, language)
  191. ZEND_END_ARG_INFO()
  192. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
  193. ZEND_ARG_INFO(0, encoding)
  194. ZEND_END_ARG_INFO()
  195. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
  196. ZEND_ARG_INFO(0, type)
  197. ZEND_END_ARG_INFO()
  198. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
  199. ZEND_ARG_INFO(0, encoding)
  200. ZEND_END_ARG_INFO()
  201. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
  202. ZEND_ARG_INFO(0, encoding)
  203. ZEND_END_ARG_INFO()
  204. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
  205. ZEND_ARG_INFO(0, substchar)
  206. ZEND_END_ARG_INFO()
  207. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
  208. ZEND_ARG_INFO(0, encoding)
  209. ZEND_END_ARG_INFO()
  210. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
  211. ZEND_ARG_INFO(0, encoded_string)
  212. ZEND_ARG_INFO(1, result)
  213. ZEND_END_ARG_INFO()
  214. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
  215. ZEND_ARG_INFO(0, contents)
  216. ZEND_ARG_INFO(0, status)
  217. ZEND_END_ARG_INFO()
  218. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
  219. ZEND_ARG_INFO(0, str)
  220. ZEND_ARG_INFO(0, encoding)
  221. ZEND_END_ARG_INFO()
  222. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
  223. ZEND_ARG_INFO(0, haystack)
  224. ZEND_ARG_INFO(0, needle)
  225. ZEND_ARG_INFO(0, offset)
  226. ZEND_ARG_INFO(0, encoding)
  227. ZEND_END_ARG_INFO()
  228. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
  229. ZEND_ARG_INFO(0, haystack)
  230. ZEND_ARG_INFO(0, needle)
  231. ZEND_ARG_INFO(0, offset)
  232. ZEND_ARG_INFO(0, encoding)
  233. ZEND_END_ARG_INFO()
  234. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
  235. ZEND_ARG_INFO(0, haystack)
  236. ZEND_ARG_INFO(0, needle)
  237. ZEND_ARG_INFO(0, offset)
  238. ZEND_ARG_INFO(0, encoding)
  239. ZEND_END_ARG_INFO()
  240. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
  241. ZEND_ARG_INFO(0, haystack)
  242. ZEND_ARG_INFO(0, needle)
  243. ZEND_ARG_INFO(0, offset)
  244. ZEND_ARG_INFO(0, encoding)
  245. ZEND_END_ARG_INFO()
  246. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
  247. ZEND_ARG_INFO(0, haystack)
  248. ZEND_ARG_INFO(0, needle)
  249. ZEND_ARG_INFO(0, part)
  250. ZEND_ARG_INFO(0, encoding)
  251. ZEND_END_ARG_INFO()
  252. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
  253. ZEND_ARG_INFO(0, haystack)
  254. ZEND_ARG_INFO(0, needle)
  255. ZEND_ARG_INFO(0, part)
  256. ZEND_ARG_INFO(0, encoding)
  257. ZEND_END_ARG_INFO()
  258. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
  259. ZEND_ARG_INFO(0, haystack)
  260. ZEND_ARG_INFO(0, needle)
  261. ZEND_ARG_INFO(0, part)
  262. ZEND_ARG_INFO(0, encoding)
  263. ZEND_END_ARG_INFO()
  264. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
  265. ZEND_ARG_INFO(0, haystack)
  266. ZEND_ARG_INFO(0, needle)
  267. ZEND_ARG_INFO(0, part)
  268. ZEND_ARG_INFO(0, encoding)
  269. ZEND_END_ARG_INFO()
  270. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
  271. ZEND_ARG_INFO(0, haystack)
  272. ZEND_ARG_INFO(0, needle)
  273. ZEND_ARG_INFO(0, encoding)
  274. ZEND_END_ARG_INFO()
  275. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
  276. ZEND_ARG_INFO(0, str)
  277. ZEND_ARG_INFO(0, start)
  278. ZEND_ARG_INFO(0, length)
  279. ZEND_ARG_INFO(0, encoding)
  280. ZEND_END_ARG_INFO()
  281. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
  282. ZEND_ARG_INFO(0, str)
  283. ZEND_ARG_INFO(0, start)
  284. ZEND_ARG_INFO(0, length)
  285. ZEND_ARG_INFO(0, encoding)
  286. ZEND_END_ARG_INFO()
  287. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
  288. ZEND_ARG_INFO(0, str)
  289. ZEND_ARG_INFO(0, encoding)
  290. ZEND_END_ARG_INFO()
  291. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
  292. ZEND_ARG_INFO(0, str)
  293. ZEND_ARG_INFO(0, start)
  294. ZEND_ARG_INFO(0, width)
  295. ZEND_ARG_INFO(0, trimmarker)
  296. ZEND_ARG_INFO(0, encoding)
  297. ZEND_END_ARG_INFO()
  298. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
  299. ZEND_ARG_INFO(0, str)
  300. ZEND_ARG_INFO(0, to)
  301. ZEND_ARG_INFO(0, from)
  302. ZEND_END_ARG_INFO()
  303. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
  304. ZEND_ARG_INFO(0, sourcestring)
  305. ZEND_ARG_INFO(0, mode)
  306. ZEND_ARG_INFO(0, encoding)
  307. ZEND_END_ARG_INFO()
  308. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
  309. ZEND_ARG_INFO(0, sourcestring)
  310. ZEND_ARG_INFO(0, encoding)
  311. ZEND_END_ARG_INFO()
  312. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
  313. ZEND_ARG_INFO(0, sourcestring)
  314. ZEND_ARG_INFO(0, encoding)
  315. ZEND_END_ARG_INFO()
  316. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
  317. ZEND_ARG_INFO(0, str)
  318. ZEND_ARG_INFO(0, encoding_list)
  319. ZEND_ARG_INFO(0, strict)
  320. ZEND_END_ARG_INFO()
  321. ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
  322. ZEND_END_ARG_INFO()
  323. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
  324. ZEND_ARG_INFO(0, encoding)
  325. ZEND_END_ARG_INFO()
  326. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
  327. ZEND_ARG_INFO(0, str)
  328. ZEND_ARG_INFO(0, charset)
  329. ZEND_ARG_INFO(0, transfer)
  330. ZEND_ARG_INFO(0, linefeed)
  331. ZEND_ARG_INFO(0, indent)
  332. ZEND_END_ARG_INFO()
  333. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
  334. ZEND_ARG_INFO(0, string)
  335. ZEND_END_ARG_INFO()
  336. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
  337. ZEND_ARG_INFO(0, str)
  338. ZEND_ARG_INFO(0, option)
  339. ZEND_ARG_INFO(0, encoding)
  340. ZEND_END_ARG_INFO()
  341. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3)
  342. ZEND_ARG_INFO(0, to)
  343. ZEND_ARG_INFO(0, from)
  344. ZEND_ARG_INFO(1, ...)
  345. ZEND_END_ARG_INFO()
  346. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
  347. ZEND_ARG_INFO(0, string)
  348. ZEND_ARG_INFO(0, convmap)
  349. ZEND_ARG_INFO(0, encoding)
  350. ZEND_END_ARG_INFO()
  351. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
  352. ZEND_ARG_INFO(0, string)
  353. ZEND_ARG_INFO(0, convmap)
  354. ZEND_ARG_INFO(0, encoding)
  355. ZEND_END_ARG_INFO()
  356. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
  357. ZEND_ARG_INFO(0, to)
  358. ZEND_ARG_INFO(0, subject)
  359. ZEND_ARG_INFO(0, message)
  360. ZEND_ARG_INFO(0, additional_headers)
  361. ZEND_ARG_INFO(0, additional_parameters)
  362. ZEND_END_ARG_INFO()
  363. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
  364. ZEND_ARG_INFO(0, type)
  365. ZEND_END_ARG_INFO()
  366. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
  367. ZEND_ARG_INFO(0, var)
  368. ZEND_ARG_INFO(0, encoding)
  369. ZEND_END_ARG_INFO()
  370. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
  371. ZEND_ARG_INFO(0, encoding)
  372. ZEND_END_ARG_INFO()
  373. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
  374. ZEND_ARG_INFO(0, pattern)
  375. ZEND_ARG_INFO(0, string)
  376. ZEND_ARG_INFO(1, registers)
  377. ZEND_END_ARG_INFO()
  378. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
  379. ZEND_ARG_INFO(0, pattern)
  380. ZEND_ARG_INFO(0, string)
  381. ZEND_ARG_INFO(1, registers)
  382. ZEND_END_ARG_INFO()
  383. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
  384. ZEND_ARG_INFO(0, pattern)
  385. ZEND_ARG_INFO(0, replacement)
  386. ZEND_ARG_INFO(0, string)
  387. ZEND_ARG_INFO(0, option)
  388. ZEND_END_ARG_INFO()
  389. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
  390. ZEND_ARG_INFO(0, pattern)
  391. ZEND_ARG_INFO(0, replacement)
  392. ZEND_ARG_INFO(0, string)
  393. ZEND_END_ARG_INFO()
  394. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
  395. ZEND_ARG_INFO(0, pattern)
  396. ZEND_ARG_INFO(0, string)
  397. ZEND_ARG_INFO(0, limit)
  398. ZEND_END_ARG_INFO()
  399. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
  400. ZEND_ARG_INFO(0, pattern)
  401. ZEND_ARG_INFO(0, string)
  402. ZEND_ARG_INFO(0, option)
  403. ZEND_END_ARG_INFO()
  404. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
  405. ZEND_ARG_INFO(0, pattern)
  406. ZEND_ARG_INFO(0, option)
  407. ZEND_END_ARG_INFO()
  408. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
  409. ZEND_ARG_INFO(0, pattern)
  410. ZEND_ARG_INFO(0, option)
  411. ZEND_END_ARG_INFO()
  412. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
  413. ZEND_ARG_INFO(0, pattern)
  414. ZEND_ARG_INFO(0, option)
  415. ZEND_END_ARG_INFO()
  416. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
  417. ZEND_ARG_INFO(0, string)
  418. ZEND_ARG_INFO(0, pattern)
  419. ZEND_ARG_INFO(0, option)
  420. ZEND_END_ARG_INFO()
  421. ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
  422. ZEND_END_ARG_INFO()
  423. ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
  424. ZEND_END_ARG_INFO()
  425. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
  426. ZEND_ARG_INFO(0, position)
  427. ZEND_END_ARG_INFO()
  428. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
  429. ZEND_ARG_INFO(0, options)
  430. ZEND_END_ARG_INFO()
  431. /* }}} */
  432. /* {{{ zend_function_entry mbstring_functions[] */
  433. const zend_function_entry mbstring_functions[] = {
  434. PHP_FE(mb_convert_case, arginfo_mb_convert_case)
  435. PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
  436. PHP_FE(mb_strtolower, arginfo_mb_strtolower)
  437. PHP_FE(mb_language, arginfo_mb_language)
  438. PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
  439. PHP_FE(mb_http_input, arginfo_mb_http_input)
  440. PHP_FE(mb_http_output, arginfo_mb_http_output)
  441. PHP_FE(mb_detect_order, arginfo_mb_detect_order)
  442. PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
  443. PHP_FE(mb_parse_str, arginfo_mb_parse_str)
  444. PHP_FE(mb_output_handler, arginfo_mb_output_handler)
  445. PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
  446. PHP_FE(mb_strlen, arginfo_mb_strlen)
  447. PHP_FE(mb_strpos, arginfo_mb_strpos)
  448. PHP_FE(mb_strrpos, arginfo_mb_strrpos)
  449. PHP_FE(mb_stripos, arginfo_mb_stripos)
  450. PHP_FE(mb_strripos, arginfo_mb_strripos)
  451. PHP_FE(mb_strstr, arginfo_mb_strstr)
  452. PHP_FE(mb_strrchr, arginfo_mb_strrchr)
  453. PHP_FE(mb_stristr, arginfo_mb_stristr)
  454. PHP_FE(mb_strrichr, arginfo_mb_strrichr)
  455. PHP_FE(mb_substr_count, arginfo_mb_substr_count)
  456. PHP_FE(mb_substr, arginfo_mb_substr)
  457. PHP_FE(mb_strcut, arginfo_mb_strcut)
  458. PHP_FE(mb_strwidth, arginfo_mb_strwidth)
  459. PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
  460. PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
  461. PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
  462. PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
  463. PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
  464. PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
  465. PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
  466. PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
  467. PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
  468. PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
  469. PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
  470. PHP_FE(mb_send_mail, arginfo_mb_send_mail)
  471. PHP_FE(mb_get_info, arginfo_mb_get_info)
  472. PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
  473. #if HAVE_MBREGEX
  474. PHP_MBREGEX_FUNCTION_ENTRIES
  475. #endif
  476. { NULL, NULL, NULL }
  477. };
  478. /* }}} */
  479. /* {{{ zend_module_entry mbstring_module_entry */
  480. zend_module_entry mbstring_module_entry = {
  481. STANDARD_MODULE_HEADER,
  482. "mbstring",
  483. mbstring_functions,
  484. PHP_MINIT(mbstring),
  485. PHP_MSHUTDOWN(mbstring),
  486. PHP_RINIT(mbstring),
  487. PHP_RSHUTDOWN(mbstring),
  488. PHP_MINFO(mbstring),
  489. NO_VERSION_YET,
  490. PHP_MODULE_GLOBALS(mbstring),
  491. PHP_GINIT(mbstring),
  492. PHP_GSHUTDOWN(mbstring),
  493. NULL,
  494. STANDARD_MODULE_PROPERTIES_EX
  495. };
  496. /* }}} */
  497. /* {{{ static sapi_post_entry php_post_entries[] */
  498. static sapi_post_entry php_post_entries[] = {
  499. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
  500. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  501. { NULL, 0, NULL, NULL }
  502. };
  503. /* }}} */
  504. #ifdef COMPILE_DL_MBSTRING
  505. ZEND_GET_MODULE(mbstring)
  506. #endif
  507. /* {{{ allocators */
  508. static void *_php_mb_allocators_malloc(unsigned int sz)
  509. {
  510. return emalloc(sz);
  511. }
  512. static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
  513. {
  514. return erealloc(ptr, sz);
  515. }
  516. static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
  517. {
  518. return ecalloc(nelems, szelem);
  519. }
  520. static void _php_mb_allocators_free(void *ptr)
  521. {
  522. efree(ptr);
  523. }
  524. static void *_php_mb_allocators_pmalloc(unsigned int sz)
  525. {
  526. return pemalloc(sz, 1);
  527. }
  528. static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
  529. {
  530. return perealloc(ptr, sz, 1);
  531. }
  532. static void _php_mb_allocators_pfree(void *ptr)
  533. {
  534. pefree(ptr, 1);
  535. }
  536. static mbfl_allocators _php_mb_allocators = {
  537. _php_mb_allocators_malloc,
  538. _php_mb_allocators_realloc,
  539. _php_mb_allocators_calloc,
  540. _php_mb_allocators_free,
  541. _php_mb_allocators_pmalloc,
  542. _php_mb_allocators_prealloc,
  543. _php_mb_allocators_pfree
  544. };
  545. /* }}} */
  546. /* {{{ static sapi_post_entry mbstr_post_entries[] */
  547. static sapi_post_entry mbstr_post_entries[] = {
  548. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
  549. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  550. { NULL, 0, NULL, NULL }
  551. };
  552. /* }}} */
  553. /* {{{ static int php_mb_parse_encoding_list()
  554. * Return 0 if input contains any illegal encoding, otherwise 1.
  555. * Even if any illegal encoding is detected the result may contain a list
  556. * of parsed encodings.
  557. */
  558. static int
  559. php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  560. {
  561. int n, l, size, bauto, ret = 1;
  562. char *p, *p1, *p2, *endp, *tmpstr;
  563. enum mbfl_no_encoding no_encoding;
  564. enum mbfl_no_encoding *src, *entry, *list;
  565. list = NULL;
  566. if (value == NULL || value_length <= 0) {
  567. if (return_list) {
  568. *return_list = NULL;
  569. }
  570. if (return_size) {
  571. *return_size = 0;
  572. }
  573. return 0;
  574. } else {
  575. enum mbfl_no_encoding *identify_list;
  576. int identify_list_size;
  577. identify_list = MBSTRG(default_detect_order_list);
  578. identify_list_size = MBSTRG(default_detect_order_list_size);
  579. /* copy the value string for work */
  580. if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
  581. tmpstr = (char *)estrndup(value+1, value_length-2);
  582. value_length -= 2;
  583. }
  584. else
  585. tmpstr = (char *)estrndup(value, value_length);
  586. if (tmpstr == NULL) {
  587. return 0;
  588. }
  589. /* count the number of listed encoding names */
  590. endp = tmpstr + value_length;
  591. n = 1;
  592. p1 = tmpstr;
  593. while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
  594. p1 = p2 + 1;
  595. n++;
  596. }
  597. size = n + identify_list_size;
  598. /* make list */
  599. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  600. if (list != NULL) {
  601. entry = list;
  602. n = 0;
  603. bauto = 0;
  604. p1 = tmpstr;
  605. do {
  606. p2 = p = php_memnstr(p1, ",", 1, endp);
  607. if (p == NULL) {
  608. p = endp;
  609. }
  610. *p = '\0';
  611. /* trim spaces */
  612. while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
  613. p1++;
  614. }
  615. p--;
  616. while (p > p1 && (*p == ' ' || *p == '\t')) {
  617. *p = '\0';
  618. p--;
  619. }
  620. /* convert to the encoding number and check encoding */
  621. if (strcasecmp(p1, "auto") == 0) {
  622. if (!bauto) {
  623. bauto = 1;
  624. l = identify_list_size;
  625. src = identify_list;
  626. while (l > 0) {
  627. *entry++ = *src++;
  628. l--;
  629. n++;
  630. }
  631. }
  632. } else {
  633. no_encoding = mbfl_name2no_encoding(p1);
  634. if (no_encoding != mbfl_no_encoding_invalid) {
  635. *entry++ = no_encoding;
  636. n++;
  637. } else {
  638. ret = 0;
  639. }
  640. }
  641. p1 = p2 + 1;
  642. } while (n < size && p2 != NULL);
  643. if (n > 0) {
  644. if (return_list) {
  645. *return_list = list;
  646. } else {
  647. pefree(list, persistent);
  648. }
  649. } else {
  650. pefree(list, persistent);
  651. if (return_list) {
  652. *return_list = NULL;
  653. }
  654. ret = 0;
  655. }
  656. if (return_size) {
  657. *return_size = n;
  658. }
  659. } else {
  660. if (return_list) {
  661. *return_list = NULL;
  662. }
  663. if (return_size) {
  664. *return_size = 0;
  665. }
  666. ret = 0;
  667. }
  668. efree(tmpstr);
  669. }
  670. return ret;
  671. }
  672. /* }}} */
  673. /* {{{ MBSTRING_API php_mb_check_encoding_list */
  674. MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
  675. return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
  676. }
  677. /* }}} */
  678. /* {{{ static int php_mb_parse_encoding_array()
  679. * Return 0 if input contains any illegal encoding, otherwise 1.
  680. * Even if any illegal encoding is detected the result may contain a list
  681. * of parsed encodings.
  682. */
  683. static int
  684. php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  685. {
  686. zval **hash_entry;
  687. HashTable *target_hash;
  688. int i, n, l, size, bauto,ret = 1;
  689. enum mbfl_no_encoding no_encoding;
  690. enum mbfl_no_encoding *src, *list, *entry;
  691. list = NULL;
  692. if (Z_TYPE_P(array) == IS_ARRAY) {
  693. enum mbfl_no_encoding *identify_list;
  694. int identify_list_size;
  695. identify_list = MBSTRG(default_detect_order_list);
  696. identify_list_size = MBSTRG(default_detect_order_list_size);
  697. target_hash = Z_ARRVAL_P(array);
  698. zend_hash_internal_pointer_reset(target_hash);
  699. i = zend_hash_num_elements(target_hash);
  700. size = i + identify_list_size;
  701. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  702. if (list != NULL) {
  703. entry = list;
  704. bauto = 0;
  705. n = 0;
  706. while (i > 0) {
  707. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  708. break;
  709. }
  710. convert_to_string_ex(hash_entry);
  711. if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
  712. if (!bauto) {
  713. bauto = 1;
  714. l = identify_list_size;
  715. src = identify_list;
  716. while (l > 0) {
  717. *entry++ = *src++;
  718. l--;
  719. n++;
  720. }
  721. }
  722. } else {
  723. no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
  724. if (no_encoding != mbfl_no_encoding_invalid) {
  725. *entry++ = no_encoding;
  726. n++;
  727. } else {
  728. ret = 0;
  729. }
  730. }
  731. zend_hash_move_forward(target_hash);
  732. i--;
  733. }
  734. if (n > 0) {
  735. if (return_list) {
  736. *return_list = list;
  737. } else {
  738. pefree(list, persistent);
  739. }
  740. } else {
  741. pefree(list, persistent);
  742. if (return_list) {
  743. *return_list = NULL;
  744. }
  745. ret = 0;
  746. }
  747. if (return_size) {
  748. *return_size = n;
  749. }
  750. } else {
  751. if (return_list) {
  752. *return_list = NULL;
  753. }
  754. if (return_size) {
  755. *return_size = 0;
  756. }
  757. ret = 0;
  758. }
  759. }
  760. return ret;
  761. }
  762. /* }}} */
  763. static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
  764. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
  765. static void _php_mb_free_regex(void *opaque);
  766. #if HAVE_ONIG
  767. /* {{{ _php_mb_compile_regex */
  768. static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
  769. {
  770. php_mb_regex_t *retval;
  771. OnigErrorInfo err_info;
  772. int err_code;
  773. if ((err_code = onig_new(&retval,
  774. (const OnigUChar *)pattern,
  775. (const OnigUChar *)pattern + strlen(pattern),
  776. ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
  777. ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
  778. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  779. onig_error_code_to_str(err_str, err_code, err_info);
  780. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
  781. retval = NULL;
  782. }
  783. return retval;
  784. }
  785. /* }}} */
  786. /* {{{ _php_mb_match_regex */
  787. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
  788. {
  789. return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
  790. (const OnigUChar*)str + str_len, (const OnigUChar *)str,
  791. (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
  792. }
  793. /* }}} */
  794. /* {{{ _php_mb_free_regex */
  795. static void _php_mb_free_regex(void *opaque)
  796. {
  797. onig_free((php_mb_regex_t *)opaque);
  798. }
  799. /* }}} */
  800. #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
  801. /* {{{ _php_mb_compile_regex */
  802. static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
  803. {
  804. pcre *retval;
  805. const char *err_str;
  806. int err_offset;
  807. if (!(retval = pcre_compile(pattern,
  808. PCRE_CASELESS, &err_str, &err_offset, NULL))) {
  809. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
  810. }
  811. return retval;
  812. }
  813. /* }}} */
  814. /* {{{ _php_mb_match_regex */
  815. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
  816. {
  817. return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
  818. 0, NULL, 0) >= 0;
  819. }
  820. /* }}} */
  821. /* {{{ _php_mb_free_regex */
  822. static void _php_mb_free_regex(void *opaque)
  823. {
  824. pcre_free(opaque);
  825. }
  826. /* }}} */
  827. #endif
  828. /* {{{ php_mb_nls_get_default_detect_order_list */
  829. static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
  830. {
  831. size_t i;
  832. *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  833. *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  834. for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
  835. if (php_mb_default_identify_list[i].lang == lang) {
  836. *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
  837. *plist_size = php_mb_default_identify_list[i].list_size;
  838. return 1;
  839. }
  840. }
  841. return 0;
  842. }
  843. /* }}} */
  844. /* {{{ php.ini directive handler */
  845. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
  846. static PHP_INI_MH(OnUpdate_mbstring_language)
  847. {
  848. enum mbfl_no_language no_language;
  849. no_language = mbfl_name2no_language(new_value);
  850. if (no_language == mbfl_no_language_invalid) {
  851. MBSTRG(language) = mbfl_no_language_neutral;
  852. return FAILURE;
  853. }
  854. MBSTRG(language) = no_language;
  855. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  856. return SUCCESS;
  857. }
  858. /* }}} */
  859. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
  860. static PHP_INI_MH(OnUpdate_mbstring_detect_order)
  861. {
  862. enum mbfl_no_encoding *list;
  863. int size;
  864. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  865. if (MBSTRG(detect_order_list)) {
  866. free(MBSTRG(detect_order_list));
  867. }
  868. MBSTRG(detect_order_list) = list;
  869. MBSTRG(detect_order_list_size) = size;
  870. } else {
  871. if (MBSTRG(detect_order_list)) {
  872. free(MBSTRG(detect_order_list));
  873. MBSTRG(detect_order_list) = NULL;
  874. }
  875. return FAILURE;
  876. }
  877. return SUCCESS;
  878. }
  879. /* }}} */
  880. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
  881. static PHP_INI_MH(OnUpdate_mbstring_http_input)
  882. {
  883. enum mbfl_no_encoding *list;
  884. int size;
  885. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  886. if (MBSTRG(http_input_list)) {
  887. free(MBSTRG(http_input_list));
  888. }
  889. MBSTRG(http_input_list) = list;
  890. MBSTRG(http_input_list_size) = size;
  891. } else {
  892. if (MBSTRG(http_input_list)) {
  893. free(MBSTRG(http_input_list));
  894. MBSTRG(http_input_list) = NULL;
  895. }
  896. MBSTRG(http_input_list_size) = 0;
  897. return FAILURE;
  898. }
  899. return SUCCESS;
  900. }
  901. /* }}} */
  902. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
  903. static PHP_INI_MH(OnUpdate_mbstring_http_output)
  904. {
  905. enum mbfl_no_encoding no_encoding;
  906. no_encoding = mbfl_name2no_encoding(new_value);
  907. if (no_encoding != mbfl_no_encoding_invalid) {
  908. MBSTRG(http_output_encoding) = no_encoding;
  909. MBSTRG(current_http_output_encoding) = no_encoding;
  910. } else {
  911. MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
  912. MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
  913. if (new_value != NULL && new_value_length > 0) {
  914. return FAILURE;
  915. }
  916. }
  917. return SUCCESS;
  918. }
  919. /* }}} */
  920. /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
  921. int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
  922. {
  923. enum mbfl_no_encoding no_encoding;
  924. const char *enc_name = NULL;
  925. uint enc_name_len = 0;
  926. no_encoding = new_value ? mbfl_name2no_encoding(new_value):
  927. mbfl_no_encoding_invalid;
  928. if (no_encoding != mbfl_no_encoding_invalid) {
  929. enc_name = new_value;
  930. enc_name_len = new_value_length;
  931. } else {
  932. switch (MBSTRG(language)) {
  933. case mbfl_no_language_uni:
  934. enc_name = "UTF-8";
  935. enc_name_len = sizeof("UTF-8") - 1;
  936. break;
  937. case mbfl_no_language_japanese:
  938. enc_name = "EUC-JP";
  939. enc_name_len = sizeof("EUC-JP") - 1;
  940. break;
  941. case mbfl_no_language_korean:
  942. enc_name = "EUC-KR";
  943. enc_name_len = sizeof("EUC-KR") - 1;
  944. break;
  945. case mbfl_no_language_simplified_chinese:
  946. enc_name = "EUC-CN";
  947. enc_name_len = sizeof("EUC-CN") - 1;
  948. break;
  949. case mbfl_no_language_traditional_chinese:
  950. enc_name = "EUC-TW";
  951. enc_name_len = sizeof("EUC-TW") - 1;
  952. break;
  953. case mbfl_no_language_russian:
  954. enc_name = "KOI8-R";
  955. enc_name_len = sizeof("KOI8-R") - 1;
  956. break;
  957. case mbfl_no_language_german:
  958. enc_name = "ISO-8859-15";
  959. enc_name_len = sizeof("ISO-8859-15") - 1;
  960. break;
  961. case mbfl_no_language_armenian:
  962. enc_name = "ArmSCII-8";
  963. enc_name_len = sizeof("ArmSCII-8") - 1;
  964. break;
  965. case mbfl_no_language_turkish:
  966. enc_name = "ISO-8859-9";
  967. enc_name_len = sizeof("ISO-8859-9") - 1;
  968. break;
  969. default:
  970. enc_name = "ISO-8859-1";
  971. enc_name_len = sizeof("ISO-8859-1") - 1;
  972. break;
  973. }
  974. no_encoding = mbfl_name2no_encoding(enc_name);
  975. }
  976. MBSTRG(internal_encoding) = no_encoding;
  977. MBSTRG(current_internal_encoding) = no_encoding;
  978. #if HAVE_MBREGEX
  979. {
  980. const char *enc_name = new_value;
  981. if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
  982. /* falls back to EUC-JP if an unknown encoding name is given */
  983. enc_name = "EUC-JP";
  984. php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
  985. }
  986. php_mb_regex_set_mbctype(new_value TSRMLS_CC);
  987. }
  988. #endif
  989. return SUCCESS;
  990. }
  991. /* }}} */
  992. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
  993. static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
  994. {
  995. if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
  996. || stage == PHP_INI_STAGE_RUNTIME) {
  997. return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
  998. } else {
  999. /* the corresponding mbstring globals needs to be set according to the
  1000. * ini value in the later stage because it never falls back to the
  1001. * default value if 1. no value for mbstring.internal_encoding is given,
  1002. * 2. mbstring.language directive is processed in per-dir or runtime
  1003. * context and 3. call to the handler for mbstring.language is done
  1004. * after mbstring.internal_encoding is handled. */
  1005. return SUCCESS;
  1006. }
  1007. }
  1008. /* }}} */
  1009. #ifdef ZEND_MULTIBYTE
  1010. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
  1011. static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
  1012. {
  1013. int *list, size;
  1014. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  1015. if (MBSTRG(script_encoding_list) != NULL) {
  1016. free(MBSTRG(script_encoding_list));
  1017. }
  1018. MBSTRG(script_encoding_list) = list;
  1019. MBSTRG(script_encoding_list_size) = size;
  1020. } else {
  1021. if (MBSTRG(script_encoding_list) != NULL) {
  1022. free(MBSTRG(script_encoding_list));
  1023. }
  1024. MBSTRG(script_encoding_list) = NULL;
  1025. MBSTRG(script_encoding_list_size) = 0;
  1026. return FAILURE;
  1027. }
  1028. return SUCCESS;
  1029. }
  1030. /* }}} */
  1031. #endif /* ZEND_MULTIBYTE */
  1032. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
  1033. static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
  1034. {
  1035. int c;
  1036. char *endptr = NULL;
  1037. if (new_value != NULL) {
  1038. if (strcasecmp("none", new_value) == 0) {
  1039. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1040. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1041. } else if (strcasecmp("long", new_value) == 0) {
  1042. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1043. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1044. } else if (strcasecmp("entity", new_value) == 0) {
  1045. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1046. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1047. } else {
  1048. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1049. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1050. if (new_value_length >0) {
  1051. c = strtol(new_value, &endptr, 0);
  1052. if (*endptr == '\0') {
  1053. MBSTRG(filter_illegal_substchar) = c;
  1054. MBSTRG(current_filter_illegal_substchar) = c;
  1055. }
  1056. }
  1057. }
  1058. } else {
  1059. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1060. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1061. MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
  1062. MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
  1063. }
  1064. return SUCCESS;
  1065. }
  1066. /* }}} */
  1067. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
  1068. static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
  1069. {
  1070. if (new_value == NULL) {
  1071. return FAILURE;
  1072. }
  1073. OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
  1074. if (MBSTRG(encoding_translation)) {
  1075. sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
  1076. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  1077. } else {
  1078. sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
  1079. sapi_register_post_entries(php_post_entries TSRMLS_CC);
  1080. }
  1081. return SUCCESS;
  1082. }
  1083. /* }}} */
  1084. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
  1085. static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
  1086. {
  1087. zval tmp;
  1088. void *re = NULL;
  1089. if (!new_value) {
  1090. new_value = entry->orig_value;
  1091. new_value_length = entry->orig_value_length;
  1092. }
  1093. php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
  1094. if (Z_STRLEN(tmp) > 0) {
  1095. if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
  1096. zval_dtor(&tmp);
  1097. return FAILURE;
  1098. }
  1099. }
  1100. if (MBSTRG(http_output_conv_mimetypes)) {
  1101. _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
  1102. }
  1103. MBSTRG(http_output_conv_mimetypes) = re;
  1104. zval_dtor(&tmp);
  1105. return SUCCESS;
  1106. }
  1107. /* }}} */
  1108. /* }}} */
  1109. /* {{{ php.ini directive registration */
  1110. PHP_INI_BEGIN()
  1111. PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
  1112. PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
  1113. PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
  1114. PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
  1115. PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
  1116. #ifdef ZEND_MULTIBYTE
  1117. PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
  1118. #endif /* ZEND_MULTIBYTE */
  1119. PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
  1120. STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
  1121. PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
  1122. STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
  1123. PHP_INI_SYSTEM | PHP_INI_PERDIR,
  1124. OnUpdate_mbstring_encoding_translation,
  1125. encoding_translation, zend_mbstring_globals, mbstring_globals)
  1126. PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
  1127. "^(text/|application/xhtml\\+xml)",
  1128. PHP_INI_ALL,
  1129. OnUpdate_mbstring_http_output_conv_mimetypes)
  1130. STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
  1131. PHP_INI_ALL,
  1132. OnUpdateLong,
  1133. strict_detection, zend_mbstring_globals, mbstring_globals)
  1134. PHP_INI_END()
  1135. /* }}} */
  1136. /* {{{ module global initialize handler */
  1137. static PHP_GINIT_FUNCTION(mbstring)
  1138. {
  1139. mbstring_globals->language = mbfl_no_language_uni;
  1140. mbstring_globals->internal_encoding = mbfl_no_encoding_invalid;
  1141. mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
  1142. #ifdef ZEND_MULTIBYTE
  1143. mbstring_globals->script_encoding_list = NULL;
  1144. mbstring_globals->script_encoding_list_size = 0;
  1145. #endif /* ZEND_MULTIBYTE */
  1146. mbstring_globals->http_output_encoding = mbfl_no_encoding_pass;
  1147. mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass;
  1148. mbstring_globals->http_input_identify = mbfl_no_encoding_invalid;
  1149. mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid;
  1150. mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid;
  1151. mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid;
  1152. mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid;
  1153. mbstring_globals->http_input_list = NULL;
  1154. mbstring_globals->http_input_list_size = 0;
  1155. mbstring_globals->detect_order_list = NULL;
  1156. mbstring_globals->detect_order_list_size = 0;
  1157. mbstring_globals->current_detect_order_list = NULL;
  1158. mbstring_globals->current_detect_order_list_size = 0;
  1159. mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  1160. mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  1161. mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1162. mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
  1163. mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1164. mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
  1165. mbstring_globals->illegalchars = 0;
  1166. mbstring_globals->func_overload = 0;
  1167. mbstring_globals->encoding_translation = 0;
  1168. mbstring_globals->strict_detection = 0;
  1169. mbstring_globals->outconv = NULL;
  1170. mbstring_globals->http_output_conv_mimetypes = NULL;
  1171. #if HAVE_MBREGEX
  1172. mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
  1173. #endif
  1174. }
  1175. /* }}} */
  1176. /* {{{ PHP_GSHUTDOWN_FUNCTION */
  1177. static PHP_GSHUTDOWN_FUNCTION(mbstring)
  1178. {
  1179. if (mbstring_globals->http_input_list) {
  1180. free(mbstring_globals->http_input_list);
  1181. }
  1182. #ifdef ZEND_MULTIBYTE
  1183. if (mbstring_globals->script_encoding_list) {
  1184. free(mbstring_globals->script_encoding_list);
  1185. }
  1186. #endif /* ZEND_MULTIBYTE */
  1187. if (mbstring_globals->detect_order_list) {
  1188. free(mbstring_globals->detect_order_list);
  1189. }
  1190. if (mbstring_globals->http_output_conv_mimetypes) {
  1191. _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
  1192. }
  1193. #if HAVE_MBREGEX
  1194. php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
  1195. #endif
  1196. }
  1197. /* }}} */
  1198. /* {{{ PHP_MINIT_FUNCTION(mbstring) */
  1199. PHP_MINIT_FUNCTION(mbstring)
  1200. {
  1201. __mbfl_allocators = &_php_mb_allocators;
  1202. REGISTER_INI_ENTRIES();
  1203. /* This is a global handler. Should not be set in a per-request handler. */
  1204. sapi_register_treat_data(mbstr_treat_data);
  1205. /* Post handlers are stored in the thread-local context. */
  1206. if (MBSTRG(encoding_translation)) {
  1207. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  1208. }
  1209. REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
  1210. REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
  1211. REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
  1212. REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
  1213. REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
  1214. REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
  1215. #if HAVE_MBREGEX
  1216. PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1217. #endif
  1218. return SUCCESS;
  1219. }
  1220. /* }}} */
  1221. /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
  1222. PHP_MSHUTDOWN_FUNCTION(mbstring)
  1223. {
  1224. UNREGISTER_INI_ENTRIES();
  1225. #if HAVE_MBREGEX
  1226. PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1227. #endif
  1228. return SUCCESS;
  1229. }
  1230. /* }}} */
  1231. /* {{{ PHP_RINIT_FUNCTION(mbstring) */
  1232. PHP_RINIT_FUNCTION(mbstring)
  1233. {
  1234. int n;
  1235. enum mbfl_no_encoding *list=NULL, *entry;
  1236. zend_function *func, *orig;
  1237. const struct mb_overload_def *p;
  1238. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  1239. MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
  1240. MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
  1241. MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
  1242. MBSTRG(illegalchars) = 0;
  1243. n = 0;
  1244. if (MBSTRG(detect_order_list)) {
  1245. list = MBSTRG(detect_order_list);
  1246. n = MBSTRG(detect_order_list_size);
  1247. }
  1248. if (n <= 0) {
  1249. list = MBSTRG(default_detect_order_list);
  1250. n = MBSTRG(default_detect_order_list_size);
  1251. }
  1252. entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
  1253. MBSTRG(current_detect_order_list) = entry;
  1254. MBSTRG(current_detect_order_list_size) = n;
  1255. while (n > 0) {
  1256. *entry++ = *list++;
  1257. n--;
  1258. }
  1259. /* override original function. */
  1260. if (MBSTRG(func_overload)){
  1261. p = &(mb_ovld[0]);
  1262. while (p->type > 0) {
  1263. if ((MBSTRG(func_overload) & p->type) == p->type &&
  1264. zend_hash_find(EG(function_table), p->save_func,
  1265. strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
  1266. zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
  1267. if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
  1268. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
  1269. return FAILURE;
  1270. } else {
  1271. zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
  1272. if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
  1273. NULL) == FAILURE) {
  1274. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
  1275. return FAILURE;
  1276. }
  1277. }
  1278. }
  1279. p++;
  1280. }
  1281. }
  1282. #if HAVE_MBREGEX
  1283. PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1284. #endif
  1285. #ifdef ZEND_MULTIBYTE
  1286. zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
  1287. php_mb_set_zend_encoding(TSRMLS_C);
  1288. #endif /* ZEND_MULTIBYTE */
  1289. return SUCCESS;
  1290. }
  1291. /* }}} */
  1292. /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
  1293. PHP_RSHUTDOWN_FUNCTION(mbstring)
  1294. {
  1295. const struct mb_overload_def *p;
  1296. zend_function *orig;
  1297. if (MBSTRG(current_detect_order_list) != NULL) {
  1298. efree(MBSTRG(current_detect_order_list));
  1299. MBSTRG(current_detect_order_list) = NULL;
  1300. MBSTRG(current_detect_order_list_size) = 0;
  1301. }
  1302. if (MBSTRG(outconv) != NULL) {
  1303. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1304. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1305. MBSTRG(outconv) = NULL;
  1306. }
  1307. /* clear http input identification. */
  1308. MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
  1309. MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
  1310. MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
  1311. MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
  1312. MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
  1313. /* clear overloaded function. */
  1314. if (MBSTRG(func_overload)){
  1315. p = &(mb_ovld[0]);
  1316. while (p->type > 0) {
  1317. if ((MBSTRG(func_overload) & p->type) == p->type &&
  1318. zend_hash_find(EG(function_table), p->save_func,
  1319. strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
  1320. zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
  1321. zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
  1322. }
  1323. p++;
  1324. }
  1325. }
  1326. #if HAVE_MBREGEX
  1327. PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1328. #endif
  1329. return SUCCESS;
  1330. }
  1331. /* }}} */
  1332. /* {{{ PHP_MINFO_FUNCTION(mbstring) */
  1333. PHP_MINFO_FUNCTION(mbstring)
  1334. {
  1335. php_info_print_table_start();
  1336. php_info_print_table_row(2, "Multibyte Support", "enabled");
  1337. php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
  1338. php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
  1339. php_info_print_table_end();
  1340. php_info_print_table_start();
  1341. php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
  1342. php_info_print_table_end();
  1343. #if HAVE_MBREGEX
  1344. PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
  1345. #endif
  1346. DISPLAY_INI_ENTRIES();
  1347. }
  1348. /* }}} */
  1349. /* {{{ proto string mb_language([string language])
  1350. Sets the current language or Returns the current language as a string */
  1351. PHP_FUNCTION(mb_language)
  1352. {
  1353. char *name = NULL;
  1354. int name_len = 0;
  1355. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  1356. return;
  1357. }
  1358. if (name == NULL) {
  1359. RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
  1360. } else {
  1361. if (FAILURE == zend_alter_ini_entry(
  1362. "mbstring.language", sizeof("mbstring.language"),
  1363. name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
  1364. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
  1365. RETVAL_FALSE;
  1366. } else {
  1367. RETVAL_TRUE;
  1368. }
  1369. }
  1370. }
  1371. /* }}} */
  1372. /* {{{ proto string mb_internal_encoding([string encoding])
  1373. Sets the current internal encoding or Returns the current internal encoding as a string */
  1374. PHP_FUNCTION(mb_internal_encoding)
  1375. {
  1376. char *name = NULL;
  1377. int name_len;
  1378. enum mbfl_no_encoding no_encoding;
  1379. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  1380. RETURN_FALSE;
  1381. }
  1382. if (name == NULL) {
  1383. name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  1384. if (name != NULL) {
  1385. RETURN_STRING(name, 1);
  1386. } else {
  1387. RETURN_FALSE;
  1388. }
  1389. } else {
  1390. no_encoding = mbfl_name2no_encoding(name);
  1391. if (no_encoding == mbfl_no_encoding_invalid) {
  1392. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1393. RETURN_FALSE;
  1394. } else {
  1395. MBSTRG(current_internal_encoding) = no_encoding;
  1396. #ifdef ZEND_MULTIBYTE
  1397. /* TODO: make independent from mbstring.encoding_translation? */
  1398. if (MBSTRG(encoding_translation)) {
  1399. zend_multibyte_set_internal_encoding(name TSRMLS_CC);
  1400. }
  1401. #endif /* ZEND_MULTIBYTE */
  1402. RETURN_TRUE;
  1403. }
  1404. }
  1405. }
  1406. /* }}} */
  1407. /* {{{ proto mixed mb_http_input([string type])
  1408. Returns the input encoding */
  1409. PHP_FUNCTION(mb_http_input)
  1410. {
  1411. char *typ = NULL;
  1412. int typ_len;
  1413. int retname, n;
  1414. char *name, *list, *temp;
  1415. enum mbfl_no_encoding *entry;
  1416. enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
  1417. retname = 1;
  1418. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  1419. RETURN_FALSE;
  1420. }
  1421. if (typ == NULL) {
  1422. result = MBSTRG(http_input_identify);
  1423. } else {
  1424. switch (*typ) {
  1425. case 'G':
  1426. case 'g':
  1427. result = MBSTRG(http_input_identify_get);
  1428. break;
  1429. case 'P':
  1430. case 'p':
  1431. result = MBSTRG(http_input_identify_post);
  1432. break;
  1433. case 'C':
  1434. case 'c':
  1435. result = MBSTRG(http_input_identify_cookie);
  1436. break;
  1437. case 'S':
  1438. case 's':
  1439. result = MBSTRG(http_input_identify_string);
  1440. break;
  1441. case 'I':
  1442. case 'i':
  1443. array_init(return_value);
  1444. entry = MBSTRG(http_input_list);
  1445. n = MBSTRG(http_input_list_size);
  1446. while (n > 0) {
  1447. name = (char *)mbfl_no_encoding2name(*entry);
  1448. if (name) {
  1449. add_next_index_string(return_value, name, 1);
  1450. }
  1451. entry++;
  1452. n--;
  1453. }
  1454. retname = 0;
  1455. break;
  1456. case 'L':
  1457. case 'l':
  1458. entry = MBSTRG(http_input_list);
  1459. n = MBSTRG(http_input_list_size);
  1460. list = NULL;
  1461. while (n > 0) {
  1462. name = (char *)mbfl_no_encoding2name(*entry);
  1463. if (name) {
  1464. if (list) {
  1465. temp = list;
  1466. spprintf(&list, 0, "%s,%s", temp, name);
  1467. efree(temp);
  1468. if (!list) {
  1469. break;
  1470. }
  1471. } else {
  1472. list = estrdup(name);
  1473. }
  1474. }
  1475. entry++;
  1476. n--;
  1477. }
  1478. if (!list) {
  1479. RETURN_FALSE;
  1480. }
  1481. RETVAL_STRING(list, 0);
  1482. retname = 0;
  1483. break;
  1484. default:
  1485. result = MBSTRG(http_input_identify);
  1486. break;
  1487. }
  1488. }
  1489. if (retname) {
  1490. if (result != mbfl_no_encoding_invalid &&
  1491. (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
  1492. RETVAL_STRING(name, 1);
  1493. } else {
  1494. RETVAL_FALSE;
  1495. }
  1496. }
  1497. }
  1498. /* }}} */
  1499. /* {{{ proto string mb_http_output([string encoding])
  1500. Sets the current output_encoding or returns the current output_encoding as a string */
  1501. PHP_FUNCTION(mb_http_output)
  1502. {
  1503. char *name = NULL;
  1504. int name_len;
  1505. enum mbfl_no_encoding no_encoding;
  1506. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
  1507. RETURN_FALSE;
  1508. }
  1509. if (name == NULL) {
  1510. name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
  1511. if (name != NULL) {
  1512. RETURN_STRING(name, 1);
  1513. } else {
  1514. RETURN_FALSE;
  1515. }
  1516. } else {
  1517. no_encoding = mbfl_name2no_encoding(name);
  1518. if (no_encoding == mbfl_no_encoding_invalid) {
  1519. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1520. RETURN_FALSE;
  1521. } else {
  1522. MBSTRG(current_http_output_encoding) = no_encoding;
  1523. RETURN_TRUE;
  1524. }
  1525. }
  1526. }
  1527. /* }}} */
  1528. /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
  1529. Sets the current detect_order or Return the current detect_order as a array */
  1530. PHP_FUNCTION(mb_detect_order)
  1531. {
  1532. zval **arg1 = NULL;
  1533. int n, size;
  1534. enum mbfl_no_encoding *list, *entry;
  1535. char *name;
  1536. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
  1537. return;
  1538. }
  1539. if (!arg1) {
  1540. array_init(return_value);
  1541. entry = MBSTRG(current_detect_order_list);
  1542. n = MBSTRG(current_detect_order_list_size);
  1543. while (n > 0) {
  1544. name = (char *)mbfl_no_encoding2name(*entry);
  1545. if (name) {
  1546. add_next_index_string(return_value, name, 1);
  1547. }
  1548. entry++;
  1549. n--;
  1550. }
  1551. } else {
  1552. list = NULL;
  1553. size = 0;
  1554. switch (Z_TYPE_PP(arg1)) {
  1555. case IS_ARRAY:
  1556. if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
  1557. if (list) {
  1558. efree(list);
  1559. }
  1560. RETURN_FALSE;
  1561. }
  1562. break;
  1563. default:
  1564. convert_to_string_ex(arg1);
  1565. if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
  1566. if (list) {
  1567. efree(list);
  1568. }
  1569. RETURN_FALSE;
  1570. }
  1571. break;
  1572. }
  1573. if (list == NULL) {
  1574. RETURN_FALSE;
  1575. }
  1576. if (MBSTRG(current_detect_order_list)) {
  1577. efree(MBSTRG(current_detect_order_list));
  1578. }
  1579. MBSTRG(current_detect_order_list) = list;
  1580. MBSTRG(current_detect_order_list_size) = size;
  1581. RETURN_TRUE;
  1582. }
  1583. }
  1584. /* }}} */
  1585. /* {{{ proto mixed mb_substitute_character([mixed substchar])
  1586. Sets the current substitute_character or returns the current substitute_character */
  1587. PHP_FUNCTION(mb_substitute_character)
  1588. {
  1589. zval **arg1 = NULL;
  1590. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
  1591. return;
  1592. }
  1593. if (!arg1) {
  1594. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  1595. RETURN_STRING("none", 1);
  1596. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  1597. RETURN_STRING("long", 1);
  1598. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  1599. RETURN_STRING("entity", 1);
  1600. } else {
  1601. RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
  1602. }
  1603. } else {
  1604. RETVAL_TRUE;
  1605. switch (Z_TYPE_PP(arg1)) {
  1606. case IS_STRING:
  1607. if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
  1608. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1609. } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
  1610. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1611. } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
  1612. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1613. } else {
  1614. convert_to_long_ex(arg1);
  1615. if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
  1616. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1617. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1618. } else {
  1619. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1620. RETURN_FALSE;
  1621. }
  1622. }
  1623. break;
  1624. default:
  1625. convert_to_long_ex(arg1);
  1626. if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
  1627. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1628. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1629. } else {
  1630. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1631. RETURN_FALSE;
  1632. }
  1633. break;
  1634. }
  1635. }
  1636. }
  1637. /* }}} */
  1638. /* {{{ proto string mb_preferred_mime_name(string encoding)
  1639. Return the preferred MIME name (charset) as a string */
  1640. PHP_FUNCTION(mb_preferred_mime_name)
  1641. {
  1642. enum mbfl_no_encoding no_encoding;
  1643. char *name = NULL;
  1644. int name_len;
  1645. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
  1646. return;
  1647. } else {
  1648. no_encoding = mbfl_name2no_encoding(name);
  1649. if (no_encoding == mbfl_no_encoding_invalid) {
  1650. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1651. RETVAL_FALSE;
  1652. } else {
  1653. const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
  1654. if (preferred_name == NULL || *preferred_name == '\0') {
  1655. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
  1656. RETVAL_FALSE;
  1657. } else {
  1658. RETVAL_STRING((char *)preferred_name, 1);
  1659. }
  1660. }
  1661. }
  1662. }
  1663. /* }}} */
  1664. #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
  1665. #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
  1666. /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
  1667. Parses GET/POST/COOKIE data and sets global variables */
  1668. PHP_FUNCTION(mb_parse_str)
  1669. {
  1670. zval *track_vars_array = NULL;
  1671. char *encstr = NULL;
  1672. int encstr_len;
  1673. php_mb_encoding_handler_info_t info;
  1674. enum mbfl_no_encoding detected;
  1675. track_vars_array = NULL;
  1676. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
  1677. return;
  1678. }
  1679. /* Clear out the array */
  1680. if (track_vars_array != NULL) {
  1681. zval_dtor(track_vars_array);
  1682. array_init(track_vars_array);
  1683. }
  1684. encstr = estrndup(encstr, encstr_len);
  1685. info.data_type = PARSE_STRING;
  1686. info.separator = PG(arg_separator).input;
  1687. info.report_errors = 1;
  1688. info.to_encoding = MBSTRG(current_internal_encoding);
  1689. info.to_language = MBSTRG(language);
  1690. info.from_encodings = MBSTRG(http_input_list);
  1691. info.num_from_encodings = MBSTRG(http_input_list_size);
  1692. info.from_language = MBSTRG(language);
  1693. detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
  1694. MBSTRG(http_input_identify) = detected;
  1695. RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
  1696. if (encstr != NULL) efree(encstr);
  1697. }
  1698. /* }}} */
  1699. /* {{{ proto string mb_output_handler(string contents, int status)
  1700. Returns string in output buffer converted to the http_output encoding */
  1701. PHP_FUNCTION(mb_output_handler)
  1702. {
  1703. char *arg_string;
  1704. int arg_string_len;
  1705. long arg_status;
  1706. mbfl_string string, result;
  1707. const char *charset;
  1708. char *p;
  1709. enum mbfl_no_encoding encoding;
  1710. int last_feed, len;
  1711. unsigned char send_text_mimetype = 0;
  1712. char *s, *mimetype = NULL;
  1713. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
  1714. return;
  1715. }
  1716. encoding = MBSTRG(current_http_output_encoding);
  1717. /* start phase only */
  1718. if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
  1719. /* delete the converter just in case. */
  1720. if (MBSTRG(outconv)) {
  1721. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1722. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1723. MBSTRG(outconv) = NULL;
  1724. }
  1725. if (encoding == mbfl_no_encoding_pass) {
  1726. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1727. }
  1728. /* analyze mime type */
  1729. if (SG(sapi_headers).mimetype &&
  1730. _php_mb_match_regex(
  1731. MBSTRG(http_output_conv_mimetypes),
  1732. SG(sapi_headers).mimetype,
  1733. strlen(SG(sapi_headers).mimetype))) {
  1734. if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
  1735. mimetype = estrdup(SG(sapi_headers).mimetype);
  1736. } else {
  1737. mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
  1738. }
  1739. send_text_mimetype = 1;
  1740. } else if (SG(sapi_headers).send_default_content_type) {
  1741. mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
  1742. }
  1743. /* if content-type is not yet set, set it and activate the converter */
  1744. if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
  1745. charset = mbfl_no2preferred_mime_name(encoding);
  1746. if (charset) {
  1747. len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
  1748. if (sapi_add_header(p, len, 0) != FAILURE) {
  1749. SG(sapi_headers).send_default_content_type = 0;
  1750. }
  1751. }
  1752. /* activate the converter */
  1753. MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
  1754. if (send_text_mimetype){
  1755. efree(mimetype);
  1756. }
  1757. }
  1758. }
  1759. /* just return if the converter is not activated. */
  1760. if (MBSTRG(outconv) == NULL) {
  1761. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1762. }
  1763. /* flag */
  1764. last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
  1765. /* mode */
  1766. mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
  1767. mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
  1768. /* feed the string */
  1769. mbfl_string_init(&string);
  1770. string.no_language = MBSTRG(language);
  1771. string.no_encoding = MBSTRG(current_internal_encoding);
  1772. string.val = (unsigned char *)arg_string;
  1773. string.len = arg_string_len;
  1774. mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
  1775. if (last_feed) {
  1776. mbfl_buffer_converter_flush(MBSTRG(outconv));
  1777. }
  1778. /* get the converter output, and return it */
  1779. mbfl_buffer_converter_result(MBSTRG(outconv), &result);
  1780. RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */
  1781. /* delete the converter if it is the last feed. */
  1782. if (last_feed) {
  1783. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1784. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1785. MBSTRG(outconv) = NULL;
  1786. }
  1787. }
  1788. /* }}} */
  1789. /* {{{ proto int mb_strlen(string str [, string encoding])
  1790. Get character numbers of a string */
  1791. PHP_FUNCTION(mb_strlen)
  1792. {
  1793. int n;
  1794. mbfl_string string;
  1795. char *enc_name = NULL;
  1796. int enc_name_len;
  1797. mbfl_string_init(&string);
  1798. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  1799. RETURN_FALSE;
  1800. }
  1801. string.no_language = MBSTRG(language);
  1802. if (enc_name == NULL) {
  1803. string.no_encoding = MBSTRG(current_internal_encoding);
  1804. } else {
  1805. string.no_encoding = mbfl_name2no_encoding(enc_name);
  1806. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1807. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1808. RETURN_FALSE;
  1809. }
  1810. }
  1811. n = mbfl_strlen(&string);
  1812. if (n >= 0) {
  1813. RETVAL_LONG(n);
  1814. } else {
  1815. RETVAL_FALSE;
  1816. }
  1817. }
  1818. /* }}} */
  1819. /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
  1820. Find position of first occurrence of a string within another */
  1821. PHP_FUNCTION(mb_strpos)
  1822. {
  1823. int n, reverse = 0;
  1824. long offset;
  1825. mbfl_string haystack, needle;
  1826. char *enc_name = NULL;
  1827. int enc_name_len;
  1828. mbfl_string_init(&haystack);
  1829. mbfl_string_init(&needle);
  1830. haystack.no_language = MBSTRG(language);
  1831. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1832. needle.no_language = MBSTRG(language);
  1833. needle.no_encoding = MBSTRG(current_internal_encoding);
  1834. offset = 0;
  1835. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
  1836. RETURN_FALSE;
  1837. }
  1838. if (enc_name != NULL) {
  1839. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1840. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1841. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1842. RETURN_FALSE;
  1843. }
  1844. }
  1845. if (offset < 0 || offset > mbfl_strlen(&haystack)) {
  1846. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
  1847. RETURN_FALSE;
  1848. }
  1849. if (needle.len == 0) {
  1850. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  1851. RETURN_FALSE;
  1852. }
  1853. n = mbfl_strpos(&haystack, &needle, offset, reverse);
  1854. if (n >= 0) {
  1855. RETVAL_LONG(n);
  1856. } else {
  1857. switch (-n) {
  1858. case 1:
  1859. break;
  1860. case 2:
  1861. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
  1862. break;
  1863. case 4:
  1864. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
  1865. break;
  1866. case 8:
  1867. php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
  1868. break;
  1869. default:
  1870. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
  1871. break;
  1872. }
  1873. RETVAL_FALSE;
  1874. }
  1875. }
  1876. /* }}} */
  1877. /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
  1878. Find position of last occurrence of a string within another */
  1879. PHP_FUNCTION(mb_strrpos)
  1880. {
  1881. int n;
  1882. mbfl_string haystack, needle;
  1883. char *enc_name = NULL;
  1884. int enc_name_len;
  1885. zval **zoffset = NULL;
  1886. long offset = 0, str_flg;
  1887. char *enc_name2 = NULL;
  1888. int enc_name_len2;
  1889. mbfl_string_init(&haystack);
  1890. mbfl_string_init(&needle);
  1891. haystack.no_language = MBSTRG(language);
  1892. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1893. needle.no_language = MBSTRG(language);
  1894. needle.no_encoding = MBSTRG(current_internal_encoding);
  1895. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
  1896. RETURN_FALSE;
  1897. }
  1898. if (zoffset) {
  1899. if (Z_TYPE_PP(zoffset) == IS_STRING) {
  1900. enc_name2 = Z_STRVAL_PP(zoffset);
  1901. enc_name_len2 = Z_STRLEN_PP(zoffset);
  1902. str_flg = 1;
  1903. if (enc_name2 != NULL) {
  1904. switch (*enc_name2) {
  1905. case '0':
  1906. case '1':
  1907. case '2':
  1908. case '3':
  1909. case '4':
  1910. case '5':
  1911. case '6':
  1912. case '7':
  1913. case '8':
  1914. case '9':
  1915. case ' ':
  1916. case '-':
  1917. case '.':
  1918. break;
  1919. default :
  1920. str_flg = 0;
  1921. break;
  1922. }
  1923. }
  1924. if (str_flg) {
  1925. convert_to_long_ex(zoffset);
  1926. offset = Z_LVAL_PP(zoffset);
  1927. } else {
  1928. enc_name = enc_name2;
  1929. enc_name_len = enc_name_len2;
  1930. }
  1931. } else {
  1932. convert_to_long_ex(zoffset);
  1933. offset = Z_LVAL_PP(zoffset);
  1934. }
  1935. }
  1936. if (enc_name != NULL) {
  1937. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1938. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1939. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1940. RETURN_FALSE;
  1941. }
  1942. }
  1943. if (haystack.len <= 0) {
  1944. RETURN_FALSE;
  1945. }
  1946. if (needle.len <= 0) {
  1947. RETURN_FALSE;
  1948. }
  1949. {
  1950. int haystack_char_len = mbfl_strlen(&haystack);
  1951. if ((offset > 0 && offset > haystack_char_len) ||
  1952. (offset < 0 && -offset > haystack_char_len)) {
  1953. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
  1954. RETURN_FALSE;
  1955. }
  1956. }
  1957. n = mbfl_strpos(&haystack, &needle, offset, 1);
  1958. if (n >= 0) {
  1959. RETVAL_LONG(n);
  1960. } else {
  1961. RETVAL_FALSE;
  1962. }
  1963. }
  1964. /* }}} */
  1965. /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
  1966. Finds position of first occurrence of a string within another, case insensitive */
  1967. PHP_FUNCTION(mb_stripos)
  1968. {
  1969. int n;
  1970. long offset;
  1971. mbfl_string haystack, needle;
  1972. char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1973. int from_encoding_len;
  1974. n = -1;
  1975. offset = 0;
  1976. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
  1977. RETURN_FALSE;
  1978. }
  1979. if (needle.len == 0) {
  1980. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  1981. RETURN_FALSE;
  1982. }
  1983. n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
  1984. if (n >= 0) {
  1985. RETVAL_LONG(n);
  1986. } else {
  1987. RETVAL_FALSE;
  1988. }
  1989. }
  1990. /* }}} */
  1991. /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
  1992. Finds position of last occurrence of a string within another, case insensitive */
  1993. PHP_FUNCTION(mb_strripos)
  1994. {
  1995. int n;
  1996. long offset;
  1997. mbfl_string haystack, needle;
  1998. const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1999. int from_encoding_len;
  2000. n = -1;
  2001. offset = 0;
  2002. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
  2003. RETURN_FALSE;
  2004. }
  2005. n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
  2006. if (n >= 0) {
  2007. RETVAL_LONG(n);
  2008. } else {
  2009. RETVAL_FALSE;
  2010. }
  2011. }
  2012. /* }}} */
  2013. /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
  2014. Finds first occurrence of a string within another */
  2015. PHP_FUNCTION(mb_strstr)
  2016. {
  2017. int n, len, mblen;
  2018. mbfl_string haystack, needle, result, *ret = NULL;
  2019. char *enc_name = NULL;
  2020. int enc_name_len;
  2021. zend_bool part = 0;
  2022. mbfl_string_init(&haystack);
  2023. mbfl_string_init(&needle);
  2024. haystack.no_language = MBSTRG(language);
  2025. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2026. needle.no_language = MBSTRG(language);
  2027. needle.no_encoding = MBSTRG(current_internal_encoding);
  2028. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
  2029. RETURN_FALSE;
  2030. }
  2031. if (enc_name != NULL) {
  2032. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  2033. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2034. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2035. RETURN_FALSE;
  2036. }
  2037. }
  2038. if (needle.len <= 0) {
  2039. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  2040. RETURN_FALSE;
  2041. }
  2042. n = mbfl_strpos(&haystack, &needle, 0, 0);
  2043. if (n >= 0) {
  2044. mblen = mbfl_strlen(&haystack);
  2045. if (part) {
  2046. ret = mbfl_substr(&haystack, &result, 0, n);
  2047. if (ret != NULL) {
  2048. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2049. } else {
  2050. RETVAL_FALSE;
  2051. }
  2052. } else {
  2053. len = (mblen - n);
  2054. ret = mbfl_substr(&haystack, &result, n, len);
  2055. if (ret != NULL) {
  2056. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2057. } else {
  2058. RETVAL_FALSE;
  2059. }
  2060. }
  2061. } else {
  2062. RETVAL_FALSE;
  2063. }
  2064. }
  2065. /* }}} */
  2066. /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
  2067. Finds the last occurrence of a character in a string within another */
  2068. PHP_FUNCTION(mb_strrchr)
  2069. {
  2070. int n, len, mblen;
  2071. mbfl_string haystack, needle, result, *ret = NULL;
  2072. char *enc_name = NULL;
  2073. int enc_name_len;
  2074. zend_bool part = 0;
  2075. mbfl_string_init(&haystack);
  2076. mbfl_string_init(&needle);
  2077. haystack.no_language = MBSTRG(language);
  2078. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2079. needle.no_language = MBSTRG(language);
  2080. needle.no_encoding = MBSTRG(current_internal_encoding);
  2081. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
  2082. RETURN_FALSE;
  2083. }
  2084. if (enc_name != NULL) {
  2085. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  2086. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2087. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2088. RETURN_FALSE;
  2089. }
  2090. }
  2091. if (haystack.len <= 0) {
  2092. RETURN_FALSE;
  2093. }
  2094. if (needle.len <= 0) {
  2095. RETURN_FALSE;
  2096. }
  2097. n = mbfl_strpos(&haystack, &needle, 0, 1);
  2098. if (n >= 0) {
  2099. mblen = mbfl_strlen(&haystack);
  2100. if (part) {
  2101. ret = mbfl_substr(&haystack, &result, 0, n);
  2102. if (ret != NULL) {
  2103. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2104. } else {
  2105. RETVAL_FALSE;
  2106. }
  2107. } else {
  2108. len = (mblen - n);
  2109. ret = mbfl_substr(&haystack, &result, n, len);
  2110. if (ret != NULL) {
  2111. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2112. } else {
  2113. RETVAL_FALSE;
  2114. }
  2115. }
  2116. } else {
  2117. RETVAL_FALSE;
  2118. }
  2119. }
  2120. /* }}} */
  2121. /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
  2122. Finds first occurrence of a string within another, case insensitive */
  2123. PHP_FUNCTION(mb_stristr)
  2124. {
  2125. zend_bool part = 0;
  2126. unsigned int from_encoding_len, len, mblen;
  2127. int n;
  2128. mbfl_string haystack, needle, result, *ret = NULL;
  2129. const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2130. mbfl_string_init(&haystack);
  2131. mbfl_string_init(&needle);
  2132. haystack.no_language = MBSTRG(language);
  2133. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2134. needle.no_language = MBSTRG(language);
  2135. needle.no_encoding = MBSTRG(current_internal_encoding);
  2136. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
  2137. RETURN_FALSE;
  2138. }
  2139. if (!needle.len) {
  2140. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  2141. RETURN_FALSE;
  2142. }
  2143. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
  2144. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2145. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
  2146. RETURN_FALSE;
  2147. }
  2148. n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
  2149. if (n <0) {
  2150. RETURN_FALSE;
  2151. }
  2152. mblen = mbfl_strlen(&haystack);
  2153. if (part) {
  2154. ret = mbfl_substr(&haystack, &result, 0, n);
  2155. if (ret != NULL) {
  2156. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2157. } else {
  2158. RETVAL_FALSE;
  2159. }
  2160. } else {
  2161. len = (mblen - n);
  2162. ret = mbfl_substr(&haystack, &result, n, len);
  2163. if (ret != NULL) {
  2164. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2165. } else {
  2166. RETVAL_FALSE;
  2167. }
  2168. }
  2169. }
  2170. /* }}} */
  2171. /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
  2172. Finds the last occurrence of a character in a string within another, case insensitive */
  2173. PHP_FUNCTION(mb_strrichr)
  2174. {
  2175. zend_bool part = 0;
  2176. int n, from_encoding_len, len, mblen;
  2177. mbfl_string haystack, needle, result, *ret = NULL;
  2178. char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2179. mbfl_string_init(&haystack);
  2180. mbfl_string_init(&needle);
  2181. haystack.no_language = MBSTRG(language);
  2182. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2183. needle.no_language = MBSTRG(language);
  2184. needle.no_encoding = MBSTRG(current_internal_encoding);
  2185. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
  2186. RETURN_FALSE;
  2187. }
  2188. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
  2189. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2190. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
  2191. RETURN_FALSE;
  2192. }
  2193. n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
  2194. if (n <0) {
  2195. RETURN_FALSE;
  2196. }
  2197. mblen = mbfl_strlen(&haystack);
  2198. if (part) {
  2199. ret = mbfl_substr(&haystack, &result, 0, n);
  2200. if (ret != NULL) {
  2201. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2202. } else {
  2203. RETVAL_FALSE;
  2204. }
  2205. } else {
  2206. len = (mblen - n);
  2207. ret = mbfl_substr(&haystack, &result, n, len);
  2208. if (ret != NULL) {
  2209. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2210. } else {
  2211. RETVAL_FALSE;
  2212. }
  2213. }
  2214. }
  2215. /* }}} */
  2216. /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
  2217. Count the number of substring occurrences */
  2218. PHP_FUNCTION(mb_substr_count)
  2219. {
  2220. int n;
  2221. mbfl_string haystack, needle;
  2222. char *enc_name = NULL;
  2223. int enc_name_len;
  2224. mbfl_string_init(&haystack);
  2225. mbfl_string_init(&needle);
  2226. haystack.no_language = MBSTRG(language);
  2227. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2228. needle.no_language = MBSTRG(language);
  2229. needle.no_encoding = MBSTRG(current_internal_encoding);
  2230. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
  2231. return;
  2232. }
  2233. if (enc_name != NULL) {
  2234. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  2235. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2236. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2237. RETURN_FALSE;
  2238. }
  2239. }
  2240. if (needle.len <= 0) {
  2241. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
  2242. RETURN_FALSE;
  2243. }
  2244. n = mbfl_substr_count(&haystack, &needle);
  2245. if (n >= 0) {
  2246. RETVAL_LONG(n);
  2247. } else {
  2248. RETVAL_FALSE;
  2249. }
  2250. }
  2251. /* }}} */
  2252. /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
  2253. Returns part of a string */
  2254. PHP_FUNCTION(mb_substr)
  2255. {
  2256. size_t argc = ZEND_NUM_ARGS();
  2257. char *str, *encoding;
  2258. long from, len;
  2259. int mblen, str_len, encoding_len;
  2260. mbfl_string string, result, *ret;
  2261. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", &str, &str_len, &from, &len, &encoding, &encoding_len) == FAILURE) {
  2262. return;
  2263. }
  2264. mbfl_string_init(&string);
  2265. string.no_language = MBSTRG(language);
  2266. string.no_encoding = MBSTRG(current_internal_encoding);
  2267. if (argc == 4) {
  2268. string.no_encoding = mbfl_name2no_encoding(encoding);
  2269. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2270. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  2271. RETURN_FALSE;
  2272. }
  2273. }
  2274. string.val = (unsigned char *)str;
  2275. string.len = str_len;
  2276. if (argc < 3) {
  2277. len = str_len;
  2278. }
  2279. /* measures length */
  2280. mblen = 0;
  2281. if (from < 0 || len < 0) {
  2282. mblen = mbfl_strlen(&string);
  2283. }
  2284. /* if "from" position is negative, count start position from the end
  2285. * of the string
  2286. */
  2287. if (from < 0) {
  2288. from = mblen + from;
  2289. if (from < 0) {
  2290. from = 0;
  2291. }
  2292. }
  2293. /* if "length" position is negative, set it to the length
  2294. * needed to stop that many chars from the end of the string
  2295. */
  2296. if (len < 0) {
  2297. len = (mblen - from) + len;
  2298. if (len < 0) {
  2299. len = 0;
  2300. }
  2301. }
  2302. if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
  2303. && (from >= mbfl_strlen(&string))) {
  2304. RETURN_FALSE;
  2305. }
  2306. ret = mbfl_substr(&string, &result, from, len);
  2307. if (NULL == ret) {
  2308. RETURN_FALSE;
  2309. }
  2310. RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2311. }
  2312. /* }}} */
  2313. /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
  2314. Returns part of a string */
  2315. PHP_FUNCTION(mb_strcut)
  2316. {
  2317. size_t argc = ZEND_NUM_ARGS();
  2318. char *encoding;
  2319. long from, len;
  2320. int encoding_len;
  2321. mbfl_string string, result, *ret;
  2322. mbfl_string_init(&string);
  2323. string.no_language = MBSTRG(language);
  2324. string.no_encoding = MBSTRG(current_internal_encoding);
  2325. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) {
  2326. return;
  2327. }
  2328. if (argc == 4) {
  2329. string.no_encoding = mbfl_name2no_encoding(encoding);
  2330. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2331. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  2332. RETURN_FALSE;
  2333. }
  2334. }
  2335. if (argc < 3) {
  2336. len = string.len;
  2337. }
  2338. /* if "from" position is negative, count start position from the end
  2339. * of the string
  2340. */
  2341. if (from < 0) {
  2342. from = string.len + from;
  2343. if (from < 0) {
  2344. from = 0;
  2345. }
  2346. }
  2347. /* if "length" position is negative, set it to the length
  2348. * needed to stop that many chars from the end of the string
  2349. */
  2350. if (len < 0) {
  2351. len = (string.len - from) + len;
  2352. if (len < 0) {
  2353. len = 0;
  2354. }
  2355. }
  2356. if ((unsigned int)from > string.len) {
  2357. RETURN_FALSE;
  2358. }
  2359. ret = mbfl_strcut(&string, &result, from, len);
  2360. if (ret == NULL) {
  2361. RETURN_FALSE;
  2362. }
  2363. RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2364. }
  2365. /* }}} */
  2366. /* {{{ proto int mb_strwidth(string str [, string encoding])
  2367. Gets terminal width of a string */
  2368. PHP_FUNCTION(mb_strwidth)
  2369. {
  2370. int n;
  2371. mbfl_string string;
  2372. char *enc_name = NULL;
  2373. int enc_name_len;
  2374. mbfl_string_init(&string);
  2375. string.no_language = MBSTRG(language);
  2376. string.no_encoding = MBSTRG(current_internal_encoding);
  2377. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  2378. return;
  2379. }
  2380. if (enc_name != NULL) {
  2381. string.no_encoding = mbfl_name2no_encoding(enc_name);
  2382. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2383. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2384. RETURN_FALSE;
  2385. }
  2386. }
  2387. n = mbfl_strwidth(&string);
  2388. if (n >= 0) {
  2389. RETVAL_LONG(n);
  2390. } else {
  2391. RETVAL_FALSE;
  2392. }
  2393. }
  2394. /* }}} */
  2395. /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
  2396. Trim the string in terminal width */
  2397. PHP_FUNCTION(mb_strimwidth)
  2398. {
  2399. char *str, *trimmarker, *encoding;
  2400. long from, width;
  2401. int str_len, trimmarker_len, encoding_len;
  2402. mbfl_string string, result, marker, *ret;
  2403. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
  2404. return;
  2405. }
  2406. mbfl_string_init(&string);
  2407. mbfl_string_init(&marker);
  2408. string.no_language = MBSTRG(language);
  2409. string.no_encoding = MBSTRG(current_internal_encoding);
  2410. marker.no_language = MBSTRG(language);
  2411. marker.no_encoding = MBSTRG(current_internal_encoding);
  2412. marker.val = NULL;
  2413. marker.len = 0;
  2414. if (ZEND_NUM_ARGS() == 5) {
  2415. string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
  2416. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2417. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  2418. RETURN_FALSE;
  2419. }
  2420. }
  2421. string.val = (unsigned char *)str;
  2422. string.len = str_len;
  2423. if (from < 0 || from > str_len) {
  2424. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
  2425. RETURN_FALSE;
  2426. }
  2427. if (width < 0) {
  2428. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
  2429. RETURN_FALSE;
  2430. }
  2431. if (ZEND_NUM_ARGS() >= 4) {
  2432. marker.val = (unsigned char *)trimmarker;
  2433. marker.len = trimmarker_len;
  2434. }
  2435. ret = mbfl_strimwidth(&string, &marker, &result, from, width);
  2436. if (ret == NULL) {
  2437. RETURN_FALSE;
  2438. }
  2439. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2440. }
  2441. /* }}} */
  2442. /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
  2443. MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
  2444. {
  2445. mbfl_string string, result, *ret;
  2446. enum mbfl_no_encoding from_encoding, to_encoding;
  2447. mbfl_buffer_converter *convd;
  2448. int size, *list;
  2449. char *output=NULL;
  2450. if (output_len) {
  2451. *output_len = 0;
  2452. }
  2453. if (!input) {
  2454. return NULL;
  2455. }
  2456. /* new encoding */
  2457. if (_to_encoding && strlen(_to_encoding)) {
  2458. to_encoding = mbfl_name2no_encoding(_to_encoding);
  2459. if (to_encoding == mbfl_no_encoding_invalid) {
  2460. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
  2461. return NULL;
  2462. }
  2463. } else {
  2464. to_encoding = MBSTRG(current_internal_encoding);
  2465. }
  2466. /* initialize string */
  2467. mbfl_string_init(&string);
  2468. mbfl_string_init(&result);
  2469. from_encoding = MBSTRG(current_internal_encoding);
  2470. string.no_encoding = from_encoding;
  2471. string.no_language = MBSTRG(language);
  2472. string.val = (unsigned char *)input;
  2473. string.len = length;
  2474. /* pre-conversion encoding */
  2475. if (_from_encodings) {
  2476. list = NULL;
  2477. size = 0;
  2478. php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
  2479. if (size == 1) {
  2480. from_encoding = *list;
  2481. string.no_encoding = from_encoding;
  2482. } else if (size > 1) {
  2483. /* auto detect */
  2484. from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection));
  2485. if (from_encoding != mbfl_no_encoding_invalid) {
  2486. string.no_encoding = from_encoding;
  2487. } else {
  2488. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
  2489. from_encoding = mbfl_no_encoding_pass;
  2490. to_encoding = from_encoding;
  2491. string.no_encoding = from_encoding;
  2492. }
  2493. } else {
  2494. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
  2495. }
  2496. if (list != NULL) {
  2497. efree((void *)list);
  2498. }
  2499. }
  2500. /* initialize converter */
  2501. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  2502. if (convd == NULL) {
  2503. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
  2504. return NULL;
  2505. }
  2506. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  2507. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  2508. /* do it */
  2509. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2510. if (ret) {
  2511. if (output_len) {
  2512. *output_len = ret->len;
  2513. }
  2514. output = (char *)ret->val;
  2515. }
  2516. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  2517. mbfl_buffer_converter_delete(convd);
  2518. return output;
  2519. }
  2520. /* }}} */
  2521. /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
  2522. Returns converted string in desired encoding */
  2523. PHP_FUNCTION(mb_convert_encoding)
  2524. {
  2525. char *arg_str, *arg_new;
  2526. int str_len, new_len;
  2527. zval *arg_old;
  2528. int i;
  2529. size_t size, l, n;
  2530. char *_from_encodings = NULL, *ret, *s_free = NULL;
  2531. zval **hash_entry;
  2532. HashTable *target_hash;
  2533. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
  2534. return;
  2535. }
  2536. if (ZEND_NUM_ARGS() == 3) {
  2537. switch (Z_TYPE_P(arg_old)) {
  2538. case IS_ARRAY:
  2539. target_hash = Z_ARRVAL_P(arg_old);
  2540. zend_hash_internal_pointer_reset(target_hash);
  2541. i = zend_hash_num_elements(target_hash);
  2542. _from_encodings = NULL;
  2543. while (i > 0) {
  2544. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  2545. break;
  2546. }
  2547. convert_to_string_ex(hash_entry);
  2548. if ( _from_encodings) {
  2549. l = strlen(_from_encodings);
  2550. n = strlen(Z_STRVAL_PP(hash_entry));
  2551. _from_encodings = erealloc(_from_encodings, l+n+2);
  2552. strcpy(_from_encodings+l, ",");
  2553. strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
  2554. } else {
  2555. _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
  2556. }
  2557. zend_hash_move_forward(target_hash);
  2558. i--;
  2559. }
  2560. if (_from_encodings != NULL && !strlen(_from_encodings)) {
  2561. efree(_from_encodings);
  2562. _from_encodings = NULL;
  2563. }
  2564. s_free = _from_encodings;
  2565. break;
  2566. default:
  2567. convert_to_string(arg_old);
  2568. _from_encodings = Z_STRVAL_P(arg_old);
  2569. break;
  2570. }
  2571. }
  2572. /* new encoding */
  2573. ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
  2574. if (ret != NULL) {
  2575. RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */
  2576. } else {
  2577. RETVAL_FALSE;
  2578. }
  2579. if ( s_free) {
  2580. efree(s_free);
  2581. }
  2582. }
  2583. /* }}} */
  2584. /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
  2585. Returns a case-folded version of sourcestring */
  2586. PHP_FUNCTION(mb_convert_case)
  2587. {
  2588. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2589. int str_len, from_encoding_len;
  2590. long case_mode = 0;
  2591. char *newstr;
  2592. size_t ret_len;
  2593. RETVAL_FALSE;
  2594. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
  2595. &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
  2596. RETURN_FALSE;
  2597. newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  2598. if (newstr) {
  2599. RETVAL_STRINGL(newstr, ret_len, 0);
  2600. }
  2601. }
  2602. /* }}} */
  2603. /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
  2604. * Returns a uppercased version of sourcestring
  2605. */
  2606. PHP_FUNCTION(mb_strtoupper)
  2607. {
  2608. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2609. int str_len, from_encoding_len;
  2610. char *newstr;
  2611. size_t ret_len;
  2612. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  2613. &from_encoding, &from_encoding_len) == FAILURE) {
  2614. return;
  2615. }
  2616. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  2617. if (newstr) {
  2618. RETURN_STRINGL(newstr, ret_len, 0);
  2619. }
  2620. RETURN_FALSE;
  2621. }
  2622. /* }}} */
  2623. /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
  2624. * Returns a lowercased version of sourcestring
  2625. */
  2626. PHP_FUNCTION(mb_strtolower)
  2627. {
  2628. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2629. int str_len, from_encoding_len;
  2630. char *newstr;
  2631. size_t ret_len;
  2632. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  2633. &from_encoding, &from_encoding_len) == FAILURE) {
  2634. return;
  2635. }
  2636. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  2637. if (newstr) {
  2638. RETURN_STRINGL(newstr, ret_len, 0);
  2639. }
  2640. RETURN_FALSE;
  2641. }
  2642. /* }}} */
  2643. /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
  2644. Encodings of the given string is returned (as a string) */
  2645. PHP_FUNCTION(mb_detect_encoding)
  2646. {
  2647. char *str;
  2648. int str_len;
  2649. zend_bool strict=0;
  2650. zval *encoding_list;
  2651. mbfl_string string;
  2652. const char *ret;
  2653. enum mbfl_no_encoding *elist;
  2654. int size, *list;
  2655. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
  2656. return;
  2657. }
  2658. /* make encoding list */
  2659. list = NULL;
  2660. size = 0;
  2661. if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_P(encoding_list)) {
  2662. switch (Z_TYPE_P(encoding_list)) {
  2663. case IS_ARRAY:
  2664. if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
  2665. if (list) {
  2666. efree(list);
  2667. list = NULL;
  2668. size = 0;
  2669. }
  2670. }
  2671. break;
  2672. default:
  2673. convert_to_string(encoding_list);
  2674. if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
  2675. if (list) {
  2676. efree(list);
  2677. list = NULL;
  2678. size = 0;
  2679. }
  2680. }
  2681. break;
  2682. }
  2683. if (size <= 0) {
  2684. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
  2685. }
  2686. }
  2687. if (ZEND_NUM_ARGS() < 3) {
  2688. strict = (zend_bool)MBSTRG(strict_detection);
  2689. }
  2690. if (size > 0 && list != NULL) {
  2691. elist = list;
  2692. } else {
  2693. elist = MBSTRG(current_detect_order_list);
  2694. size = MBSTRG(current_detect_order_list_size);
  2695. }
  2696. mbfl_string_init(&string);
  2697. string.no_language = MBSTRG(language);
  2698. string.val = (unsigned char *)str;
  2699. string.len = str_len;
  2700. ret = mbfl_identify_encoding_name(&string, elist, size, strict);
  2701. if (list != NULL) {
  2702. efree((void *)list);
  2703. }
  2704. if (ret == NULL) {
  2705. RETURN_FALSE;
  2706. }
  2707. RETVAL_STRING((char *)ret, 1);
  2708. }
  2709. /* }}} */
  2710. /* {{{ proto mixed mb_list_encodings()
  2711. Returns an array of all supported entity encodings */
  2712. PHP_FUNCTION(mb_list_encodings)
  2713. {
  2714. const mbfl_encoding **encodings;
  2715. const mbfl_encoding *encoding;
  2716. int i;
  2717. array_init(return_value);
  2718. i = 0;
  2719. encodings = mbfl_get_supported_encodings();
  2720. while ((encoding = encodings[i++]) != NULL) {
  2721. add_next_index_string(return_value, (char *) encoding->name, 1);
  2722. }
  2723. }
  2724. /* }}} */
  2725. /* {{{ proto array mb_encoding_aliases(string encoding)
  2726. Returns an array of the aliases of a given encoding name */
  2727. PHP_FUNCTION(mb_encoding_aliases)
  2728. {
  2729. const mbfl_encoding *encoding;
  2730. char *name = NULL;
  2731. int name_len;
  2732. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
  2733. RETURN_FALSE;
  2734. }
  2735. encoding = mbfl_name2encoding(name);
  2736. if (!encoding) {
  2737. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  2738. RETURN_FALSE;
  2739. }
  2740. array_init(return_value);
  2741. if (encoding->aliases != NULL) {
  2742. const char **alias;
  2743. for (alias = *encoding->aliases; *alias; ++alias) {
  2744. add_next_index_string(return_value, (char *)*alias, 1);
  2745. }
  2746. }
  2747. }
  2748. /* }}} */
  2749. /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
  2750. Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
  2751. PHP_FUNCTION(mb_encode_mimeheader)
  2752. {
  2753. enum mbfl_no_encoding charset, transenc;
  2754. mbfl_string string, result, *ret;
  2755. char *charset_name = NULL;
  2756. int charset_name_len;
  2757. char *trans_enc_name = NULL;
  2758. int trans_enc_name_len;
  2759. char *linefeed = "\r\n";
  2760. int linefeed_len;
  2761. long indent = 0;
  2762. mbfl_string_init(&string);
  2763. string.no_language = MBSTRG(language);
  2764. string.no_encoding = MBSTRG(current_internal_encoding);
  2765. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
  2766. return;
  2767. }
  2768. charset = mbfl_no_encoding_pass;
  2769. transenc = mbfl_no_encoding_base64;
  2770. if (charset_name != NULL) {
  2771. charset = mbfl_name2no_encoding(charset_name);
  2772. if (charset == mbfl_no_encoding_invalid) {
  2773. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
  2774. RETURN_FALSE;
  2775. }
  2776. } else {
  2777. const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
  2778. if (lang != NULL) {
  2779. charset = lang->mail_charset;
  2780. transenc = lang->mail_header_encoding;
  2781. }
  2782. }
  2783. if (trans_enc_name != NULL) {
  2784. if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
  2785. transenc = mbfl_no_encoding_base64;
  2786. } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
  2787. transenc = mbfl_no_encoding_qprint;
  2788. }
  2789. }
  2790. mbfl_string_init(&result);
  2791. ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
  2792. if (ret != NULL) {
  2793. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2794. } else {
  2795. RETVAL_FALSE;
  2796. }
  2797. }
  2798. /* }}} */
  2799. /* {{{ proto string mb_decode_mimeheader(string string)
  2800. Decodes the MIME "encoded-word" in the string */
  2801. PHP_FUNCTION(mb_decode_mimeheader)
  2802. {
  2803. mbfl_string string, result, *ret;
  2804. mbfl_string_init(&string);
  2805. string.no_language = MBSTRG(language);
  2806. string.no_encoding = MBSTRG(current_internal_encoding);
  2807. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
  2808. return;
  2809. }
  2810. mbfl_string_init(&result);
  2811. ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
  2812. if (ret != NULL) {
  2813. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2814. } else {
  2815. RETVAL_FALSE;
  2816. }
  2817. }
  2818. /* }}} */
  2819. /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
  2820. Conversion between full-width character and half-width character (Japanese) */
  2821. PHP_FUNCTION(mb_convert_kana)
  2822. {
  2823. int opt, i;
  2824. mbfl_string string, result, *ret;
  2825. char *optstr = NULL;
  2826. int optstr_len;
  2827. char *encname = NULL;
  2828. int encname_len;
  2829. mbfl_string_init(&string);
  2830. string.no_language = MBSTRG(language);
  2831. string.no_encoding = MBSTRG(current_internal_encoding);
  2832. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
  2833. return;
  2834. }
  2835. /* option */
  2836. if (optstr != NULL) {
  2837. char *p = optstr;
  2838. int n = optstr_len;
  2839. i = 0;
  2840. opt = 0;
  2841. while (i < n) {
  2842. i++;
  2843. switch (*p++) {
  2844. case 'A':
  2845. opt |= 0x1;
  2846. break;
  2847. case 'a':
  2848. opt |= 0x10;
  2849. break;
  2850. case 'R':
  2851. opt |= 0x2;
  2852. break;
  2853. case 'r':
  2854. opt |= 0x20;
  2855. break;
  2856. case 'N':
  2857. opt |= 0x4;
  2858. break;
  2859. case 'n':
  2860. opt |= 0x40;
  2861. break;
  2862. case 'S':
  2863. opt |= 0x8;
  2864. break;
  2865. case 's':
  2866. opt |= 0x80;
  2867. break;
  2868. case 'K':
  2869. opt |= 0x100;
  2870. break;
  2871. case 'k':
  2872. opt |= 0x1000;
  2873. break;
  2874. case 'H':
  2875. opt |= 0x200;
  2876. break;
  2877. case 'h':
  2878. opt |= 0x2000;
  2879. break;
  2880. case 'V':
  2881. opt |= 0x800;
  2882. break;
  2883. case 'C':
  2884. opt |= 0x10000;
  2885. break;
  2886. case 'c':
  2887. opt |= 0x20000;
  2888. break;
  2889. case 'M':
  2890. opt |= 0x100000;
  2891. break;
  2892. case 'm':
  2893. opt |= 0x200000;
  2894. break;
  2895. }
  2896. }
  2897. } else {
  2898. opt = 0x900;
  2899. }
  2900. /* encoding */
  2901. if (encname != NULL) {
  2902. string.no_encoding = mbfl_name2no_encoding(encname);
  2903. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2904. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
  2905. RETURN_FALSE;
  2906. }
  2907. }
  2908. ret = mbfl_ja_jp_hantozen(&string, &result, opt);
  2909. if (ret != NULL) {
  2910. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2911. } else {
  2912. RETVAL_FALSE;
  2913. }
  2914. }
  2915. /* }}} */
  2916. #define PHP_MBSTR_STACK_BLOCK_SIZE 32
  2917. /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
  2918. Converts the string resource in variables to desired encoding */
  2919. PHP_FUNCTION(mb_convert_variables)
  2920. {
  2921. zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
  2922. HashTable *target_hash;
  2923. mbfl_string string, result, *ret;
  2924. enum mbfl_no_encoding from_encoding, to_encoding;
  2925. mbfl_encoding_detector *identd;
  2926. mbfl_buffer_converter *convd;
  2927. int n, to_enc_len, argc, stack_level, stack_max, elistsz;
  2928. enum mbfl_no_encoding *elist;
  2929. char *name, *to_enc;
  2930. void *ptmp;
  2931. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
  2932. return;
  2933. }
  2934. /* new encoding */
  2935. to_encoding = mbfl_name2no_encoding(to_enc);
  2936. if (to_encoding == mbfl_no_encoding_invalid) {
  2937. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
  2938. efree(args);
  2939. RETURN_FALSE;
  2940. }
  2941. /* initialize string */
  2942. mbfl_string_init(&string);
  2943. mbfl_string_init(&result);
  2944. from_encoding = MBSTRG(current_internal_encoding);
  2945. string.no_encoding = from_encoding;
  2946. string.no_language = MBSTRG(language);
  2947. /* pre-conversion encoding */
  2948. elist = NULL;
  2949. elistsz = 0;
  2950. switch (Z_TYPE_PP(zfrom_enc)) {
  2951. case IS_ARRAY:
  2952. php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
  2953. break;
  2954. default:
  2955. convert_to_string_ex(zfrom_enc);
  2956. php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
  2957. break;
  2958. }
  2959. if (elistsz <= 0) {
  2960. from_encoding = mbfl_no_encoding_pass;
  2961. } else if (elistsz == 1) {
  2962. from_encoding = *elist;
  2963. } else {
  2964. /* auto detect */
  2965. from_encoding = mbfl_no_encoding_invalid;
  2966. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  2967. stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
  2968. stack_level = 0;
  2969. identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
  2970. if (identd != NULL) {
  2971. n = 0;
  2972. while (n < argc || stack_level > 0) {
  2973. if (stack_level <= 0) {
  2974. var = args[n++];
  2975. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2976. target_hash = HASH_OF(*var);
  2977. if (target_hash != NULL) {
  2978. zend_hash_internal_pointer_reset(target_hash);
  2979. }
  2980. }
  2981. } else {
  2982. stack_level--;
  2983. var = stack[stack_level];
  2984. }
  2985. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2986. target_hash = HASH_OF(*var);
  2987. if (target_hash != NULL) {
  2988. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  2989. zend_hash_move_forward(target_hash);
  2990. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  2991. if (stack_level >= stack_max) {
  2992. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  2993. ptmp = erealloc(stack, sizeof(zval **)*stack_max);
  2994. stack = (zval ***)ptmp;
  2995. }
  2996. stack[stack_level] = var;
  2997. stack_level++;
  2998. var = hash_entry;
  2999. target_hash = HASH_OF(*var);
  3000. if (target_hash != NULL) {
  3001. zend_hash_internal_pointer_reset(target_hash);
  3002. continue;
  3003. }
  3004. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  3005. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  3006. string.len = Z_STRLEN_PP(hash_entry);
  3007. if (mbfl_encoding_detector_feed(identd, &string)) {
  3008. goto detect_end; /* complete detecting */
  3009. }
  3010. }
  3011. }
  3012. }
  3013. } else if (Z_TYPE_PP(var) == IS_STRING) {
  3014. string.val = (unsigned char *)Z_STRVAL_PP(var);
  3015. string.len = Z_STRLEN_PP(var);
  3016. if (mbfl_encoding_detector_feed(identd, &string)) {
  3017. goto detect_end; /* complete detecting */
  3018. }
  3019. }
  3020. }
  3021. detect_end:
  3022. from_encoding = mbfl_encoding_detector_judge(identd);
  3023. mbfl_encoding_detector_delete(identd);
  3024. }
  3025. efree(stack);
  3026. if (from_encoding == mbfl_no_encoding_invalid) {
  3027. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
  3028. from_encoding = mbfl_no_encoding_pass;
  3029. }
  3030. }
  3031. if (elist != NULL) {
  3032. efree((void *)elist);
  3033. }
  3034. /* create converter */
  3035. convd = NULL;
  3036. if (from_encoding != mbfl_no_encoding_pass) {
  3037. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
  3038. if (convd == NULL) {
  3039. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
  3040. RETURN_FALSE;
  3041. }
  3042. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  3043. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  3044. }
  3045. /* convert */
  3046. if (convd != NULL) {
  3047. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  3048. stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
  3049. stack_level = 0;
  3050. n = 0;
  3051. while (n < argc || stack_level > 0) {
  3052. if (stack_level <= 0) {
  3053. var = args[n++];
  3054. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  3055. target_hash = HASH_OF(*var);
  3056. if (target_hash != NULL) {
  3057. zend_hash_internal_pointer_reset(target_hash);
  3058. }
  3059. }
  3060. } else {
  3061. stack_level--;
  3062. var = stack[stack_level];
  3063. }
  3064. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  3065. target_hash = HASH_OF(*var);
  3066. if (target_hash != NULL) {
  3067. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  3068. zend_hash_move_forward(target_hash);
  3069. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  3070. if (stack_level >= stack_max) {
  3071. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  3072. ptmp = erealloc(stack, sizeof(zval **)*stack_max);
  3073. stack = (zval ***)ptmp;
  3074. }
  3075. stack[stack_level] = var;
  3076. stack_level++;
  3077. var = hash_entry;
  3078. SEPARATE_ZVAL(hash_entry);
  3079. target_hash = HASH_OF(*var);
  3080. if (target_hash != NULL) {
  3081. zend_hash_internal_pointer_reset(target_hash);
  3082. continue;
  3083. }
  3084. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  3085. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  3086. string.len = Z_STRLEN_PP(hash_entry);
  3087. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3088. if (ret != NULL) {
  3089. if (Z_REFCOUNT_PP(hash_entry) > 1) {
  3090. Z_DELREF_PP(hash_entry);
  3091. MAKE_STD_ZVAL(*hash_entry);
  3092. } else {
  3093. zval_dtor(*hash_entry);
  3094. }
  3095. ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
  3096. }
  3097. }
  3098. }
  3099. }
  3100. } else if (Z_TYPE_PP(var) == IS_STRING) {
  3101. string.val = (unsigned char *)Z_STRVAL_PP(var);
  3102. string.len = Z_STRLEN_PP(var);
  3103. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3104. if (ret != NULL) {
  3105. zval_dtor(*var);
  3106. ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
  3107. }
  3108. }
  3109. }
  3110. efree(stack);
  3111. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  3112. mbfl_buffer_converter_delete(convd);
  3113. }
  3114. efree(args);
  3115. name = (char *)mbfl_no_encoding2name(from_encoding);
  3116. if (name != NULL) {
  3117. RETURN_STRING(name, 1);
  3118. } else {
  3119. RETURN_FALSE;
  3120. }
  3121. }
  3122. /* }}} */
  3123. /* {{{ HTML numeric entity */
  3124. /* {{{ static void php_mb_numericentity_exec() */
  3125. static void
  3126. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
  3127. {
  3128. char *str, *encoding;
  3129. int str_len, encoding_len;
  3130. zval *zconvmap, **hash_entry;
  3131. HashTable *target_hash;
  3132. size_t argc = ZEND_NUM_ARGS();
  3133. int i, *convmap, *mapelm, mapsize=0;
  3134. mbfl_string string, result, *ret;
  3135. enum mbfl_no_encoding no_encoding;
  3136. if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) {
  3137. return;
  3138. }
  3139. mbfl_string_init(&string);
  3140. string.no_language = MBSTRG(language);
  3141. string.no_encoding = MBSTRG(current_internal_encoding);
  3142. string.val = (unsigned char *)str;
  3143. string.len = str_len;
  3144. /* encoding */
  3145. if (argc == 3) {
  3146. no_encoding = mbfl_name2no_encoding(encoding);
  3147. if (no_encoding == mbfl_no_encoding_invalid) {
  3148. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  3149. RETURN_FALSE;
  3150. } else {
  3151. string.no_encoding = no_encoding;
  3152. }
  3153. }
  3154. /* conversion map */
  3155. convmap = NULL;
  3156. if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
  3157. target_hash = Z_ARRVAL_P(zconvmap);
  3158. zend_hash_internal_pointer_reset(target_hash);
  3159. i = zend_hash_num_elements(target_hash);
  3160. if (i > 0) {
  3161. convmap = (int *)safe_emalloc(i, sizeof(int), 0);
  3162. mapelm = convmap;
  3163. mapsize = 0;
  3164. while (i > 0) {
  3165. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  3166. break;
  3167. }
  3168. convert_to_long_ex(hash_entry);
  3169. *mapelm++ = Z_LVAL_PP(hash_entry);
  3170. mapsize++;
  3171. i--;
  3172. zend_hash_move_forward(target_hash);
  3173. }
  3174. }
  3175. }
  3176. if (convmap == NULL) {
  3177. RETURN_FALSE;
  3178. }
  3179. mapsize /= 4;
  3180. ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
  3181. if (ret != NULL) {
  3182. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  3183. } else {
  3184. RETVAL_FALSE;
  3185. }
  3186. efree((void *)convmap);
  3187. }
  3188. /* }}} */
  3189. /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
  3190. Converts specified characters to HTML numeric entities */
  3191. PHP_FUNCTION(mb_encode_numericentity)
  3192. {
  3193. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  3194. }
  3195. /* }}} */
  3196. /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
  3197. Converts HTML numeric entities to character code */
  3198. PHP_FUNCTION(mb_decode_numericentity)
  3199. {
  3200. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  3201. }
  3202. /* }}} */
  3203. /* }}} */
  3204. /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
  3205. * Sends an email message with MIME scheme
  3206. */
  3207. #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
  3208. if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
  3209. pos += 2; \
  3210. while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
  3211. pos++; \
  3212. } \
  3213. continue; \
  3214. }
  3215. #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
  3216. pp = str; \
  3217. ee = pp + len; \
  3218. while ((pp = memchr(pp, '\0', (ee - pp)))) { \
  3219. *pp = ' '; \
  3220. } \
  3221. #define APPEND_ONE_CHAR(ch) do { \
  3222. if (token.a > 0) { \
  3223. smart_str_appendc(&token, ch); \
  3224. } else {\
  3225. token.len++; \
  3226. } \
  3227. } while (0)
  3228. #define SEPARATE_SMART_STR(str) do {\
  3229. if ((str)->a == 0) { \
  3230. char *tmp_ptr; \
  3231. (str)->a = 1; \
  3232. while ((str)->a < (str)->len) { \
  3233. (str)->a <<= 1; \
  3234. } \
  3235. tmp_ptr = emalloc((str)->a + 1); \
  3236. memcpy(tmp_ptr, (str)->c, (str)->len); \
  3237. (str)->c = tmp_ptr; \
  3238. } \
  3239. } while (0)
  3240. static void my_smart_str_dtor(smart_str *s)
  3241. {
  3242. if (s->a > 0) {
  3243. smart_str_free(s);
  3244. }
  3245. }
  3246. static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
  3247. {
  3248. const char *ps;
  3249. size_t icnt;
  3250. int state = 0;
  3251. int crlf_state = -1;
  3252. smart_str token = { 0, 0, 0 };
  3253. smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
  3254. ps = str;
  3255. icnt = str_len;
  3256. /*
  3257. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  3258. * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
  3259. * state 0 1 2 3
  3260. *
  3261. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  3262. * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
  3263. * crlf_state -1 0 1 -1
  3264. *
  3265. */
  3266. while (icnt > 0) {
  3267. switch (*ps) {
  3268. case ':':
  3269. if (crlf_state == 1) {
  3270. APPEND_ONE_CHAR('\r');
  3271. }
  3272. if (state == 0 || state == 1) {
  3273. fld_name = token;
  3274. state = 2;
  3275. } else {
  3276. APPEND_ONE_CHAR(*ps);
  3277. }
  3278. crlf_state = 0;
  3279. break;
  3280. case '\n':
  3281. if (crlf_state == -1) {
  3282. goto out;
  3283. }
  3284. crlf_state = -1;
  3285. break;
  3286. case '\r':
  3287. if (crlf_state == 1) {
  3288. APPEND_ONE_CHAR('\r');
  3289. } else {
  3290. crlf_state = 1;
  3291. }
  3292. break;
  3293. case ' ': case '\t':
  3294. if (crlf_state == -1) {
  3295. if (state == 3) {
  3296. /* continuing from the previous line */
  3297. SEPARATE_SMART_STR(&token);
  3298. state = 4;
  3299. } else {
  3300. /* simply skipping this new line */
  3301. state = 5;
  3302. }
  3303. } else {
  3304. if (crlf_state == 1) {
  3305. APPEND_ONE_CHAR('\r');
  3306. }
  3307. if (state == 1 || state == 3) {
  3308. APPEND_ONE_CHAR(*ps);
  3309. }
  3310. }
  3311. crlf_state = 0;
  3312. break;
  3313. default:
  3314. switch (state) {
  3315. case 0:
  3316. token.c = (char *)ps;
  3317. token.len = 0;
  3318. token.a = 0;
  3319. state = 1;
  3320. break;
  3321. case 2:
  3322. if (crlf_state != -1) {
  3323. token.c = (char *)ps;
  3324. token.len = 0;
  3325. token.a = 0;
  3326. state = 3;
  3327. break;
  3328. }
  3329. /* break is missing intentionally */
  3330. case 3:
  3331. if (crlf_state == -1) {
  3332. fld_val = token;
  3333. if (fld_name.c != NULL && fld_val.c != NULL) {
  3334. char *dummy;
  3335. /* FIXME: some locale free implementation is
  3336. * really required here,,, */
  3337. SEPARATE_SMART_STR(&fld_name);
  3338. php_strtoupper(fld_name.c, fld_name.len);
  3339. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  3340. my_smart_str_dtor(&fld_name);
  3341. }
  3342. memset(&fld_name, 0, sizeof(smart_str));
  3343. memset(&fld_val, 0, sizeof(smart_str));
  3344. token.c = (char *)ps;
  3345. token.len = 0;
  3346. token.a = 0;
  3347. state = 1;
  3348. }
  3349. break;
  3350. case 4:
  3351. APPEND_ONE_CHAR(' ');
  3352. state = 3;
  3353. break;
  3354. }
  3355. if (crlf_state == 1) {
  3356. APPEND_ONE_CHAR('\r');
  3357. }
  3358. APPEND_ONE_CHAR(*ps);
  3359. crlf_state = 0;
  3360. break;
  3361. }
  3362. ps++, icnt--;
  3363. }
  3364. out:
  3365. if (state == 2) {
  3366. token.c = "";
  3367. token.len = 0;
  3368. token.a = 0;
  3369. state = 3;
  3370. }
  3371. if (state == 3) {
  3372. fld_val = token;
  3373. if (fld_name.c != NULL && fld_val.c != NULL) {
  3374. void *dummy;
  3375. /* FIXME: some locale free implementation is
  3376. * really required here,,, */
  3377. SEPARATE_SMART_STR(&fld_name);
  3378. php_strtoupper(fld_name.c, fld_name.len);
  3379. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  3380. my_smart_str_dtor(&fld_name);
  3381. }
  3382. }
  3383. return state;
  3384. }
  3385. PHP_FUNCTION(mb_send_mail)
  3386. {
  3387. int n;
  3388. char *to = NULL;
  3389. int to_len;
  3390. char *message = NULL;
  3391. int message_len;
  3392. char *headers = NULL;
  3393. int headers_len;
  3394. char *subject = NULL;
  3395. int subject_len;
  3396. char *extra_cmd = NULL;
  3397. int extra_cmd_len;
  3398. int i;
  3399. char *to_r = NULL;
  3400. char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
  3401. struct {
  3402. int cnt_type:1;
  3403. int cnt_trans_enc:1;
  3404. } suppressed_hdrs = { 0, 0 };
  3405. char *message_buf = NULL, *subject_buf = NULL, *p;
  3406. mbfl_string orig_str, conv_str;
  3407. mbfl_string *pstr; /* pointer to mbfl string for return value */
  3408. enum mbfl_no_encoding
  3409. tran_cs, /* transfar text charset */
  3410. head_enc, /* header transfar encoding */
  3411. body_enc; /* body transfar encoding */
  3412. mbfl_memory_device device; /* automatic allocateable buffer for additional header */
  3413. const mbfl_language *lang;
  3414. int err = 0;
  3415. HashTable ht_headers;
  3416. smart_str *s;
  3417. extern void mbfl_memory_device_unput(mbfl_memory_device *device);
  3418. char *pp, *ee;
  3419. /* initialize */
  3420. mbfl_memory_device_init(&device, 0, 0);
  3421. mbfl_string_init(&orig_str);
  3422. mbfl_string_init(&conv_str);
  3423. /* character-set, transfer-encoding */
  3424. tran_cs = mbfl_no_encoding_utf8;
  3425. head_enc = mbfl_no_encoding_base64;
  3426. body_enc = mbfl_no_encoding_base64;
  3427. lang = mbfl_no2language(MBSTRG(language));
  3428. if (lang != NULL) {
  3429. tran_cs = lang->mail_charset;
  3430. head_enc = lang->mail_header_encoding;
  3431. body_enc = lang->mail_body_encoding;
  3432. }
  3433. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
  3434. return;
  3435. }
  3436. /* ASCIIZ check */
  3437. MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
  3438. MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
  3439. MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
  3440. if (headers) {
  3441. MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
  3442. }
  3443. if (extra_cmd) {
  3444. MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
  3445. }
  3446. zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
  3447. if (headers != NULL) {
  3448. _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
  3449. }
  3450. if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
  3451. char *tmp;
  3452. char *param_name;
  3453. char *charset = NULL;
  3454. SEPARATE_SMART_STR(s);
  3455. smart_str_0(s);
  3456. p = strchr(s->c, ';');
  3457. if (p != NULL) {
  3458. /* skipping the padded spaces */
  3459. do {
  3460. ++p;
  3461. } while (*p == ' ' || *p == '\t');
  3462. if (*p != '\0') {
  3463. if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
  3464. if (strcasecmp(param_name, "charset") == 0) {
  3465. enum mbfl_no_encoding _tran_cs = tran_cs;
  3466. charset = php_strtok_r(NULL, "= \"", &tmp);
  3467. if (charset != NULL) {
  3468. _tran_cs = mbfl_name2no_encoding(charset);
  3469. }
  3470. if (_tran_cs == mbfl_no_encoding_invalid) {
  3471. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
  3472. _tran_cs = mbfl_no_encoding_ascii;
  3473. }
  3474. tran_cs = _tran_cs;
  3475. }
  3476. }
  3477. }
  3478. }
  3479. suppressed_hdrs.cnt_type = 1;
  3480. }
  3481. if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
  3482. enum mbfl_no_encoding _body_enc;
  3483. SEPARATE_SMART_STR(s);
  3484. smart_str_0(s);
  3485. _body_enc = mbfl_name2no_encoding(s->c);
  3486. switch (_body_enc) {
  3487. case mbfl_no_encoding_base64:
  3488. case mbfl_no_encoding_7bit:
  3489. case mbfl_no_encoding_8bit:
  3490. body_enc = _body_enc;
  3491. break;
  3492. default:
  3493. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
  3494. body_enc = mbfl_no_encoding_8bit;
  3495. break;
  3496. }
  3497. suppressed_hdrs.cnt_trans_enc = 1;
  3498. }
  3499. /* To: */
  3500. if (to != NULL) {
  3501. if (to_len > 0) {
  3502. to_r = estrndup(to, to_len);
  3503. for (; to_len; to_len--) {
  3504. if (!isspace((unsigned char) to_r[to_len - 1])) {
  3505. break;
  3506. }
  3507. to_r[to_len - 1] = '\0';
  3508. }
  3509. for (i = 0; to_r[i]; i++) {
  3510. if (iscntrl((unsigned char) to_r[i])) {
  3511. /* According to RFC 822, section 3.1.1 long headers may be separated into
  3512. * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
  3513. * To prevent these separators from being replaced with a space, we use the
  3514. * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
  3515. */
  3516. SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
  3517. to_r[i] = ' ';
  3518. }
  3519. }
  3520. } else {
  3521. to_r = to;
  3522. }
  3523. } else {
  3524. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
  3525. err = 1;
  3526. }
  3527. /* Subject: */
  3528. if (subject != NULL && subject_len >= 0) {
  3529. orig_str.no_language = MBSTRG(language);
  3530. orig_str.val = (unsigned char *)subject;
  3531. orig_str.len = subject_len;
  3532. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  3533. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  3534. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  3535. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3536. }
  3537. pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
  3538. if (pstr != NULL) {
  3539. subject_buf = subject = (char *)pstr->val;
  3540. }
  3541. } else {
  3542. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
  3543. err = 1;
  3544. }
  3545. /* message body */
  3546. if (message != NULL) {
  3547. orig_str.no_language = MBSTRG(language);
  3548. orig_str.val = (unsigned char *)message;
  3549. orig_str.len = (unsigned int)message_len;
  3550. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  3551. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  3552. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  3553. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3554. }
  3555. pstr = NULL;
  3556. {
  3557. mbfl_string tmpstr;
  3558. if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
  3559. tmpstr.no_encoding=mbfl_no_encoding_8bit;
  3560. pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
  3561. efree(tmpstr.val);
  3562. }
  3563. }
  3564. if (pstr != NULL) {
  3565. message_buf = message = (char *)pstr->val;
  3566. }
  3567. } else {
  3568. /* this is not really an error, so it is allowed. */
  3569. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
  3570. message = NULL;
  3571. }
  3572. /* other headers */
  3573. #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
  3574. #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
  3575. #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
  3576. #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
  3577. if (headers != NULL) {
  3578. p = headers;
  3579. n = headers_len;
  3580. mbfl_memory_device_strncat(&device, p, n);
  3581. if (n > 0 && p[n - 1] != '\n') {
  3582. mbfl_memory_device_strncat(&device, "\n", 1);
  3583. }
  3584. }
  3585. if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
  3586. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
  3587. mbfl_memory_device_strncat(&device, "\n", 1);
  3588. }
  3589. if (!suppressed_hdrs.cnt_type) {
  3590. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
  3591. p = (char *)mbfl_no2preferred_mime_name(tran_cs);
  3592. if (p != NULL) {
  3593. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
  3594. mbfl_memory_device_strcat(&device, p);
  3595. }
  3596. mbfl_memory_device_strncat(&device, "\n", 1);
  3597. }
  3598. if (!suppressed_hdrs.cnt_trans_enc) {
  3599. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
  3600. p = (char *)mbfl_no2preferred_mime_name(body_enc);
  3601. if (p == NULL) {
  3602. p = "7bit";
  3603. }
  3604. mbfl_memory_device_strcat(&device, p);
  3605. mbfl_memory_device_strncat(&device, "\n", 1);
  3606. }
  3607. mbfl_memory_device_unput(&device);
  3608. mbfl_memory_device_output('\0', &device);
  3609. headers = (char *)device.buffer;
  3610. if (force_extra_parameters) {
  3611. extra_cmd = php_escape_shell_cmd(force_extra_parameters);
  3612. } else if (extra_cmd) {
  3613. extra_cmd = php_escape_shell_cmd(extra_cmd);
  3614. }
  3615. if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
  3616. RETVAL_TRUE;
  3617. } else {
  3618. RETVAL_FALSE;
  3619. }
  3620. if (extra_cmd) {
  3621. efree(extra_cmd);
  3622. }
  3623. if (to_r != to) {
  3624. efree(to_r);
  3625. }
  3626. if (subject_buf) {
  3627. efree((void *)subject_buf);
  3628. }
  3629. if (message_buf) {
  3630. efree((void *)message_buf);
  3631. }
  3632. mbfl_memory_device_clear(&device);
  3633. zend_hash_destroy(&ht_headers);
  3634. }
  3635. #undef SKIP_LONG_HEADER_SEP_MBSTRING
  3636. #undef MAIL_ASCIIZ_CHECK_MBSTRING
  3637. #undef APPEND_ONE_CHAR
  3638. #undef SEPARATE_SMART_STR
  3639. #undef PHP_MBSTR_MAIL_MIME_HEADER1
  3640. #undef PHP_MBSTR_MAIL_MIME_HEADER2
  3641. #undef PHP_MBSTR_MAIL_MIME_HEADER3
  3642. #undef PHP_MBSTR_MAIL_MIME_HEADER4
  3643. /* }}} */
  3644. /* {{{ proto mixed mb_get_info([string type])
  3645. Returns the current settings of mbstring */
  3646. PHP_FUNCTION(mb_get_info)
  3647. {
  3648. char *typ = NULL;
  3649. int typ_len, n;
  3650. char *name;
  3651. const struct mb_overload_def *over_func;
  3652. zval *row1, *row2;
  3653. const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
  3654. enum mbfl_no_encoding *entry;
  3655. #ifdef ZEND_MULTIBYTE
  3656. zval *row3;
  3657. #endif /* ZEND_MULTIBYTE */
  3658. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  3659. RETURN_FALSE;
  3660. }
  3661. if (!typ || !strcasecmp("all", typ)) {
  3662. array_init(return_value);
  3663. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  3664. add_assoc_string(return_value, "internal_encoding", name, 1);
  3665. }
  3666. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  3667. add_assoc_string(return_value, "http_input", name, 1);
  3668. }
  3669. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  3670. add_assoc_string(return_value, "http_output", name, 1);
  3671. }
  3672. if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
  3673. add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
  3674. }
  3675. add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
  3676. if (MBSTRG(func_overload)){
  3677. over_func = &(mb_ovld[0]);
  3678. MAKE_STD_ZVAL(row1);
  3679. array_init(row1);
  3680. while (over_func->type > 0) {
  3681. if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
  3682. add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
  3683. }
  3684. over_func++;
  3685. }
  3686. add_assoc_zval(return_value, "func_overload_list", row1);
  3687. } else {
  3688. add_assoc_string(return_value, "func_overload_list", "no overload", 1);
  3689. }
  3690. if (lang != NULL) {
  3691. if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
  3692. add_assoc_string(return_value, "mail_charset", name, 1);
  3693. }
  3694. if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
  3695. add_assoc_string(return_value, "mail_header_encoding", name, 1);
  3696. }
  3697. if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
  3698. add_assoc_string(return_value, "mail_body_encoding", name, 1);
  3699. }
  3700. }
  3701. add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
  3702. if (MBSTRG(encoding_translation)) {
  3703. add_assoc_string(return_value, "encoding_translation", "On", 1);
  3704. } else {
  3705. add_assoc_string(return_value, "encoding_translation", "Off", 1);
  3706. }
  3707. if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
  3708. add_assoc_string(return_value, "language", name, 1);
  3709. }
  3710. n = MBSTRG(current_detect_order_list_size);
  3711. entry = MBSTRG(current_detect_order_list);
  3712. if(n > 0) {
  3713. MAKE_STD_ZVAL(row2);
  3714. array_init(row2);
  3715. while (n > 0) {
  3716. if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
  3717. add_next_index_string(row2, name, 1);
  3718. }
  3719. entry++;
  3720. n--;
  3721. }
  3722. add_assoc_zval(return_value, "detect_order", row2);
  3723. }
  3724. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  3725. add_assoc_string(return_value, "substitute_character", "none", 1);
  3726. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  3727. add_assoc_string(return_value, "substitute_character", "long", 1);
  3728. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  3729. add_assoc_string(return_value, "substitute_character", "entity", 1);
  3730. } else {
  3731. add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
  3732. }
  3733. if (MBSTRG(strict_detection)) {
  3734. add_assoc_string(return_value, "strict_detection", "On", 1);
  3735. } else {
  3736. add_assoc_string(return_value, "strict_detection", "Off", 1);
  3737. }
  3738. #ifdef ZEND_MULTIBYTE
  3739. entry = MBSTRG(script_encoding_list);
  3740. n = MBSTRG(script_encoding_list_size);
  3741. if(n > 0) {
  3742. MAKE_STD_ZVAL(row3);
  3743. array_init(row3);
  3744. while (n > 0) {
  3745. if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
  3746. add_next_index_string(row3, name, 1);
  3747. }
  3748. entry++;
  3749. n--;
  3750. }
  3751. add_assoc_zval(return_value, "script_encoding", row3);
  3752. }
  3753. #endif /* ZEND_MULTIBYTE */
  3754. } else if (!strcasecmp("internal_encoding", typ)) {
  3755. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  3756. RETVAL_STRING(name, 1);
  3757. }
  3758. } else if (!strcasecmp("http_input", typ)) {
  3759. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  3760. RETVAL_STRING(name, 1);
  3761. }
  3762. } else if (!strcasecmp("http_output", typ)) {
  3763. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  3764. RETVAL_STRING(name, 1);
  3765. }
  3766. } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
  3767. if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
  3768. RETVAL_STRING(name, 1);
  3769. }
  3770. } else if (!strcasecmp("func_overload", typ)) {
  3771. RETVAL_LONG(MBSTRG(func_overload));
  3772. } else if (!strcasecmp("func_overload_list", typ)) {
  3773. if (MBSTRG(func_overload)){
  3774. over_func = &(mb_ovld[0]);
  3775. array_init(return_value);
  3776. while (over_func->type > 0) {
  3777. if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
  3778. add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
  3779. }
  3780. over_func++;
  3781. }
  3782. } else {
  3783. RETVAL_STRING("no overload", 1);
  3784. }
  3785. } else if (!strcasecmp("mail_charset", typ)) {
  3786. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
  3787. RETVAL_STRING(name, 1);
  3788. }
  3789. } else if (!strcasecmp("mail_header_encoding", typ)) {
  3790. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
  3791. RETVAL_STRING(name, 1);
  3792. }
  3793. } else if (!strcasecmp("mail_body_encoding", typ)) {
  3794. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
  3795. RETVAL_STRING(name, 1);
  3796. }
  3797. } else if (!strcasecmp("illegal_chars", typ)) {
  3798. RETVAL_LONG(MBSTRG(illegalchars));
  3799. } else if (!strcasecmp("encoding_translation", typ)) {
  3800. if (MBSTRG(encoding_translation)) {
  3801. RETVAL_STRING("On", 1);
  3802. } else {
  3803. RETVAL_STRING("Off", 1);
  3804. }
  3805. } else if (!strcasecmp("language", typ)) {
  3806. if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
  3807. RETVAL_STRING(name, 1);
  3808. }
  3809. } else if (!strcasecmp("detect_order", typ)) {
  3810. n = MBSTRG(current_detect_order_list_size);
  3811. entry = MBSTRG(current_detect_order_list);
  3812. if(n > 0) {
  3813. array_init(return_value);
  3814. while (n > 0) {
  3815. name = (char *)mbfl_no_encoding2name(*entry);
  3816. if (name) {
  3817. add_next_index_string(return_value, name, 1);
  3818. }
  3819. entry++;
  3820. n--;
  3821. }
  3822. }
  3823. } else if (!strcasecmp("substitute_character", typ)) {
  3824. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  3825. RETVAL_STRING("none", 1);
  3826. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  3827. RETVAL_STRING("long", 1);
  3828. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  3829. RETVAL_STRING("entity", 1);
  3830. } else {
  3831. RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
  3832. }
  3833. } else if (!strcasecmp("strict_detection", typ)) {
  3834. if (MBSTRG(strict_detection)) {
  3835. RETVAL_STRING("On", 1);
  3836. } else {
  3837. RETVAL_STRING("Off", 1);
  3838. }
  3839. } else {
  3840. #ifdef ZEND_MULTIBYTE
  3841. if (!strcasecmp("script_encoding", typ)) {
  3842. entry = MBSTRG(script_encoding_list);
  3843. n = MBSTRG(script_encoding_list_size);
  3844. if(n > 0) {
  3845. array_init(return_value);
  3846. while (n > 0) {
  3847. name = (char *)mbfl_no_encoding2name(*entry);
  3848. if (name) {
  3849. add_next_index_string(return_value, name, 1);
  3850. }
  3851. entry++;
  3852. n--;
  3853. }
  3854. }
  3855. return;
  3856. }
  3857. #endif /* ZEND_MULTIBYTE */
  3858. RETURN_FALSE;
  3859. }
  3860. }
  3861. /* }}} */
  3862. /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
  3863. Check if the string is valid for the specified encoding */
  3864. PHP_FUNCTION(mb_check_encoding)
  3865. {
  3866. char *var = NULL;
  3867. int var_len;
  3868. char *enc = NULL;
  3869. int enc_len;
  3870. mbfl_buffer_converter *convd;
  3871. enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
  3872. mbfl_string string, result, *ret = NULL;
  3873. long illegalchars = 0;
  3874. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
  3875. RETURN_FALSE;
  3876. }
  3877. if (var == NULL) {
  3878. RETURN_BOOL(MBSTRG(illegalchars) == 0);
  3879. }
  3880. if (enc != NULL) {
  3881. no_encoding = mbfl_name2no_encoding(enc);
  3882. if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) {
  3883. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
  3884. RETURN_FALSE;
  3885. }
  3886. }
  3887. convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
  3888. if (convd == NULL) {
  3889. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
  3890. RETURN_FALSE;
  3891. }
  3892. mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
  3893. mbfl_buffer_converter_illegal_substchar(convd, 0);
  3894. /* initialize string */
  3895. mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
  3896. mbfl_string_init(&result);
  3897. string.val = (unsigned char *)var;
  3898. string.len = var_len;
  3899. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3900. illegalchars = mbfl_buffer_illegalchars(convd);
  3901. mbfl_buffer_converter_delete(convd);
  3902. RETVAL_FALSE;
  3903. if (ret != NULL) {
  3904. if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
  3905. RETVAL_TRUE;
  3906. }
  3907. mbfl_string_clear(&result);
  3908. }
  3909. }
  3910. /* }}} */
  3911. /* {{{ MBSTRING_API int php_mb_encoding_translation() */
  3912. MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
  3913. {
  3914. return MBSTRG(encoding_translation);
  3915. }
  3916. /* }}} */
  3917. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
  3918. MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
  3919. {
  3920. if (enc != NULL) {
  3921. if (enc->flag & MBFL_ENCTYPE_MBCS) {
  3922. if (enc->mblen_table != NULL) {
  3923. if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
  3924. }
  3925. } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
  3926. return 2;
  3927. } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
  3928. return 4;
  3929. }
  3930. }
  3931. return 1;
  3932. }
  3933. /* }}} */
  3934. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
  3935. MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
  3936. {
  3937. return php_mb_mbchar_bytes_ex(s,
  3938. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3939. }
  3940. /* }}} */
  3941. /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
  3942. MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
  3943. {
  3944. register const char *p = s;
  3945. char *last=NULL;
  3946. if (nbytes == (size_t)-1) {
  3947. size_t nb = 0;
  3948. while (*p != '\0') {
  3949. if (nb == 0) {
  3950. if ((unsigned char)*p == (unsigned char)c) {
  3951. last = (char *)p;
  3952. }
  3953. nb = php_mb_mbchar_bytes_ex(p, enc);
  3954. if (nb == 0) {
  3955. return NULL; /* something is going wrong! */
  3956. }
  3957. }
  3958. --nb;
  3959. ++p;
  3960. }
  3961. } else {
  3962. register size_t bcnt = nbytes;
  3963. register size_t nbytes_char;
  3964. while (bcnt > 0) {
  3965. if ((unsigned char)*p == (unsigned char)c) {
  3966. last = (char *)p;
  3967. }
  3968. nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
  3969. if (bcnt < nbytes_char) {
  3970. return NULL;
  3971. }
  3972. p += nbytes_char;
  3973. bcnt -= nbytes_char;
  3974. }
  3975. }
  3976. return last;
  3977. }
  3978. /* }}} */
  3979. /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
  3980. MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
  3981. {
  3982. return php_mb_safe_strrchr_ex(s, c, nbytes,
  3983. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3984. }
  3985. /* }}} */
  3986. /* {{{ MBSTRING_API char *php_mb_strrchr() */
  3987. MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
  3988. {
  3989. return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
  3990. }
  3991. /* }}} */
  3992. /* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
  3993. MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
  3994. {
  3995. if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
  3996. return php_mb_mbchar_bytes_ex(s,
  3997. mbfl_no2encoding(MBSTRG(http_input_identify)));
  3998. } else {
  3999. return php_mb_mbchar_bytes_ex(s,
  4000. mbfl_no2encoding(MBSTRG(internal_encoding)));
  4001. }
  4002. }
  4003. /* }}} */
  4004. /* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
  4005. MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
  4006. {
  4007. int i;
  4008. mbfl_string string, result, *ret = NULL;
  4009. enum mbfl_no_encoding from_encoding, to_encoding;
  4010. mbfl_buffer_converter *convd;
  4011. if (encoding_to) {
  4012. /* new encoding */
  4013. to_encoding = mbfl_name2no_encoding(encoding_to);
  4014. if (to_encoding == mbfl_no_encoding_invalid) {
  4015. return -1;
  4016. }
  4017. } else {
  4018. to_encoding = MBSTRG(current_internal_encoding);
  4019. }
  4020. if (encoding_from) {
  4021. /* old encoding */
  4022. from_encoding = mbfl_name2no_encoding(encoding_from);
  4023. if (from_encoding == mbfl_no_encoding_invalid) {
  4024. return -1;
  4025. }
  4026. } else {
  4027. from_encoding = MBSTRG(http_input_identify);
  4028. }
  4029. if (from_encoding == mbfl_no_encoding_pass) {
  4030. return 0;
  4031. }
  4032. /* initialize string */
  4033. mbfl_string_init(&string);
  4034. mbfl_string_init(&result);
  4035. string.no_encoding = from_encoding;
  4036. string.no_language = MBSTRG(language);
  4037. for (i=0; i<num; i++){
  4038. string.val = (unsigned char *)str[i];
  4039. string.len = len[i];
  4040. /* initialize converter */
  4041. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  4042. if (convd == NULL) {
  4043. return -1;
  4044. }
  4045. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  4046. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  4047. /* do it */
  4048. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  4049. if (ret != NULL) {
  4050. efree(str[i]);
  4051. str[i] = (char *)ret->val;
  4052. len[i] = (int)ret->len;
  4053. }
  4054. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  4055. mbfl_buffer_converter_delete(convd);
  4056. }
  4057. return ret ? 0 : -1;
  4058. }
  4059. /* }}} */
  4060. /* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
  4061. */
  4062. MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
  4063. {
  4064. mbfl_string string;
  4065. enum mbfl_no_encoding *elist;
  4066. enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
  4067. mbfl_encoding_detector *identd = NULL;
  4068. int size;
  4069. enum mbfl_no_encoding *list;
  4070. if (MBSTRG(http_input_list_size) == 1 &&
  4071. MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
  4072. MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
  4073. return SUCCESS;
  4074. }
  4075. if (MBSTRG(http_input_list_size) == 1 &&
  4076. MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
  4077. mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
  4078. MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
  4079. return SUCCESS;
  4080. }
  4081. if (arg_list && strlen(arg_list)>0) {
  4082. /* make encoding list */
  4083. list = NULL;
  4084. size = 0;
  4085. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  4086. if (size > 0 && list != NULL) {
  4087. elist = list;
  4088. } else {
  4089. elist = MBSTRG(current_detect_order_list);
  4090. size = MBSTRG(current_detect_order_list_size);
  4091. if (size <= 0){
  4092. elist = MBSTRG(default_detect_order_list);
  4093. size = MBSTRG(default_detect_order_list_size);
  4094. }
  4095. }
  4096. } else {
  4097. elist = MBSTRG(current_detect_order_list);
  4098. size = MBSTRG(current_detect_order_list_size);
  4099. if (size <= 0){
  4100. elist = MBSTRG(default_detect_order_list);
  4101. size = MBSTRG(default_detect_order_list_size);
  4102. }
  4103. }
  4104. mbfl_string_init(&string);
  4105. string.no_language = MBSTRG(language);
  4106. identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection));
  4107. if (identd) {
  4108. int n = 0;
  4109. while(n < num){
  4110. string.val = (unsigned char *)arg_string[n];
  4111. string.len = arg_length[n];
  4112. if (mbfl_encoding_detector_feed(identd, &string)) {
  4113. break;
  4114. }
  4115. n++;
  4116. }
  4117. encoding = mbfl_encoding_detector_judge(identd);
  4118. mbfl_encoding_detector_delete(identd);
  4119. }
  4120. if (encoding != mbfl_no_encoding_invalid) {
  4121. MBSTRG(http_input_identify) = encoding;
  4122. return SUCCESS;
  4123. } else {
  4124. return FAILURE;
  4125. }
  4126. }
  4127. /* }}} */
  4128. /* {{{ MBSTRING_API int php_mb_stripos()
  4129. */
  4130. MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
  4131. {
  4132. int n;
  4133. mbfl_string haystack, needle;
  4134. n = -1;
  4135. mbfl_string_init(&haystack);
  4136. mbfl_string_init(&needle);
  4137. haystack.no_language = MBSTRG(language);
  4138. haystack.no_encoding = MBSTRG(current_internal_encoding);
  4139. needle.no_language = MBSTRG(language);
  4140. needle.no_encoding = MBSTRG(current_internal_encoding);
  4141. do {
  4142. size_t len = 0;
  4143. haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
  4144. haystack.len = len;
  4145. if (!haystack.val) {
  4146. break;
  4147. }
  4148. if (haystack.len <= 0) {
  4149. break;
  4150. }
  4151. needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
  4152. needle.len = len;
  4153. if (!needle.val) {
  4154. break;
  4155. }
  4156. if (needle.len <= 0) {
  4157. break;
  4158. }
  4159. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
  4160. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  4161. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
  4162. break;
  4163. }
  4164. {
  4165. int haystack_char_len = mbfl_strlen(&haystack);
  4166. if (mode) {
  4167. if ((offset > 0 && offset > haystack_char_len) ||
  4168. (offset < 0 && -offset > haystack_char_len)) {
  4169. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
  4170. break;
  4171. }
  4172. } else {
  4173. if (offset < 0 || offset > haystack_char_len) {
  4174. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
  4175. break;
  4176. }
  4177. }
  4178. }
  4179. n = mbfl_strpos(&haystack, &needle, offset, mode);
  4180. } while(0);
  4181. if (haystack.val) {
  4182. efree(haystack.val);
  4183. }
  4184. if (needle.val) {
  4185. efree(needle.val);
  4186. }
  4187. return n;
  4188. }
  4189. /* }}} */
  4190. #ifdef ZEND_MULTIBYTE
  4191. /* {{{ php_mb_set_zend_encoding() */
  4192. static int php_mb_set_zend_encoding(TSRMLS_D)
  4193. {
  4194. /* 'd better use mbfl_memory_device? */
  4195. char *name, *list = NULL;
  4196. int n, *entry, list_size = 0;
  4197. zend_encoding_detector encoding_detector;
  4198. zend_encoding_converter encoding_converter;
  4199. zend_encoding_oddlen encoding_oddlen;
  4200. /* notify script encoding to Zend Engine */
  4201. entry = MBSTRG(script_encoding_list);
  4202. n = MBSTRG(script_encoding_list_size);
  4203. while (n > 0) {
  4204. name = (char *)mbfl_no_encoding2name(*entry);
  4205. if (name) {
  4206. list_size += strlen(name) + 1;
  4207. if (!list) {
  4208. list = (char*)emalloc(list_size);
  4209. *list = '\0';
  4210. } else {
  4211. list = (char*)erealloc(list, list_size);
  4212. strcat(list, ",");
  4213. }
  4214. strcat(list, name);
  4215. }
  4216. entry++;
  4217. n--;
  4218. }
  4219. zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
  4220. if (list) {
  4221. efree(list);
  4222. }
  4223. encoding_detector = php_mb_encoding_detector;
  4224. encoding_converter = php_mb_encoding_converter;
  4225. encoding_oddlen = php_mb_oddlen;
  4226. /* TODO: make independent from mbstring.encoding_translation? */
  4227. if (MBSTRG(encoding_translation)) {
  4228. /* notify internal encoding to Zend Engine */
  4229. name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  4230. zend_multibyte_set_internal_encoding(name TSRMLS_CC);
  4231. }
  4232. zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
  4233. return 0;
  4234. }
  4235. /* }}} */
  4236. /* {{{ char *php_mb_encoding_detector()
  4237. * Interface for Zend Engine
  4238. */
  4239. static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
  4240. {
  4241. mbfl_string string;
  4242. const char *ret;
  4243. enum mbfl_no_encoding *elist;
  4244. int size, *list;
  4245. /* make encoding list */
  4246. list = NULL;
  4247. size = 0;
  4248. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  4249. if (size <= 0) {
  4250. return NULL;
  4251. }
  4252. if (size > 0 && list != NULL) {
  4253. elist = list;
  4254. } else {
  4255. elist = MBSTRG(current_detect_order_list);
  4256. size = MBSTRG(current_detect_order_list_size);
  4257. }
  4258. mbfl_string_init(&string);
  4259. string.no_language = MBSTRG(language);
  4260. string.val = (unsigned char *)arg_string;
  4261. string.len = arg_length;
  4262. ret = mbfl_identify_encoding_name(&string, elist, size, 0);
  4263. if (list != NULL) {
  4264. efree((void *)list);
  4265. }
  4266. if (ret != NULL) {
  4267. return estrdup(ret);
  4268. } else {
  4269. return NULL;
  4270. }
  4271. }
  4272. /* }}} */
  4273. /* {{{ int php_mb_encoding_converter() */
  4274. static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC)
  4275. {
  4276. mbfl_string string, result, *ret;
  4277. enum mbfl_no_encoding from_encoding, to_encoding;
  4278. mbfl_buffer_converter *convd;
  4279. /* new encoding */
  4280. to_encoding = mbfl_name2no_encoding(encoding_to);
  4281. if (to_encoding == mbfl_no_encoding_invalid) {
  4282. return -1;
  4283. }
  4284. /* old encoding */
  4285. from_encoding = mbfl_name2no_encoding(encoding_from);
  4286. if (from_encoding == mbfl_no_encoding_invalid) {
  4287. return -1;
  4288. }
  4289. /* initialize string */
  4290. mbfl_string_init(&string);
  4291. mbfl_string_init(&result);
  4292. string.no_encoding = from_encoding;
  4293. string.no_language = MBSTRG(language);
  4294. string.val = (unsigned char*)from;
  4295. string.len = from_length;
  4296. /* initialize converter */
  4297. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  4298. if (convd == NULL) {
  4299. return -1;
  4300. }
  4301. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  4302. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  4303. /* do it */
  4304. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  4305. if (ret != NULL) {
  4306. *to = ret->val;
  4307. *to_length = ret->len;
  4308. }
  4309. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  4310. mbfl_buffer_converter_delete(convd);
  4311. return ret ? 0 : -1;
  4312. }
  4313. /* }}} */
  4314. /* {{{ int php_mb_oddlen()
  4315. * returns number of odd (e.g. appears only first byte of multibyte
  4316. * character) chars
  4317. */
  4318. static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
  4319. {
  4320. mbfl_string mb_string;
  4321. mbfl_string_init(&mb_string);
  4322. mb_string.no_language = MBSTRG(language);
  4323. mb_string.no_encoding = mbfl_name2no_encoding(encoding);
  4324. mb_string.val = (unsigned char *)string;
  4325. mb_string.len = length;
  4326. if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
  4327. return 0;
  4328. }
  4329. return mbfl_oddlen(&mb_string);
  4330. }
  4331. /* }}} */
  4332. #endif /* ZEND_MULTIBYTE */
  4333. #endif /* HAVE_MBSTRING */
  4334. /*
  4335. * Local variables:
  4336. * tab-width: 4
  4337. * c-basic-offset: 4
  4338. * End:
  4339. * vim600: fdm=marker
  4340. * vim: noet sw=4 ts=4
  4341. */