PageRenderTime 69ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 1ms

/php5/ext/mbstring/mbstring.c

http://github.com/vpj/PHP-Extension-API
C | 4894 lines | 3937 code | 551 blank | 406 comment | 1119 complexity | 5d03371e366ee3b8905913514bf2535b MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-2.1, BSD-3-Clause
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2009 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  16. | Rui Hirokawa <hirokawa@php.net> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* $Id: mbstring.c,v 1.224.2.22.2.25.2.53 2009/04/20 16:07:59 jani Exp $ */
  20. /*
  21. * PHP 4 Multibyte String module "mbstring"
  22. *
  23. * History:
  24. * 2000.5.19 Release php-4.0RC2_jstring-1.0
  25. * 2001.4.1 Release php4_jstring-1.0.91
  26. * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
  27. * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
  28. */
  29. /*
  30. * PHP3 Internationalization support program.
  31. *
  32. * Copyright (c) 1999,2000 by the PHP3 internationalization team.
  33. * All rights reserved.
  34. *
  35. * See README_PHP3-i18n-ja for more detail.
  36. *
  37. * Authors:
  38. * Hironori Sato <satoh@jpnnet.com>
  39. * Shigeru Kanemoto <sgk@happysize.co.jp>
  40. * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
  41. * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
  42. */
  43. /* {{{ includes */
  44. #ifdef HAVE_CONFIG_H
  45. #include "config.h"
  46. #endif
  47. #include "php.h"
  48. #include "php_ini.h"
  49. #include "php_variables.h"
  50. #include "mbstring.h"
  51. #include "ext/standard/php_string.h"
  52. #include "ext/standard/php_mail.h"
  53. #include "ext/standard/exec.h"
  54. #include "ext/standard/php_smart_str.h"
  55. #include "ext/standard/url.h"
  56. #include "main/php_output.h"
  57. #include "ext/standard/info.h"
  58. #include "libmbfl/mbfl/mbfl_allocators.h"
  59. #include "php_variables.h"
  60. #include "php_globals.h"
  61. #include "rfc1867.h"
  62. #include "php_content_types.h"
  63. #include "SAPI.h"
  64. #include "php_unicode.h"
  65. #include "TSRM.h"
  66. #include "mb_gpc.h"
  67. #if HAVE_MBREGEX
  68. #include "php_mbregex.h"
  69. #endif
  70. #ifdef ZEND_MULTIBYTE
  71. #include "zend_multibyte.h"
  72. #endif /* ZEND_MULTIBYTE */
  73. #if HAVE_ONIG
  74. #include "php_onig_compat.h"
  75. #include <oniguruma.h>
  76. #undef UChar
  77. #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
  78. #include "ext/pcre/php_pcre.h"
  79. #endif
  80. /* }}} */
  81. #if HAVE_MBSTRING
  82. /* {{{ prototypes */
  83. ZEND_DECLARE_MODULE_GLOBALS(mbstring)
  84. static PHP_GINIT_FUNCTION(mbstring);
  85. static PHP_GSHUTDOWN_FUNCTION(mbstring);
  86. /* }}} */
  87. /* {{{ php_mb_default_identify_list */
  88. typedef struct _php_mb_nls_ident_list {
  89. enum mbfl_no_language lang;
  90. const enum mbfl_no_encoding* list;
  91. int list_size;
  92. } php_mb_nls_ident_list;
  93. static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
  94. mbfl_no_encoding_ascii,
  95. mbfl_no_encoding_jis,
  96. mbfl_no_encoding_utf8,
  97. mbfl_no_encoding_euc_jp,
  98. mbfl_no_encoding_sjis
  99. };
  100. static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
  101. mbfl_no_encoding_ascii,
  102. mbfl_no_encoding_utf8,
  103. mbfl_no_encoding_euc_cn,
  104. mbfl_no_encoding_cp936
  105. };
  106. static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
  107. mbfl_no_encoding_ascii,
  108. mbfl_no_encoding_utf8,
  109. mbfl_no_encoding_euc_tw,
  110. mbfl_no_encoding_big5
  111. };
  112. static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
  113. mbfl_no_encoding_ascii,
  114. mbfl_no_encoding_utf8,
  115. mbfl_no_encoding_euc_kr,
  116. mbfl_no_encoding_uhc
  117. };
  118. static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
  119. mbfl_no_encoding_ascii,
  120. mbfl_no_encoding_utf8,
  121. mbfl_no_encoding_koi8r,
  122. mbfl_no_encoding_cp1251,
  123. mbfl_no_encoding_cp866
  124. };
  125. static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
  126. mbfl_no_encoding_ascii,
  127. mbfl_no_encoding_utf8,
  128. mbfl_no_encoding_armscii8
  129. };
  130. static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
  131. mbfl_no_encoding_ascii,
  132. mbfl_no_encoding_utf8,
  133. mbfl_no_encoding_cp1254,
  134. mbfl_no_encoding_8859_9
  135. };
  136. static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
  137. mbfl_no_encoding_ascii,
  138. mbfl_no_encoding_utf8,
  139. mbfl_no_encoding_koi8u
  140. };
  141. static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
  142. mbfl_no_encoding_ascii,
  143. mbfl_no_encoding_utf8
  144. };
  145. static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
  146. { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
  147. { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
  148. { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
  149. { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
  150. { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
  151. { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
  152. { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
  153. { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
  154. { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
  155. };
  156. /* }}} */
  157. /* {{{ mb_overload_def mb_ovld[] */
  158. static const struct mb_overload_def mb_ovld[] = {
  159. {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
  160. {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
  161. {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
  162. {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
  163. {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
  164. {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_stripos"},
  165. {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
  166. {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
  167. {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
  168. {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
  169. {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
  170. {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
  171. {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
  172. #if HAVE_MBREGEX
  173. {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
  174. {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
  175. {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
  176. {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
  177. {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
  178. #endif
  179. {0, NULL, NULL, NULL}
  180. };
  181. /* }}} */
  182. /* {{{ arginfo */
  183. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
  184. ZEND_ARG_INFO(0, language)
  185. ZEND_END_ARG_INFO()
  186. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
  187. ZEND_ARG_INFO(0, encoding)
  188. ZEND_END_ARG_INFO()
  189. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
  190. ZEND_ARG_INFO(0, type)
  191. ZEND_END_ARG_INFO()
  192. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
  193. ZEND_ARG_INFO(0, encoding)
  194. ZEND_END_ARG_INFO()
  195. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
  196. ZEND_ARG_INFO(0, encoding)
  197. ZEND_END_ARG_INFO()
  198. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
  199. ZEND_ARG_INFO(0, substchar)
  200. ZEND_END_ARG_INFO()
  201. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
  202. ZEND_ARG_INFO(0, encoding)
  203. ZEND_END_ARG_INFO()
  204. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
  205. ZEND_ARG_INFO(0, encoded_string)
  206. ZEND_ARG_INFO(1, result)
  207. ZEND_END_ARG_INFO()
  208. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
  209. ZEND_ARG_INFO(0, contents)
  210. ZEND_ARG_INFO(0, status)
  211. ZEND_END_ARG_INFO()
  212. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
  213. ZEND_ARG_INFO(0, str)
  214. ZEND_ARG_INFO(0, encoding)
  215. ZEND_END_ARG_INFO()
  216. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
  217. ZEND_ARG_INFO(0, haystack)
  218. ZEND_ARG_INFO(0, needle)
  219. ZEND_ARG_INFO(0, offset)
  220. ZEND_ARG_INFO(0, encoding)
  221. ZEND_END_ARG_INFO()
  222. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
  223. ZEND_ARG_INFO(0, haystack)
  224. ZEND_ARG_INFO(0, needle)
  225. ZEND_ARG_INFO(0, offset)
  226. ZEND_ARG_INFO(0, encoding)
  227. ZEND_END_ARG_INFO()
  228. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
  229. ZEND_ARG_INFO(0, haystack)
  230. ZEND_ARG_INFO(0, needle)
  231. ZEND_ARG_INFO(0, offset)
  232. ZEND_ARG_INFO(0, encoding)
  233. ZEND_END_ARG_INFO()
  234. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
  235. ZEND_ARG_INFO(0, haystack)
  236. ZEND_ARG_INFO(0, needle)
  237. ZEND_ARG_INFO(0, offset)
  238. ZEND_ARG_INFO(0, encoding)
  239. ZEND_END_ARG_INFO()
  240. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
  241. ZEND_ARG_INFO(0, haystack)
  242. ZEND_ARG_INFO(0, needle)
  243. ZEND_ARG_INFO(0, part)
  244. ZEND_ARG_INFO(0, encoding)
  245. ZEND_END_ARG_INFO()
  246. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
  247. ZEND_ARG_INFO(0, haystack)
  248. ZEND_ARG_INFO(0, needle)
  249. ZEND_ARG_INFO(0, part)
  250. ZEND_ARG_INFO(0, encoding)
  251. ZEND_END_ARG_INFO()
  252. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
  253. ZEND_ARG_INFO(0, haystack)
  254. ZEND_ARG_INFO(0, needle)
  255. ZEND_ARG_INFO(0, part)
  256. ZEND_ARG_INFO(0, encoding)
  257. ZEND_END_ARG_INFO()
  258. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
  259. ZEND_ARG_INFO(0, haystack)
  260. ZEND_ARG_INFO(0, needle)
  261. ZEND_ARG_INFO(0, part)
  262. ZEND_ARG_INFO(0, encoding)
  263. ZEND_END_ARG_INFO()
  264. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
  265. ZEND_ARG_INFO(0, haystack)
  266. ZEND_ARG_INFO(0, needle)
  267. ZEND_ARG_INFO(0, encoding)
  268. ZEND_END_ARG_INFO()
  269. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
  270. ZEND_ARG_INFO(0, str)
  271. ZEND_ARG_INFO(0, start)
  272. ZEND_ARG_INFO(0, length)
  273. ZEND_ARG_INFO(0, encoding)
  274. ZEND_END_ARG_INFO()
  275. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
  276. ZEND_ARG_INFO(0, str)
  277. ZEND_ARG_INFO(0, start)
  278. ZEND_ARG_INFO(0, length)
  279. ZEND_ARG_INFO(0, encoding)
  280. ZEND_END_ARG_INFO()
  281. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
  282. ZEND_ARG_INFO(0, str)
  283. ZEND_ARG_INFO(0, encoding)
  284. ZEND_END_ARG_INFO()
  285. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
  286. ZEND_ARG_INFO(0, str)
  287. ZEND_ARG_INFO(0, start)
  288. ZEND_ARG_INFO(0, width)
  289. ZEND_ARG_INFO(0, trimmarker)
  290. ZEND_ARG_INFO(0, encoding)
  291. ZEND_END_ARG_INFO()
  292. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
  293. ZEND_ARG_INFO(0, str)
  294. ZEND_ARG_INFO(0, to)
  295. ZEND_ARG_INFO(0, from)
  296. ZEND_END_ARG_INFO()
  297. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
  298. ZEND_ARG_INFO(0, sourcestring)
  299. ZEND_ARG_INFO(0, mode)
  300. ZEND_ARG_INFO(0, encoding)
  301. ZEND_END_ARG_INFO()
  302. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
  303. ZEND_ARG_INFO(0, sourcestring)
  304. ZEND_ARG_INFO(0, encoding)
  305. ZEND_END_ARG_INFO()
  306. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
  307. ZEND_ARG_INFO(0, sourcestring)
  308. ZEND_ARG_INFO(0, encoding)
  309. ZEND_END_ARG_INFO()
  310. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
  311. ZEND_ARG_INFO(0, str)
  312. ZEND_ARG_INFO(0, encoding_list)
  313. ZEND_ARG_INFO(0, strict)
  314. ZEND_END_ARG_INFO()
  315. ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
  316. ZEND_END_ARG_INFO()
  317. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
  318. ZEND_ARG_INFO(0, encoding)
  319. ZEND_END_ARG_INFO()
  320. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
  321. ZEND_ARG_INFO(0, str)
  322. ZEND_ARG_INFO(0, charset)
  323. ZEND_ARG_INFO(0, transfer)
  324. ZEND_ARG_INFO(0, linefeed)
  325. ZEND_ARG_INFO(0, indent)
  326. ZEND_END_ARG_INFO()
  327. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
  328. ZEND_ARG_INFO(0, string)
  329. ZEND_END_ARG_INFO()
  330. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
  331. ZEND_ARG_INFO(0, str)
  332. ZEND_ARG_INFO(0, option)
  333. ZEND_ARG_INFO(0, encoding)
  334. ZEND_END_ARG_INFO()
  335. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3)
  336. ZEND_ARG_INFO(0, to)
  337. ZEND_ARG_INFO(0, from)
  338. ZEND_ARG_INFO(1, ...)
  339. ZEND_END_ARG_INFO()
  340. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
  341. ZEND_ARG_INFO(0, string)
  342. ZEND_ARG_INFO(0, convmap)
  343. ZEND_ARG_INFO(0, encoding)
  344. ZEND_END_ARG_INFO()
  345. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
  346. ZEND_ARG_INFO(0, string)
  347. ZEND_ARG_INFO(0, convmap)
  348. ZEND_ARG_INFO(0, encoding)
  349. ZEND_END_ARG_INFO()
  350. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
  351. ZEND_ARG_INFO(0, to)
  352. ZEND_ARG_INFO(0, subject)
  353. ZEND_ARG_INFO(0, message)
  354. ZEND_ARG_INFO(0, additional_headers)
  355. ZEND_ARG_INFO(0, additional_parameters)
  356. ZEND_END_ARG_INFO()
  357. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
  358. ZEND_ARG_INFO(0, type)
  359. ZEND_END_ARG_INFO()
  360. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
  361. ZEND_ARG_INFO(0, var)
  362. ZEND_ARG_INFO(0, encoding)
  363. ZEND_END_ARG_INFO()
  364. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
  365. ZEND_ARG_INFO(0, encoding)
  366. ZEND_END_ARG_INFO()
  367. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
  368. ZEND_ARG_INFO(0, pattern)
  369. ZEND_ARG_INFO(0, string)
  370. ZEND_ARG_INFO(1, registers)
  371. ZEND_END_ARG_INFO()
  372. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
  373. ZEND_ARG_INFO(0, pattern)
  374. ZEND_ARG_INFO(0, string)
  375. ZEND_ARG_INFO(1, registers)
  376. ZEND_END_ARG_INFO()
  377. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
  378. ZEND_ARG_INFO(0, pattern)
  379. ZEND_ARG_INFO(0, replacement)
  380. ZEND_ARG_INFO(0, string)
  381. ZEND_ARG_INFO(0, option)
  382. ZEND_END_ARG_INFO()
  383. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
  384. ZEND_ARG_INFO(0, pattern)
  385. ZEND_ARG_INFO(0, replacement)
  386. ZEND_ARG_INFO(0, string)
  387. ZEND_END_ARG_INFO()
  388. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
  389. ZEND_ARG_INFO(0, pattern)
  390. ZEND_ARG_INFO(0, string)
  391. ZEND_ARG_INFO(0, limit)
  392. ZEND_END_ARG_INFO()
  393. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
  394. ZEND_ARG_INFO(0, pattern)
  395. ZEND_ARG_INFO(0, string)
  396. ZEND_ARG_INFO(0, option)
  397. ZEND_END_ARG_INFO()
  398. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
  399. ZEND_ARG_INFO(0, pattern)
  400. ZEND_ARG_INFO(0, option)
  401. ZEND_END_ARG_INFO()
  402. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
  403. ZEND_ARG_INFO(0, pattern)
  404. ZEND_ARG_INFO(0, option)
  405. ZEND_END_ARG_INFO()
  406. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
  407. ZEND_ARG_INFO(0, pattern)
  408. ZEND_ARG_INFO(0, option)
  409. ZEND_END_ARG_INFO()
  410. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
  411. ZEND_ARG_INFO(0, string)
  412. ZEND_ARG_INFO(0, pattern)
  413. ZEND_ARG_INFO(0, option)
  414. ZEND_END_ARG_INFO()
  415. ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
  416. ZEND_END_ARG_INFO()
  417. ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
  418. ZEND_END_ARG_INFO()
  419. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
  420. ZEND_ARG_INFO(0, position)
  421. ZEND_END_ARG_INFO()
  422. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
  423. ZEND_ARG_INFO(0, options)
  424. ZEND_END_ARG_INFO()
  425. /* }}} */
  426. /* {{{ zend_function_entry mbstring_functions[] */
  427. const zend_function_entry mbstring_functions[] = {
  428. PHP_FE(mb_convert_case, arginfo_mb_convert_case)
  429. PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
  430. PHP_FE(mb_strtolower, arginfo_mb_strtolower)
  431. PHP_FE(mb_language, arginfo_mb_language)
  432. PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
  433. PHP_FE(mb_http_input, arginfo_mb_http_input)
  434. PHP_FE(mb_http_output, arginfo_mb_http_output)
  435. PHP_FE(mb_detect_order, arginfo_mb_detect_order)
  436. PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
  437. PHP_FE(mb_parse_str, arginfo_mb_parse_str)
  438. PHP_FE(mb_output_handler, arginfo_mb_output_handler)
  439. PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
  440. PHP_FE(mb_strlen, arginfo_mb_strlen)
  441. PHP_FE(mb_strpos, arginfo_mb_strpos)
  442. PHP_FE(mb_strrpos, arginfo_mb_strrpos)
  443. PHP_FE(mb_stripos, arginfo_mb_stripos)
  444. PHP_FE(mb_strripos, arginfo_mb_strripos)
  445. PHP_FE(mb_strstr, arginfo_mb_strstr)
  446. PHP_FE(mb_strrchr, arginfo_mb_strrchr)
  447. PHP_FE(mb_stristr, arginfo_mb_stristr)
  448. PHP_FE(mb_strrichr, arginfo_mb_strrichr)
  449. PHP_FE(mb_substr_count, arginfo_mb_substr_count)
  450. PHP_FE(mb_substr, arginfo_mb_substr)
  451. PHP_FE(mb_strcut, arginfo_mb_strcut)
  452. PHP_FE(mb_strwidth, arginfo_mb_strwidth)
  453. PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
  454. PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
  455. PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
  456. PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
  457. PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
  458. PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
  459. PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
  460. PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
  461. PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
  462. PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
  463. PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
  464. PHP_FE(mb_send_mail, arginfo_mb_send_mail)
  465. PHP_FE(mb_get_info, arginfo_mb_get_info)
  466. PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
  467. #if HAVE_MBREGEX
  468. PHP_MBREGEX_FUNCTION_ENTRIES
  469. #endif
  470. { NULL, NULL, NULL }
  471. };
  472. /* }}} */
  473. /* {{{ zend_module_entry mbstring_module_entry */
  474. zend_module_entry mbstring_module_entry = {
  475. STANDARD_MODULE_HEADER,
  476. "mbstring",
  477. mbstring_functions,
  478. PHP_MINIT(mbstring),
  479. PHP_MSHUTDOWN(mbstring),
  480. PHP_RINIT(mbstring),
  481. PHP_RSHUTDOWN(mbstring),
  482. PHP_MINFO(mbstring),
  483. NO_VERSION_YET,
  484. PHP_MODULE_GLOBALS(mbstring),
  485. PHP_GINIT(mbstring),
  486. PHP_GSHUTDOWN(mbstring),
  487. NULL,
  488. STANDARD_MODULE_PROPERTIES_EX
  489. };
  490. /* }}} */
  491. /* {{{ static sapi_post_entry php_post_entries[] */
  492. static sapi_post_entry php_post_entries[] = {
  493. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
  494. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  495. { NULL, 0, NULL, NULL }
  496. };
  497. /* }}} */
  498. #ifdef COMPILE_DL_MBSTRING
  499. ZEND_GET_MODULE(mbstring)
  500. #endif
  501. /* {{{ allocators */
  502. static void *_php_mb_allocators_malloc(unsigned int sz)
  503. {
  504. return emalloc(sz);
  505. }
  506. static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
  507. {
  508. return erealloc(ptr, sz);
  509. }
  510. static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
  511. {
  512. return ecalloc(nelems, szelem);
  513. }
  514. static void _php_mb_allocators_free(void *ptr)
  515. {
  516. efree(ptr);
  517. }
  518. static void *_php_mb_allocators_pmalloc(unsigned int sz)
  519. {
  520. return pemalloc(sz, 1);
  521. }
  522. static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
  523. {
  524. return perealloc(ptr, sz, 1);
  525. }
  526. static void _php_mb_allocators_pfree(void *ptr)
  527. {
  528. pefree(ptr, 1);
  529. }
  530. static mbfl_allocators _php_mb_allocators = {
  531. _php_mb_allocators_malloc,
  532. _php_mb_allocators_realloc,
  533. _php_mb_allocators_calloc,
  534. _php_mb_allocators_free,
  535. _php_mb_allocators_pmalloc,
  536. _php_mb_allocators_prealloc,
  537. _php_mb_allocators_pfree
  538. };
  539. /* }}} */
  540. /* {{{ static sapi_post_entry mbstr_post_entries[] */
  541. static sapi_post_entry mbstr_post_entries[] = {
  542. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
  543. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  544. { NULL, 0, NULL, NULL }
  545. };
  546. /* }}} */
  547. /* {{{ static int php_mb_parse_encoding_list()
  548. * Return 0 if input contains any illegal encoding, otherwise 1.
  549. * Even if any illegal encoding is detected the result may contain a list
  550. * of parsed encodings.
  551. */
  552. static int
  553. php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  554. {
  555. int n, l, size, bauto, ret = 1;
  556. char *p, *p1, *p2, *endp, *tmpstr;
  557. enum mbfl_no_encoding no_encoding;
  558. enum mbfl_no_encoding *src, *entry, *list;
  559. list = NULL;
  560. if (value == NULL || value_length <= 0) {
  561. if (return_list) {
  562. *return_list = NULL;
  563. }
  564. if (return_size) {
  565. *return_size = 0;
  566. }
  567. return 0;
  568. } else {
  569. enum mbfl_no_encoding *identify_list;
  570. int identify_list_size;
  571. identify_list = MBSTRG(default_detect_order_list);
  572. identify_list_size = MBSTRG(default_detect_order_list_size);
  573. /* copy the value string for work */
  574. if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
  575. tmpstr = (char *)estrndup(value+1, value_length-2);
  576. value_length -= 2;
  577. }
  578. else
  579. tmpstr = (char *)estrndup(value, value_length);
  580. if (tmpstr == NULL) {
  581. return 0;
  582. }
  583. /* count the number of listed encoding names */
  584. endp = tmpstr + value_length;
  585. n = 1;
  586. p1 = tmpstr;
  587. while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
  588. p1 = p2 + 1;
  589. n++;
  590. }
  591. size = n + identify_list_size;
  592. /* make list */
  593. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  594. if (list != NULL) {
  595. entry = list;
  596. n = 0;
  597. bauto = 0;
  598. p1 = tmpstr;
  599. do {
  600. p2 = p = php_memnstr(p1, ",", 1, endp);
  601. if (p == NULL) {
  602. p = endp;
  603. }
  604. *p = '\0';
  605. /* trim spaces */
  606. while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
  607. p1++;
  608. }
  609. p--;
  610. while (p > p1 && (*p == ' ' || *p == '\t')) {
  611. *p = '\0';
  612. p--;
  613. }
  614. /* convert to the encoding number and check encoding */
  615. if (strcasecmp(p1, "auto") == 0) {
  616. if (!bauto) {
  617. bauto = 1;
  618. l = identify_list_size;
  619. src = identify_list;
  620. while (l > 0) {
  621. *entry++ = *src++;
  622. l--;
  623. n++;
  624. }
  625. }
  626. } else {
  627. no_encoding = mbfl_name2no_encoding(p1);
  628. if (no_encoding != mbfl_no_encoding_invalid) {
  629. *entry++ = no_encoding;
  630. n++;
  631. } else {
  632. ret = 0;
  633. }
  634. }
  635. p1 = p2 + 1;
  636. } while (n < size && p2 != NULL);
  637. if (n > 0) {
  638. if (return_list) {
  639. *return_list = list;
  640. } else {
  641. pefree(list, persistent);
  642. }
  643. } else {
  644. pefree(list, persistent);
  645. if (return_list) {
  646. *return_list = NULL;
  647. }
  648. ret = 0;
  649. }
  650. if (return_size) {
  651. *return_size = n;
  652. }
  653. } else {
  654. if (return_list) {
  655. *return_list = NULL;
  656. }
  657. if (return_size) {
  658. *return_size = 0;
  659. }
  660. ret = 0;
  661. }
  662. efree(tmpstr);
  663. }
  664. return ret;
  665. }
  666. /* }}} */
  667. /* {{{ MBSTRING_API php_mb_check_encoding_list */
  668. MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
  669. return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
  670. }
  671. /* }}} */
  672. /* {{{ static int php_mb_parse_encoding_array()
  673. * Return 0 if input contains any illegal encoding, otherwise 1.
  674. * Even if any illegal encoding is detected the result may contain a list
  675. * of parsed encodings.
  676. */
  677. static int
  678. php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  679. {
  680. zval **hash_entry;
  681. HashTable *target_hash;
  682. int i, n, l, size, bauto,ret = 1;
  683. enum mbfl_no_encoding no_encoding;
  684. enum mbfl_no_encoding *src, *list, *entry;
  685. list = NULL;
  686. if (Z_TYPE_P(array) == IS_ARRAY) {
  687. enum mbfl_no_encoding *identify_list;
  688. int identify_list_size;
  689. identify_list = MBSTRG(default_detect_order_list);
  690. identify_list_size = MBSTRG(default_detect_order_list_size);
  691. target_hash = Z_ARRVAL_P(array);
  692. zend_hash_internal_pointer_reset(target_hash);
  693. i = zend_hash_num_elements(target_hash);
  694. size = i + identify_list_size;
  695. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  696. if (list != NULL) {
  697. entry = list;
  698. bauto = 0;
  699. n = 0;
  700. while (i > 0) {
  701. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  702. break;
  703. }
  704. convert_to_string_ex(hash_entry);
  705. if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
  706. if (!bauto) {
  707. bauto = 1;
  708. l = identify_list_size;
  709. src = identify_list;
  710. while (l > 0) {
  711. *entry++ = *src++;
  712. l--;
  713. n++;
  714. }
  715. }
  716. } else {
  717. no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
  718. if (no_encoding != mbfl_no_encoding_invalid) {
  719. *entry++ = no_encoding;
  720. n++;
  721. } else {
  722. ret = 0;
  723. }
  724. }
  725. zend_hash_move_forward(target_hash);
  726. i--;
  727. }
  728. if (n > 0) {
  729. if (return_list) {
  730. *return_list = list;
  731. } else {
  732. pefree(list, persistent);
  733. }
  734. } else {
  735. pefree(list, persistent);
  736. if (return_list) {
  737. *return_list = NULL;
  738. }
  739. ret = 0;
  740. }
  741. if (return_size) {
  742. *return_size = n;
  743. }
  744. } else {
  745. if (return_list) {
  746. *return_list = NULL;
  747. }
  748. if (return_size) {
  749. *return_size = 0;
  750. }
  751. ret = 0;
  752. }
  753. }
  754. return ret;
  755. }
  756. /* }}} */
  757. static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
  758. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
  759. static void _php_mb_free_regex(void *opaque);
  760. #if HAVE_ONIG
  761. /* {{{ _php_mb_compile_regex */
  762. void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
  763. {
  764. php_mb_regex_t *retval;
  765. OnigErrorInfo err_info;
  766. int err_code;
  767. if ((err_code = onig_new(&retval,
  768. (const OnigUChar *)pattern,
  769. (const OnigUChar *)pattern + strlen(pattern),
  770. ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
  771. ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
  772. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  773. onig_error_code_to_str(err_str, err_code, err_info);
  774. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
  775. retval = NULL;
  776. }
  777. return retval;
  778. }
  779. /* }}} */
  780. /* {{{ _php_mb_match_regex */
  781. int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
  782. {
  783. return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
  784. (const OnigUChar*)str + str_len, (const OnigUChar *)str,
  785. (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
  786. }
  787. /* }}} */
  788. /* {{{ _php_mb_free_regex */
  789. void _php_mb_free_regex(void *opaque)
  790. {
  791. onig_free((php_mb_regex_t *)opaque);
  792. }
  793. /* }}} */
  794. #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
  795. /* {{{ _php_mb_compile_regex */
  796. void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
  797. {
  798. pcre *retval;
  799. const char *err_str;
  800. int err_offset;
  801. if (!(retval = pcre_compile(pattern,
  802. PCRE_CASELESS, &err_str, &err_offset, NULL))) {
  803. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
  804. }
  805. return retval;
  806. }
  807. /* }}} */
  808. /* {{{ _php_mb_match_regex */
  809. int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
  810. {
  811. return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
  812. 0, NULL, 0) >= 0;
  813. }
  814. /* }}} */
  815. /* {{{ _php_mb_free_regex */
  816. void _php_mb_free_regex(void *opaque)
  817. {
  818. pcre_free(opaque);
  819. }
  820. /* }}} */
  821. #endif
  822. /* {{{ php_mb_nls_get_default_detect_order_list */
  823. static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
  824. {
  825. size_t i;
  826. *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  827. *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  828. for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
  829. if (php_mb_default_identify_list[i].lang == lang) {
  830. *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
  831. *plist_size = php_mb_default_identify_list[i].list_size;
  832. return 1;
  833. }
  834. }
  835. return 0;
  836. }
  837. /* }}} */
  838. /* {{{ php.ini directive handler */
  839. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
  840. static PHP_INI_MH(OnUpdate_mbstring_language)
  841. {
  842. enum mbfl_no_language no_language;
  843. no_language = mbfl_name2no_language(new_value);
  844. if (no_language == mbfl_no_language_invalid) {
  845. MBSTRG(language) = mbfl_no_language_neutral;
  846. return FAILURE;
  847. }
  848. MBSTRG(language) = no_language;
  849. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  850. return SUCCESS;
  851. }
  852. /* }}} */
  853. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
  854. static PHP_INI_MH(OnUpdate_mbstring_detect_order)
  855. {
  856. enum mbfl_no_encoding *list;
  857. int size;
  858. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  859. if (MBSTRG(detect_order_list)) {
  860. free(MBSTRG(detect_order_list));
  861. }
  862. MBSTRG(detect_order_list) = list;
  863. MBSTRG(detect_order_list_size) = size;
  864. } else {
  865. if (MBSTRG(detect_order_list)) {
  866. free(MBSTRG(detect_order_list));
  867. MBSTRG(detect_order_list) = NULL;
  868. }
  869. return FAILURE;
  870. }
  871. return SUCCESS;
  872. }
  873. /* }}} */
  874. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
  875. static PHP_INI_MH(OnUpdate_mbstring_http_input)
  876. {
  877. enum mbfl_no_encoding *list;
  878. int size;
  879. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  880. if (MBSTRG(http_input_list)) {
  881. free(MBSTRG(http_input_list));
  882. }
  883. MBSTRG(http_input_list) = list;
  884. MBSTRG(http_input_list_size) = size;
  885. } else {
  886. if (MBSTRG(http_input_list)) {
  887. free(MBSTRG(http_input_list));
  888. MBSTRG(http_input_list) = NULL;
  889. }
  890. MBSTRG(http_input_list_size) = 0;
  891. return FAILURE;
  892. }
  893. return SUCCESS;
  894. }
  895. /* }}} */
  896. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
  897. static PHP_INI_MH(OnUpdate_mbstring_http_output)
  898. {
  899. enum mbfl_no_encoding no_encoding;
  900. no_encoding = mbfl_name2no_encoding(new_value);
  901. if (no_encoding != mbfl_no_encoding_invalid) {
  902. MBSTRG(http_output_encoding) = no_encoding;
  903. MBSTRG(current_http_output_encoding) = no_encoding;
  904. } else {
  905. MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
  906. MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
  907. if (new_value != NULL && new_value_length > 0) {
  908. return FAILURE;
  909. }
  910. }
  911. return SUCCESS;
  912. }
  913. /* }}} */
  914. /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
  915. int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
  916. {
  917. enum mbfl_no_encoding no_encoding;
  918. const char *enc_name = NULL;
  919. uint enc_name_len = 0;
  920. no_encoding = new_value ? mbfl_name2no_encoding(new_value):
  921. mbfl_no_encoding_invalid;
  922. if (no_encoding != mbfl_no_encoding_invalid) {
  923. enc_name = new_value;
  924. enc_name_len = new_value_length;
  925. } else {
  926. switch (MBSTRG(language)) {
  927. case mbfl_no_language_uni:
  928. enc_name = "UTF-8";
  929. enc_name_len = sizeof("UTF-8") - 1;
  930. break;
  931. case mbfl_no_language_japanese:
  932. enc_name = "EUC-JP";
  933. enc_name_len = sizeof("EUC-JP") - 1;
  934. break;
  935. case mbfl_no_language_korean:
  936. enc_name = "EUC-KR";
  937. enc_name_len = sizeof("EUC-KR") - 1;
  938. break;
  939. case mbfl_no_language_simplified_chinese:
  940. enc_name = "EUC-CN";
  941. enc_name_len = sizeof("EUC-CN") - 1;
  942. break;
  943. case mbfl_no_language_traditional_chinese:
  944. enc_name = "EUC-TW";
  945. enc_name_len = sizeof("EUC-TW") - 1;
  946. break;
  947. case mbfl_no_language_russian:
  948. enc_name = "KOI8-R";
  949. enc_name_len = sizeof("KOI8-R") - 1;
  950. break;
  951. case mbfl_no_language_german:
  952. enc_name = "ISO-8859-15";
  953. enc_name_len = sizeof("ISO-8859-15") - 1;
  954. break;
  955. case mbfl_no_language_armenian:
  956. enc_name = "ArmSCII-8";
  957. enc_name_len = sizeof("ArmSCII-8") - 1;
  958. break;
  959. case mbfl_no_language_turkish:
  960. enc_name = "ISO-8859-9";
  961. enc_name_len = sizeof("ISO-8859-9") - 1;
  962. break;
  963. default:
  964. enc_name = "ISO-8859-1";
  965. enc_name_len = sizeof("ISO-8859-1") - 1;
  966. break;
  967. }
  968. no_encoding = mbfl_name2no_encoding(enc_name);
  969. }
  970. MBSTRG(internal_encoding) = no_encoding;
  971. MBSTRG(current_internal_encoding) = no_encoding;
  972. #if HAVE_MBREGEX
  973. {
  974. const char *enc_name = new_value;
  975. if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
  976. /* falls back to EUC-JP if an unknown encoding name is given */
  977. enc_name = "EUC-JP";
  978. php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
  979. }
  980. php_mb_regex_set_mbctype(new_value TSRMLS_CC);
  981. }
  982. #endif
  983. return SUCCESS;
  984. }
  985. /* }}} */
  986. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
  987. static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
  988. {
  989. if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
  990. || stage == PHP_INI_STAGE_RUNTIME) {
  991. return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
  992. } else {
  993. /* the corresponding mbstring globals needs to be set according to the
  994. * ini value in the later stage because it never falls back to the
  995. * default value if 1. no value for mbstring.internal_encoding is given,
  996. * 2. mbstring.language directive is processed in per-dir or runtime
  997. * context and 3. call to the handler for mbstring.language is done
  998. * after mbstring.internal_encoding is handled. */
  999. return SUCCESS;
  1000. }
  1001. }
  1002. /* }}} */
  1003. #ifdef ZEND_MULTIBYTE
  1004. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
  1005. static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
  1006. {
  1007. int *list, size;
  1008. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  1009. if (MBSTRG(script_encoding_list) != NULL) {
  1010. free(MBSTRG(script_encoding_list));
  1011. }
  1012. MBSTRG(script_encoding_list) = list;
  1013. MBSTRG(script_encoding_list_size) = size;
  1014. } else {
  1015. if (MBSTRG(script_encoding_list) != NULL) {
  1016. free(MBSTRG(script_encoding_list));
  1017. }
  1018. MBSTRG(script_encoding_list) = NULL;
  1019. MBSTRG(script_encoding_list_size) = 0;
  1020. return FAILURE;
  1021. }
  1022. return SUCCESS;
  1023. }
  1024. /* }}} */
  1025. #endif /* ZEND_MULTIBYTE */
  1026. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
  1027. static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
  1028. {
  1029. int c;
  1030. char *endptr = NULL;
  1031. if (new_value != NULL) {
  1032. if (strcasecmp("none", new_value) == 0) {
  1033. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1034. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1035. } else if (strcasecmp("long", new_value) == 0) {
  1036. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1037. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1038. } else if (strcasecmp("entity", new_value) == 0) {
  1039. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1040. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1041. } else {
  1042. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1043. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1044. if (new_value_length >0) {
  1045. c = strtol(new_value, &endptr, 0);
  1046. if (*endptr == '\0') {
  1047. MBSTRG(filter_illegal_substchar) = c;
  1048. MBSTRG(current_filter_illegal_substchar) = c;
  1049. }
  1050. }
  1051. }
  1052. } else {
  1053. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1054. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1055. MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
  1056. MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
  1057. }
  1058. return SUCCESS;
  1059. }
  1060. /* }}} */
  1061. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
  1062. static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
  1063. {
  1064. if (new_value == NULL) {
  1065. return FAILURE;
  1066. }
  1067. OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
  1068. if (MBSTRG(encoding_translation)) {
  1069. sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
  1070. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  1071. } else {
  1072. sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
  1073. sapi_register_post_entries(php_post_entries TSRMLS_CC);
  1074. }
  1075. return SUCCESS;
  1076. }
  1077. /* }}} */
  1078. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
  1079. static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
  1080. {
  1081. zval tmp;
  1082. void *re = NULL;
  1083. if (!new_value) {
  1084. new_value = entry->orig_value;
  1085. new_value_length = entry->orig_value_length;
  1086. }
  1087. php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
  1088. if (Z_STRLEN(tmp) > 0) {
  1089. if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
  1090. zval_dtor(&tmp);
  1091. return FAILURE;
  1092. }
  1093. }
  1094. if (MBSTRG(http_output_conv_mimetypes)) {
  1095. _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
  1096. }
  1097. MBSTRG(http_output_conv_mimetypes) = re;
  1098. zval_dtor(&tmp);
  1099. return SUCCESS;
  1100. }
  1101. /* }}} */
  1102. /* }}} */
  1103. /* {{{ php.ini directive registration */
  1104. PHP_INI_BEGIN()
  1105. PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
  1106. PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
  1107. PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
  1108. PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
  1109. PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
  1110. #ifdef ZEND_MULTIBYTE
  1111. PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
  1112. #endif /* ZEND_MULTIBYTE */
  1113. PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
  1114. STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
  1115. PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
  1116. STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
  1117. PHP_INI_SYSTEM | PHP_INI_PERDIR,
  1118. OnUpdate_mbstring_encoding_translation,
  1119. encoding_translation, zend_mbstring_globals, mbstring_globals)
  1120. PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
  1121. "^(text/|application/xhtml\\+xml)",
  1122. PHP_INI_ALL,
  1123. OnUpdate_mbstring_http_output_conv_mimetypes)
  1124. STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
  1125. PHP_INI_ALL,
  1126. OnUpdateLong,
  1127. strict_detection, zend_mbstring_globals, mbstring_globals)
  1128. PHP_INI_END()
  1129. /* }}} */
  1130. /* {{{ module global initialize handler */
  1131. static PHP_GINIT_FUNCTION(mbstring)
  1132. {
  1133. mbstring_globals->language = mbfl_no_language_uni;
  1134. mbstring_globals->internal_encoding = mbfl_no_encoding_invalid;
  1135. mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
  1136. #ifdef ZEND_MULTIBYTE
  1137. mbstring_globals->script_encoding_list = NULL;
  1138. mbstring_globals->script_encoding_list_size = 0;
  1139. #endif /* ZEND_MULTIBYTE */
  1140. mbstring_globals->http_output_encoding = mbfl_no_encoding_pass;
  1141. mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass;
  1142. mbstring_globals->http_input_identify = mbfl_no_encoding_invalid;
  1143. mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid;
  1144. mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid;
  1145. mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid;
  1146. mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid;
  1147. mbstring_globals->http_input_list = NULL;
  1148. mbstring_globals->http_input_list_size = 0;
  1149. mbstring_globals->detect_order_list = NULL;
  1150. mbstring_globals->detect_order_list_size = 0;
  1151. mbstring_globals->current_detect_order_list = NULL;
  1152. mbstring_globals->current_detect_order_list_size = 0;
  1153. mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  1154. mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  1155. mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1156. mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
  1157. mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1158. mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
  1159. mbstring_globals->illegalchars = 0;
  1160. mbstring_globals->func_overload = 0;
  1161. mbstring_globals->encoding_translation = 0;
  1162. mbstring_globals->strict_detection = 0;
  1163. mbstring_globals->outconv = NULL;
  1164. mbstring_globals->http_output_conv_mimetypes = NULL;
  1165. #if HAVE_MBREGEX
  1166. mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
  1167. #endif
  1168. }
  1169. /* }}} */
  1170. /* {{{ PHP_GSHUTDOWN_FUNCTION */
  1171. static PHP_GSHUTDOWN_FUNCTION(mbstring)
  1172. {
  1173. if (mbstring_globals->http_input_list) {
  1174. free(mbstring_globals->http_input_list);
  1175. }
  1176. #ifdef ZEND_MULTIBYTE
  1177. if (mbstring_globals->script_encoding_list) {
  1178. free(mbstring_globals->script_encoding_list);
  1179. }
  1180. #endif /* ZEND_MULTIBYTE */
  1181. if (mbstring_globals->detect_order_list) {
  1182. free(mbstring_globals->detect_order_list);
  1183. }
  1184. if (mbstring_globals->http_output_conv_mimetypes) {
  1185. _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
  1186. }
  1187. #if HAVE_MBREGEX
  1188. php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
  1189. #endif
  1190. }
  1191. /* }}} */
  1192. /* {{{ PHP_MINIT_FUNCTION(mbstring) */
  1193. PHP_MINIT_FUNCTION(mbstring)
  1194. {
  1195. __mbfl_allocators = &_php_mb_allocators;
  1196. REGISTER_INI_ENTRIES();
  1197. /* This is a global handler. Should not be set in a per-request handler. */
  1198. sapi_register_treat_data(mbstr_treat_data);
  1199. /* Post handlers are stored in the thread-local context. */
  1200. if (MBSTRG(encoding_translation)) {
  1201. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  1202. }
  1203. REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
  1204. REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
  1205. REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
  1206. REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
  1207. REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
  1208. REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
  1209. #if HAVE_MBREGEX
  1210. PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1211. #endif
  1212. return SUCCESS;
  1213. }
  1214. /* }}} */
  1215. /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
  1216. PHP_MSHUTDOWN_FUNCTION(mbstring)
  1217. {
  1218. UNREGISTER_INI_ENTRIES();
  1219. #if HAVE_MBREGEX
  1220. PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1221. #endif
  1222. return SUCCESS;
  1223. }
  1224. /* }}} */
  1225. /* {{{ PHP_RINIT_FUNCTION(mbstring) */
  1226. PHP_RINIT_FUNCTION(mbstring)
  1227. {
  1228. int n;
  1229. enum mbfl_no_encoding *list=NULL, *entry;
  1230. zend_function *func, *orig;
  1231. const struct mb_overload_def *p;
  1232. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  1233. MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
  1234. MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
  1235. MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
  1236. MBSTRG(illegalchars) = 0;
  1237. n = 0;
  1238. if (MBSTRG(detect_order_list)) {
  1239. list = MBSTRG(detect_order_list);
  1240. n = MBSTRG(detect_order_list_size);
  1241. }
  1242. if (n <= 0) {
  1243. list = MBSTRG(default_detect_order_list);
  1244. n = MBSTRG(default_detect_order_list_size);
  1245. }
  1246. entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
  1247. MBSTRG(current_detect_order_list) = entry;
  1248. MBSTRG(current_detect_order_list_size) = n;
  1249. while (n > 0) {
  1250. *entry++ = *list++;
  1251. n--;
  1252. }
  1253. /* override original function. */
  1254. if (MBSTRG(func_overload)){
  1255. p = &(mb_ovld[0]);
  1256. while (p->type > 0) {
  1257. if ((MBSTRG(func_overload) & p->type) == p->type &&
  1258. zend_hash_find(EG(function_table), p->save_func,
  1259. strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
  1260. zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
  1261. if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
  1262. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
  1263. return FAILURE;
  1264. } else {
  1265. zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
  1266. if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
  1267. NULL) == FAILURE) {
  1268. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
  1269. return FAILURE;
  1270. }
  1271. }
  1272. }
  1273. p++;
  1274. }
  1275. }
  1276. #if HAVE_MBREGEX
  1277. PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1278. #endif
  1279. #ifdef ZEND_MULTIBYTE
  1280. zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
  1281. php_mb_set_zend_encoding(TSRMLS_C);
  1282. #endif /* ZEND_MULTIBYTE */
  1283. return SUCCESS;
  1284. }
  1285. /* }}} */
  1286. /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
  1287. PHP_RSHUTDOWN_FUNCTION(mbstring)
  1288. {
  1289. const struct mb_overload_def *p;
  1290. zend_function *orig;
  1291. if (MBSTRG(current_detect_order_list) != NULL) {
  1292. efree(MBSTRG(current_detect_order_list));
  1293. MBSTRG(current_detect_order_list) = NULL;
  1294. MBSTRG(current_detect_order_list_size) = 0;
  1295. }
  1296. if (MBSTRG(outconv) != NULL) {
  1297. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1298. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1299. MBSTRG(outconv) = NULL;
  1300. }
  1301. /* clear http input identification. */
  1302. MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
  1303. MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
  1304. MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
  1305. MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
  1306. MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
  1307. /* clear overloaded function. */
  1308. if (MBSTRG(func_overload)){
  1309. p = &(mb_ovld[0]);
  1310. while (p->type > 0) {
  1311. if ((MBSTRG(func_overload) & p->type) == p->type &&
  1312. zend_hash_find(EG(function_table), p->save_func,
  1313. strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
  1314. zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
  1315. zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
  1316. }
  1317. p++;
  1318. }
  1319. }
  1320. #if HAVE_MBREGEX
  1321. PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1322. #endif
  1323. return SUCCESS;
  1324. }
  1325. /* }}} */
  1326. /* {{{ PHP_MINFO_FUNCTION(mbstring) */
  1327. PHP_MINFO_FUNCTION(mbstring)
  1328. {
  1329. php_info_print_table_start();
  1330. php_info_print_table_row(2, "Multibyte Support", "enabled");
  1331. php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
  1332. php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
  1333. php_info_print_table_end();
  1334. php_info_print_table_start();
  1335. php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
  1336. php_info_print_table_end();
  1337. #if HAVE_MBREGEX
  1338. PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
  1339. #endif
  1340. DISPLAY_INI_ENTRIES();
  1341. }
  1342. /* }}} */
  1343. /* {{{ proto string mb_language([string language])
  1344. Sets the current language or Returns the current language as a string */
  1345. PHP_FUNCTION(mb_language)
  1346. {
  1347. char *name = NULL;
  1348. int name_len = 0;
  1349. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  1350. return;
  1351. }
  1352. if (name == NULL) {
  1353. RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
  1354. } else {
  1355. if (FAILURE == zend_alter_ini_entry(
  1356. "mbstring.language", sizeof("mbstring.language"),
  1357. name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
  1358. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
  1359. RETVAL_FALSE;
  1360. } else {
  1361. RETVAL_TRUE;
  1362. }
  1363. }
  1364. }
  1365. /* }}} */
  1366. /* {{{ proto string mb_internal_encoding([string encoding])
  1367. Sets the current internal encoding or Returns the current internal encoding as a string */
  1368. PHP_FUNCTION(mb_internal_encoding)
  1369. {
  1370. char *name = NULL;
  1371. int name_len;
  1372. enum mbfl_no_encoding no_encoding;
  1373. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  1374. RETURN_FALSE;
  1375. }
  1376. if (name == NULL) {
  1377. name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  1378. if (name != NULL) {
  1379. RETURN_STRING(name, 1);
  1380. } else {
  1381. RETURN_FALSE;
  1382. }
  1383. } else {
  1384. no_encoding = mbfl_name2no_encoding(name);
  1385. if (no_encoding == mbfl_no_encoding_invalid) {
  1386. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1387. RETURN_FALSE;
  1388. } else {
  1389. MBSTRG(current_internal_encoding) = no_encoding;
  1390. #ifdef ZEND_MULTIBYTE
  1391. /* TODO: make independent from mbstring.encoding_translation? */
  1392. if (MBSTRG(encoding_translation)) {
  1393. zend_multibyte_set_internal_encoding(name TSRMLS_CC);
  1394. }
  1395. #endif /* ZEND_MULTIBYTE */
  1396. RETURN_TRUE;
  1397. }
  1398. }
  1399. }
  1400. /* }}} */
  1401. /* {{{ proto mixed mb_http_input([string type])
  1402. Returns the input encoding */
  1403. PHP_FUNCTION(mb_http_input)
  1404. {
  1405. char *typ = NULL;
  1406. int typ_len;
  1407. int retname, n;
  1408. char *name, *list, *temp;
  1409. enum mbfl_no_encoding *entry;
  1410. enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
  1411. retname = 1;
  1412. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  1413. RETURN_FALSE;
  1414. }
  1415. if (typ == NULL) {
  1416. result = MBSTRG(http_input_identify);
  1417. } else {
  1418. switch (*typ) {
  1419. case 'G':
  1420. case 'g':
  1421. result = MBSTRG(http_input_identify_get);
  1422. break;
  1423. case 'P':
  1424. case 'p':
  1425. result = MBSTRG(http_input_identify_post);
  1426. break;
  1427. case 'C':
  1428. case 'c':
  1429. result = MBSTRG(http_input_identify_cookie);
  1430. break;
  1431. case 'S':
  1432. case 's':
  1433. result = MBSTRG(http_input_identify_string);
  1434. break;
  1435. case 'I':
  1436. case 'i':
  1437. array_init(return_value);
  1438. entry = MBSTRG(http_input_list);
  1439. n = MBSTRG(http_input_list_size);
  1440. while (n > 0) {
  1441. name = (char *)mbfl_no_encoding2name(*entry);
  1442. if (name) {
  1443. add_next_index_string(return_value, name, 1);
  1444. }
  1445. entry++;
  1446. n--;
  1447. }
  1448. retname = 0;
  1449. break;
  1450. case 'L':
  1451. case 'l':
  1452. entry = MBSTRG(http_input_list);
  1453. n = MBSTRG(http_input_list_size);
  1454. list = NULL;
  1455. while (n > 0) {
  1456. name = (char *)mbfl_no_encoding2name(*entry);
  1457. if (name) {
  1458. if (list) {
  1459. temp = list;
  1460. spprintf(&list, 0, "%s,%s", temp, name);
  1461. efree(temp);
  1462. if (!list) {
  1463. break;
  1464. }
  1465. } else {
  1466. list = estrdup(name);
  1467. }
  1468. }
  1469. entry++;
  1470. n--;
  1471. }
  1472. if (!list) {
  1473. RETURN_FALSE;
  1474. }
  1475. RETVAL_STRING(list, 0);
  1476. retname = 0;
  1477. break;
  1478. default:
  1479. result = MBSTRG(http_input_identify);
  1480. break;
  1481. }
  1482. }
  1483. if (retname) {
  1484. if (result != mbfl_no_encoding_invalid &&
  1485. (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
  1486. RETVAL_STRING(name, 1);
  1487. } else {
  1488. RETVAL_FALSE;
  1489. }
  1490. }
  1491. }
  1492. /* }}} */
  1493. /* {{{ proto string mb_http_output([string encoding])
  1494. Sets the current output_encoding or returns the current output_encoding as a string */
  1495. PHP_FUNCTION(mb_http_output)
  1496. {
  1497. char *name = NULL;
  1498. int name_len;
  1499. enum mbfl_no_encoding no_encoding;
  1500. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
  1501. RETURN_FALSE;
  1502. }
  1503. if (name == NULL) {
  1504. name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
  1505. if (name != NULL) {
  1506. RETURN_STRING(name, 1);
  1507. } else {
  1508. RETURN_FALSE;
  1509. }
  1510. } else {
  1511. no_encoding = mbfl_name2no_encoding(name);
  1512. if (no_encoding == mbfl_no_encoding_invalid) {
  1513. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1514. RETURN_FALSE;
  1515. } else {
  1516. MBSTRG(current_http_output_encoding) = no_encoding;
  1517. RETURN_TRUE;
  1518. }
  1519. }
  1520. }
  1521. /* }}} */
  1522. /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
  1523. Sets the current detect_order or Return the current detect_order as a array */
  1524. PHP_FUNCTION(mb_detect_order)
  1525. {
  1526. zval **arg1 = NULL;
  1527. int n, size;
  1528. enum mbfl_no_encoding *list, *entry;
  1529. char *name;
  1530. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
  1531. return;
  1532. }
  1533. if (!arg1) {
  1534. array_init(return_value);
  1535. entry = MBSTRG(current_detect_order_list);
  1536. n = MBSTRG(current_detect_order_list_size);
  1537. while (n > 0) {
  1538. name = (char *)mbfl_no_encoding2name(*entry);
  1539. if (name) {
  1540. add_next_index_string(return_value, name, 1);
  1541. }
  1542. entry++;
  1543. n--;
  1544. }
  1545. } else {
  1546. list = NULL;
  1547. size = 0;
  1548. switch (Z_TYPE_PP(arg1)) {
  1549. case IS_ARRAY:
  1550. if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
  1551. if (list) {
  1552. efree(list);
  1553. }
  1554. RETURN_FALSE;
  1555. }
  1556. break;
  1557. default:
  1558. convert_to_string_ex(arg1);
  1559. if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
  1560. if (list) {
  1561. efree(list);
  1562. }
  1563. RETURN_FALSE;
  1564. }
  1565. break;
  1566. }
  1567. if (list == NULL) {
  1568. RETURN_FALSE;
  1569. }
  1570. if (MBSTRG(current_detect_order_list)) {
  1571. efree(MBSTRG(current_detect_order_list));
  1572. }
  1573. MBSTRG(current_detect_order_list) = list;
  1574. MBSTRG(current_detect_order_list_size) = size;
  1575. RETURN_TRUE;
  1576. }
  1577. }
  1578. /* }}} */
  1579. /* {{{ proto mixed mb_substitute_character([mixed substchar])
  1580. Sets the current substitute_character or returns the current substitute_character */
  1581. PHP_FUNCTION(mb_substitute_character)
  1582. {
  1583. zval **arg1 = NULL;
  1584. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
  1585. return;
  1586. }
  1587. if (!arg1) {
  1588. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  1589. RETURN_STRING("none", 1);
  1590. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  1591. RETURN_STRING("long", 1);
  1592. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  1593. RETURN_STRING("entity", 1);
  1594. } else {
  1595. RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
  1596. }
  1597. } else {
  1598. RETVAL_TRUE;
  1599. switch (Z_TYPE_PP(arg1)) {
  1600. case IS_STRING:
  1601. if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
  1602. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1603. } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
  1604. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1605. } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
  1606. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1607. } else {
  1608. convert_to_long_ex(arg1);
  1609. if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
  1610. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1611. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1612. } else {
  1613. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1614. RETURN_FALSE;
  1615. }
  1616. }
  1617. break;
  1618. default:
  1619. convert_to_long_ex(arg1);
  1620. if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
  1621. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1622. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1623. } else {
  1624. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1625. RETURN_FALSE;
  1626. }
  1627. break;
  1628. }
  1629. }
  1630. }
  1631. /* }}} */
  1632. /* {{{ proto string mb_preferred_mime_name(string encoding)
  1633. Return the preferred MIME name (charset) as a string */
  1634. PHP_FUNCTION(mb_preferred_mime_name)
  1635. {
  1636. enum mbfl_no_encoding no_encoding;
  1637. char *name = NULL;
  1638. int name_len;
  1639. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
  1640. return;
  1641. } else {
  1642. no_encoding = mbfl_name2no_encoding(name);
  1643. if (no_encoding == mbfl_no_encoding_invalid) {
  1644. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1645. RETVAL_FALSE;
  1646. } else {
  1647. const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
  1648. if (preferred_name == NULL || *preferred_name == '\0') {
  1649. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
  1650. RETVAL_FALSE;
  1651. } else {
  1652. RETVAL_STRING((char *)preferred_name, 1);
  1653. }
  1654. }
  1655. }
  1656. }
  1657. /* }}} */
  1658. #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
  1659. #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
  1660. /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
  1661. Parses GET/POST/COOKIE data and sets global variables */
  1662. PHP_FUNCTION(mb_parse_str)
  1663. {
  1664. zval *track_vars_array = NULL;
  1665. char *encstr = NULL;
  1666. int encstr_len;
  1667. php_mb_encoding_handler_info_t info;
  1668. enum mbfl_no_encoding detected;
  1669. track_vars_array = NULL;
  1670. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
  1671. return;
  1672. }
  1673. /* Clear out the array */
  1674. if (track_vars_array != NULL) {
  1675. zval_dtor(track_vars_array);
  1676. array_init(track_vars_array);
  1677. }
  1678. encstr = estrndup(encstr, encstr_len);
  1679. info.data_type = PARSE_STRING;
  1680. info.separator = PG(arg_separator).input;
  1681. info.force_register_globals = (track_vars_array == NULL);
  1682. info.report_errors = 1;
  1683. info.to_encoding = MBSTRG(current_internal_encoding);
  1684. info.to_language = MBSTRG(language);
  1685. info.from_encodings = MBSTRG(http_input_list);
  1686. info.num_from_encodings = MBSTRG(http_input_list_size);
  1687. info.from_language = MBSTRG(language);
  1688. detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
  1689. MBSTRG(http_input_identify) = detected;
  1690. RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
  1691. if (encstr != NULL) efree(encstr);
  1692. }
  1693. /* }}} */
  1694. /* {{{ proto string mb_output_handler(string contents, int status)
  1695. Returns string in output buffer converted to the http_output encoding */
  1696. PHP_FUNCTION(mb_output_handler)
  1697. {
  1698. char *arg_string;
  1699. int arg_string_len;
  1700. long arg_status;
  1701. mbfl_string string, result;
  1702. const char *charset;
  1703. char *p;
  1704. enum mbfl_no_encoding encoding;
  1705. int last_feed, len;
  1706. unsigned char send_text_mimetype = 0;
  1707. char *s, *mimetype = NULL;
  1708. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
  1709. return;
  1710. }
  1711. encoding = MBSTRG(current_http_output_encoding);
  1712. /* start phase only */
  1713. if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
  1714. /* delete the converter just in case. */
  1715. if (MBSTRG(outconv)) {
  1716. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1717. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1718. MBSTRG(outconv) = NULL;
  1719. }
  1720. if (encoding == mbfl_no_encoding_pass) {
  1721. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1722. }
  1723. /* analyze mime type */
  1724. if (SG(sapi_headers).mimetype &&
  1725. _php_mb_match_regex(
  1726. MBSTRG(http_output_conv_mimetypes),
  1727. SG(sapi_headers).mimetype,
  1728. strlen(SG(sapi_headers).mimetype))) {
  1729. if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
  1730. mimetype = estrdup(SG(sapi_headers).mimetype);
  1731. } else {
  1732. mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
  1733. }
  1734. send_text_mimetype = 1;
  1735. } else if (SG(sapi_headers).send_default_content_type) {
  1736. mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
  1737. }
  1738. /* if content-type is not yet set, set it and activate the converter */
  1739. if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
  1740. charset = mbfl_no2preferred_mime_name(encoding);
  1741. if (charset) {
  1742. len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
  1743. if (sapi_add_header(p, len, 0) != FAILURE) {
  1744. SG(sapi_headers).send_default_content_type = 0;
  1745. }
  1746. }
  1747. /* activate the converter */
  1748. MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
  1749. if (send_text_mimetype){
  1750. efree(mimetype);
  1751. }
  1752. }
  1753. }
  1754. /* just return if the converter is not activated. */
  1755. if (MBSTRG(outconv) == NULL) {
  1756. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1757. }
  1758. /* flag */
  1759. last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
  1760. /* mode */
  1761. mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
  1762. mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
  1763. /* feed the string */
  1764. mbfl_string_init(&string);
  1765. string.no_language = MBSTRG(language);
  1766. string.no_encoding = MBSTRG(current_internal_encoding);
  1767. string.val = (unsigned char *)arg_string;
  1768. string.len = arg_string_len;
  1769. mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
  1770. if (last_feed) {
  1771. mbfl_buffer_converter_flush(MBSTRG(outconv));
  1772. }
  1773. /* get the converter output, and return it */
  1774. mbfl_buffer_converter_result(MBSTRG(outconv), &result);
  1775. RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */
  1776. /* delete the converter if it is the last feed. */
  1777. if (last_feed) {
  1778. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1779. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1780. MBSTRG(outconv) = NULL;
  1781. }
  1782. }
  1783. /* }}} */
  1784. /* {{{ proto int mb_strlen(string str [, string encoding])
  1785. Get character numbers of a string */
  1786. PHP_FUNCTION(mb_strlen)
  1787. {
  1788. int n;
  1789. mbfl_string string;
  1790. char *enc_name = NULL;
  1791. int enc_name_len;
  1792. mbfl_string_init(&string);
  1793. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  1794. RETURN_FALSE;
  1795. }
  1796. string.no_language = MBSTRG(language);
  1797. if (enc_name == NULL) {
  1798. string.no_encoding = MBSTRG(current_internal_encoding);
  1799. } else {
  1800. string.no_encoding = mbfl_name2no_encoding(enc_name);
  1801. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1802. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1803. RETURN_FALSE;
  1804. }
  1805. }
  1806. n = mbfl_strlen(&string);
  1807. if (n >= 0) {
  1808. RETVAL_LONG(n);
  1809. } else {
  1810. RETVAL_FALSE;
  1811. }
  1812. }
  1813. /* }}} */
  1814. /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
  1815. Find position of first occurrence of a string within another */
  1816. PHP_FUNCTION(mb_strpos)
  1817. {
  1818. int n, reverse = 0;
  1819. long offset;
  1820. mbfl_string haystack, needle;
  1821. char *enc_name = NULL;
  1822. int enc_name_len;
  1823. mbfl_string_init(&haystack);
  1824. mbfl_string_init(&needle);
  1825. haystack.no_language = MBSTRG(language);
  1826. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1827. needle.no_language = MBSTRG(language);
  1828. needle.no_encoding = MBSTRG(current_internal_encoding);
  1829. offset = 0;
  1830. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
  1831. RETURN_FALSE;
  1832. }
  1833. if (enc_name != NULL) {
  1834. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1835. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1836. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1837. RETURN_FALSE;
  1838. }
  1839. }
  1840. if (offset < 0 || offset > mbfl_strlen(&haystack)) {
  1841. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
  1842. RETURN_FALSE;
  1843. }
  1844. if (needle.len == 0) {
  1845. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  1846. RETURN_FALSE;
  1847. }
  1848. n = mbfl_strpos(&haystack, &needle, offset, reverse);
  1849. if (n >= 0) {
  1850. RETVAL_LONG(n);
  1851. } else {
  1852. switch (-n) {
  1853. case 1:
  1854. break;
  1855. case 2:
  1856. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
  1857. break;
  1858. case 4:
  1859. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
  1860. break;
  1861. case 8:
  1862. php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
  1863. break;
  1864. default:
  1865. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
  1866. break;
  1867. }
  1868. RETVAL_FALSE;
  1869. }
  1870. }
  1871. /* }}} */
  1872. /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
  1873. Find position of last occurrence of a string within another */
  1874. PHP_FUNCTION(mb_strrpos)
  1875. {
  1876. int n;
  1877. mbfl_string haystack, needle;
  1878. char *enc_name = NULL;
  1879. int enc_name_len;
  1880. zval **zoffset = NULL;
  1881. long offset = 0, str_flg;
  1882. char *enc_name2 = NULL;
  1883. int enc_name_len2;
  1884. mbfl_string_init(&haystack);
  1885. mbfl_string_init(&needle);
  1886. haystack.no_language = MBSTRG(language);
  1887. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1888. needle.no_language = MBSTRG(language);
  1889. needle.no_encoding = MBSTRG(current_internal_encoding);
  1890. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
  1891. RETURN_FALSE;
  1892. }
  1893. if (zoffset) {
  1894. if (Z_TYPE_PP(zoffset) == IS_STRING) {
  1895. enc_name2 = Z_STRVAL_PP(zoffset);
  1896. enc_name_len2 = Z_STRLEN_PP(zoffset);
  1897. str_flg = 1;
  1898. if (enc_name2 != NULL) {
  1899. switch (*enc_name2) {
  1900. case '0':
  1901. case '1':
  1902. case '2':
  1903. case '3':
  1904. case '4':
  1905. case '5':
  1906. case '6':
  1907. case '7':
  1908. case '8':
  1909. case '9':
  1910. case ' ':
  1911. case '-':
  1912. case '.':
  1913. break;
  1914. default :
  1915. str_flg = 0;
  1916. break;
  1917. }
  1918. }
  1919. if (str_flg) {
  1920. convert_to_long_ex(zoffset);
  1921. offset = Z_LVAL_PP(zoffset);
  1922. } else {
  1923. enc_name = enc_name2;
  1924. enc_name_len = enc_name_len2;
  1925. }
  1926. } else {
  1927. convert_to_long_ex(zoffset);
  1928. offset = Z_LVAL_PP(zoffset);
  1929. }
  1930. }
  1931. if (enc_name != NULL) {
  1932. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1933. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1934. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1935. RETURN_FALSE;
  1936. }
  1937. }
  1938. if (haystack.len <= 0) {
  1939. RETURN_FALSE;
  1940. }
  1941. if (needle.len <= 0) {
  1942. RETURN_FALSE;
  1943. }
  1944. {
  1945. int haystack_char_len = mbfl_strlen(&haystack);
  1946. if ((offset > 0 && offset > haystack_char_len) ||
  1947. (offset < 0 && -offset > haystack_char_len)) {
  1948. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
  1949. RETURN_FALSE;
  1950. }
  1951. }
  1952. n = mbfl_strpos(&haystack, &needle, offset, 1);
  1953. if (n >= 0) {
  1954. RETVAL_LONG(n);
  1955. } else {
  1956. RETVAL_FALSE;
  1957. }
  1958. }
  1959. /* }}} */
  1960. /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
  1961. Finds position of first occurrence of a string within another, case insensitive */
  1962. PHP_FUNCTION(mb_stripos)
  1963. {
  1964. int n;
  1965. long offset;
  1966. mbfl_string haystack, needle;
  1967. char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1968. int from_encoding_len;
  1969. n = -1;
  1970. offset = 0;
  1971. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
  1972. RETURN_FALSE;
  1973. }
  1974. if (needle.len == 0) {
  1975. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  1976. RETURN_FALSE;
  1977. }
  1978. n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
  1979. if (n >= 0) {
  1980. RETVAL_LONG(n);
  1981. } else {
  1982. RETVAL_FALSE;
  1983. }
  1984. }
  1985. /* }}} */
  1986. /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
  1987. Finds position of last occurrence of a string within another, case insensitive */
  1988. PHP_FUNCTION(mb_strripos)
  1989. {
  1990. int n;
  1991. long offset;
  1992. mbfl_string haystack, needle;
  1993. const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1994. int from_encoding_len;
  1995. n = -1;
  1996. offset = 0;
  1997. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
  1998. RETURN_FALSE;
  1999. }
  2000. n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
  2001. if (n >= 0) {
  2002. RETVAL_LONG(n);
  2003. } else {
  2004. RETVAL_FALSE;
  2005. }
  2006. }
  2007. /* }}} */
  2008. /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
  2009. Finds first occurrence of a string within another */
  2010. PHP_FUNCTION(mb_strstr)
  2011. {
  2012. int n, len, mblen;
  2013. mbfl_string haystack, needle, result, *ret = NULL;
  2014. char *enc_name = NULL;
  2015. int enc_name_len;
  2016. zend_bool part = 0;
  2017. mbfl_string_init(&haystack);
  2018. mbfl_string_init(&needle);
  2019. haystack.no_language = MBSTRG(language);
  2020. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2021. needle.no_language = MBSTRG(language);
  2022. needle.no_encoding = MBSTRG(current_internal_encoding);
  2023. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
  2024. RETURN_FALSE;
  2025. }
  2026. if (enc_name != NULL) {
  2027. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  2028. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2029. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2030. RETURN_FALSE;
  2031. }
  2032. }
  2033. if (needle.len <= 0) {
  2034. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  2035. RETURN_FALSE;
  2036. }
  2037. n = mbfl_strpos(&haystack, &needle, 0, 0);
  2038. if (n >= 0) {
  2039. mblen = mbfl_strlen(&haystack);
  2040. if (part) {
  2041. ret = mbfl_substr(&haystack, &result, 0, n);
  2042. if (ret != NULL) {
  2043. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2044. } else {
  2045. RETVAL_FALSE;
  2046. }
  2047. } else {
  2048. len = (mblen - n);
  2049. ret = mbfl_substr(&haystack, &result, n, len);
  2050. if (ret != NULL) {
  2051. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2052. } else {
  2053. RETVAL_FALSE;
  2054. }
  2055. }
  2056. } else {
  2057. RETVAL_FALSE;
  2058. }
  2059. }
  2060. /* }}} */
  2061. /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
  2062. Finds the last occurrence of a character in a string within another */
  2063. PHP_FUNCTION(mb_strrchr)
  2064. {
  2065. int n, len, mblen;
  2066. mbfl_string haystack, needle, result, *ret = NULL;
  2067. char *enc_name = NULL;
  2068. int enc_name_len;
  2069. zend_bool part = 0;
  2070. mbfl_string_init(&haystack);
  2071. mbfl_string_init(&needle);
  2072. haystack.no_language = MBSTRG(language);
  2073. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2074. needle.no_language = MBSTRG(language);
  2075. needle.no_encoding = MBSTRG(current_internal_encoding);
  2076. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
  2077. RETURN_FALSE;
  2078. }
  2079. if (enc_name != NULL) {
  2080. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  2081. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2082. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2083. RETURN_FALSE;
  2084. }
  2085. }
  2086. if (haystack.len <= 0) {
  2087. RETURN_FALSE;
  2088. }
  2089. if (needle.len <= 0) {
  2090. RETURN_FALSE;
  2091. }
  2092. n = mbfl_strpos(&haystack, &needle, 0, 1);
  2093. if (n >= 0) {
  2094. mblen = mbfl_strlen(&haystack);
  2095. if (part) {
  2096. ret = mbfl_substr(&haystack, &result, 0, n);
  2097. if (ret != NULL) {
  2098. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2099. } else {
  2100. RETVAL_FALSE;
  2101. }
  2102. } else {
  2103. len = (mblen - n);
  2104. ret = mbfl_substr(&haystack, &result, n, len);
  2105. if (ret != NULL) {
  2106. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2107. } else {
  2108. RETVAL_FALSE;
  2109. }
  2110. }
  2111. } else {
  2112. RETVAL_FALSE;
  2113. }
  2114. }
  2115. /* }}} */
  2116. /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
  2117. Finds first occurrence of a string within another, case insensitive */
  2118. PHP_FUNCTION(mb_stristr)
  2119. {
  2120. zend_bool part = 0;
  2121. unsigned int from_encoding_len, len, mblen;
  2122. int n;
  2123. mbfl_string haystack, needle, result, *ret = NULL;
  2124. const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2125. mbfl_string_init(&haystack);
  2126. mbfl_string_init(&needle);
  2127. haystack.no_language = MBSTRG(language);
  2128. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2129. needle.no_language = MBSTRG(language);
  2130. needle.no_encoding = MBSTRG(current_internal_encoding);
  2131. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
  2132. RETURN_FALSE;
  2133. }
  2134. if (!needle.len) {
  2135. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  2136. RETURN_FALSE;
  2137. }
  2138. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
  2139. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2140. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
  2141. RETURN_FALSE;
  2142. }
  2143. n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
  2144. if (n <0) {
  2145. RETURN_FALSE;
  2146. }
  2147. mblen = mbfl_strlen(&haystack);
  2148. if (part) {
  2149. ret = mbfl_substr(&haystack, &result, 0, n);
  2150. if (ret != NULL) {
  2151. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2152. } else {
  2153. RETVAL_FALSE;
  2154. }
  2155. } else {
  2156. len = (mblen - n);
  2157. ret = mbfl_substr(&haystack, &result, n, len);
  2158. if (ret != NULL) {
  2159. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2160. } else {
  2161. RETVAL_FALSE;
  2162. }
  2163. }
  2164. }
  2165. /* }}} */
  2166. /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
  2167. Finds the last occurrence of a character in a string within another, case insensitive */
  2168. PHP_FUNCTION(mb_strrichr)
  2169. {
  2170. zend_bool part = 0;
  2171. int n, from_encoding_len, len, mblen;
  2172. mbfl_string haystack, needle, result, *ret = NULL;
  2173. char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2174. mbfl_string_init(&haystack);
  2175. mbfl_string_init(&needle);
  2176. haystack.no_language = MBSTRG(language);
  2177. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2178. needle.no_language = MBSTRG(language);
  2179. needle.no_encoding = MBSTRG(current_internal_encoding);
  2180. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
  2181. RETURN_FALSE;
  2182. }
  2183. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
  2184. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2185. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
  2186. RETURN_FALSE;
  2187. }
  2188. n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
  2189. if (n <0) {
  2190. RETURN_FALSE;
  2191. }
  2192. mblen = mbfl_strlen(&haystack);
  2193. if (part) {
  2194. ret = mbfl_substr(&haystack, &result, 0, n);
  2195. if (ret != NULL) {
  2196. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2197. } else {
  2198. RETVAL_FALSE;
  2199. }
  2200. } else {
  2201. len = (mblen - n);
  2202. ret = mbfl_substr(&haystack, &result, n, len);
  2203. if (ret != NULL) {
  2204. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2205. } else {
  2206. RETVAL_FALSE;
  2207. }
  2208. }
  2209. }
  2210. /* }}} */
  2211. /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
  2212. Count the number of substring occurrences */
  2213. PHP_FUNCTION(mb_substr_count)
  2214. {
  2215. int n;
  2216. mbfl_string haystack, needle;
  2217. char *enc_name = NULL;
  2218. int enc_name_len;
  2219. mbfl_string_init(&haystack);
  2220. mbfl_string_init(&needle);
  2221. haystack.no_language = MBSTRG(language);
  2222. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2223. needle.no_language = MBSTRG(language);
  2224. needle.no_encoding = MBSTRG(current_internal_encoding);
  2225. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
  2226. return;
  2227. }
  2228. if (enc_name != NULL) {
  2229. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  2230. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2231. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2232. RETURN_FALSE;
  2233. }
  2234. }
  2235. if (needle.len <= 0) {
  2236. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
  2237. RETURN_FALSE;
  2238. }
  2239. n = mbfl_substr_count(&haystack, &needle);
  2240. if (n >= 0) {
  2241. RETVAL_LONG(n);
  2242. } else {
  2243. RETVAL_FALSE;
  2244. }
  2245. }
  2246. /* }}} */
  2247. /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
  2248. Returns part of a string */
  2249. PHP_FUNCTION(mb_substr)
  2250. {
  2251. size_t argc = ZEND_NUM_ARGS();
  2252. char *str, *encoding;
  2253. long from, len;
  2254. int mblen, str_len, encoding_len;
  2255. mbfl_string string, result, *ret;
  2256. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", &str, &str_len, &from, &len, &encoding, &encoding_len) == FAILURE) {
  2257. return;
  2258. }
  2259. mbfl_string_init(&string);
  2260. string.no_language = MBSTRG(language);
  2261. string.no_encoding = MBSTRG(current_internal_encoding);
  2262. if (argc == 4) {
  2263. string.no_encoding = mbfl_name2no_encoding(encoding);
  2264. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2265. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  2266. RETURN_FALSE;
  2267. }
  2268. }
  2269. string.val = (unsigned char *)str;
  2270. string.len = str_len;
  2271. if (argc < 3) {
  2272. len = str_len;
  2273. }
  2274. /* measures length */
  2275. mblen = 0;
  2276. if (from < 0 || len < 0) {
  2277. mblen = mbfl_strlen(&string);
  2278. }
  2279. /* if "from" position is negative, count start position from the end
  2280. * of the string
  2281. */
  2282. if (from < 0) {
  2283. from = mblen + from;
  2284. if (from < 0) {
  2285. from = 0;
  2286. }
  2287. }
  2288. /* if "length" position is negative, set it to the length
  2289. * needed to stop that many chars from the end of the string
  2290. */
  2291. if (len < 0) {
  2292. len = (mblen - from) + len;
  2293. if (len < 0) {
  2294. len = 0;
  2295. }
  2296. }
  2297. if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
  2298. && (from >= mbfl_strlen(&string))) {
  2299. RETURN_FALSE;
  2300. }
  2301. ret = mbfl_substr(&string, &result, from, len);
  2302. if (NULL == ret) {
  2303. RETURN_FALSE;
  2304. }
  2305. RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2306. }
  2307. /* }}} */
  2308. /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
  2309. Returns part of a string */
  2310. PHP_FUNCTION(mb_strcut)
  2311. {
  2312. size_t argc = ZEND_NUM_ARGS();
  2313. char *encoding;
  2314. long from, len;
  2315. int encoding_len;
  2316. mbfl_string string, result, *ret;
  2317. mbfl_string_init(&string);
  2318. string.no_language = MBSTRG(language);
  2319. string.no_encoding = MBSTRG(current_internal_encoding);
  2320. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) {
  2321. return;
  2322. }
  2323. if (argc == 4) {
  2324. string.no_encoding = mbfl_name2no_encoding(encoding);
  2325. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2326. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  2327. RETURN_FALSE;
  2328. }
  2329. }
  2330. if (argc < 3) {
  2331. len = string.len;
  2332. }
  2333. /* if "from" position is negative, count start position from the end
  2334. * of the string
  2335. */
  2336. if (from < 0) {
  2337. from = string.len + from;
  2338. if (from < 0) {
  2339. from = 0;
  2340. }
  2341. }
  2342. /* if "length" position is negative, set it to the length
  2343. * needed to stop that many chars from the end of the string
  2344. */
  2345. if (len < 0) {
  2346. len = (string.len - from) + len;
  2347. if (len < 0) {
  2348. len = 0;
  2349. }
  2350. }
  2351. if ((unsigned int)from > string.len) {
  2352. RETURN_FALSE;
  2353. }
  2354. if (((unsigned int)from + (unsigned int)len) > string.len) {
  2355. len = string.len - from;
  2356. }
  2357. ret = mbfl_strcut(&string, &result, from, len);
  2358. if (ret == NULL) {
  2359. RETURN_FALSE;
  2360. }
  2361. RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2362. }
  2363. /* }}} */
  2364. /* {{{ proto int mb_strwidth(string str [, string encoding])
  2365. Gets terminal width of a string */
  2366. PHP_FUNCTION(mb_strwidth)
  2367. {
  2368. int n;
  2369. mbfl_string string;
  2370. char *enc_name = NULL;
  2371. int enc_name_len;
  2372. mbfl_string_init(&string);
  2373. string.no_language = MBSTRG(language);
  2374. string.no_encoding = MBSTRG(current_internal_encoding);
  2375. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  2376. return;
  2377. }
  2378. if (enc_name != NULL) {
  2379. string.no_encoding = mbfl_name2no_encoding(enc_name);
  2380. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2381. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2382. RETURN_FALSE;
  2383. }
  2384. }
  2385. n = mbfl_strwidth(&string);
  2386. if (n >= 0) {
  2387. RETVAL_LONG(n);
  2388. } else {
  2389. RETVAL_FALSE;
  2390. }
  2391. }
  2392. /* }}} */
  2393. /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
  2394. Trim the string in terminal width */
  2395. PHP_FUNCTION(mb_strimwidth)
  2396. {
  2397. char *str, *trimmarker, *encoding;
  2398. long from, width;
  2399. int str_len, trimmarker_len, encoding_len;
  2400. mbfl_string string, result, marker, *ret;
  2401. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
  2402. return;
  2403. }
  2404. mbfl_string_init(&string);
  2405. mbfl_string_init(&marker);
  2406. string.no_language = MBSTRG(language);
  2407. string.no_encoding = MBSTRG(current_internal_encoding);
  2408. marker.no_language = MBSTRG(language);
  2409. marker.no_encoding = MBSTRG(current_internal_encoding);
  2410. marker.val = NULL;
  2411. marker.len = 0;
  2412. if (ZEND_NUM_ARGS() == 5) {
  2413. string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
  2414. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2415. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  2416. RETURN_FALSE;
  2417. }
  2418. }
  2419. string.val = (unsigned char *)str;
  2420. string.len = str_len;
  2421. if (from < 0 || from > str_len) {
  2422. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
  2423. RETURN_FALSE;
  2424. }
  2425. if (width < 0) {
  2426. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
  2427. RETURN_FALSE;
  2428. }
  2429. if (ZEND_NUM_ARGS() >= 4) {
  2430. marker.val = (unsigned char *)trimmarker;
  2431. marker.len = trimmarker_len;
  2432. }
  2433. ret = mbfl_strimwidth(&string, &marker, &result, from, width);
  2434. if (ret == NULL) {
  2435. RETURN_FALSE;
  2436. }
  2437. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2438. }
  2439. /* }}} */
  2440. /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
  2441. MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
  2442. {
  2443. mbfl_string string, result, *ret;
  2444. enum mbfl_no_encoding from_encoding, to_encoding;
  2445. mbfl_buffer_converter *convd;
  2446. int size, *list;
  2447. char *output=NULL;
  2448. if (output_len) {
  2449. *output_len = 0;
  2450. }
  2451. if (!input) {
  2452. return NULL;
  2453. }
  2454. /* new encoding */
  2455. if (_to_encoding && strlen(_to_encoding)) {
  2456. to_encoding = mbfl_name2no_encoding(_to_encoding);
  2457. if (to_encoding == mbfl_no_encoding_invalid) {
  2458. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
  2459. return NULL;
  2460. }
  2461. } else {
  2462. to_encoding = MBSTRG(current_internal_encoding);
  2463. }
  2464. /* initialize string */
  2465. mbfl_string_init(&string);
  2466. mbfl_string_init(&result);
  2467. from_encoding = MBSTRG(current_internal_encoding);
  2468. string.no_encoding = from_encoding;
  2469. string.no_language = MBSTRG(language);
  2470. string.val = (unsigned char *)input;
  2471. string.len = length;
  2472. /* pre-conversion encoding */
  2473. if (_from_encodings) {
  2474. list = NULL;
  2475. size = 0;
  2476. php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
  2477. if (size == 1) {
  2478. from_encoding = *list;
  2479. string.no_encoding = from_encoding;
  2480. } else if (size > 1) {
  2481. /* auto detect */
  2482. from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection));
  2483. if (from_encoding != mbfl_no_encoding_invalid) {
  2484. string.no_encoding = from_encoding;
  2485. } else {
  2486. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
  2487. from_encoding = mbfl_no_encoding_pass;
  2488. to_encoding = from_encoding;
  2489. string.no_encoding = from_encoding;
  2490. }
  2491. } else {
  2492. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
  2493. }
  2494. if (list != NULL) {
  2495. efree((void *)list);
  2496. }
  2497. }
  2498. /* initialize converter */
  2499. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  2500. if (convd == NULL) {
  2501. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
  2502. return NULL;
  2503. }
  2504. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  2505. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  2506. /* do it */
  2507. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2508. if (ret) {
  2509. if (output_len) {
  2510. *output_len = ret->len;
  2511. }
  2512. output = (char *)ret->val;
  2513. }
  2514. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  2515. mbfl_buffer_converter_delete(convd);
  2516. return output;
  2517. }
  2518. /* }}} */
  2519. /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
  2520. Returns converted string in desired encoding */
  2521. PHP_FUNCTION(mb_convert_encoding)
  2522. {
  2523. char *arg_str, *arg_new;
  2524. int str_len, new_len;
  2525. zval *arg_old;
  2526. int i;
  2527. size_t size, l, n;
  2528. char *_from_encodings = NULL, *ret, *s_free = NULL;
  2529. zval **hash_entry;
  2530. HashTable *target_hash;
  2531. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
  2532. return;
  2533. }
  2534. if (ZEND_NUM_ARGS() == 3) {
  2535. switch (Z_TYPE_P(arg_old)) {
  2536. case IS_ARRAY:
  2537. target_hash = Z_ARRVAL_P(arg_old);
  2538. zend_hash_internal_pointer_reset(target_hash);
  2539. i = zend_hash_num_elements(target_hash);
  2540. _from_encodings = NULL;
  2541. while (i > 0) {
  2542. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  2543. break;
  2544. }
  2545. convert_to_string_ex(hash_entry);
  2546. if ( _from_encodings) {
  2547. l = strlen(_from_encodings);
  2548. n = strlen(Z_STRVAL_PP(hash_entry));
  2549. _from_encodings = erealloc(_from_encodings, l+n+2);
  2550. strcpy(_from_encodings+l, ",");
  2551. strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
  2552. } else {
  2553. _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
  2554. }
  2555. zend_hash_move_forward(target_hash);
  2556. i--;
  2557. }
  2558. if (_from_encodings != NULL && !strlen(_from_encodings)) {
  2559. efree(_from_encodings);
  2560. _from_encodings = NULL;
  2561. }
  2562. s_free = _from_encodings;
  2563. break;
  2564. default:
  2565. convert_to_string(arg_old);
  2566. _from_encodings = Z_STRVAL_P(arg_old);
  2567. break;
  2568. }
  2569. }
  2570. /* new encoding */
  2571. ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
  2572. if (ret != NULL) {
  2573. RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */
  2574. } else {
  2575. RETVAL_FALSE;
  2576. }
  2577. if ( s_free) {
  2578. efree(s_free);
  2579. }
  2580. }
  2581. /* }}} */
  2582. /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
  2583. Returns a case-folded version of sourcestring */
  2584. PHP_FUNCTION(mb_convert_case)
  2585. {
  2586. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2587. int str_len, from_encoding_len;
  2588. long case_mode = 0;
  2589. char *newstr;
  2590. size_t ret_len;
  2591. RETVAL_FALSE;
  2592. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
  2593. &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
  2594. RETURN_FALSE;
  2595. newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  2596. if (newstr) {
  2597. RETVAL_STRINGL(newstr, ret_len, 0);
  2598. }
  2599. }
  2600. /* }}} */
  2601. /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
  2602. * Returns a uppercased version of sourcestring
  2603. */
  2604. PHP_FUNCTION(mb_strtoupper)
  2605. {
  2606. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2607. int str_len, from_encoding_len;
  2608. char *newstr;
  2609. size_t ret_len;
  2610. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  2611. &from_encoding, &from_encoding_len) == FAILURE) {
  2612. return;
  2613. }
  2614. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  2615. if (newstr) {
  2616. RETURN_STRINGL(newstr, ret_len, 0);
  2617. }
  2618. RETURN_FALSE;
  2619. }
  2620. /* }}} */
  2621. /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
  2622. * Returns a lowercased version of sourcestring
  2623. */
  2624. PHP_FUNCTION(mb_strtolower)
  2625. {
  2626. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2627. int str_len, from_encoding_len;
  2628. char *newstr;
  2629. size_t ret_len;
  2630. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  2631. &from_encoding, &from_encoding_len) == FAILURE) {
  2632. return;
  2633. }
  2634. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  2635. if (newstr) {
  2636. RETURN_STRINGL(newstr, ret_len, 0);
  2637. }
  2638. RETURN_FALSE;
  2639. }
  2640. /* }}} */
  2641. /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
  2642. Encodings of the given string is returned (as a string) */
  2643. PHP_FUNCTION(mb_detect_encoding)
  2644. {
  2645. char *str;
  2646. int str_len;
  2647. zend_bool strict=0;
  2648. zval *encoding_list;
  2649. mbfl_string string;
  2650. const char *ret;
  2651. enum mbfl_no_encoding *elist;
  2652. int size, *list;
  2653. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
  2654. return;
  2655. }
  2656. /* make encoding list */
  2657. list = NULL;
  2658. size = 0;
  2659. if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_P(encoding_list)) {
  2660. switch (Z_TYPE_P(encoding_list)) {
  2661. case IS_ARRAY:
  2662. if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
  2663. if (list) {
  2664. efree(list);
  2665. list = NULL;
  2666. size = 0;
  2667. }
  2668. }
  2669. break;
  2670. default:
  2671. convert_to_string(encoding_list);
  2672. if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
  2673. if (list) {
  2674. efree(list);
  2675. list = NULL;
  2676. size = 0;
  2677. }
  2678. }
  2679. break;
  2680. }
  2681. if (size <= 0) {
  2682. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
  2683. }
  2684. }
  2685. if (ZEND_NUM_ARGS() < 3) {
  2686. strict = (zend_bool)MBSTRG(strict_detection);
  2687. }
  2688. if (size > 0 && list != NULL) {
  2689. elist = list;
  2690. } else {
  2691. elist = MBSTRG(current_detect_order_list);
  2692. size = MBSTRG(current_detect_order_list_size);
  2693. }
  2694. mbfl_string_init(&string);
  2695. string.no_language = MBSTRG(language);
  2696. string.val = (unsigned char *)str;
  2697. string.len = str_len;
  2698. ret = mbfl_identify_encoding_name(&string, elist, size, strict);
  2699. if (list != NULL) {
  2700. efree((void *)list);
  2701. }
  2702. if (ret == NULL) {
  2703. RETURN_FALSE;
  2704. }
  2705. RETVAL_STRING((char *)ret, 1);
  2706. }
  2707. /* }}} */
  2708. /* {{{ proto mixed mb_list_encodings()
  2709. Returns an array of all supported entity encodings */
  2710. PHP_FUNCTION(mb_list_encodings)
  2711. {
  2712. const mbfl_encoding **encodings;
  2713. const mbfl_encoding *encoding;
  2714. int i;
  2715. array_init(return_value);
  2716. i = 0;
  2717. encodings = mbfl_get_supported_encodings();
  2718. while ((encoding = encodings[i++]) != NULL) {
  2719. add_next_index_string(return_value, (char *) encoding->name, 1);
  2720. }
  2721. }
  2722. /* }}} */
  2723. /* {{{ proto array mb_encoding_aliases(string encoding)
  2724. Returns an array of the aliases of a given encoding name */
  2725. PHP_FUNCTION(mb_encoding_aliases)
  2726. {
  2727. const mbfl_encoding *encoding;
  2728. char *name = NULL;
  2729. int name_len;
  2730. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
  2731. RETURN_FALSE;
  2732. }
  2733. encoding = mbfl_name2encoding(name);
  2734. if (!encoding) {
  2735. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  2736. RETURN_FALSE;
  2737. }
  2738. array_init(return_value);
  2739. if (encoding->aliases != NULL) {
  2740. const char **alias;
  2741. for (alias = *encoding->aliases; *alias; ++alias) {
  2742. add_next_index_string(return_value, (char *)*alias, 1);
  2743. }
  2744. }
  2745. }
  2746. /* }}} */
  2747. /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
  2748. Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
  2749. PHP_FUNCTION(mb_encode_mimeheader)
  2750. {
  2751. enum mbfl_no_encoding charset, transenc;
  2752. mbfl_string string, result, *ret;
  2753. char *charset_name = NULL;
  2754. int charset_name_len;
  2755. char *trans_enc_name = NULL;
  2756. int trans_enc_name_len;
  2757. char *linefeed = "\r\n";
  2758. int linefeed_len;
  2759. long indent = 0;
  2760. mbfl_string_init(&string);
  2761. string.no_language = MBSTRG(language);
  2762. string.no_encoding = MBSTRG(current_internal_encoding);
  2763. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
  2764. return;
  2765. }
  2766. charset = mbfl_no_encoding_pass;
  2767. transenc = mbfl_no_encoding_base64;
  2768. if (charset_name != NULL) {
  2769. charset = mbfl_name2no_encoding(charset_name);
  2770. if (charset == mbfl_no_encoding_invalid) {
  2771. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
  2772. RETURN_FALSE;
  2773. }
  2774. } else {
  2775. const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
  2776. if (lang != NULL) {
  2777. charset = lang->mail_charset;
  2778. transenc = lang->mail_header_encoding;
  2779. }
  2780. }
  2781. if (trans_enc_name != NULL) {
  2782. if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
  2783. transenc = mbfl_no_encoding_base64;
  2784. } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
  2785. transenc = mbfl_no_encoding_qprint;
  2786. }
  2787. }
  2788. mbfl_string_init(&result);
  2789. ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
  2790. if (ret != NULL) {
  2791. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2792. } else {
  2793. RETVAL_FALSE;
  2794. }
  2795. }
  2796. /* }}} */
  2797. /* {{{ proto string mb_decode_mimeheader(string string)
  2798. Decodes the MIME "encoded-word" in the string */
  2799. PHP_FUNCTION(mb_decode_mimeheader)
  2800. {
  2801. mbfl_string string, result, *ret;
  2802. mbfl_string_init(&string);
  2803. string.no_language = MBSTRG(language);
  2804. string.no_encoding = MBSTRG(current_internal_encoding);
  2805. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
  2806. return;
  2807. }
  2808. mbfl_string_init(&result);
  2809. ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
  2810. if (ret != NULL) {
  2811. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2812. } else {
  2813. RETVAL_FALSE;
  2814. }
  2815. }
  2816. /* }}} */
  2817. /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
  2818. Conversion between full-width character and half-width character (Japanese) */
  2819. PHP_FUNCTION(mb_convert_kana)
  2820. {
  2821. int opt, i;
  2822. mbfl_string string, result, *ret;
  2823. char *optstr = NULL;
  2824. int optstr_len;
  2825. char *encname = NULL;
  2826. int encname_len;
  2827. mbfl_string_init(&string);
  2828. string.no_language = MBSTRG(language);
  2829. string.no_encoding = MBSTRG(current_internal_encoding);
  2830. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
  2831. return;
  2832. }
  2833. /* option */
  2834. if (optstr != NULL) {
  2835. char *p = optstr;
  2836. int n = optstr_len;
  2837. i = 0;
  2838. opt = 0;
  2839. while (i < n) {
  2840. i++;
  2841. switch (*p++) {
  2842. case 'A':
  2843. opt |= 0x1;
  2844. break;
  2845. case 'a':
  2846. opt |= 0x10;
  2847. break;
  2848. case 'R':
  2849. opt |= 0x2;
  2850. break;
  2851. case 'r':
  2852. opt |= 0x20;
  2853. break;
  2854. case 'N':
  2855. opt |= 0x4;
  2856. break;
  2857. case 'n':
  2858. opt |= 0x40;
  2859. break;
  2860. case 'S':
  2861. opt |= 0x8;
  2862. break;
  2863. case 's':
  2864. opt |= 0x80;
  2865. break;
  2866. case 'K':
  2867. opt |= 0x100;
  2868. break;
  2869. case 'k':
  2870. opt |= 0x1000;
  2871. break;
  2872. case 'H':
  2873. opt |= 0x200;
  2874. break;
  2875. case 'h':
  2876. opt |= 0x2000;
  2877. break;
  2878. case 'V':
  2879. opt |= 0x800;
  2880. break;
  2881. case 'C':
  2882. opt |= 0x10000;
  2883. break;
  2884. case 'c':
  2885. opt |= 0x20000;
  2886. break;
  2887. case 'M':
  2888. opt |= 0x100000;
  2889. break;
  2890. case 'm':
  2891. opt |= 0x200000;
  2892. break;
  2893. }
  2894. }
  2895. } else {
  2896. opt = 0x900;
  2897. }
  2898. /* encoding */
  2899. if (encname != NULL) {
  2900. string.no_encoding = mbfl_name2no_encoding(encname);
  2901. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2902. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
  2903. RETURN_FALSE;
  2904. }
  2905. }
  2906. ret = mbfl_ja_jp_hantozen(&string, &result, opt);
  2907. if (ret != NULL) {
  2908. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2909. } else {
  2910. RETVAL_FALSE;
  2911. }
  2912. }
  2913. /* }}} */
  2914. #define PHP_MBSTR_STACK_BLOCK_SIZE 32
  2915. /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
  2916. Converts the string resource in variables to desired encoding */
  2917. PHP_FUNCTION(mb_convert_variables)
  2918. {
  2919. zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
  2920. HashTable *target_hash;
  2921. mbfl_string string, result, *ret;
  2922. enum mbfl_no_encoding from_encoding, to_encoding;
  2923. mbfl_encoding_detector *identd;
  2924. mbfl_buffer_converter *convd;
  2925. int n, to_enc_len, argc, stack_level, stack_max, elistsz;
  2926. enum mbfl_no_encoding *elist;
  2927. char *name, *to_enc;
  2928. void *ptmp;
  2929. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
  2930. return;
  2931. }
  2932. /* new encoding */
  2933. to_encoding = mbfl_name2no_encoding(to_enc);
  2934. if (to_encoding == mbfl_no_encoding_invalid) {
  2935. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
  2936. efree(args);
  2937. RETURN_FALSE;
  2938. }
  2939. /* initialize string */
  2940. mbfl_string_init(&string);
  2941. mbfl_string_init(&result);
  2942. from_encoding = MBSTRG(current_internal_encoding);
  2943. string.no_encoding = from_encoding;
  2944. string.no_language = MBSTRG(language);
  2945. /* pre-conversion encoding */
  2946. elist = NULL;
  2947. elistsz = 0;
  2948. switch (Z_TYPE_PP(zfrom_enc)) {
  2949. case IS_ARRAY:
  2950. php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
  2951. break;
  2952. default:
  2953. convert_to_string_ex(zfrom_enc);
  2954. php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
  2955. break;
  2956. }
  2957. if (elistsz <= 0) {
  2958. from_encoding = mbfl_no_encoding_pass;
  2959. } else if (elistsz == 1) {
  2960. from_encoding = *elist;
  2961. } else {
  2962. /* auto detect */
  2963. from_encoding = mbfl_no_encoding_invalid;
  2964. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  2965. stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
  2966. stack_level = 0;
  2967. identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
  2968. if (identd != NULL) {
  2969. n = 0;
  2970. while (n < argc || stack_level > 0) {
  2971. if (stack_level <= 0) {
  2972. var = args[n++];
  2973. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2974. target_hash = HASH_OF(*var);
  2975. if (target_hash != NULL) {
  2976. zend_hash_internal_pointer_reset(target_hash);
  2977. }
  2978. }
  2979. } else {
  2980. stack_level--;
  2981. var = stack[stack_level];
  2982. }
  2983. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2984. target_hash = HASH_OF(*var);
  2985. if (target_hash != NULL) {
  2986. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  2987. zend_hash_move_forward(target_hash);
  2988. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  2989. if (stack_level >= stack_max) {
  2990. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  2991. ptmp = erealloc(stack, sizeof(zval **)*stack_max);
  2992. stack = (zval ***)ptmp;
  2993. }
  2994. stack[stack_level] = var;
  2995. stack_level++;
  2996. var = hash_entry;
  2997. target_hash = HASH_OF(*var);
  2998. if (target_hash != NULL) {
  2999. zend_hash_internal_pointer_reset(target_hash);
  3000. continue;
  3001. }
  3002. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  3003. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  3004. string.len = Z_STRLEN_PP(hash_entry);
  3005. if (mbfl_encoding_detector_feed(identd, &string)) {
  3006. goto detect_end; /* complete detecting */
  3007. }
  3008. }
  3009. }
  3010. }
  3011. } else if (Z_TYPE_PP(var) == IS_STRING) {
  3012. string.val = (unsigned char *)Z_STRVAL_PP(var);
  3013. string.len = Z_STRLEN_PP(var);
  3014. if (mbfl_encoding_detector_feed(identd, &string)) {
  3015. goto detect_end; /* complete detecting */
  3016. }
  3017. }
  3018. }
  3019. detect_end:
  3020. from_encoding = mbfl_encoding_detector_judge(identd);
  3021. mbfl_encoding_detector_delete(identd);
  3022. }
  3023. efree(stack);
  3024. if (from_encoding == mbfl_no_encoding_invalid) {
  3025. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
  3026. from_encoding = mbfl_no_encoding_pass;
  3027. }
  3028. }
  3029. if (elist != NULL) {
  3030. efree((void *)elist);
  3031. }
  3032. /* create converter */
  3033. convd = NULL;
  3034. if (from_encoding != mbfl_no_encoding_pass) {
  3035. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
  3036. if (convd == NULL) {
  3037. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
  3038. RETURN_FALSE;
  3039. }
  3040. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  3041. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  3042. }
  3043. /* convert */
  3044. if (convd != NULL) {
  3045. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  3046. stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
  3047. stack_level = 0;
  3048. n = 0;
  3049. while (n < argc || stack_level > 0) {
  3050. if (stack_level <= 0) {
  3051. var = args[n++];
  3052. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  3053. target_hash = HASH_OF(*var);
  3054. if (target_hash != NULL) {
  3055. zend_hash_internal_pointer_reset(target_hash);
  3056. }
  3057. }
  3058. } else {
  3059. stack_level--;
  3060. var = stack[stack_level];
  3061. }
  3062. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  3063. target_hash = HASH_OF(*var);
  3064. if (target_hash != NULL) {
  3065. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  3066. zend_hash_move_forward(target_hash);
  3067. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  3068. if (stack_level >= stack_max) {
  3069. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  3070. ptmp = erealloc(stack, sizeof(zval **)*stack_max);
  3071. stack = (zval ***)ptmp;
  3072. }
  3073. stack[stack_level] = var;
  3074. stack_level++;
  3075. var = hash_entry;
  3076. SEPARATE_ZVAL(hash_entry);
  3077. target_hash = HASH_OF(*var);
  3078. if (target_hash != NULL) {
  3079. zend_hash_internal_pointer_reset(target_hash);
  3080. continue;
  3081. }
  3082. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  3083. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  3084. string.len = Z_STRLEN_PP(hash_entry);
  3085. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3086. if (ret != NULL) {
  3087. if (Z_REFCOUNT_PP(hash_entry) > 1) {
  3088. Z_DELREF_PP(hash_entry);
  3089. MAKE_STD_ZVAL(*hash_entry);
  3090. } else {
  3091. zval_dtor(*hash_entry);
  3092. }
  3093. ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
  3094. }
  3095. }
  3096. }
  3097. }
  3098. } else if (Z_TYPE_PP(var) == IS_STRING) {
  3099. string.val = (unsigned char *)Z_STRVAL_PP(var);
  3100. string.len = Z_STRLEN_PP(var);
  3101. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3102. if (ret != NULL) {
  3103. zval_dtor(*var);
  3104. ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
  3105. }
  3106. }
  3107. }
  3108. efree(stack);
  3109. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  3110. mbfl_buffer_converter_delete(convd);
  3111. }
  3112. efree(args);
  3113. name = (char *)mbfl_no_encoding2name(from_encoding);
  3114. if (name != NULL) {
  3115. RETURN_STRING(name, 1);
  3116. } else {
  3117. RETURN_FALSE;
  3118. }
  3119. }
  3120. /* }}} */
  3121. /* {{{ HTML numeric entity */
  3122. /* {{{ static void php_mb_numericentity_exec() */
  3123. static void
  3124. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
  3125. {
  3126. char *str, *encoding;
  3127. int str_len, encoding_len;
  3128. zval *zconvmap, **hash_entry;
  3129. HashTable *target_hash;
  3130. size_t argc = ZEND_NUM_ARGS();
  3131. int i, *convmap, *mapelm, mapsize=0;
  3132. mbfl_string string, result, *ret;
  3133. enum mbfl_no_encoding no_encoding;
  3134. if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) {
  3135. return;
  3136. }
  3137. mbfl_string_init(&string);
  3138. string.no_language = MBSTRG(language);
  3139. string.no_encoding = MBSTRG(current_internal_encoding);
  3140. string.val = (unsigned char *)str;
  3141. string.len = str_len;
  3142. /* encoding */
  3143. if (argc == 3) {
  3144. no_encoding = mbfl_name2no_encoding(encoding);
  3145. if (no_encoding == mbfl_no_encoding_invalid) {
  3146. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  3147. RETURN_FALSE;
  3148. } else {
  3149. string.no_encoding = no_encoding;
  3150. }
  3151. }
  3152. /* conversion map */
  3153. convmap = NULL;
  3154. if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
  3155. target_hash = Z_ARRVAL_P(zconvmap);
  3156. zend_hash_internal_pointer_reset(target_hash);
  3157. i = zend_hash_num_elements(target_hash);
  3158. if (i > 0) {
  3159. convmap = (int *)safe_emalloc(i, sizeof(int), 0);
  3160. mapelm = convmap;
  3161. mapsize = 0;
  3162. while (i > 0) {
  3163. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  3164. break;
  3165. }
  3166. convert_to_long_ex(hash_entry);
  3167. *mapelm++ = Z_LVAL_PP(hash_entry);
  3168. mapsize++;
  3169. i--;
  3170. zend_hash_move_forward(target_hash);
  3171. }
  3172. }
  3173. }
  3174. if (convmap == NULL) {
  3175. RETURN_FALSE;
  3176. }
  3177. mapsize /= 4;
  3178. ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
  3179. if (ret != NULL) {
  3180. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  3181. } else {
  3182. RETVAL_FALSE;
  3183. }
  3184. efree((void *)convmap);
  3185. }
  3186. /* }}} */
  3187. /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
  3188. Converts specified characters to HTML numeric entities */
  3189. PHP_FUNCTION(mb_encode_numericentity)
  3190. {
  3191. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  3192. }
  3193. /* }}} */
  3194. /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
  3195. Converts HTML numeric entities to character code */
  3196. PHP_FUNCTION(mb_decode_numericentity)
  3197. {
  3198. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  3199. }
  3200. /* }}} */
  3201. /* }}} */
  3202. /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
  3203. * Sends an email message with MIME scheme
  3204. */
  3205. #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
  3206. if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
  3207. pos += 2; \
  3208. while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
  3209. pos++; \
  3210. } \
  3211. continue; \
  3212. }
  3213. #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
  3214. pp = str; \
  3215. ee = pp + len; \
  3216. while ((pp = memchr(pp, '\0', (ee - pp)))) { \
  3217. *pp = ' '; \
  3218. } \
  3219. #define APPEND_ONE_CHAR(ch) do { \
  3220. if (token.a > 0) { \
  3221. smart_str_appendc(&token, ch); \
  3222. } else {\
  3223. token.len++; \
  3224. } \
  3225. } while (0)
  3226. #define SEPARATE_SMART_STR(str) do {\
  3227. if ((str)->a == 0) { \
  3228. char *tmp_ptr; \
  3229. (str)->a = 1; \
  3230. while ((str)->a < (str)->len) { \
  3231. (str)->a <<= 1; \
  3232. } \
  3233. tmp_ptr = emalloc((str)->a + 1); \
  3234. memcpy(tmp_ptr, (str)->c, (str)->len); \
  3235. (str)->c = tmp_ptr; \
  3236. } \
  3237. } while (0)
  3238. static void my_smart_str_dtor(smart_str *s)
  3239. {
  3240. if (s->a > 0) {
  3241. smart_str_free(s);
  3242. }
  3243. }
  3244. static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
  3245. {
  3246. const char *ps;
  3247. size_t icnt;
  3248. int state = 0;
  3249. int crlf_state = -1;
  3250. smart_str token = { 0, 0, 0 };
  3251. smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
  3252. ps = str;
  3253. icnt = str_len;
  3254. /*
  3255. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  3256. * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
  3257. * state 0 1 2 3
  3258. *
  3259. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  3260. * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
  3261. * crlf_state -1 0 1 -1
  3262. *
  3263. */
  3264. while (icnt > 0) {
  3265. switch (*ps) {
  3266. case ':':
  3267. if (crlf_state == 1) {
  3268. APPEND_ONE_CHAR('\r');
  3269. }
  3270. if (state == 0 || state == 1) {
  3271. fld_name = token;
  3272. state = 2;
  3273. } else {
  3274. APPEND_ONE_CHAR(*ps);
  3275. }
  3276. crlf_state = 0;
  3277. break;
  3278. case '\n':
  3279. if (crlf_state == -1) {
  3280. goto out;
  3281. }
  3282. crlf_state = -1;
  3283. break;
  3284. case '\r':
  3285. if (crlf_state == 1) {
  3286. APPEND_ONE_CHAR('\r');
  3287. } else {
  3288. crlf_state = 1;
  3289. }
  3290. break;
  3291. case ' ': case '\t':
  3292. if (crlf_state == -1) {
  3293. if (state == 3) {
  3294. /* continuing from the previous line */
  3295. SEPARATE_SMART_STR(&token);
  3296. state = 4;
  3297. } else {
  3298. /* simply skipping this new line */
  3299. state = 5;
  3300. }
  3301. } else {
  3302. if (crlf_state == 1) {
  3303. APPEND_ONE_CHAR('\r');
  3304. }
  3305. if (state == 1 || state == 3) {
  3306. APPEND_ONE_CHAR(*ps);
  3307. }
  3308. }
  3309. crlf_state = 0;
  3310. break;
  3311. default:
  3312. switch (state) {
  3313. case 0:
  3314. token.c = (char *)ps;
  3315. token.len = 0;
  3316. token.a = 0;
  3317. state = 1;
  3318. break;
  3319. case 2:
  3320. if (crlf_state != -1) {
  3321. token.c = (char *)ps;
  3322. token.len = 0;
  3323. token.a = 0;
  3324. state = 3;
  3325. break;
  3326. }
  3327. /* break is missing intentionally */
  3328. case 3:
  3329. if (crlf_state == -1) {
  3330. fld_val = token;
  3331. if (fld_name.c != NULL && fld_val.c != NULL) {
  3332. char *dummy;
  3333. /* FIXME: some locale free implementation is
  3334. * really required here,,, */
  3335. SEPARATE_SMART_STR(&fld_name);
  3336. php_strtoupper(fld_name.c, fld_name.len);
  3337. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  3338. my_smart_str_dtor(&fld_name);
  3339. }
  3340. memset(&fld_name, 0, sizeof(smart_str));
  3341. memset(&fld_val, 0, sizeof(smart_str));
  3342. token.c = (char *)ps;
  3343. token.len = 0;
  3344. token.a = 0;
  3345. state = 1;
  3346. }
  3347. break;
  3348. case 4:
  3349. APPEND_ONE_CHAR(' ');
  3350. state = 3;
  3351. break;
  3352. }
  3353. if (crlf_state == 1) {
  3354. APPEND_ONE_CHAR('\r');
  3355. }
  3356. APPEND_ONE_CHAR(*ps);
  3357. crlf_state = 0;
  3358. break;
  3359. }
  3360. ps++, icnt--;
  3361. }
  3362. out:
  3363. if (state == 2) {
  3364. token.c = "";
  3365. token.len = 0;
  3366. token.a = 0;
  3367. state = 3;
  3368. }
  3369. if (state == 3) {
  3370. fld_val = token;
  3371. if (fld_name.c != NULL && fld_val.c != NULL) {
  3372. void *dummy;
  3373. /* FIXME: some locale free implementation is
  3374. * really required here,,, */
  3375. SEPARATE_SMART_STR(&fld_name);
  3376. php_strtoupper(fld_name.c, fld_name.len);
  3377. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  3378. my_smart_str_dtor(&fld_name);
  3379. }
  3380. }
  3381. return state;
  3382. }
  3383. PHP_FUNCTION(mb_send_mail)
  3384. {
  3385. int n;
  3386. char *to = NULL;
  3387. int to_len;
  3388. char *message = NULL;
  3389. int message_len;
  3390. char *headers = NULL;
  3391. int headers_len;
  3392. char *subject = NULL;
  3393. int subject_len;
  3394. char *extra_cmd = NULL;
  3395. int extra_cmd_len;
  3396. int i;
  3397. char *to_r = NULL;
  3398. char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
  3399. struct {
  3400. int cnt_type:1;
  3401. int cnt_trans_enc:1;
  3402. } suppressed_hdrs = { 0, 0 };
  3403. char *message_buf = NULL, *subject_buf = NULL, *p;
  3404. mbfl_string orig_str, conv_str;
  3405. mbfl_string *pstr; /* pointer to mbfl string for return value */
  3406. enum mbfl_no_encoding
  3407. tran_cs, /* transfar text charset */
  3408. head_enc, /* header transfar encoding */
  3409. body_enc; /* body transfar encoding */
  3410. mbfl_memory_device device; /* automatic allocateable buffer for additional header */
  3411. const mbfl_language *lang;
  3412. int err = 0;
  3413. HashTable ht_headers;
  3414. smart_str *s;
  3415. extern void mbfl_memory_device_unput(mbfl_memory_device *device);
  3416. char *pp, *ee;
  3417. if (PG(safe_mode) && (ZEND_NUM_ARGS() == 5)) {
  3418. php_error_docref(NULL TSRMLS_CC, E_WARNING, "SAFE MODE Restriction in effect. The fifth parameter is disabled in SAFE MODE.");
  3419. RETURN_FALSE;
  3420. }
  3421. /* initialize */
  3422. mbfl_memory_device_init(&device, 0, 0);
  3423. mbfl_string_init(&orig_str);
  3424. mbfl_string_init(&conv_str);
  3425. /* character-set, transfer-encoding */
  3426. tran_cs = mbfl_no_encoding_utf8;
  3427. head_enc = mbfl_no_encoding_base64;
  3428. body_enc = mbfl_no_encoding_base64;
  3429. lang = mbfl_no2language(MBSTRG(language));
  3430. if (lang != NULL) {
  3431. tran_cs = lang->mail_charset;
  3432. head_enc = lang->mail_header_encoding;
  3433. body_enc = lang->mail_body_encoding;
  3434. }
  3435. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
  3436. return;
  3437. }
  3438. /* ASCIIZ check */
  3439. MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
  3440. MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
  3441. MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
  3442. if (headers) {
  3443. MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
  3444. }
  3445. if (extra_cmd) {
  3446. MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
  3447. }
  3448. zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
  3449. if (headers != NULL) {
  3450. _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
  3451. }
  3452. if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
  3453. char *tmp;
  3454. char *param_name;
  3455. char *charset = NULL;
  3456. SEPARATE_SMART_STR(s);
  3457. smart_str_0(s);
  3458. p = strchr(s->c, ';');
  3459. if (p != NULL) {
  3460. /* skipping the padded spaces */
  3461. do {
  3462. ++p;
  3463. } while (*p == ' ' || *p == '\t');
  3464. if (*p != '\0') {
  3465. if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
  3466. if (strcasecmp(param_name, "charset") == 0) {
  3467. enum mbfl_no_encoding _tran_cs = tran_cs;
  3468. charset = php_strtok_r(NULL, "= \"", &tmp);
  3469. if (charset != NULL) {
  3470. _tran_cs = mbfl_name2no_encoding(charset);
  3471. }
  3472. if (_tran_cs == mbfl_no_encoding_invalid) {
  3473. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
  3474. _tran_cs = mbfl_no_encoding_ascii;
  3475. }
  3476. tran_cs = _tran_cs;
  3477. }
  3478. }
  3479. }
  3480. }
  3481. suppressed_hdrs.cnt_type = 1;
  3482. }
  3483. if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
  3484. enum mbfl_no_encoding _body_enc;
  3485. SEPARATE_SMART_STR(s);
  3486. smart_str_0(s);
  3487. _body_enc = mbfl_name2no_encoding(s->c);
  3488. switch (_body_enc) {
  3489. case mbfl_no_encoding_base64:
  3490. case mbfl_no_encoding_7bit:
  3491. case mbfl_no_encoding_8bit:
  3492. body_enc = _body_enc;
  3493. break;
  3494. default:
  3495. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
  3496. body_enc = mbfl_no_encoding_8bit;
  3497. break;
  3498. }
  3499. suppressed_hdrs.cnt_trans_enc = 1;
  3500. }
  3501. /* To: */
  3502. if (to != NULL) {
  3503. if (to_len > 0) {
  3504. to_r = estrndup(to, to_len);
  3505. for (; to_len; to_len--) {
  3506. if (!isspace((unsigned char) to_r[to_len - 1])) {
  3507. break;
  3508. }
  3509. to_r[to_len - 1] = '\0';
  3510. }
  3511. for (i = 0; to_r[i]; i++) {
  3512. if (iscntrl((unsigned char) to_r[i])) {
  3513. /* According to RFC 822, section 3.1.1 long headers may be separated into
  3514. * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
  3515. * To prevent these separators from being replaced with a space, we use the
  3516. * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
  3517. */
  3518. SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
  3519. to_r[i] = ' ';
  3520. }
  3521. }
  3522. } else {
  3523. to_r = to;
  3524. }
  3525. } else {
  3526. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
  3527. err = 1;
  3528. }
  3529. /* Subject: */
  3530. if (subject != NULL && subject_len >= 0) {
  3531. orig_str.no_language = MBSTRG(language);
  3532. orig_str.val = (unsigned char *)subject;
  3533. orig_str.len = subject_len;
  3534. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  3535. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  3536. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  3537. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3538. }
  3539. pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
  3540. if (pstr != NULL) {
  3541. subject_buf = subject = (char *)pstr->val;
  3542. }
  3543. } else {
  3544. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
  3545. err = 1;
  3546. }
  3547. /* message body */
  3548. if (message != NULL) {
  3549. orig_str.no_language = MBSTRG(language);
  3550. orig_str.val = (unsigned char *)message;
  3551. orig_str.len = (unsigned int)message_len;
  3552. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  3553. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  3554. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  3555. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3556. }
  3557. pstr = NULL;
  3558. {
  3559. mbfl_string tmpstr;
  3560. if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
  3561. tmpstr.no_encoding=mbfl_no_encoding_8bit;
  3562. pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
  3563. efree(tmpstr.val);
  3564. }
  3565. }
  3566. if (pstr != NULL) {
  3567. message_buf = message = (char *)pstr->val;
  3568. }
  3569. } else {
  3570. /* this is not really an error, so it is allowed. */
  3571. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
  3572. message = NULL;
  3573. }
  3574. /* other headers */
  3575. #define PHP_MBSTR_MAIL_MIME_HEADER1 "Mime-Version: 1.0"
  3576. #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
  3577. #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
  3578. #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
  3579. if (headers != NULL) {
  3580. p = headers;
  3581. n = headers_len;
  3582. mbfl_memory_device_strncat(&device, p, n);
  3583. if (n > 0 && p[n - 1] != '\n') {
  3584. mbfl_memory_device_strncat(&device, "\n", 1);
  3585. }
  3586. }
  3587. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
  3588. mbfl_memory_device_strncat(&device, "\n", 1);
  3589. if (!suppressed_hdrs.cnt_type) {
  3590. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
  3591. p = (char *)mbfl_no2preferred_mime_name(tran_cs);
  3592. if (p != NULL) {
  3593. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
  3594. mbfl_memory_device_strcat(&device, p);
  3595. }
  3596. mbfl_memory_device_strncat(&device, "\n", 1);
  3597. }
  3598. if (!suppressed_hdrs.cnt_trans_enc) {
  3599. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
  3600. p = (char *)mbfl_no2preferred_mime_name(body_enc);
  3601. if (p == NULL) {
  3602. p = "7bit";
  3603. }
  3604. mbfl_memory_device_strcat(&device, p);
  3605. mbfl_memory_device_strncat(&device, "\n", 1);
  3606. }
  3607. mbfl_memory_device_unput(&device);
  3608. mbfl_memory_device_output('\0', &device);
  3609. headers = (char *)device.buffer;
  3610. if (force_extra_parameters) {
  3611. extra_cmd = php_escape_shell_cmd(force_extra_parameters);
  3612. } else if (extra_cmd) {
  3613. extra_cmd = php_escape_shell_cmd(extra_cmd);
  3614. }
  3615. if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
  3616. RETVAL_TRUE;
  3617. } else {
  3618. RETVAL_FALSE;
  3619. }
  3620. if (extra_cmd) {
  3621. efree(extra_cmd);
  3622. }
  3623. if (to_r != to) {
  3624. efree(to_r);
  3625. }
  3626. if (subject_buf) {
  3627. efree((void *)subject_buf);
  3628. }
  3629. if (message_buf) {
  3630. efree((void *)message_buf);
  3631. }
  3632. mbfl_memory_device_clear(&device);
  3633. zend_hash_destroy(&ht_headers);
  3634. }
  3635. #undef SKIP_LONG_HEADER_SEP_MBSTRING
  3636. #undef MAIL_ASCIIZ_CHECK_MBSTRING
  3637. #undef APPEND_ONE_CHAR
  3638. #undef SEPARATE_SMART_STR
  3639. #undef PHP_MBSTR_MAIL_MIME_HEADER1
  3640. #undef PHP_MBSTR_MAIL_MIME_HEADER2
  3641. #undef PHP_MBSTR_MAIL_MIME_HEADER3
  3642. #undef PHP_MBSTR_MAIL_MIME_HEADER4
  3643. /* }}} */
  3644. /* {{{ proto mixed mb_get_info([string type])
  3645. Returns the current settings of mbstring */
  3646. PHP_FUNCTION(mb_get_info)
  3647. {
  3648. char *typ = NULL;
  3649. int typ_len, n;
  3650. char *name;
  3651. const struct mb_overload_def *over_func;
  3652. zval *row1, *row2;
  3653. const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
  3654. enum mbfl_no_encoding *entry;
  3655. #ifdef ZEND_MULTIBYTE
  3656. zval *row3;
  3657. #endif /* ZEND_MULTIBYTE */
  3658. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  3659. RETURN_FALSE;
  3660. }
  3661. if (!typ || !strcasecmp("all", typ)) {
  3662. array_init(return_value);
  3663. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  3664. add_assoc_string(return_value, "internal_encoding", name, 1);
  3665. }
  3666. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  3667. add_assoc_string(return_value, "http_input", name, 1);
  3668. }
  3669. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  3670. add_assoc_string(return_value, "http_output", name, 1);
  3671. }
  3672. if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
  3673. add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
  3674. }
  3675. add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
  3676. if (MBSTRG(func_overload)){
  3677. over_func = &(mb_ovld[0]);
  3678. MAKE_STD_ZVAL(row1);
  3679. array_init(row1);
  3680. while (over_func->type > 0) {
  3681. if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
  3682. add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
  3683. }
  3684. over_func++;
  3685. }
  3686. add_assoc_zval(return_value, "func_overload_list", row1);
  3687. } else {
  3688. add_assoc_string(return_value, "func_overload_list", "no overload", 1);
  3689. }
  3690. if (lang != NULL) {
  3691. if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
  3692. add_assoc_string(return_value, "mail_charset", name, 1);
  3693. }
  3694. if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
  3695. add_assoc_string(return_value, "mail_header_encoding", name, 1);
  3696. }
  3697. if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
  3698. add_assoc_string(return_value, "mail_body_encoding", name, 1);
  3699. }
  3700. }
  3701. add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
  3702. if (MBSTRG(encoding_translation)) {
  3703. add_assoc_string(return_value, "encoding_translation", "On", 1);
  3704. } else {
  3705. add_assoc_string(return_value, "encoding_translation", "Off", 1);
  3706. }
  3707. if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
  3708. add_assoc_string(return_value, "language", name, 1);
  3709. }
  3710. n = MBSTRG(current_detect_order_list_size);
  3711. entry = MBSTRG(current_detect_order_list);
  3712. if(n > 0) {
  3713. MAKE_STD_ZVAL(row2);
  3714. array_init(row2);
  3715. while (n > 0) {
  3716. if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
  3717. add_next_index_string(row2, name, 1);
  3718. }
  3719. entry++;
  3720. n--;
  3721. }
  3722. add_assoc_zval(return_value, "detect_order", row2);
  3723. }
  3724. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  3725. add_assoc_string(return_value, "substitute_character", "none", 1);
  3726. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  3727. add_assoc_string(return_value, "substitute_character", "long", 1);
  3728. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  3729. add_assoc_string(return_value, "substitute_character", "entity", 1);
  3730. } else {
  3731. add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
  3732. }
  3733. if (MBSTRG(strict_detection)) {
  3734. add_assoc_string(return_value, "strict_detection", "On", 1);
  3735. } else {
  3736. add_assoc_string(return_value, "strict_detection", "Off", 1);
  3737. }
  3738. #ifdef ZEND_MULTIBYTE
  3739. entry = MBSTRG(script_encoding_list);
  3740. n = MBSTRG(script_encoding_list_size);
  3741. if(n > 0) {
  3742. MAKE_STD_ZVAL(row3);
  3743. array_init(row3);
  3744. while (n > 0) {
  3745. if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
  3746. add_next_index_string(row3, name, 1);
  3747. }
  3748. entry++;
  3749. n--;
  3750. }
  3751. add_assoc_zval(return_value, "script_encoding", row3);
  3752. }
  3753. #endif /* ZEND_MULTIBYTE */
  3754. } else if (!strcasecmp("internal_encoding", typ)) {
  3755. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  3756. RETVAL_STRING(name, 1);
  3757. }
  3758. } else if (!strcasecmp("http_input", typ)) {
  3759. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  3760. RETVAL_STRING(name, 1);
  3761. }
  3762. } else if (!strcasecmp("http_output", typ)) {
  3763. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  3764. RETVAL_STRING(name, 1);
  3765. }
  3766. } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
  3767. if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
  3768. RETVAL_STRING(name, 1);
  3769. }
  3770. } else if (!strcasecmp("func_overload", typ)) {
  3771. RETVAL_LONG(MBSTRG(func_overload));
  3772. } else if (!strcasecmp("func_overload_list", typ)) {
  3773. if (MBSTRG(func_overload)){
  3774. over_func = &(mb_ovld[0]);
  3775. array_init(return_value);
  3776. while (over_func->type > 0) {
  3777. if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
  3778. add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
  3779. }
  3780. over_func++;
  3781. }
  3782. } else {
  3783. RETVAL_STRING("no overload", 1);
  3784. }
  3785. } else if (!strcasecmp("mail_charset", typ)) {
  3786. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
  3787. RETVAL_STRING(name, 1);
  3788. }
  3789. } else if (!strcasecmp("mail_header_encoding", typ)) {
  3790. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
  3791. RETVAL_STRING(name, 1);
  3792. }
  3793. } else if (!strcasecmp("mail_body_encoding", typ)) {
  3794. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
  3795. RETVAL_STRING(name, 1);
  3796. }
  3797. } else if (!strcasecmp("illegal_chars", typ)) {
  3798. RETVAL_LONG(MBSTRG(illegalchars));
  3799. } else if (!strcasecmp("encoding_translation", typ)) {
  3800. if (MBSTRG(encoding_translation)) {
  3801. RETVAL_STRING("On", 1);
  3802. } else {
  3803. RETVAL_STRING("Off", 1);
  3804. }
  3805. } else if (!strcasecmp("language", typ)) {
  3806. if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
  3807. RETVAL_STRING(name, 1);
  3808. }
  3809. } else if (!strcasecmp("detect_order", typ)) {
  3810. n = MBSTRG(current_detect_order_list_size);
  3811. entry = MBSTRG(current_detect_order_list);
  3812. if(n > 0) {
  3813. array_init(return_value);
  3814. while (n > 0) {
  3815. name = (char *)mbfl_no_encoding2name(*entry);
  3816. if (name) {
  3817. add_next_index_string(return_value, name, 1);
  3818. }
  3819. entry++;
  3820. n--;
  3821. }
  3822. }
  3823. } else if (!strcasecmp("substitute_character", typ)) {
  3824. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  3825. RETVAL_STRING("none", 1);
  3826. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  3827. RETVAL_STRING("long", 1);
  3828. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  3829. RETVAL_STRING("entity", 1);
  3830. } else {
  3831. RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
  3832. }
  3833. } else if (!strcasecmp("strict_detection", typ)) {
  3834. if (MBSTRG(strict_detection)) {
  3835. RETVAL_STRING("On", 1);
  3836. } else {
  3837. RETVAL_STRING("Off", 1);
  3838. }
  3839. } else {
  3840. #ifdef ZEND_MULTIBYTE
  3841. if (!strcasecmp("script_encoding", typ)) {
  3842. entry = MBSTRG(script_encoding_list);
  3843. n = MBSTRG(script_encoding_list_size);
  3844. if(n > 0) {
  3845. array_init(return_value);
  3846. while (n > 0) {
  3847. name = (char *)mbfl_no_encoding2name(*entry);
  3848. if (name) {
  3849. add_next_index_string(return_value, name, 1);
  3850. }
  3851. entry++;
  3852. n--;
  3853. }
  3854. }
  3855. return;
  3856. }
  3857. #endif /* ZEND_MULTIBYTE */
  3858. RETURN_FALSE;
  3859. }
  3860. }
  3861. /* }}} */
  3862. /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
  3863. Check if the string is valid for the specified encoding */
  3864. PHP_FUNCTION(mb_check_encoding)
  3865. {
  3866. char *var = NULL;
  3867. int var_len;
  3868. char *enc = NULL;
  3869. int enc_len;
  3870. mbfl_buffer_converter *convd;
  3871. enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
  3872. mbfl_string string, result, *ret = NULL;
  3873. long illegalchars = 0;
  3874. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
  3875. RETURN_FALSE;
  3876. }
  3877. if (var == NULL) {
  3878. RETURN_BOOL(MBSTRG(illegalchars) == 0);
  3879. }
  3880. if (enc != NULL) {
  3881. no_encoding = mbfl_name2no_encoding(enc);
  3882. if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) {
  3883. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
  3884. RETURN_FALSE;
  3885. }
  3886. }
  3887. convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
  3888. if (convd == NULL) {
  3889. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
  3890. RETURN_FALSE;
  3891. }
  3892. mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
  3893. mbfl_buffer_converter_illegal_substchar(convd, 0);
  3894. /* initialize string */
  3895. mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
  3896. mbfl_string_init(&result);
  3897. string.val = (unsigned char *)var;
  3898. string.len = var_len;
  3899. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3900. illegalchars = mbfl_buffer_illegalchars(convd);
  3901. mbfl_buffer_converter_delete(convd);
  3902. RETVAL_FALSE;
  3903. if (ret != NULL) {
  3904. if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
  3905. RETVAL_TRUE;
  3906. }
  3907. mbfl_string_clear(&result);
  3908. }
  3909. }
  3910. /* }}} */
  3911. /* {{{ MBSTRING_API int php_mb_encoding_translation() */
  3912. MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
  3913. {
  3914. return MBSTRG(encoding_translation);
  3915. }
  3916. /* }}} */
  3917. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
  3918. MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
  3919. {
  3920. if (enc != NULL) {
  3921. if (enc->flag & MBFL_ENCTYPE_MBCS) {
  3922. if (enc->mblen_table != NULL) {
  3923. if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
  3924. }
  3925. } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
  3926. return 2;
  3927. } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
  3928. return 4;
  3929. }
  3930. }
  3931. return 1;
  3932. }
  3933. /* }}} */
  3934. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
  3935. MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
  3936. {
  3937. return php_mb_mbchar_bytes_ex(s,
  3938. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3939. }
  3940. /* }}} */
  3941. /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
  3942. MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
  3943. {
  3944. register const char *p = s;
  3945. char *last=NULL;
  3946. if (nbytes == (size_t)-1) {
  3947. size_t nb = 0;
  3948. while (*p != '\0') {
  3949. if (nb == 0) {
  3950. if ((unsigned char)*p == (unsigned char)c) {
  3951. last = (char *)p;
  3952. }
  3953. nb = php_mb_mbchar_bytes_ex(p, enc);
  3954. if (nb == 0) {
  3955. return NULL; /* something is going wrong! */
  3956. }
  3957. }
  3958. --nb;
  3959. ++p;
  3960. }
  3961. } else {
  3962. register size_t bcnt = nbytes;
  3963. register size_t nbytes_char;
  3964. while (bcnt > 0) {
  3965. if ((unsigned char)*p == (unsigned char)c) {
  3966. last = (char *)p;
  3967. }
  3968. nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
  3969. if (bcnt < nbytes_char) {
  3970. return NULL;
  3971. }
  3972. p += nbytes_char;
  3973. bcnt -= nbytes_char;
  3974. }
  3975. }
  3976. return last;
  3977. }
  3978. /* }}} */
  3979. /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
  3980. MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
  3981. {
  3982. return php_mb_safe_strrchr_ex(s, c, nbytes,
  3983. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3984. }
  3985. /* }}} */
  3986. /* {{{ MBSTRING_API char *php_mb_strrchr() */
  3987. MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
  3988. {
  3989. return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
  3990. }
  3991. /* }}} */
  3992. /* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
  3993. MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
  3994. {
  3995. if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
  3996. return php_mb_mbchar_bytes_ex(s,
  3997. mbfl_no2encoding(MBSTRG(http_input_identify)));
  3998. } else {
  3999. return php_mb_mbchar_bytes_ex(s,
  4000. mbfl_no2encoding(MBSTRG(internal_encoding)));
  4001. }
  4002. }
  4003. /* }}} */
  4004. /* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
  4005. MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
  4006. {
  4007. int i;
  4008. mbfl_string string, result, *ret = NULL;
  4009. enum mbfl_no_encoding from_encoding, to_encoding;
  4010. mbfl_buffer_converter *convd;
  4011. if (encoding_to) {
  4012. /* new encoding */
  4013. to_encoding = mbfl_name2no_encoding(encoding_to);
  4014. if (to_encoding == mbfl_no_encoding_invalid) {
  4015. return -1;
  4016. }
  4017. } else {
  4018. to_encoding = MBSTRG(current_internal_encoding);
  4019. }
  4020. if (encoding_from) {
  4021. /* old encoding */
  4022. from_encoding = mbfl_name2no_encoding(encoding_from);
  4023. if (from_encoding == mbfl_no_encoding_invalid) {
  4024. return -1;
  4025. }
  4026. } else {
  4027. from_encoding = MBSTRG(http_input_identify);
  4028. }
  4029. if (from_encoding == mbfl_no_encoding_pass) {
  4030. return 0;
  4031. }
  4032. /* initialize string */
  4033. mbfl_string_init(&string);
  4034. mbfl_string_init(&result);
  4035. string.no_encoding = from_encoding;
  4036. string.no_language = MBSTRG(language);
  4037. for (i=0; i<num; i++){
  4038. string.val = (unsigned char *)str[i];
  4039. string.len = len[i];
  4040. /* initialize converter */
  4041. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  4042. if (convd == NULL) {
  4043. return -1;
  4044. }
  4045. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  4046. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  4047. /* do it */
  4048. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  4049. if (ret != NULL) {
  4050. efree(str[i]);
  4051. str[i] = (char *)ret->val;
  4052. len[i] = (int)ret->len;
  4053. }
  4054. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  4055. mbfl_buffer_converter_delete(convd);
  4056. }
  4057. return ret ? 0 : -1;
  4058. }
  4059. /* }}} */
  4060. /* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
  4061. */
  4062. MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
  4063. {
  4064. mbfl_string string;
  4065. enum mbfl_no_encoding *elist;
  4066. enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
  4067. mbfl_encoding_detector *identd = NULL;
  4068. int size;
  4069. enum mbfl_no_encoding *list;
  4070. if (MBSTRG(http_input_list_size) == 1 &&
  4071. MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
  4072. MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
  4073. return SUCCESS;
  4074. }
  4075. if (MBSTRG(http_input_list_size) == 1 &&
  4076. MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
  4077. mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
  4078. MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
  4079. return SUCCESS;
  4080. }
  4081. if (arg_list && strlen(arg_list)>0) {
  4082. /* make encoding list */
  4083. list = NULL;
  4084. size = 0;
  4085. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  4086. if (size > 0 && list != NULL) {
  4087. elist = list;
  4088. } else {
  4089. elist = MBSTRG(current_detect_order_list);
  4090. size = MBSTRG(current_detect_order_list_size);
  4091. if (size <= 0){
  4092. elist = MBSTRG(default_detect_order_list);
  4093. size = MBSTRG(default_detect_order_list_size);
  4094. }
  4095. }
  4096. } else {
  4097. elist = MBSTRG(current_detect_order_list);
  4098. size = MBSTRG(current_detect_order_list_size);
  4099. if (size <= 0){
  4100. elist = MBSTRG(default_detect_order_list);
  4101. size = MBSTRG(default_detect_order_list_size);
  4102. }
  4103. }
  4104. mbfl_string_init(&string);
  4105. string.no_language = MBSTRG(language);
  4106. identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection));
  4107. if (identd) {
  4108. int n = 0;
  4109. while(n < num){
  4110. string.val = (unsigned char *)arg_string[n];
  4111. string.len = arg_length[n];
  4112. if (mbfl_encoding_detector_feed(identd, &string)) {
  4113. break;
  4114. }
  4115. n++;
  4116. }
  4117. encoding = mbfl_encoding_detector_judge(identd);
  4118. mbfl_encoding_detector_delete(identd);
  4119. }
  4120. if (encoding != mbfl_no_encoding_invalid) {
  4121. MBSTRG(http_input_identify) = encoding;
  4122. return SUCCESS;
  4123. } else {
  4124. return FAILURE;
  4125. }
  4126. }
  4127. /* }}} */
  4128. /* {{{ MBSTRING_API int php_mb_stripos()
  4129. */
  4130. MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
  4131. {
  4132. int n;
  4133. mbfl_string haystack, needle;
  4134. n = -1;
  4135. mbfl_string_init(&haystack);
  4136. mbfl_string_init(&needle);
  4137. haystack.no_language = MBSTRG(language);
  4138. haystack.no_encoding = MBSTRG(current_internal_encoding);
  4139. needle.no_language = MBSTRG(language);
  4140. needle.no_encoding = MBSTRG(current_internal_encoding);
  4141. do {
  4142. size_t len = 0;
  4143. haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
  4144. haystack.len = len;
  4145. if (!haystack.val) {
  4146. break;
  4147. }
  4148. if (haystack.len <= 0) {
  4149. break;
  4150. }
  4151. needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
  4152. needle.len = len;
  4153. if (!needle.val) {
  4154. break;
  4155. }
  4156. if (needle.len <= 0) {
  4157. break;
  4158. }
  4159. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
  4160. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  4161. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
  4162. break;
  4163. }
  4164. {
  4165. int haystack_char_len = mbfl_strlen(&haystack);
  4166. if (mode) {
  4167. if ((offset > 0 && offset > haystack_char_len) ||
  4168. (offset < 0 && -offset > haystack_char_len)) {
  4169. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
  4170. break;
  4171. }
  4172. } else {
  4173. if (offset < 0 || offset > haystack_char_len) {
  4174. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
  4175. break;
  4176. }
  4177. }
  4178. }
  4179. n = mbfl_strpos(&haystack, &needle, offset, mode);
  4180. } while(0);
  4181. if (haystack.val) {
  4182. efree(haystack.val);
  4183. }
  4184. if (needle.val) {
  4185. efree(needle.val);
  4186. }
  4187. return n;
  4188. }
  4189. /* }}} */
  4190. #ifdef ZEND_MULTIBYTE
  4191. /* {{{ php_mb_set_zend_encoding() */
  4192. static int php_mb_set_zend_encoding(TSRMLS_D)
  4193. {
  4194. /* 'd better use mbfl_memory_device? */
  4195. char *name, *list = NULL;
  4196. int n, *entry, list_size = 0;
  4197. zend_encoding_detector encoding_detector;
  4198. zend_encoding_converter encoding_converter;
  4199. zend_encoding_oddlen encoding_oddlen;
  4200. /* notify script encoding to Zend Engine */
  4201. entry = MBSTRG(script_encoding_list);
  4202. n = MBSTRG(script_encoding_list_size);
  4203. while (n > 0) {
  4204. name = (char *)mbfl_no_encoding2name(*entry);
  4205. if (name) {
  4206. list_size += strlen(name) + 1;
  4207. if (!list) {
  4208. list = (char*)emalloc(list_size);
  4209. *list = '\0';
  4210. } else {
  4211. list = (char*)erealloc(list, list_size);
  4212. strcat(list, ",");
  4213. }
  4214. strcat(list, name);
  4215. }
  4216. entry++;
  4217. n--;
  4218. }
  4219. zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
  4220. if (list) {
  4221. efree(list);
  4222. }
  4223. encoding_detector = php_mb_encoding_detector;
  4224. encoding_converter = php_mb_encoding_converter;
  4225. encoding_oddlen = php_mb_oddlen;
  4226. /* TODO: make independent from mbstring.encoding_translation? */
  4227. if (MBSTRG(encoding_translation)) {
  4228. /* notify internal encoding to Zend Engine */
  4229. name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  4230. zend_multibyte_set_internal_encoding(name TSRMLS_CC);
  4231. }
  4232. zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
  4233. return 0;
  4234. }
  4235. /* }}} */
  4236. /* {{{ char *php_mb_encoding_detector()
  4237. * Interface for Zend Engine
  4238. */
  4239. static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
  4240. {
  4241. mbfl_string string;
  4242. const char *ret;
  4243. enum mbfl_no_encoding *elist;
  4244. int size, *list;
  4245. /* make encoding list */
  4246. list = NULL;
  4247. size = 0;
  4248. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  4249. if (size <= 0) {
  4250. return NULL;
  4251. }
  4252. if (size > 0 && list != NULL) {
  4253. elist = list;
  4254. } else {
  4255. elist = MBSTRG(current_detect_order_list);
  4256. size = MBSTRG(current_detect_order_list_size);
  4257. }
  4258. mbfl_string_init(&string);
  4259. string.no_language = MBSTRG(language);
  4260. string.val = (unsigned char *)arg_string;
  4261. string.len = arg_length;
  4262. ret = mbfl_identify_encoding_name(&string, elist, size, 0);
  4263. if (list != NULL) {
  4264. efree((void *)list);
  4265. }
  4266. if (ret != NULL) {
  4267. return estrdup(ret);
  4268. } else {
  4269. return NULL;
  4270. }
  4271. }
  4272. /* }}} */
  4273. /* {{{ int php_mb_encoding_converter() */
  4274. static int php_mb_encoding_converter(unsigned char **to, size_t *to_length,
  4275. const unsigned char *from, size_t from_length,
  4276. const char *encoding_to, const char *encoding_from TSRMLS_DC)
  4277. {
  4278. mbfl_string string, result, *ret;
  4279. enum mbfl_no_encoding from_encoding, to_encoding;
  4280. mbfl_buffer_converter *convd;
  4281. /* new encoding */
  4282. to_encoding = mbfl_name2no_encoding(encoding_to);
  4283. if (to_encoding == mbfl_no_encoding_invalid) {
  4284. return -1;
  4285. }
  4286. /* old encoding */
  4287. from_encoding = mbfl_name2no_encoding(encoding_from);
  4288. if (from_encoding == mbfl_no_encoding_invalid) {
  4289. return -1;
  4290. }
  4291. /* initialize string */
  4292. mbfl_string_init(&string);
  4293. mbfl_string_init(&result);
  4294. string.no_encoding = from_encoding;
  4295. string.no_language = MBSTRG(language);
  4296. string.val = (unsigned char*)from;
  4297. string.len = from_length;
  4298. /* initialize converter */
  4299. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  4300. if (convd == NULL) {
  4301. return -1;
  4302. }
  4303. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  4304. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  4305. /* do it */
  4306. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  4307. if (ret != NULL) {
  4308. *to = ret->val;
  4309. *to_length = ret->len;
  4310. }
  4311. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  4312. mbfl_buffer_converter_delete(convd);
  4313. return ret ? 0 : -1;
  4314. }
  4315. /* }}} */
  4316. /* {{{ int php_mb_oddlen()
  4317. * returns number of odd (e.g. appears only first byte of multibyte
  4318. * character) chars
  4319. */
  4320. static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
  4321. {
  4322. mbfl_string mb_string;
  4323. mbfl_string_init(&mb_string);
  4324. mb_string.no_language = MBSTRG(language);
  4325. mb_string.no_encoding = mbfl_name2no_encoding(encoding);
  4326. mb_string.val = (unsigned char *)string;
  4327. mb_string.len = length;
  4328. if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
  4329. return 0;
  4330. }
  4331. return mbfl_oddlen(&mb_string);
  4332. }
  4333. /* }}} */
  4334. #endif /* ZEND_MULTIBYTE */
  4335. #endif /* HAVE_MBSTRING */
  4336. /*
  4337. * Local variables:
  4338. * tab-width: 4
  4339. * c-basic-offset: 4
  4340. * End:
  4341. * vim600: fdm=marker
  4342. * vim: noet sw=4 ts=4
  4343. */