PageRenderTime 73ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/hphp/runtime/ext/ext_mb.cpp

http://github.com/facebook/hiphop-php
C++ | 4191 lines | 3589 code | 415 blank | 187 comment | 982 complexity | 6654a83821b70924cecdcf76a157ee43 MD5 | raw file
Possible License(s): LGPL-2.1, BSD-2-Clause, BSD-3-Clause, MPL-2.0-no-copyleft-exception, MIT, LGPL-2.0, Apache-2.0
  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
  6. | Copyright (c) 1997-2010 The PHP Group |
  7. +----------------------------------------------------------------------+
  8. | This source file is subject to version 3.01 of the PHP license, |
  9. | that is bundled with this package in the file LICENSE, and is |
  10. | available through the world-wide-web at the following url: |
  11. | http://www.php.net/license/3_01.txt |
  12. | If you did not receive a copy of the PHP license and are unable to |
  13. | obtain it through the world-wide-web, please send a note to |
  14. | license@php.net so we can mail you a copy immediately. |
  15. +----------------------------------------------------------------------+
  16. */
  17. #include "hphp/runtime/ext/ext_mb.h"
  18. #include "hphp/runtime/base/string-buffer.h"
  19. #include "hphp/runtime/base/request-local.h"
  20. #include "hphp/runtime/ext/php_unicode.h"
  21. #include "hphp/runtime/ext/unicode_data.h"
  22. #include "hphp/runtime/ext/ext_process.h"
  23. #include "hphp/runtime/ext/ext_string.h"
  24. #include "hphp/runtime/base/zend-url.h"
  25. #include "hphp/runtime/base/zend-string.h"
  26. #include "hphp/runtime/base/ini-setting.h"
  27. extern "C" {
  28. #include <mbfl/mbfl_convert.h>
  29. #include <mbfl/mbfilter.h>
  30. #include <oniguruma.h>
  31. }
  32. #define php_mb_re_pattern_buffer re_pattern_buffer
  33. #define php_mb_regex_t regex_t
  34. #define php_mb_re_registers re_registers
  35. extern void mbfl_memory_device_unput(mbfl_memory_device *device);
  36. #define PARSE_POST 0
  37. #define PARSE_GET 1
  38. #define PARSE_COOKIE 2
  39. #define PARSE_STRING 3
  40. #define PARSE_ENV 4
  41. #define PARSE_SERVER 5
  42. #define PARSE_SESSION 6
  43. namespace HPHP {
  44. static class mbstringExtension : public Extension {
  45. public:
  46. mbstringExtension() : Extension("mbstring") {}
  47. virtual void moduleInit() {
  48. IniSetting::SetGlobalDefault("mbstring.http_input", "pass");
  49. IniSetting::SetGlobalDefault("mbstring.http_output", "pass");
  50. }
  51. } s_mbstring_extension;
  52. ///////////////////////////////////////////////////////////////////////////////
  53. // statics
  54. #define PHP_MBSTR_STACK_BLOCK_SIZE 32
  55. typedef struct _php_mb_nls_ident_list {
  56. mbfl_no_language lang;
  57. mbfl_no_encoding* list;
  58. int list_size;
  59. } php_mb_nls_ident_list;
  60. static mbfl_no_encoding php_mb_default_identify_list_ja[] = {
  61. mbfl_no_encoding_ascii,
  62. mbfl_no_encoding_jis,
  63. mbfl_no_encoding_utf8,
  64. mbfl_no_encoding_euc_jp,
  65. mbfl_no_encoding_sjis
  66. };
  67. static mbfl_no_encoding php_mb_default_identify_list_cn[] = {
  68. mbfl_no_encoding_ascii,
  69. mbfl_no_encoding_utf8,
  70. mbfl_no_encoding_euc_cn,
  71. mbfl_no_encoding_cp936
  72. };
  73. static mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
  74. mbfl_no_encoding_ascii,
  75. mbfl_no_encoding_utf8,
  76. mbfl_no_encoding_euc_tw,
  77. mbfl_no_encoding_big5
  78. };
  79. static mbfl_no_encoding php_mb_default_identify_list_kr[] = {
  80. mbfl_no_encoding_ascii,
  81. mbfl_no_encoding_utf8,
  82. mbfl_no_encoding_euc_kr,
  83. mbfl_no_encoding_uhc
  84. };
  85. static mbfl_no_encoding php_mb_default_identify_list_ru[] = {
  86. mbfl_no_encoding_ascii,
  87. mbfl_no_encoding_utf8,
  88. mbfl_no_encoding_koi8r,
  89. mbfl_no_encoding_cp1251,
  90. mbfl_no_encoding_cp866
  91. };
  92. static mbfl_no_encoding php_mb_default_identify_list_hy[] = {
  93. mbfl_no_encoding_ascii,
  94. mbfl_no_encoding_utf8,
  95. mbfl_no_encoding_armscii8
  96. };
  97. static mbfl_no_encoding php_mb_default_identify_list_tr[] = {
  98. mbfl_no_encoding_ascii,
  99. mbfl_no_encoding_utf8,
  100. mbfl_no_encoding_8859_9
  101. };
  102. static mbfl_no_encoding php_mb_default_identify_list_neut[] = {
  103. mbfl_no_encoding_ascii,
  104. mbfl_no_encoding_utf8
  105. };
  106. static php_mb_nls_ident_list php_mb_default_identify_list[] = {
  107. { mbfl_no_language_japanese, php_mb_default_identify_list_ja,
  108. sizeof(php_mb_default_identify_list_ja) /
  109. sizeof(php_mb_default_identify_list_ja[0]) },
  110. { mbfl_no_language_korean, php_mb_default_identify_list_kr,
  111. sizeof(php_mb_default_identify_list_kr) /
  112. sizeof(php_mb_default_identify_list_kr[0]) },
  113. { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk,
  114. sizeof(php_mb_default_identify_list_tw_hk) /
  115. sizeof(php_mb_default_identify_list_tw_hk[0]) },
  116. { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn,
  117. sizeof(php_mb_default_identify_list_cn) /
  118. sizeof(php_mb_default_identify_list_cn[0]) },
  119. { mbfl_no_language_russian, php_mb_default_identify_list_ru,
  120. sizeof(php_mb_default_identify_list_ru) /
  121. sizeof(php_mb_default_identify_list_ru[0]) },
  122. { mbfl_no_language_armenian, php_mb_default_identify_list_hy,
  123. sizeof(php_mb_default_identify_list_hy) /
  124. sizeof(php_mb_default_identify_list_hy[0]) },
  125. { mbfl_no_language_turkish, php_mb_default_identify_list_tr,
  126. sizeof(php_mb_default_identify_list_tr) /
  127. sizeof(php_mb_default_identify_list_tr[0]) },
  128. { mbfl_no_language_neutral, php_mb_default_identify_list_neut,
  129. sizeof(php_mb_default_identify_list_neut) /
  130. sizeof(php_mb_default_identify_list_neut[0]) }
  131. };
  132. ///////////////////////////////////////////////////////////////////////////////
  133. // globals
  134. typedef std::map<std::string, php_mb_regex_t *> RegexCache;
  135. class MBGlobals : public RequestEventHandler {
  136. public:
  137. mbfl_no_language language;
  138. mbfl_no_language current_language;
  139. mbfl_no_encoding internal_encoding;
  140. mbfl_no_encoding current_internal_encoding;
  141. mbfl_no_encoding http_output_encoding;
  142. mbfl_no_encoding current_http_output_encoding;
  143. mbfl_no_encoding http_input_identify;
  144. mbfl_no_encoding http_input_identify_get;
  145. mbfl_no_encoding http_input_identify_post;
  146. mbfl_no_encoding http_input_identify_cookie;
  147. mbfl_no_encoding http_input_identify_string;
  148. mbfl_no_encoding *http_input_list;
  149. int http_input_list_size;
  150. mbfl_no_encoding *detect_order_list;
  151. int detect_order_list_size;
  152. mbfl_no_encoding *current_detect_order_list;
  153. int current_detect_order_list_size;
  154. mbfl_no_encoding *default_detect_order_list;
  155. int default_detect_order_list_size;
  156. int filter_illegal_mode;
  157. int filter_illegal_substchar;
  158. int current_filter_illegal_mode;
  159. int current_filter_illegal_substchar;
  160. bool encoding_translation;
  161. long strict_detection;
  162. long illegalchars;
  163. mbfl_buffer_converter *outconv;
  164. OnigEncoding default_mbctype;
  165. OnigEncoding current_mbctype;
  166. RegexCache ht_rc;
  167. std::string search_str;
  168. unsigned int search_pos;
  169. php_mb_regex_t *search_re;
  170. OnigRegion *search_regs;
  171. OnigOptionType regex_default_options;
  172. OnigSyntaxType *regex_default_syntax;
  173. MBGlobals() :
  174. language(mbfl_no_language_uni),
  175. current_language(mbfl_no_language_uni),
  176. internal_encoding(mbfl_no_encoding_utf8),
  177. current_internal_encoding(mbfl_no_encoding_utf8),
  178. http_output_encoding(mbfl_no_encoding_pass),
  179. current_http_output_encoding(mbfl_no_encoding_pass),
  180. http_input_identify(mbfl_no_encoding_invalid),
  181. http_input_identify_get(mbfl_no_encoding_invalid),
  182. http_input_identify_post(mbfl_no_encoding_invalid),
  183. http_input_identify_cookie(mbfl_no_encoding_invalid),
  184. http_input_identify_string(mbfl_no_encoding_invalid),
  185. http_input_list(NULL),
  186. http_input_list_size(0),
  187. detect_order_list(NULL),
  188. detect_order_list_size(0),
  189. current_detect_order_list(NULL),
  190. current_detect_order_list_size(0),
  191. default_detect_order_list
  192. ((mbfl_no_encoding *)php_mb_default_identify_list_neut),
  193. default_detect_order_list_size
  194. (sizeof(php_mb_default_identify_list_neut) /
  195. sizeof(php_mb_default_identify_list_neut[0])),
  196. filter_illegal_mode(MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR),
  197. filter_illegal_substchar(0x3f), /* '?' */
  198. current_filter_illegal_mode(MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR),
  199. current_filter_illegal_substchar(0x3f), /* '?' */
  200. encoding_translation(0),
  201. strict_detection(0),
  202. illegalchars(0),
  203. outconv(NULL),
  204. default_mbctype(ONIG_ENCODING_EUC_JP),
  205. current_mbctype(ONIG_ENCODING_EUC_JP),
  206. search_pos(0),
  207. search_re((php_mb_regex_t*)NULL),
  208. search_regs((OnigRegion*)NULL),
  209. regex_default_options(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE),
  210. regex_default_syntax(ONIG_SYNTAX_RUBY) {
  211. }
  212. virtual void requestInit() {
  213. current_language = language;
  214. current_internal_encoding = internal_encoding;
  215. current_http_output_encoding = http_output_encoding;
  216. current_filter_illegal_mode = filter_illegal_mode;
  217. current_filter_illegal_substchar = filter_illegal_substchar;
  218. if (!encoding_translation) {
  219. illegalchars = 0;
  220. }
  221. mbfl_no_encoding *list=NULL, *entry;
  222. int n = 0;
  223. if (detect_order_list) {
  224. list = detect_order_list;
  225. n = detect_order_list_size;
  226. }
  227. if (n <= 0) {
  228. list = default_detect_order_list;
  229. n = default_detect_order_list_size;
  230. }
  231. entry = (mbfl_no_encoding *)malloc(n * sizeof(int));
  232. current_detect_order_list = entry;
  233. current_detect_order_list_size = n;
  234. while (n > 0) {
  235. *entry++ = *list++;
  236. n--;
  237. }
  238. }
  239. virtual void requestShutdown() {
  240. if (current_detect_order_list != NULL) {
  241. free(current_detect_order_list);
  242. current_detect_order_list = NULL;
  243. current_detect_order_list_size = 0;
  244. }
  245. if (outconv != NULL) {
  246. illegalchars += mbfl_buffer_illegalchars(outconv);
  247. mbfl_buffer_converter_delete(outconv);
  248. outconv = NULL;
  249. }
  250. /* clear http input identification. */
  251. http_input_identify = mbfl_no_encoding_invalid;
  252. http_input_identify_post = mbfl_no_encoding_invalid;
  253. http_input_identify_get = mbfl_no_encoding_invalid;
  254. http_input_identify_cookie = mbfl_no_encoding_invalid;
  255. http_input_identify_string = mbfl_no_encoding_invalid;
  256. current_mbctype = default_mbctype;
  257. search_str.clear();
  258. search_pos = 0;
  259. if (search_regs != NULL) {
  260. onig_region_free(search_regs, 1);
  261. search_regs = (OnigRegion *)NULL;
  262. }
  263. for (RegexCache::const_iterator it = ht_rc.begin(); it != ht_rc.end();
  264. ++it) {
  265. onig_free(it->second);
  266. }
  267. ht_rc.clear();
  268. }
  269. };
  270. IMPLEMENT_STATIC_REQUEST_LOCAL(MBGlobals, s_mb_globals);
  271. #define MBSTRG(name) s_mb_globals->name
  272. ///////////////////////////////////////////////////////////////////////////////
  273. // unicode functions
  274. /*
  275. * A simple array of 32-bit masks for lookup.
  276. */
  277. static unsigned long masks32[32] = {
  278. 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
  279. 0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
  280. 0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
  281. 0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
  282. 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
  283. 0x40000000, 0x80000000
  284. };
  285. static int prop_lookup(unsigned long code, unsigned long n) {
  286. long l, r, m;
  287. /*
  288. * There is an extra node on the end of the offsets to allow this routine
  289. * to work right. If the index is 0xffff, then there are no nodes for the
  290. * property.
  291. */
  292. if ((l = _ucprop_offsets[n]) == 0xffff)
  293. return 0;
  294. /*
  295. * Locate the next offset that is not 0xffff. The sentinel at the end of
  296. * the array is the max index value.
  297. */
  298. for (m = 1; n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++)
  299. ;
  300. r = _ucprop_offsets[n + m] - 1;
  301. while (l <= r) {
  302. /*
  303. * Determine a "mid" point and adjust to make sure the mid point is at
  304. * the beginning of a range pair.
  305. */
  306. m = (l + r) >> 1;
  307. m -= (m & 1);
  308. if (code > _ucprop_ranges[m + 1])
  309. l = m + 2;
  310. else if (code < _ucprop_ranges[m])
  311. r = m - 2;
  312. else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
  313. return 1;
  314. }
  315. return 0;
  316. }
  317. static int php_unicode_is_prop(unsigned long code, unsigned long mask1,
  318. unsigned long mask2) {
  319. unsigned long i;
  320. if (mask1 == 0 && mask2 == 0)
  321. return 0;
  322. for (i = 0; mask1 && i < 32; i++) {
  323. if ((mask1 & masks32[i]) && prop_lookup(code, i))
  324. return 1;
  325. }
  326. for (i = 32; mask2 && i < _ucprop_size; i++) {
  327. if ((mask2 & masks32[i & 31]) && prop_lookup(code, i))
  328. return 1;
  329. }
  330. return 0;
  331. }
  332. static unsigned long case_lookup(unsigned long code, long l, long r,
  333. int field) {
  334. long m;
  335. /*
  336. * Do the binary search.
  337. */
  338. while (l <= r) {
  339. /*
  340. * Determine a "mid" point and adjust to make sure the mid point is at
  341. * the beginning of a case mapping triple.
  342. */
  343. m = (l + r) >> 1;
  344. m -= (m % 3);
  345. if (code > _uccase_map[m])
  346. l = m + 3;
  347. else if (code < _uccase_map[m])
  348. r = m - 3;
  349. else if (code == _uccase_map[m])
  350. return _uccase_map[m + field];
  351. }
  352. return code;
  353. }
  354. static unsigned long php_turkish_toupper(unsigned long code, long l, long r,
  355. int field) {
  356. if (code == 0x0069L) {
  357. return 0x0130L;
  358. }
  359. return case_lookup(code, l, r, field);
  360. }
  361. static unsigned long php_turkish_tolower(unsigned long code, long l, long r,
  362. int field) {
  363. if (code == 0x0049L) {
  364. return 0x0131L;
  365. }
  366. return case_lookup(code, l, r, field);
  367. }
  368. static unsigned long php_unicode_toupper(unsigned long code,
  369. enum mbfl_no_encoding enc) {
  370. int field;
  371. long l, r;
  372. if (php_unicode_is_upper(code))
  373. return code;
  374. if (php_unicode_is_lower(code)) {
  375. /*
  376. * The character is lower case.
  377. */
  378. field = 2;
  379. l = _uccase_len[0];
  380. r = (l + _uccase_len[1]) - 3;
  381. if (enc == mbfl_no_encoding_8859_9) {
  382. return php_turkish_toupper(code, l, r, field);
  383. }
  384. } else {
  385. /*
  386. * The character is title case.
  387. */
  388. field = 1;
  389. l = _uccase_len[0] + _uccase_len[1];
  390. r = _uccase_size - 3;
  391. }
  392. return case_lookup(code, l, r, field);
  393. }
  394. static unsigned long php_unicode_tolower(unsigned long code,
  395. enum mbfl_no_encoding enc) {
  396. int field;
  397. long l, r;
  398. if (php_unicode_is_lower(code))
  399. return code;
  400. if (php_unicode_is_upper(code)) {
  401. /*
  402. * The character is upper case.
  403. */
  404. field = 1;
  405. l = 0;
  406. r = _uccase_len[0] - 3;
  407. if (enc == mbfl_no_encoding_8859_9) {
  408. return php_turkish_tolower(code, l, r, field);
  409. }
  410. } else {
  411. /*
  412. * The character is title case.
  413. */
  414. field = 2;
  415. l = _uccase_len[0] + _uccase_len[1];
  416. r = _uccase_size - 3;
  417. }
  418. return case_lookup(code, l, r, field);
  419. }
  420. static unsigned long php_unicode_totitle(unsigned long code,
  421. enum mbfl_no_encoding enc) {
  422. int field;
  423. long l, r;
  424. if (php_unicode_is_title(code))
  425. return code;
  426. /*
  427. * The offset will always be the same for converting to title case.
  428. */
  429. field = 2;
  430. if (php_unicode_is_upper(code)) {
  431. /*
  432. * The character is upper case.
  433. */
  434. l = 0;
  435. r = _uccase_len[0] - 3;
  436. } else {
  437. /*
  438. * The character is lower case.
  439. */
  440. l = _uccase_len[0];
  441. r = (l + _uccase_len[1]) - 3;
  442. }
  443. return case_lookup(code, l, r, field);
  444. }
  445. #define BE_ARY_TO_UINT32(ptr) (\
  446. ((unsigned char*)(ptr))[0]<<24 |\
  447. ((unsigned char*)(ptr))[1]<<16 |\
  448. ((unsigned char*)(ptr))[2]<< 8 |\
  449. ((unsigned char*)(ptr))[3] )
  450. #define UINT32_TO_BE_ARY(ptr,val) { \
  451. unsigned int v = val; \
  452. ((unsigned char*)(ptr))[0] = (v>>24) & 0xff,\
  453. ((unsigned char*)(ptr))[1] = (v>>16) & 0xff,\
  454. ((unsigned char*)(ptr))[2] = (v>> 8) & 0xff,\
  455. ((unsigned char*)(ptr))[3] = (v ) & 0xff;\
  456. }
  457. /**
  458. * Return 0 if input contains any illegal encoding, otherwise 1.
  459. * Even if any illegal encoding is detected the result may contain a list
  460. * of parsed encodings.
  461. */
  462. static int php_mb_parse_encoding_list(const char *value, int value_length,
  463. mbfl_no_encoding **return_list,
  464. int *return_size, int persistent) {
  465. int n, l, size, bauto, ret = 1;
  466. char *p, *p1, *p2, *endp, *tmpstr;
  467. mbfl_no_encoding no_encoding;
  468. mbfl_no_encoding *src, *entry, *list;
  469. list = NULL;
  470. if (value == NULL || value_length <= 0) {
  471. if (return_list) {
  472. *return_list = NULL;
  473. }
  474. if (return_size) {
  475. *return_size = 0;
  476. }
  477. return 0;
  478. } else {
  479. mbfl_no_encoding *identify_list;
  480. int identify_list_size;
  481. identify_list = MBSTRG(default_detect_order_list);
  482. identify_list_size = MBSTRG(default_detect_order_list_size);
  483. /* copy the value string for work */
  484. if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
  485. tmpstr = (char *)strndup(value+1, value_length-2);
  486. value_length -= 2;
  487. }
  488. else
  489. tmpstr = (char *)strndup(value, value_length);
  490. if (tmpstr == NULL) {
  491. return 0;
  492. }
  493. /* count the number of listed encoding names */
  494. endp = tmpstr + value_length;
  495. n = 1;
  496. p1 = tmpstr;
  497. while ((p2 = (char*)string_memnstr(p1, ",", 1, endp)) != NULL) {
  498. p1 = p2 + 1;
  499. n++;
  500. }
  501. size = n + identify_list_size;
  502. /* make list */
  503. list = (mbfl_no_encoding *)calloc(size, sizeof(int));
  504. if (list != NULL) {
  505. entry = list;
  506. n = 0;
  507. bauto = 0;
  508. p1 = tmpstr;
  509. do {
  510. p2 = p = (char*)string_memnstr(p1, ",", 1, endp);
  511. if (p == NULL) {
  512. p = endp;
  513. }
  514. *p = '\0';
  515. /* trim spaces */
  516. while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
  517. p1++;
  518. }
  519. p--;
  520. while (p > p1 && (*p == ' ' || *p == '\t')) {
  521. *p = '\0';
  522. p--;
  523. }
  524. /* convert to the encoding number and check encoding */
  525. if (strcasecmp(p1, "auto") == 0) {
  526. if (!bauto) {
  527. bauto = 1;
  528. l = identify_list_size;
  529. src = identify_list;
  530. while (l > 0) {
  531. *entry++ = *src++;
  532. l--;
  533. n++;
  534. }
  535. }
  536. } else {
  537. no_encoding = mbfl_name2no_encoding(p1);
  538. if (no_encoding != mbfl_no_encoding_invalid) {
  539. *entry++ = no_encoding;
  540. n++;
  541. } else {
  542. ret = 0;
  543. }
  544. }
  545. p1 = p2 + 1;
  546. } while (n < size && p2 != NULL);
  547. if (n > 0) {
  548. if (return_list) {
  549. *return_list = list;
  550. } else {
  551. free(list);
  552. }
  553. } else {
  554. free(list);
  555. if (return_list) {
  556. *return_list = NULL;
  557. }
  558. ret = 0;
  559. }
  560. if (return_size) {
  561. *return_size = n;
  562. }
  563. } else {
  564. if (return_list) {
  565. *return_list = NULL;
  566. }
  567. if (return_size) {
  568. *return_size = 0;
  569. }
  570. ret = 0;
  571. }
  572. free(tmpstr);
  573. }
  574. return ret;
  575. }
  576. static char *php_mb_convert_encoding(const char *input, size_t length,
  577. const char *_to_encoding,
  578. const char *_from_encodings,
  579. unsigned int *output_len) {
  580. mbfl_string string, result, *ret;
  581. mbfl_no_encoding from_encoding, to_encoding;
  582. mbfl_buffer_converter *convd;
  583. int size;
  584. mbfl_no_encoding *list;
  585. char *output = NULL;
  586. if (output_len) {
  587. *output_len = 0;
  588. }
  589. if (!input) {
  590. return NULL;
  591. }
  592. /* new encoding */
  593. if (_to_encoding && strlen(_to_encoding)) {
  594. to_encoding = mbfl_name2no_encoding(_to_encoding);
  595. if (to_encoding == mbfl_no_encoding_invalid) {
  596. raise_warning("Unknown encoding \"%s\"", _to_encoding);
  597. return NULL;
  598. }
  599. } else {
  600. to_encoding = MBSTRG(current_internal_encoding);
  601. }
  602. /* initialize string */
  603. mbfl_string_init(&string);
  604. mbfl_string_init(&result);
  605. from_encoding = MBSTRG(current_internal_encoding);
  606. string.no_encoding = from_encoding;
  607. string.no_language = MBSTRG(current_language);
  608. string.val = (unsigned char *)input;
  609. string.len = length;
  610. /* pre-conversion encoding */
  611. if (_from_encodings) {
  612. list = NULL;
  613. size = 0;
  614. php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings),
  615. &list, &size, 0);
  616. if (size == 1) {
  617. from_encoding = *list;
  618. string.no_encoding = from_encoding;
  619. } else if (size > 1) {
  620. /* auto detect */
  621. from_encoding = mbfl_identify_encoding_no(&string, list, size,
  622. MBSTRG(strict_detection));
  623. if (from_encoding != mbfl_no_encoding_invalid) {
  624. string.no_encoding = from_encoding;
  625. } else {
  626. raise_warning("Unable to detect character encoding");
  627. from_encoding = mbfl_no_encoding_pass;
  628. to_encoding = from_encoding;
  629. string.no_encoding = from_encoding;
  630. }
  631. } else {
  632. raise_warning("Illegal character encoding specified");
  633. }
  634. if (list != NULL) {
  635. free((void *)list);
  636. }
  637. }
  638. /* initialize converter */
  639. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  640. if (convd == NULL) {
  641. raise_warning("Unable to create character encoding converter");
  642. return NULL;
  643. }
  644. mbfl_buffer_converter_illegal_mode
  645. (convd, MBSTRG(current_filter_illegal_mode));
  646. mbfl_buffer_converter_illegal_substchar
  647. (convd, MBSTRG(current_filter_illegal_substchar));
  648. /* do it */
  649. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  650. if (ret) {
  651. if (output_len) {
  652. *output_len = ret->len;
  653. }
  654. output = (char *)ret->val;
  655. }
  656. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  657. mbfl_buffer_converter_delete(convd);
  658. return output;
  659. }
  660. static char *php_unicode_convert_case(int case_mode, const char *srcstr,
  661. size_t srclen, unsigned int *ret_len,
  662. const char *src_encoding) {
  663. char *unicode, *newstr;
  664. unsigned int unicode_len;
  665. unsigned char *unicode_ptr;
  666. size_t i;
  667. enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding);
  668. unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding,
  669. &unicode_len);
  670. if (unicode == NULL)
  671. return NULL;
  672. unicode_ptr = (unsigned char *)unicode;
  673. switch(case_mode) {
  674. case PHP_UNICODE_CASE_UPPER:
  675. for (i = 0; i < unicode_len; i+=4) {
  676. UINT32_TO_BE_ARY(&unicode_ptr[i],
  677. php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]),
  678. _src_encoding));
  679. }
  680. break;
  681. case PHP_UNICODE_CASE_LOWER:
  682. for (i = 0; i < unicode_len; i+=4) {
  683. UINT32_TO_BE_ARY(&unicode_ptr[i],
  684. php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]),
  685. _src_encoding));
  686. }
  687. break;
  688. case PHP_UNICODE_CASE_TITLE:
  689. {
  690. int mode = 0;
  691. for (i = 0; i < unicode_len; i+=4) {
  692. int res = php_unicode_is_prop
  693. (BE_ARY_TO_UINT32(&unicode_ptr[i]),
  694. UC_MN|UC_ME|UC_CF|UC_LM|UC_SK|UC_LU|UC_LL|UC_LT, 0);
  695. if (mode) {
  696. if (res) {
  697. UINT32_TO_BE_ARY
  698. (&unicode_ptr[i],
  699. php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]),
  700. _src_encoding));
  701. } else {
  702. mode = 0;
  703. }
  704. } else {
  705. if (res) {
  706. mode = 1;
  707. UINT32_TO_BE_ARY
  708. (&unicode_ptr[i],
  709. php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]),
  710. _src_encoding));
  711. }
  712. }
  713. }
  714. }
  715. break;
  716. }
  717. newstr = php_mb_convert_encoding(unicode, unicode_len, src_encoding,
  718. "UCS-4BE", ret_len);
  719. free(unicode);
  720. return newstr;
  721. }
  722. ///////////////////////////////////////////////////////////////////////////////
  723. // helpers
  724. /**
  725. * Return 0 if input contains any illegal encoding, otherwise 1.
  726. * Even if any illegal encoding is detected the result may contain a list
  727. * of parsed encodings.
  728. */
  729. static int php_mb_parse_encoding_array(CArrRef array,
  730. mbfl_no_encoding **return_list,
  731. int *return_size, int persistent) {
  732. int n, l, size, bauto,ret = 1;
  733. mbfl_no_encoding no_encoding;
  734. mbfl_no_encoding *src, *list, *entry;
  735. list = NULL;
  736. mbfl_no_encoding *identify_list = MBSTRG(default_detect_order_list);
  737. int identify_list_size = MBSTRG(default_detect_order_list_size);
  738. size = array.size() + identify_list_size;
  739. list = (mbfl_no_encoding *)calloc(size, sizeof(int));
  740. if (list != NULL) {
  741. entry = list;
  742. bauto = 0;
  743. n = 0;
  744. for (ArrayIter iter(array); iter; ++iter) {
  745. String hash_entry = iter.second();
  746. if (strcasecmp(hash_entry.data(), "auto") == 0) {
  747. if (!bauto) {
  748. bauto = 1;
  749. l = identify_list_size;
  750. src = identify_list;
  751. while (l > 0) {
  752. *entry++ = *src++;
  753. l--;
  754. n++;
  755. }
  756. }
  757. } else {
  758. no_encoding = mbfl_name2no_encoding(hash_entry.data());
  759. if (no_encoding != mbfl_no_encoding_invalid) {
  760. *entry++ = no_encoding;
  761. n++;
  762. } else {
  763. ret = 0;
  764. }
  765. }
  766. }
  767. if (n > 0) {
  768. if (return_list) {
  769. *return_list = list;
  770. } else {
  771. free(list);
  772. }
  773. } else {
  774. free(list);
  775. if (return_list) {
  776. *return_list = NULL;
  777. }
  778. ret = 0;
  779. }
  780. if (return_size) {
  781. *return_size = n;
  782. }
  783. } else {
  784. if (return_list) {
  785. *return_list = NULL;
  786. }
  787. if (return_size) {
  788. *return_size = 0;
  789. }
  790. ret = 0;
  791. }
  792. return ret;
  793. }
  794. static bool php_mb_parse_encoding(CVarRef encoding,
  795. mbfl_no_encoding **return_list,
  796. int *return_size, bool persistent) {
  797. bool ret;
  798. if (encoding.is(KindOfArray)) {
  799. ret = php_mb_parse_encoding_array(encoding.toArray(),
  800. return_list, return_size,
  801. persistent ? 1 : 0);
  802. } else {
  803. String enc = encoding.toString();
  804. ret = php_mb_parse_encoding_list(enc.data(), enc.size(),
  805. return_list, return_size,
  806. persistent ? 1 : 0);
  807. }
  808. if (!ret) {
  809. if (return_list && *return_list) {
  810. free(*return_list);
  811. *return_list = NULL;
  812. }
  813. return_size = 0;
  814. }
  815. return ret;
  816. }
  817. static int php_mb_nls_get_default_detect_order_list(mbfl_no_language lang,
  818. mbfl_no_encoding **plist,
  819. int* plist_size) {
  820. size_t i;
  821. *plist = (mbfl_no_encoding *) php_mb_default_identify_list_neut;
  822. *plist_size = sizeof(php_mb_default_identify_list_neut) /
  823. sizeof(php_mb_default_identify_list_neut[0]);
  824. for (i = 0; i < sizeof(php_mb_default_identify_list) /
  825. sizeof(php_mb_default_identify_list[0]); i++) {
  826. if (php_mb_default_identify_list[i].lang == lang) {
  827. *plist = php_mb_default_identify_list[i].list;
  828. *plist_size = php_mb_default_identify_list[i].list_size;
  829. return 1;
  830. }
  831. }
  832. return 0;
  833. }
  834. static size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc) {
  835. if (enc != NULL) {
  836. if (enc->flag & MBFL_ENCTYPE_MBCS) {
  837. if (enc->mblen_table != NULL) {
  838. if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
  839. }
  840. } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
  841. return 2;
  842. } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
  843. return 4;
  844. }
  845. }
  846. return 1;
  847. }
  848. static int php_mb_stripos(int mode,
  849. const char *old_haystack, int old_haystack_len,
  850. const char *old_needle, int old_needle_len,
  851. long offset, const char *from_encoding) {
  852. int n;
  853. mbfl_string haystack, needle;
  854. n = -1;
  855. mbfl_string_init(&haystack);
  856. mbfl_string_init(&needle);
  857. haystack.no_language = MBSTRG(current_language);
  858. haystack.no_encoding = MBSTRG(current_internal_encoding);
  859. needle.no_language = MBSTRG(current_language);
  860. needle.no_encoding = MBSTRG(current_internal_encoding);
  861. do {
  862. haystack.val = (unsigned char *)php_unicode_convert_case
  863. (PHP_UNICODE_CASE_UPPER, old_haystack, (size_t)old_haystack_len,
  864. &haystack.len, from_encoding);
  865. if (!haystack.val) {
  866. break;
  867. }
  868. if (haystack.len <= 0) {
  869. break;
  870. }
  871. needle.val = (unsigned char *)php_unicode_convert_case
  872. (PHP_UNICODE_CASE_UPPER, old_needle, (size_t)old_needle_len,
  873. &needle.len, from_encoding);
  874. if (!needle.val) {
  875. break;
  876. }
  877. if (needle.len <= 0) {
  878. break;
  879. }
  880. haystack.no_encoding = needle.no_encoding =
  881. mbfl_name2no_encoding(from_encoding);
  882. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  883. raise_warning("Unknown encoding \"%s\"", from_encoding);
  884. break;
  885. }
  886. int haystack_char_len = mbfl_strlen(&haystack);
  887. if (mode) {
  888. if ((offset > 0 && offset > haystack_char_len) ||
  889. (offset < 0 && -offset > haystack_char_len)) {
  890. raise_warning("Offset is greater than the length of haystack string");
  891. break;
  892. }
  893. } else {
  894. if (offset < 0 || offset > haystack_char_len) {
  895. raise_warning("Offset not contained in string.");
  896. break;
  897. }
  898. }
  899. n = mbfl_strpos(&haystack, &needle, offset, mode);
  900. } while(0);
  901. if (haystack.val) {
  902. free(haystack.val);
  903. }
  904. if (needle.val) {
  905. free(needle.val);
  906. }
  907. return n;
  908. }
  909. ///////////////////////////////////////////////////////////////////////////////
  910. Array f_mb_list_encodings() {
  911. Array ret;
  912. int i = 0;
  913. const mbfl_encoding **encodings = mbfl_get_supported_encodings();
  914. const mbfl_encoding *encoding;
  915. while ((encoding = encodings[i++]) != NULL) {
  916. ret.append(String(encoding->name, CopyString));
  917. }
  918. return ret;
  919. }
  920. Variant f_mb_list_encodings_alias_names(const String& name /*= null_string*/) {
  921. const mbfl_encoding **encodings;
  922. const mbfl_encoding *encoding;
  923. mbfl_no_encoding no_encoding;
  924. int i, j;
  925. Array ret;
  926. if (name.isNull()) {
  927. i = 0;
  928. encodings = mbfl_get_supported_encodings();
  929. while ((encoding = encodings[i++]) != NULL) {
  930. Array row;
  931. if (encoding->aliases != NULL) {
  932. j = 0;
  933. while ((*encoding->aliases)[j] != NULL) {
  934. row.append(String((*encoding->aliases)[j], CopyString));
  935. j++;
  936. }
  937. }
  938. ret.set(String(encoding->name, CopyString), row);
  939. }
  940. } else {
  941. no_encoding = mbfl_name2no_encoding(name.data());
  942. if (no_encoding == mbfl_no_encoding_invalid) {
  943. raise_warning("Unknown encoding \"%s\"", name.data());
  944. return false;
  945. }
  946. char *name = (char *)mbfl_no_encoding2name(no_encoding);
  947. if (name != NULL) {
  948. i = 0;
  949. encodings = mbfl_get_supported_encodings();
  950. while ((encoding = encodings[i++]) != NULL) {
  951. if (strcmp(encoding->name, name) != 0) continue;
  952. if (encoding->aliases != NULL) {
  953. j = 0;
  954. while ((*encoding->aliases)[j] != NULL) {
  955. ret.append(String((*encoding->aliases)[j], CopyString));
  956. j++;
  957. }
  958. }
  959. break;
  960. }
  961. } else {
  962. return false;
  963. }
  964. }
  965. return ret;
  966. }
  967. Variant f_mb_list_mime_names(const String& name /* = null_string */) {
  968. const mbfl_encoding **encodings;
  969. const mbfl_encoding *encoding;
  970. mbfl_no_encoding no_encoding;
  971. int i;
  972. Array ret;
  973. if (name.isNull()) {
  974. i = 0;
  975. encodings = mbfl_get_supported_encodings();
  976. while ((encoding = encodings[i++]) != NULL) {
  977. if (encoding->mime_name != NULL) {
  978. ret.set(String(encoding->name, CopyString),
  979. String(encoding->mime_name, CopyString));
  980. } else{
  981. ret.set(String(encoding->name, CopyString), "");
  982. }
  983. }
  984. } else {
  985. no_encoding = mbfl_name2no_encoding(name.data());
  986. if (no_encoding == mbfl_no_encoding_invalid) {
  987. raise_warning("Unknown encoding \"%s\"", name.data());
  988. return false;
  989. }
  990. char *name = (char *)mbfl_no_encoding2name(no_encoding);
  991. if (name != NULL) {
  992. i = 0;
  993. encodings = mbfl_get_supported_encodings();
  994. while ((encoding = encodings[i++]) != NULL) {
  995. if (strcmp(encoding->name, name) != 0) continue;
  996. if (encoding->mime_name != NULL) {
  997. return String(encoding->mime_name, CopyString);
  998. }
  999. break;
  1000. }
  1001. return "";
  1002. } else {
  1003. return false;
  1004. }
  1005. }
  1006. return ret;
  1007. }
  1008. bool f_mb_check_encoding(const String& var /* = null_string */,
  1009. const String& encoding /* = null_string */) {
  1010. mbfl_buffer_converter *convd;
  1011. mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
  1012. mbfl_string string, result, *ret = NULL;
  1013. long illegalchars = 0;
  1014. if (var.isNull()) {
  1015. return MBSTRG(illegalchars) == 0;
  1016. }
  1017. if (!encoding.isNull()) {
  1018. no_encoding = mbfl_name2no_encoding(encoding.data());
  1019. if (no_encoding == mbfl_no_encoding_invalid ||
  1020. no_encoding == mbfl_no_encoding_pass) {
  1021. raise_warning("Invalid encoding \"%s\"", encoding.data());
  1022. return false;
  1023. }
  1024. }
  1025. convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
  1026. if (convd == NULL) {
  1027. raise_warning("Unable to create converter");
  1028. return false;
  1029. }
  1030. mbfl_buffer_converter_illegal_mode
  1031. (convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
  1032. mbfl_buffer_converter_illegal_substchar
  1033. (convd, 0);
  1034. /* initialize string */
  1035. mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
  1036. mbfl_string_init(&result);
  1037. string.val = (unsigned char *)var.data();
  1038. string.len = var.size();
  1039. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  1040. illegalchars = mbfl_buffer_illegalchars(convd);
  1041. mbfl_buffer_converter_delete(convd);
  1042. if (ret != NULL) {
  1043. MBSTRG(illegalchars) += illegalchars;
  1044. if (illegalchars == 0 && string.len == ret->len &&
  1045. memcmp((const char *)string.val, (const char *)ret->val,
  1046. string.len) == 0) {
  1047. mbfl_string_clear(&result);
  1048. return true;
  1049. } else {
  1050. mbfl_string_clear(&result);
  1051. return false;
  1052. }
  1053. } else {
  1054. return false;
  1055. }
  1056. }
  1057. Variant f_mb_convert_case(const String& str, int mode,
  1058. const String& encoding /* = null_string */) {
  1059. const char *enc = NULL;
  1060. if (encoding.empty()) {
  1061. enc = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1062. }
  1063. unsigned int ret_len;
  1064. char *newstr = php_unicode_convert_case(mode, str.data(), str.size(),
  1065. &ret_len, enc);
  1066. if (newstr) {
  1067. return String(newstr, ret_len, AttachString);
  1068. }
  1069. return false;
  1070. }
  1071. Variant f_mb_convert_encoding(const String& str, const String& to_encoding,
  1072. CVarRef from_encoding /* = null_variant */) {
  1073. String encoding = from_encoding.toString();
  1074. if (from_encoding.is(KindOfArray)) {
  1075. StringBuffer _from_encodings;
  1076. Array encs = from_encoding.toArray();
  1077. for (ArrayIter iter(encs); iter; ++iter) {
  1078. if (!_from_encodings.empty()) {
  1079. _from_encodings.append(",");
  1080. }
  1081. _from_encodings.append(iter.second().toString());
  1082. }
  1083. encoding = _from_encodings.detach();
  1084. }
  1085. unsigned int size;
  1086. char *ret = php_mb_convert_encoding(str.data(), str.size(),
  1087. to_encoding.data(),
  1088. (!encoding.empty() ?
  1089. encoding.data() : NULL),
  1090. &size);
  1091. if (ret != NULL) {
  1092. return String(ret, size, AttachString);
  1093. }
  1094. return false;
  1095. }
  1096. Variant f_mb_convert_kana(const String& str,
  1097. const String& option /* = null_string */,
  1098. const String& encoding /* = null_string */) {
  1099. mbfl_string string, result, *ret;
  1100. mbfl_string_init(&string);
  1101. string.no_language = MBSTRG(current_language);
  1102. string.no_encoding = MBSTRG(current_internal_encoding);
  1103. string.val = (unsigned char *)str.data();
  1104. string.len = str.size();
  1105. int opt = 0x900;
  1106. if (!option.empty()) {
  1107. const char *p = option.data();
  1108. int n = option.size();
  1109. int i = 0;
  1110. opt = 0;
  1111. while (i < n) {
  1112. i++;
  1113. switch (*p++) {
  1114. case 'A': opt |= 0x1; break;
  1115. case 'a': opt |= 0x10; break;
  1116. case 'R': opt |= 0x2; break;
  1117. case 'r': opt |= 0x20; break;
  1118. case 'N': opt |= 0x4; break;
  1119. case 'n': opt |= 0x40; break;
  1120. case 'S': opt |= 0x8; break;
  1121. case 's': opt |= 0x80; break;
  1122. case 'K': opt |= 0x100; break;
  1123. case 'k': opt |= 0x1000; break;
  1124. case 'H': opt |= 0x200; break;
  1125. case 'h': opt |= 0x2000; break;
  1126. case 'V': opt |= 0x800; break;
  1127. case 'C': opt |= 0x10000; break;
  1128. case 'c': opt |= 0x20000; break;
  1129. case 'M': opt |= 0x100000; break;
  1130. case 'm': opt |= 0x200000; break;
  1131. }
  1132. }
  1133. }
  1134. /* encoding */
  1135. if (!encoding.empty()) {
  1136. string.no_encoding = mbfl_name2no_encoding(encoding.data());
  1137. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1138. raise_warning("Unknown encoding \"%s\"", encoding.data());
  1139. return false;
  1140. }
  1141. }
  1142. ret = mbfl_ja_jp_hantozen(&string, &result, opt);
  1143. if (ret != NULL) {
  1144. return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
  1145. }
  1146. return false;
  1147. }
  1148. static bool php_mbfl_encoding_detect(CVarRef var,
  1149. mbfl_encoding_detector *identd,
  1150. mbfl_string *string) {
  1151. if (var.is(KindOfArray) || var.is(KindOfObject)) {
  1152. Array items = var.toArray();
  1153. for (ArrayIter iter(items); iter; ++iter) {
  1154. if (php_mbfl_encoding_detect(iter.second(), identd, string)) {
  1155. return true;
  1156. }
  1157. }
  1158. } else if (var.isString()) {
  1159. String svar = var.toString();
  1160. string->val = (unsigned char *)svar.data();
  1161. string->len = svar.size();
  1162. if (mbfl_encoding_detector_feed(identd, string)) {
  1163. return true;
  1164. }
  1165. }
  1166. return false;
  1167. }
  1168. static Variant php_mbfl_convert(CVarRef var,
  1169. mbfl_buffer_converter *convd,
  1170. mbfl_string *string,
  1171. mbfl_string *result) {
  1172. if (var.is(KindOfArray)) {
  1173. Array ret;
  1174. Array items = var.toArray();
  1175. for (ArrayIter iter(items); iter; ++iter) {
  1176. ret.set(iter.first(),
  1177. php_mbfl_convert(iter.second(), convd, string, result));
  1178. }
  1179. return ret;
  1180. }
  1181. if (var.is(KindOfObject)) {
  1182. Object obj = var.toObject();
  1183. Array items = var.toArray();
  1184. for (ArrayIter iter(items); iter; ++iter) {
  1185. obj->o_set(iter.first().toString(),
  1186. php_mbfl_convert(iter.second().toString().data(), convd,
  1187. string, result));
  1188. }
  1189. return var; // which still has obj
  1190. }
  1191. if (var.isString()) {
  1192. String svar = var.toString();
  1193. string->val = (unsigned char *)svar.data();
  1194. string->len = svar.size();
  1195. mbfl_string *ret =
  1196. mbfl_buffer_converter_feed_result(convd, string, result);
  1197. return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
  1198. }
  1199. return var;
  1200. }
  1201. Variant f_mb_convert_variables(int _argc, const String& to_encoding,
  1202. CVarRef from_encoding, VRefParam vars,
  1203. CArrRef _argv /* = null_array */) {
  1204. mbfl_string string, result;
  1205. mbfl_no_encoding _from_encoding, _to_encoding;
  1206. mbfl_encoding_detector *identd;
  1207. mbfl_buffer_converter *convd;
  1208. int elistsz;
  1209. mbfl_no_encoding *elist;
  1210. char *name;
  1211. /* new encoding */
  1212. _to_encoding = mbfl_name2no_encoding(to_encoding.data());
  1213. if (_to_encoding == mbfl_no_encoding_invalid) {
  1214. raise_warning("Unknown encoding \"%s\"", to_encoding.data());
  1215. return false;
  1216. }
  1217. /* initialize string */
  1218. mbfl_string_init(&string);
  1219. mbfl_string_init(&result);
  1220. _from_encoding = MBSTRG(current_internal_encoding);
  1221. string.no_encoding = _from_encoding;
  1222. string.no_language = MBSTRG(current_language);
  1223. /* pre-conversion encoding */
  1224. elist = NULL;
  1225. elistsz = 0;
  1226. php_mb_parse_encoding(from_encoding, &elist, &elistsz, false);
  1227. if (elistsz <= 0) {
  1228. _from_encoding = mbfl_no_encoding_pass;
  1229. } else if (elistsz == 1) {
  1230. _from_encoding = *elist;
  1231. } else {
  1232. /* auto detect */
  1233. _from_encoding = mbfl_no_encoding_invalid;
  1234. identd = mbfl_encoding_detector_new(elist, elistsz,
  1235. MBSTRG(strict_detection));
  1236. if (identd != NULL) {
  1237. for (int n = -1; n < _argv.size(); n++) {
  1238. if (php_mbfl_encoding_detect(n < 0 ? (Variant&)vars : _argv[n],
  1239. identd, &string)) {
  1240. break;
  1241. }
  1242. }
  1243. _from_encoding = mbfl_encoding_detector_judge(identd);
  1244. mbfl_encoding_detector_delete(identd);
  1245. }
  1246. if (_from_encoding == mbfl_no_encoding_invalid) {
  1247. raise_warning("Unable to detect encoding");
  1248. _from_encoding = mbfl_no_encoding_pass;
  1249. }
  1250. }
  1251. if (elist != NULL) {
  1252. free((void *)elist);
  1253. }
  1254. /* create converter */
  1255. convd = NULL;
  1256. if (_from_encoding != mbfl_no_encoding_pass) {
  1257. convd = mbfl_buffer_converter_new(_from_encoding, _to_encoding, 0);
  1258. if (convd == NULL) {
  1259. raise_warning("Unable to create converter");
  1260. return false;
  1261. }
  1262. mbfl_buffer_converter_illegal_mode
  1263. (convd, MBSTRG(current_filter_illegal_mode));
  1264. mbfl_buffer_converter_illegal_substchar
  1265. (convd, MBSTRG(current_filter_illegal_substchar));
  1266. }
  1267. /* convert */
  1268. if (convd != NULL) {
  1269. vars = php_mbfl_convert(vars, convd, &string, &result);
  1270. for (int n = 0; n < _argv.size(); n++) {
  1271. const_cast<Array&>(_argv).lval(n) =
  1272. php_mbfl_convert(_argv[n], convd, &string, &result);
  1273. }
  1274. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  1275. mbfl_buffer_converter_delete(convd);
  1276. }
  1277. name = (char *)mbfl_no_encoding2name(_from_encoding);
  1278. if (name != NULL) {
  1279. return String(name, CopyString);
  1280. }
  1281. return false;
  1282. }
  1283. Variant f_mb_decode_mimeheader(const String& str) {
  1284. mbfl_string string, result, *ret;
  1285. mbfl_string_init(&string);
  1286. string.no_language = MBSTRG(current_language);
  1287. string.no_encoding = MBSTRG(current_internal_encoding);
  1288. string.val = (unsigned char *)str.data();
  1289. string.len = str.size();
  1290. mbfl_string_init(&result);
  1291. ret = mbfl_mime_header_decode(&string, &result,
  1292. MBSTRG(current_internal_encoding));
  1293. if (ret != NULL) {
  1294. return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
  1295. }
  1296. return false;
  1297. }
  1298. static Variant php_mb_numericentity_exec(const String& str, CVarRef convmap,
  1299. const String& encoding, int type) {
  1300. int mapsize=0;
  1301. mbfl_string string, result, *ret;
  1302. mbfl_no_encoding no_encoding;
  1303. mbfl_string_init(&string);
  1304. string.no_language = MBSTRG(current_language);
  1305. string.no_encoding = MBSTRG(current_internal_encoding);
  1306. string.val = (unsigned char *)str.data();
  1307. string.len = str.size();
  1308. /* encoding */
  1309. if (!encoding.empty()) {
  1310. no_encoding = mbfl_name2no_encoding(encoding.data());
  1311. if (no_encoding == mbfl_no_encoding_invalid) {
  1312. raise_warning("Unknown encoding \"%s\"", encoding.data());
  1313. return false;
  1314. } else {
  1315. string.no_encoding = no_encoding;
  1316. }
  1317. }
  1318. /* conversion map */
  1319. int *iconvmap = NULL;
  1320. if (convmap.is(KindOfArray)) {
  1321. Array convs = convmap.toArray();
  1322. mapsize = convs.size();
  1323. if (mapsize > 0) {
  1324. iconvmap = (int*)malloc(mapsize * sizeof(int));
  1325. int *mapelm = iconvmap;
  1326. for (ArrayIter iter(convs); iter; ++iter) {
  1327. *mapelm++ = iter.second().toInt32();
  1328. }
  1329. }
  1330. }
  1331. if (iconvmap == NULL) {
  1332. return false;
  1333. }
  1334. mapsize /= 4;
  1335. ret = mbfl_html_numeric_entity(&string, &result, iconvmap, mapsize, type);
  1336. free(iconvmap);
  1337. if (ret != NULL) {
  1338. return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
  1339. }
  1340. return false;
  1341. }
  1342. Variant f_mb_decode_numericentity(const String& str, CVarRef convmap,
  1343. const String& encoding /* = null_string */) {
  1344. return php_mb_numericentity_exec(str, convmap, encoding, 1);
  1345. }
  1346. Variant f_mb_detect_encoding(const String& str,
  1347. CVarRef encoding_list /* = null_variant */,
  1348. CVarRef strict /* = null_variant */) {
  1349. mbfl_string string;
  1350. const char *ret;
  1351. mbfl_no_encoding *elist;
  1352. int size;
  1353. mbfl_no_encoding *list = 0;
  1354. /* make encoding list */
  1355. list = NULL;
  1356. size = 0;
  1357. php_mb_parse_encoding(encoding_list, &list, &size, false);
  1358. if (size > 0 && list != NULL) {
  1359. elist = list;
  1360. } else {
  1361. elist = MBSTRG(current_detect_order_list);
  1362. size = MBSTRG(current_detect_order_list_size);
  1363. }
  1364. long nstrict = 0;
  1365. if (!strict.isNull()) {
  1366. nstrict = strict.toInt64();
  1367. } else {
  1368. nstrict = MBSTRG(strict_detection);
  1369. }
  1370. mbfl_string_init(&string);
  1371. string.no_language = MBSTRG(current_language);
  1372. string.val = (unsigned char *)str.data();
  1373. string.len = str.size();
  1374. ret = mbfl_identify_encoding_name(&string, elist, size, nstrict);
  1375. if (list != NULL) {
  1376. free(list);
  1377. }
  1378. if (ret != NULL) {
  1379. return String(ret, CopyString);
  1380. }
  1381. return false;
  1382. }
  1383. Variant f_mb_detect_order(CVarRef encoding_list /* = null_variant */) {
  1384. int n, size;
  1385. mbfl_no_encoding *list, *entry;
  1386. if (encoding_list.isNull()) {
  1387. Array ret;
  1388. entry = MBSTRG(current_detect_order_list);
  1389. n = MBSTRG(current_detect_order_list_size);
  1390. while (n > 0) {
  1391. char *name = (char *)mbfl_no_encoding2name(*entry);
  1392. if (name) {
  1393. ret.append(String(name, CopyString));
  1394. }
  1395. entry++;
  1396. n--;
  1397. }
  1398. return ret;
  1399. }
  1400. list = NULL;
  1401. size = 0;
  1402. if (!php_mb_parse_encoding(encoding_list, &list, &size, false) ||
  1403. list == NULL) {
  1404. return false;
  1405. }
  1406. if (MBSTRG(current_detect_order_list)) {
  1407. free(MBSTRG(current_detect_order_list));
  1408. }
  1409. MBSTRG(current_detect_order_list) = list;
  1410. MBSTRG(current_detect_order_list_size) = size;
  1411. return true;
  1412. }
  1413. Variant f_mb_encode_mimeheader(const String& str,
  1414. const String& charset /* = null_string */,
  1415. const String& transfer_encoding/*= null_string*/,
  1416. const String& linefeed /* = "\r\n" */,
  1417. int indent /* = 0 */) {
  1418. mbfl_no_encoding charsetenc, transenc;
  1419. mbfl_string string, result, *ret;
  1420. mbfl_string_init(&string);
  1421. string.no_language = MBSTRG(current_language);
  1422. string.no_encoding = MBSTRG(current_internal_encoding);
  1423. string.val = (unsigned char *)str.data();
  1424. string.len = str.size();
  1425. charsetenc = mbfl_no_encoding_pass;
  1426. transenc = mbfl_no_encoding_base64;
  1427. if (!charset.empty()) {
  1428. charsetenc = mbfl_name2no_encoding(charset.data());
  1429. if (charsetenc == mbfl_no_encoding_invalid) {
  1430. raise_warning("Unknown encoding \"%s\"", charset.data());
  1431. return false;
  1432. }
  1433. } else {
  1434. const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
  1435. if (lang != NULL) {
  1436. charsetenc = lang->mail_charset;
  1437. transenc = lang->mail_header_encoding;
  1438. }
  1439. }
  1440. if (!transfer_encoding.empty()) {
  1441. char ch = *transfer_encoding.data();
  1442. if (ch == 'B' || ch == 'b') {
  1443. transenc = mbfl_no_encoding_base64;
  1444. } else if (ch == 'Q' || ch == 'q') {
  1445. transenc = mbfl_no_encoding_qprint;
  1446. }
  1447. }
  1448. mbfl_string_init(&result);
  1449. ret = mbfl_mime_header_encode(&string, &result, charsetenc, transenc,
  1450. linefeed.data(), indent);
  1451. if (ret != NULL) {
  1452. return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
  1453. }
  1454. return false;
  1455. }
  1456. Variant f_mb_encode_numericentity(const String& str, CVarRef convmap,
  1457. const String& encoding /* = null_string */) {
  1458. return php_mb_numericentity_exec(str, convmap, encoding, 0);
  1459. }
  1460. const StaticString
  1461. s_internal_encoding("internal_encoding"),
  1462. s_http_input("http_input"),
  1463. s_http_output("http_output"),
  1464. s_mail_charset("mail_charset"),
  1465. s_mail_header_encoding("mail_header_encoding"),
  1466. s_mail_body_encoding("mail_body_encoding"),
  1467. s_illegal_chars("illegal_chars"),
  1468. s_encoding_translation("encoding_translation"),
  1469. s_On("On"),
  1470. s_Off("Off"),
  1471. s_language("language"),
  1472. s_detect_order("detect_order"),
  1473. s_substitute_character("substitute_character"),
  1474. s_strict_detection("strict_detection"),
  1475. s_none("none"),
  1476. s_long("long"),
  1477. s_entity("entity");
  1478. Variant f_mb_get_info(const String& type /* = null_string */) {
  1479. const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
  1480. mbfl_no_encoding *entry;
  1481. int n;
  1482. char *name;
  1483. if (type.empty() || strcasecmp(type.data(), "all") == 0) {
  1484. Array ret;
  1485. if ((name = (char *)mbfl_no_encoding2name
  1486. (MBSTRG(current_internal_encoding))) != NULL) {
  1487. ret.set(s_internal_encoding, String(name, CopyString));
  1488. }
  1489. if ((name = (char *)mbfl_no_encoding2name
  1490. (MBSTRG(http_input_identify))) != NULL) {
  1491. ret.set(s_http_input, String(name, CopyString));
  1492. }
  1493. if ((name = (char *)mbfl_no_encoding2name
  1494. (MBSTRG(current_http_output_encoding))) != NULL) {
  1495. ret.set(s_http_output, String(name, CopyString));
  1496. }
  1497. if (lang != NULL) {
  1498. if ((name = (char *)mbfl_no_encoding2name
  1499. (lang->mail_charset)) != NULL) {
  1500. ret.set(s_mail_charset, String(name, CopyString));
  1501. }
  1502. if ((name = (char *)mbfl_no_encoding2name
  1503. (lang->mail_header_encoding)) != NULL) {
  1504. ret.set(s_mail_header_encoding, String(name, CopyString));
  1505. }
  1506. if ((name = (char *)mbfl_no_encoding2name
  1507. (lang->mail_body_encoding)) != NULL) {
  1508. ret.set(s_mail_body_encoding, String(name, CopyString));
  1509. }
  1510. }
  1511. ret.set(s_illegal_chars, MBSTRG(illegalchars));
  1512. ret.set(s_encoding_translation,
  1513. MBSTRG(encoding_translation) ? s_On : s_Off);
  1514. if ((name = (char *)mbfl_no_language2name
  1515. (MBSTRG(current_language))) != NULL) {
  1516. ret.set(s_language, String(name, CopyString));
  1517. }
  1518. n = MBSTRG(current_detect_order_list_size);
  1519. entry = MBSTRG(current_detect_order_list);
  1520. if (n > 0) {
  1521. Array row;
  1522. while (n > 0) {
  1523. if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
  1524. row.append(String(name, CopyString));
  1525. }
  1526. entry++;
  1527. n--;
  1528. }
  1529. ret.set(s_detect_order, row);
  1530. }
  1531. switch (MBSTRG(current_filter_illegal_mode)) {
  1532. case MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE:
  1533. ret.set(s_substitute_character, s_none);
  1534. break;
  1535. case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
  1536. ret.set(s_substitute_character, s_long);
  1537. break;
  1538. case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
  1539. ret.set(s_substitute_character, s_entity);
  1540. break;
  1541. default:
  1542. ret.set(s_substitute_character,
  1543. MBSTRG(current_filter_illegal_substchar));
  1544. }
  1545. ret.set(s_strict_detection, MBSTRG(strict_detection) ? s_On : s_Off);
  1546. return ret;
  1547. } else if (strcasecmp(type.data(), "internal_encoding") == 0) {
  1548. if ((name = (char *)mbfl_no_encoding2name
  1549. (MBSTRG(current_internal_encoding))) != NULL) {
  1550. return String(name, CopyString);
  1551. }
  1552. } else if (strcasecmp(type.data(), "http_input") == 0) {
  1553. if ((name = (char *)mbfl_no_encoding2name
  1554. (MBSTRG(http_input_identify))) != NULL) {
  1555. return String(name, CopyString);
  1556. }
  1557. } else if (strcasecmp(type.data(), "http_output") == 0) {
  1558. if ((name = (char *)mbfl_no_encoding2name
  1559. (MBSTRG(current_http_output_encoding))) != NULL) {
  1560. return String(name, CopyString);
  1561. }
  1562. } else if (strcasecmp(type.data(), "mail_charset") == 0) {
  1563. if (lang != NULL &&
  1564. (name = (char *)mbfl_no_encoding2name
  1565. (lang->mail_charset)) != NULL) {
  1566. return String(name, CopyString);
  1567. }
  1568. } else if (strcasecmp(type.data(), "mail_header_encoding") == 0) {
  1569. if (lang != NULL &&
  1570. (name = (char *)mbfl_no_encoding2name
  1571. (lang->mail_header_encoding)) != NULL) {
  1572. return String(name, CopyString);
  1573. }
  1574. } else if (strcasecmp(type.data(), "mail_body_encoding") == 0) {
  1575. if (lang != NULL &&
  1576. (name = (char *)mbfl_no_encoding2name
  1577. (lang->mail_body_encoding)) != NULL) {
  1578. return String(name, CopyString);
  1579. }
  1580. } else if (strcasecmp(type.data(), "illegal_chars") == 0) {
  1581. return MBSTRG(illegalchars);
  1582. } else if (strcasecmp(type.data(), "encoding_translation") == 0) {
  1583. return MBSTRG(encoding_translation) ? "On" : "Off";
  1584. } else if (strcasecmp(type.data(), "language") == 0) {
  1585. if ((name = (char *)mbfl_no_language2name
  1586. (MBSTRG(current_language))) != NULL) {
  1587. return String(name, CopyString);
  1588. }
  1589. } else if (strcasecmp(type.data(), "detect_order") == 0) {
  1590. n = MBSTRG(current_detect_order_list_size);
  1591. entry = MBSTRG(current_detect_order_list);
  1592. if (n > 0) {
  1593. Array ret;
  1594. while (n > 0) {
  1595. name = (char *)mbfl_no_encoding2name(*entry);
  1596. if (name) {
  1597. ret.append(String(name, CopyString));
  1598. }
  1599. entry++;
  1600. n--;
  1601. }
  1602. }
  1603. } else if (strcasecmp(type.data(), "substitute_character") == 0) {
  1604. if (MBSTRG(current_filter_illegal_mode) ==
  1605. MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  1606. return s_none;
  1607. } else if (MBSTRG(current_filter_illegal_mode) ==
  1608. MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  1609. return s_long;
  1610. } else if (MBSTRG(current_filter_illegal_mode) ==
  1611. MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  1612. return s_entity;
  1613. } else {
  1614. return MBSTRG(current_filter_illegal_substchar);
  1615. }
  1616. } else if (strcasecmp(type.data(), "strict_detection") == 0) {
  1617. return MBSTRG(strict_detection) ? s_On : s_Off;
  1618. }
  1619. return false;
  1620. }
  1621. Variant f_mb_http_input(const String& type /* = null_string */) {
  1622. int n;
  1623. char *name;
  1624. mbfl_no_encoding *entry;
  1625. mbfl_no_encoding result = mbfl_no_encoding_invalid;
  1626. if (type.empty()) {
  1627. result = MBSTRG(http_input_identify);
  1628. } else {
  1629. switch (*type.data()) {
  1630. case 'G': case 'g': result = MBSTRG(http_input_identify_get); break;
  1631. case 'P': case 'p': result = MBSTRG(http_input_identify_post); break;
  1632. case 'C': case 'c': result = MBSTRG(http_input_identify_cookie); break;
  1633. case 'S': case 's': result = MBSTRG(http_input_identify_string); break;
  1634. case 'I': case 'i':
  1635. {
  1636. Array ret;
  1637. entry = MBSTRG(http_input_list);
  1638. n = MBSTRG(http_input_list_size);
  1639. while (n > 0) {
  1640. name = (char *)mbfl_no_encoding2name(*entry);
  1641. if (name) {
  1642. ret.append(String(name, CopyString));
  1643. }
  1644. entry++;
  1645. n--;
  1646. }
  1647. return ret;
  1648. }
  1649. case 'L': case 'l':
  1650. {
  1651. entry = MBSTRG(http_input_list);
  1652. n = MBSTRG(http_input_list_size);
  1653. StringBuffer list;
  1654. while (n > 0) {
  1655. name = (char *)mbfl_no_encoding2name(*entry);
  1656. if (name) {
  1657. if (list.empty()) {
  1658. list.append(name);
  1659. } else {
  1660. list.append(',');
  1661. list.append(name);
  1662. }
  1663. }
  1664. entry++;
  1665. n--;
  1666. }
  1667. if (list.empty()) {
  1668. return false;
  1669. }
  1670. return list.detach();
  1671. }
  1672. default:
  1673. result = MBSTRG(http_input_identify);
  1674. break;
  1675. }
  1676. }
  1677. if (result != mbfl_no_encoding_invalid &&
  1678. (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
  1679. return String(name, CopyString);
  1680. }
  1681. return false;
  1682. }
  1683. Variant f_mb_http_output(const String& encoding /* = null_string */) {
  1684. if (encoding.empty()) {
  1685. char *name = (char *)mbfl_no_encoding2name
  1686. (MBSTRG(current_http_output_encoding));
  1687. if (name != NULL) {
  1688. return String(name, CopyString);
  1689. }
  1690. return false;
  1691. }
  1692. mbfl_no_encoding no_encoding = mbfl_name2no_encoding(encoding.data());
  1693. if (no_encoding == mbfl_no_encoding_invalid) {
  1694. raise_warning("Unknown encoding \"%s\"", encoding.data());
  1695. return false;
  1696. }
  1697. MBSTRG(current_http_output_encoding) = no_encoding;
  1698. return true;
  1699. }
  1700. Variant f_mb_internal_encoding(const String& encoding /* = null_string */) {
  1701. if (encoding.empty()) {
  1702. char *name = (char *)mbfl_no_encoding2name
  1703. (MBSTRG(current_internal_encoding));
  1704. if (name != NULL) {
  1705. return String(name, CopyString);
  1706. }
  1707. return false;
  1708. }
  1709. mbfl_no_encoding no_encoding = mbfl_name2no_encoding(encoding.data());
  1710. if (no_encoding == mbfl_no_encoding_invalid) {
  1711. raise_warning("Unknown encoding \"%s\"", encoding.data());
  1712. return false;
  1713. }
  1714. MBSTRG(current_internal_encoding) = no_encoding;
  1715. return true;
  1716. }
  1717. Variant f_mb_language(const String& language /* = null_string */) {
  1718. if (language.empty()) {
  1719. return String(mbfl_no_language2name(MBSTRG(current_language)), CopyString);
  1720. }
  1721. mbfl_no_language no_language = mbfl_name2no_language(language.data());
  1722. if (no_language == mbfl_no_language_invalid) {
  1723. raise_warning("Unknown language \"%s\"", language.data());
  1724. return false;
  1725. }
  1726. php_mb_nls_get_default_detect_order_list
  1727. (no_language, &MBSTRG(default_detect_order_list),
  1728. &MBSTRG(default_detect_order_list_size));
  1729. MBSTRG(current_language) = no_language;
  1730. return true;
  1731. }
  1732. String f_mb_output_handler(const String& contents, int status) {
  1733. mbfl_string string, result;
  1734. int last_feed;
  1735. mbfl_no_encoding encoding = MBSTRG(current_http_output_encoding);
  1736. /* start phase only */
  1737. if (status & PHP_OUTPUT_HANDLER_START) {
  1738. /* delete the converter just in case. */
  1739. if (MBSTRG(outconv)) {
  1740. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1741. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1742. MBSTRG(outconv) = NULL;
  1743. }
  1744. if (encoding == mbfl_no_encoding_pass) {
  1745. return contents;
  1746. }
  1747. /* analyze mime type */
  1748. String mimetype = g_context->getMimeType();
  1749. if (!mimetype.empty()) {
  1750. const char *charset = mbfl_no2preferred_mime_name(encoding);
  1751. if (charset) {
  1752. g_context->setContentType(mimetype, charset);
  1753. }
  1754. /* activate the converter */
  1755. MBSTRG(outconv) = mbfl_buffer_converter_new
  1756. (MBSTRG(current_internal_encoding), encoding, 0);
  1757. }
  1758. }
  1759. /* just return if the converter is not activated. */
  1760. if (MBSTRG(outconv) == NULL) {
  1761. return contents;
  1762. }
  1763. /* flag */
  1764. last_feed = ((status & PHP_OUTPUT_HANDLER_END) != 0);
  1765. /* mode */
  1766. mbfl_buffer_converter_illegal_mode
  1767. (MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
  1768. mbfl_buffer_converter_illegal_substchar
  1769. (MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
  1770. /* feed the string */
  1771. mbfl_string_init(&string);
  1772. string.no_language = MBSTRG(current_language);
  1773. string.no_encoding = MBSTRG(current_internal_encoding);
  1774. string.val = (unsigned char *)contents.data();
  1775. string.len = contents.size();
  1776. mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
  1777. if (last_feed) {
  1778. mbfl_buffer_converter_flush(MBSTRG(outconv));
  1779. }
  1780. /* get the converter output, and return it */
  1781. mbfl_buffer_converter_result(MBSTRG(outconv), &result);
  1782. /* delete the converter if it is the last feed. */
  1783. if (last_feed) {
  1784. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1785. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1786. MBSTRG(outconv) = NULL;
  1787. }
  1788. return String(reinterpret_cast<char*>(result.val), result.len, AttachString);
  1789. }
  1790. typedef struct _php_mb_encoding_handler_info_t {
  1791. int data_type;
  1792. const char *separator;
  1793. unsigned int force_register_globals: 1;
  1794. unsigned int report_errors: 1;
  1795. enum mbfl_no_language to_language;
  1796. enum mbfl_no_encoding to_encoding;
  1797. enum mbfl_no_language from_language;
  1798. int num_from_encodings;
  1799. const enum mbfl_no_encoding *from_encodings;
  1800. } php_mb_encoding_handler_info_t;
  1801. static mbfl_no_encoding _php_mb_encoding_handler_ex
  1802. (const php_mb_encoding_handler_info_t *info, Variant &arg, char *res) {
  1803. char *var, *val;
  1804. const char *s1, *s2;
  1805. char *strtok_buf = NULL, **val_list = NULL;
  1806. int n, num, *len_list = NULL;
  1807. unsigned int val_len;
  1808. mbfl_string string, resvar, resval;
  1809. enum mbfl_no_encoding from_encoding = mbfl_no_encoding_invalid;
  1810. mbfl_encoding_detector *identd = NULL;
  1811. mbfl_buffer_converter *convd = NULL;
  1812. mbfl_string_init_set(&string, info->to_language, info->to_encoding);
  1813. mbfl_string_init_set(&resvar, info->to_language, info->to_encoding);
  1814. mbfl_string_init_set(&resval, info->to_language, info->to_encoding);
  1815. if (!res || *res == '\0') {
  1816. goto out;
  1817. }
  1818. /* count the variables(separators) contained in the "res".
  1819. * separator may contain multiple separator chars.
  1820. */
  1821. num = 1;
  1822. for (s1=res; *s1 != '\0'; s1++) {
  1823. for (s2=info->separator; *s2 != '\0'; s2++) {
  1824. if (*s1 == *s2) {
  1825. num++;
  1826. }
  1827. }
  1828. }
  1829. num *= 2; /* need space for variable name and value */
  1830. val_list = (char **)calloc(num, sizeof(char *));
  1831. len_list = (int *)calloc(num, sizeof(int));
  1832. /* split and decode the query */
  1833. n = 0;
  1834. strtok_buf = NULL;
  1835. var = strtok_r(res, info->separator, &strtok_buf);
  1836. while (var) {
  1837. val = strchr(var, '=');
  1838. if (val) { /* have a value */
  1839. len_list[n] = url_decode_ex(var, val-var);
  1840. val_list[n] = var;
  1841. n++;
  1842. *val++ = '\0';
  1843. val_list[n] = val;
  1844. len_list[n] = url_decode_ex(val, strlen(val));
  1845. } else {
  1846. len_list[n] = url_decode_ex(var, strlen(var));
  1847. val_list[n] = var;
  1848. n++;
  1849. val_list[n] = const_cast<char*>("");
  1850. len_list[n] = 0;
  1851. }
  1852. n++;
  1853. var = strtok_r(NULL, info->separator, &strtok_buf);
  1854. }
  1855. num = n; /* make sure to process initilized vars only */
  1856. /* initialize converter */
  1857. if (info->num_from_encodings <= 0) {
  1858. from_encoding = mbfl_no_encoding_pass;
  1859. } else if (info->num_from_encodings == 1) {
  1860. from_encoding = info->from_encodings[0];
  1861. } else {
  1862. /* auto detect */
  1863. from_encoding = mbfl_no_encoding_invalid;
  1864. identd = mbfl_encoding_detector_new
  1865. ((enum mbfl_no_encoding *)info->from_encodings,
  1866. info->num_from_encodings, MBSTRG(strict_detection));
  1867. if (identd) {
  1868. n = 0;
  1869. while (n < num) {
  1870. string.val = (unsigned char *)val_list[n];
  1871. string.len = len_list[n];
  1872. if (mbfl_encoding_detector_feed(identd, &string)) {
  1873. break;
  1874. }
  1875. n++;
  1876. }
  1877. from_encoding = mbfl_encoding_detector_judge(identd);
  1878. mbfl_encoding_detector_delete(identd);
  1879. }
  1880. if (from_encoding == mbfl_no_encoding_invalid) {
  1881. if (info->report_errors) {
  1882. raise_warning("Unable to detect encoding");
  1883. }
  1884. from_encoding = mbfl_no_encoding_pass;
  1885. }
  1886. }
  1887. convd = NULL;
  1888. if (from_encoding != mbfl_no_encoding_pass) {
  1889. convd = mbfl_buffer_converter_new(from_encoding, info->to_encoding, 0);
  1890. if (convd != NULL) {
  1891. mbfl_buffer_converter_illegal_mode
  1892. (convd, MBSTRG(current_filter_illegal_mode));
  1893. mbfl_buffer_converter_illegal_substchar
  1894. (convd, MBSTRG(current_filter_illegal_substchar));
  1895. } else {
  1896. if (info->report_errors) {
  1897. raise_warning("Unable to create converter");
  1898. }
  1899. goto out;
  1900. }
  1901. }
  1902. /* convert encoding */
  1903. string.no_encoding = from_encoding;
  1904. n = 0;
  1905. while (n < num) {
  1906. string.val = (unsigned char *)val_list[n];
  1907. string.len = len_list[n];
  1908. if (convd != NULL &&
  1909. mbfl_buffer_converter_feed_result(convd, &string, &resvar) != NULL) {
  1910. var = (char *)resvar.val;
  1911. } else {
  1912. var = val_list[n];
  1913. }
  1914. n++;
  1915. string.val = (unsigned char *)val_list[n];
  1916. string.len = len_list[n];
  1917. if (convd != NULL &&
  1918. mbfl_buffer_converter_feed_result(convd, &string, &resval) != NULL) {
  1919. val = (char *)resval.val;
  1920. val_len = resval.len;
  1921. } else {
  1922. val = val_list[n];
  1923. val_len = len_list[n];
  1924. }
  1925. n++;
  1926. arg.set(String(var, CopyString), String(val, val_len, CopyString));
  1927. if (convd != NULL){
  1928. mbfl_string_clear(&resvar);
  1929. mbfl_string_clear(&resval);
  1930. }
  1931. }
  1932. out:
  1933. if (convd != NULL) {
  1934. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  1935. mbfl_buffer_converter_delete(convd);
  1936. }
  1937. if (val_list != NULL) {
  1938. free((void *)val_list);
  1939. }
  1940. if (len_list != NULL) {
  1941. free((void *)len_list);
  1942. }
  1943. return from_encoding;
  1944. }
  1945. bool f_mb_parse_str(const String& encoded_string,
  1946. VRefParam result /* = null */) {
  1947. php_mb_encoding_handler_info_t info;
  1948. info.data_type = PARSE_STRING;
  1949. info.separator = ";&";
  1950. info.force_register_globals = false;
  1951. info.report_errors = 1;
  1952. info.to_encoding = MBSTRG(current_internal_encoding);
  1953. info.to_language = MBSTRG(current_language);
  1954. info.from_encodings = MBSTRG(http_input_list);
  1955. info.num_from_encodings = MBSTRG(http_input_list_size);
  1956. info.from_language = MBSTRG(current_language);
  1957. char *encstr = strndup(encoded_string.data(), encoded_string.size());
  1958. mbfl_no_encoding detected =
  1959. _php_mb_encoding_handler_ex(&info, result, encstr);
  1960. free(encstr);
  1961. MBSTRG(http_input_identify) = detected;
  1962. return detected != mbfl_no_encoding_invalid;
  1963. }
  1964. Variant f_mb_preferred_mime_name(const String& encoding) {
  1965. mbfl_no_encoding no_encoding = mbfl_name2no_encoding(encoding.data());
  1966. if (no_encoding == mbfl_no_encoding_invalid) {
  1967. raise_warning("Unknown encoding \"%s\"", encoding.data());
  1968. return false;
  1969. }
  1970. const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
  1971. if (preferred_name == NULL || *preferred_name == '\0') {
  1972. raise_warning("No MIME preferred name corresponding to \"%s\"",
  1973. encoding.data());
  1974. return false;
  1975. }
  1976. return String(preferred_name, CopyString);
  1977. }
  1978. static Variant php_mb_substr(const String& str, int from, int len,
  1979. const String& encoding, bool substr) {
  1980. mbfl_string string;
  1981. mbfl_string_init(&string);
  1982. string.no_language = MBSTRG(current_language);
  1983. string.no_encoding = MBSTRG(current_internal_encoding);
  1984. string.val = (unsigned char *)str.data();
  1985. string.len = str.size();
  1986. if (!encoding.empty()) {
  1987. string.no_encoding = mbfl_name2no_encoding(encoding.data());
  1988. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1989. raise_warning("Unknown encoding \"%s\"", encoding.data());
  1990. return false;
  1991. }
  1992. }
  1993. int size;
  1994. if (substr) {
  1995. size = mbfl_strlen(&string);
  1996. } else {
  1997. size = str.size();
  1998. }
  1999. if (len == 0x7FFFFFFF) {
  2000. len = size;
  2001. }
  2002. /* if "from" position is negative, count start position from the end
  2003. * of the string
  2004. */
  2005. if (from < 0) {
  2006. from = size + from;
  2007. if (from < 0) {
  2008. from = 0;
  2009. }
  2010. }
  2011. /* if "length" position is negative, set it to the length
  2012. * needed to stop that many chars from the end of the string
  2013. */
  2014. if (len < 0) {
  2015. len = (size - from) + len;
  2016. if (len < 0) {
  2017. len = 0;
  2018. }
  2019. }
  2020. if (from > size) {
  2021. if (!substr) {
  2022. return false;
  2023. }
  2024. from = size;
  2025. }
  2026. mbfl_string result;
  2027. mbfl_string *ret;
  2028. if (substr) {
  2029. ret = mbfl_substr(&string, &result, from, len);
  2030. } else {
  2031. ret = mbfl_strcut(&string, &result, from, len);
  2032. }
  2033. if (ret != NULL) {
  2034. return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
  2035. }
  2036. return false;
  2037. }
  2038. Variant f_mb_substr(const String& str, int start, int length /* = 0x7FFFFFFF */,
  2039. const String& encoding /* = null_string */) {
  2040. return php_mb_substr(str, start, length, encoding, true);
  2041. }
  2042. Variant f_mb_strcut(const String& str, int start, int length /* = 0x7FFFFFFF */,
  2043. const String& encoding /* = null_string */) {
  2044. return php_mb_substr(str, start, length, encoding, false);
  2045. }
  2046. Variant f_mb_strimwidth(const String& str, int start, int width,
  2047. const String& trimmarker /* = null_string */,
  2048. const String& encoding /* = null_string */) {
  2049. mbfl_string string, result, marker, *ret;
  2050. mbfl_string_init(&string);
  2051. mbfl_string_init(&marker);
  2052. string.no_language = MBSTRG(current_language);
  2053. string.no_encoding = MBSTRG(current_internal_encoding);
  2054. marker.no_language = MBSTRG(current_language);
  2055. marker.no_encoding = MBSTRG(current_internal_encoding);
  2056. marker.val = NULL;
  2057. marker.len = 0;
  2058. if (!encoding.empty()) {
  2059. string.no_encoding = marker.no_encoding =
  2060. mbfl_name2no_encoding(encoding.data());
  2061. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2062. raise_warning("Unknown encoding \"%s\"", encoding.data());
  2063. return false;
  2064. }
  2065. }
  2066. string.val = (unsigned char *)str.data();
  2067. string.len = str.size();
  2068. if (start < 0 || start > str.size()) {
  2069. raise_warning("Start position is out of reange");
  2070. return false;
  2071. }
  2072. if (width < 0) {
  2073. raise_warning("Width is negative value");
  2074. return false;
  2075. }
  2076. marker.val = (unsigned char *)trimmarker.data();
  2077. marker.len = trimmarker.size();
  2078. ret = mbfl_strimwidth(&string, &marker, &result, start, width);
  2079. if (ret != NULL) {
  2080. return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
  2081. }
  2082. return false;
  2083. }
  2084. Variant f_mb_stripos(const String& haystack, const String& needle,
  2085. int offset /* = 0 */,
  2086. const String& encoding /* = null_string */) {
  2087. const char *from_encoding;
  2088. if (encoding.empty()) {
  2089. from_encoding =
  2090. mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2091. } else {
  2092. from_encoding = encoding.data();
  2093. }
  2094. if (needle.empty()) {
  2095. raise_warning("Empty delimiter");
  2096. return false;
  2097. }
  2098. int n = php_mb_stripos(0, haystack.data(), haystack.size(),
  2099. needle.data(), needle.size(), offset, from_encoding);
  2100. if (n >= 0) {
  2101. return n;
  2102. }
  2103. return false;
  2104. }
  2105. Variant f_mb_strripos(const String& haystack, const String& needle,
  2106. int offset /* = 0 */,
  2107. const String& encoding /* = null_string */) {
  2108. const char *from_encoding;
  2109. if (encoding.empty()) {
  2110. from_encoding =
  2111. mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2112. } else {
  2113. from_encoding = encoding.data();
  2114. }
  2115. int n = php_mb_stripos(1, haystack.data(), haystack.size(),
  2116. needle.data(), needle.size(), offset, from_encoding);
  2117. if (n >= 0) {
  2118. return n;
  2119. }
  2120. return false;
  2121. }
  2122. Variant f_mb_stristr(const String& haystack, const String& needle,
  2123. bool part /* = false */,
  2124. const String& encoding /* = null_string */) {
  2125. mbfl_string mbs_haystack;
  2126. mbfl_string_init(&mbs_haystack);
  2127. mbs_haystack.no_language = MBSTRG(current_language);
  2128. mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
  2129. mbs_haystack.val = (unsigned char *)haystack.data();
  2130. mbs_haystack.len = haystack.size();
  2131. mbfl_string mbs_needle;
  2132. mbfl_string_init(&mbs_needle);
  2133. mbs_needle.no_language = MBSTRG(current_language);
  2134. mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
  2135. mbs_needle.val = (unsigned char *)needle.data();
  2136. mbs_needle.len = needle.size();
  2137. if (!mbs_needle.len) {
  2138. raise_warning("Empty delimiter.");
  2139. return false;
  2140. }
  2141. const char *from_encoding;
  2142. if (encoding.empty()) {
  2143. from_encoding =
  2144. mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2145. } else {
  2146. from_encoding = encoding.data();
  2147. }
  2148. mbs_haystack.no_encoding = mbs_needle.no_encoding =
  2149. mbfl_name2no_encoding(from_encoding);
  2150. if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
  2151. raise_warning("Unknown encoding \"%s\"", from_encoding);
  2152. return false;
  2153. }
  2154. int n = php_mb_stripos(0, (const char*)mbs_haystack.val, mbs_haystack.len,
  2155. (const char *)mbs_needle.val, mbs_needle.len,
  2156. 0, from_encoding);
  2157. if (n < 0) {
  2158. return false;
  2159. }
  2160. int mblen = mbfl_strlen(&mbs_haystack);
  2161. mbfl_string result, *ret = NULL;
  2162. if (part) {
  2163. ret = mbfl_substr(&mbs_haystack, &result, 0, n);
  2164. } else {
  2165. int len = (mblen - n);
  2166. ret = mbfl_substr(&mbs_haystack, &result, n, len);
  2167. }
  2168. if (ret != NULL) {
  2169. return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
  2170. }
  2171. return false;
  2172. }
  2173. Variant f_mb_strlen(const String& str,
  2174. const String& encoding /* = null_string */) {
  2175. mbfl_string string;
  2176. mbfl_string_init(&string);
  2177. string.val = (unsigned char *)str.data();
  2178. string.len = str.size();
  2179. string.no_language = MBSTRG(current_language);
  2180. if (encoding.empty()) {
  2181. string.no_encoding = MBSTRG(current_internal_encoding);
  2182. } else {
  2183. string.no_encoding = mbfl_name2no_encoding(encoding.data());
  2184. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2185. raise_warning("Unknown encoding \"%s\"", encoding.data());
  2186. return false;
  2187. }
  2188. }
  2189. int n = mbfl_strlen(&string);
  2190. if (n >= 0) {
  2191. return n;
  2192. }
  2193. return false;
  2194. }
  2195. Variant f_mb_strpos(const String& haystack, const String& needle,
  2196. int offset /* = 0 */,
  2197. const String& encoding /* = null_string */) {
  2198. mbfl_string mbs_haystack;
  2199. mbfl_string_init(&mbs_haystack);
  2200. mbs_haystack.no_language = MBSTRG(current_language);
  2201. mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
  2202. mbs_haystack.val = (unsigned char *)haystack.data();
  2203. mbs_haystack.len = haystack.size();
  2204. mbfl_string mbs_needle;
  2205. mbfl_string_init(&mbs_needle);
  2206. mbs_needle.no_language = MBSTRG(current_language);
  2207. mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
  2208. mbs_needle.val = (unsigned char *)needle.data();
  2209. mbs_needle.len = needle.size();
  2210. if (!encoding.empty()) {
  2211. mbs_haystack.no_encoding = mbs_needle.no_encoding =
  2212. mbfl_name2no_encoding(encoding.data());
  2213. if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
  2214. raise_warning("Unknown encoding \"%s\"", encoding.data());
  2215. return false;
  2216. }
  2217. }
  2218. if (offset < 0 || offset > mbfl_strlen(&mbs_haystack)) {
  2219. raise_warning("Offset not contained in string.");
  2220. return false;
  2221. }
  2222. if (mbs_needle.len == 0) {
  2223. raise_warning("Empty delimiter.");
  2224. return false;
  2225. }
  2226. int reverse = 0;
  2227. int n = mbfl_strpos(&mbs_haystack, &mbs_needle, offset, reverse);
  2228. if (n >= 0) {
  2229. return n;
  2230. }
  2231. switch (-n) {
  2232. case 1:
  2233. break;
  2234. case 2:
  2235. raise_warning("Needle has not positive length.");
  2236. break;
  2237. case 4:
  2238. raise_warning("Unknown encoding or conversion error.");
  2239. break;
  2240. case 8:
  2241. raise_warning("Argument is empty.");
  2242. break;
  2243. default:
  2244. raise_warning("Unknown error in mb_strpos.");
  2245. break;
  2246. }
  2247. return false;
  2248. }
  2249. Variant f_mb_strrpos(const String& haystack, const String& needle,
  2250. CVarRef offset /* = 0LL */,
  2251. const String& encoding /* = null_string */) {
  2252. mbfl_string mbs_haystack;
  2253. mbfl_string_init(&mbs_haystack);
  2254. mbs_haystack.no_language = MBSTRG(current_language);
  2255. mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
  2256. mbs_haystack.val = (unsigned char *)haystack.data();
  2257. mbs_haystack.len = haystack.size();
  2258. mbfl_string mbs_needle;
  2259. mbfl_string_init(&mbs_needle);
  2260. mbs_needle.no_language = MBSTRG(current_language);
  2261. mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
  2262. mbs_needle.val = (unsigned char *)needle.data();
  2263. mbs_needle.len = needle.size();
  2264. const char *enc_name = encoding.data();
  2265. long noffset = 0;
  2266. String soffset = offset.toString();
  2267. if (offset.isString()) {
  2268. enc_name = soffset.data();
  2269. int str_flg = 1;
  2270. if (enc_name != NULL) {
  2271. switch (*enc_name) {
  2272. case '0': case '1': case '2': case '3': case '4':
  2273. case '5': case '6': case '7': case '8': case '9':
  2274. case ' ': case '-': case '.':
  2275. break;
  2276. default :
  2277. str_flg = 0;
  2278. break;
  2279. }
  2280. }
  2281. if (str_flg) {
  2282. noffset = offset.toInt32();
  2283. enc_name = encoding.data();
  2284. }
  2285. } else {
  2286. noffset = offset.toInt32();
  2287. }
  2288. if (!enc_name && !*enc_name) {
  2289. mbs_haystack.no_encoding = mbs_needle.no_encoding =
  2290. mbfl_name2no_encoding(enc_name);
  2291. if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
  2292. raise_warning("Unknown encoding \"%s\"", enc_name);
  2293. return false;
  2294. }
  2295. }
  2296. if (mbs_haystack.len <= 0) {
  2297. return false;
  2298. }
  2299. if (mbs_needle.len <= 0) {
  2300. return false;
  2301. }
  2302. if ((noffset > 0 && noffset > mbfl_strlen(&mbs_haystack)) ||
  2303. (noffset < 0 && -noffset > mbfl_strlen(&mbs_haystack))) {
  2304. raise_notice("Offset is greater than the length of haystack string");
  2305. return false;
  2306. }
  2307. int n = mbfl_strpos(&mbs_haystack, &mbs_needle, noffset, 1);
  2308. if (n >= 0) {
  2309. return n;
  2310. }
  2311. return false;
  2312. }
  2313. Variant f_mb_strrchr(const String& haystack, const String& needle,
  2314. bool part /* = false */,
  2315. const String& encoding /* = null_string */) {
  2316. mbfl_string mbs_haystack;
  2317. mbfl_string_init(&mbs_haystack);
  2318. mbs_haystack.no_language = MBSTRG(current_language);
  2319. mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
  2320. mbs_haystack.val = (unsigned char *)haystack.data();
  2321. mbs_haystack.len = haystack.size();
  2322. mbfl_string mbs_needle;
  2323. mbfl_string_init(&mbs_needle);
  2324. mbs_needle.no_language = MBSTRG(current_language);
  2325. mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
  2326. mbs_needle.val = (unsigned char *)needle.data();
  2327. mbs_needle.len = needle.size();
  2328. if (!encoding.empty()) {
  2329. mbs_haystack.no_encoding = mbs_needle.no_encoding =
  2330. mbfl_name2no_encoding(encoding.data());
  2331. if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
  2332. raise_warning("Unknown encoding \"%s\"", encoding.data());
  2333. return false;
  2334. }
  2335. }
  2336. if (mbs_haystack.len <= 0) {
  2337. return false;
  2338. }
  2339. if (mbs_needle.len <= 0) {
  2340. return false;
  2341. }
  2342. mbfl_string result, *ret = NULL;
  2343. int n = mbfl_strpos(&mbs_haystack, &mbs_needle, 0, 1);
  2344. if (n >= 0) {
  2345. int mblen = mbfl_strlen(&mbs_haystack);
  2346. if (part) {
  2347. ret = mbfl_substr(&mbs_haystack, &result, 0, n);
  2348. } else {
  2349. int len = (mblen - n);
  2350. ret = mbfl_substr(&mbs_haystack, &result, n, len);
  2351. }
  2352. }
  2353. if (ret != NULL) {
  2354. return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
  2355. }
  2356. return false;
  2357. }
  2358. Variant f_mb_strrichr(const String& haystack, const String& needle,
  2359. bool part /* = false */,
  2360. const String& encoding /* = null_string */) {
  2361. mbfl_string mbs_haystack;
  2362. mbfl_string_init(&mbs_haystack);
  2363. mbs_haystack.no_language = MBSTRG(current_language);
  2364. mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
  2365. mbs_haystack.val = (unsigned char *)haystack.data();
  2366. mbs_haystack.len = haystack.size();
  2367. mbfl_string mbs_needle;
  2368. mbfl_string_init(&mbs_needle);
  2369. mbs_needle.no_language = MBSTRG(current_language);
  2370. mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
  2371. mbs_needle.val = (unsigned char *)needle.data();
  2372. mbs_needle.len = needle.size();
  2373. const char *from_encoding;
  2374. if (encoding.empty()) {
  2375. from_encoding =
  2376. mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2377. } else {
  2378. from_encoding = encoding.data();
  2379. }
  2380. mbs_haystack.no_encoding = mbs_needle.no_encoding =
  2381. mbfl_name2no_encoding(from_encoding);
  2382. if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
  2383. raise_warning("Unknown encoding \"%s\"", from_encoding);
  2384. return false;
  2385. }
  2386. int n = php_mb_stripos(1, (const char*)mbs_haystack.val, mbs_haystack.len,
  2387. (const char*)mbs_needle.val, mbs_needle.len,
  2388. 0, from_encoding);
  2389. if (n < 0) {
  2390. return false;
  2391. }
  2392. mbfl_string result, *ret = NULL;
  2393. int mblen = mbfl_strlen(&mbs_haystack);
  2394. if (part) {
  2395. ret = mbfl_substr(&mbs_haystack, &result, 0, n);
  2396. } else {
  2397. int len = (mblen - n);
  2398. ret = mbfl_substr(&mbs_haystack, &result, n, len);
  2399. }
  2400. if (ret != NULL) {
  2401. return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
  2402. }
  2403. return false;
  2404. }
  2405. Variant f_mb_strstr(const String& haystack, const String& needle,
  2406. bool part /* = false */,
  2407. const String& encoding /* = null_string */) {
  2408. mbfl_string mbs_haystack;
  2409. mbfl_string_init(&mbs_haystack);
  2410. mbs_haystack.no_language = MBSTRG(current_language);
  2411. mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
  2412. mbs_haystack.val = (unsigned char *)haystack.data();
  2413. mbs_haystack.len = haystack.size();
  2414. mbfl_string mbs_needle;
  2415. mbfl_string_init(&mbs_needle);
  2416. mbs_needle.no_language = MBSTRG(current_language);
  2417. mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
  2418. mbs_needle.val = (unsigned char *)needle.data();
  2419. mbs_needle.len = needle.size();
  2420. if (!encoding.empty()) {
  2421. mbs_haystack.no_encoding = mbs_needle.no_encoding =
  2422. mbfl_name2no_encoding(encoding.data());
  2423. if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
  2424. raise_warning("Unknown encoding \"%s\"", encoding.data());
  2425. return false;
  2426. }
  2427. }
  2428. if (mbs_needle.len <= 0) {
  2429. raise_warning("Empty delimiter.");
  2430. return false;
  2431. }
  2432. mbfl_string result, *ret = NULL;
  2433. int n = mbfl_strpos(&mbs_haystack, &mbs_needle, 0, 0);
  2434. if (n >= 0) {
  2435. int mblen = mbfl_strlen(&mbs_haystack);
  2436. if (part) {
  2437. ret = mbfl_substr(&mbs_haystack, &result, 0, n);
  2438. } else {
  2439. int len = (mblen - n);
  2440. ret = mbfl_substr(&mbs_haystack, &result, n, len);
  2441. }
  2442. }
  2443. if (ret != NULL) {
  2444. return String(reinterpret_cast<char*>(ret->val), ret->len, AttachString);
  2445. }
  2446. return false;
  2447. }
  2448. Variant f_mb_strtolower(const String& str,
  2449. const String& encoding /* = null_string */) {
  2450. const char *from_encoding;
  2451. if (encoding.empty()) {
  2452. from_encoding =
  2453. mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2454. } else {
  2455. from_encoding = encoding.data();
  2456. }
  2457. unsigned int ret_len;
  2458. char *newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER,
  2459. str.data(), str.size(),
  2460. &ret_len, from_encoding);
  2461. if (newstr) {
  2462. return String(newstr, ret_len, AttachString);
  2463. }
  2464. return false;
  2465. }
  2466. Variant f_mb_strtoupper(const String& str,
  2467. const String& encoding /* = null_string */) {
  2468. const char *from_encoding;
  2469. if (encoding.empty()) {
  2470. from_encoding =
  2471. mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2472. } else {
  2473. from_encoding = encoding.data();
  2474. }
  2475. unsigned int ret_len;
  2476. char *newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER,
  2477. str.data(), str.size(),
  2478. &ret_len, from_encoding);
  2479. if (newstr) {
  2480. return String(newstr, ret_len, AttachString);
  2481. }
  2482. return false;
  2483. }
  2484. Variant f_mb_strwidth(const String& str,
  2485. const String& encoding /* = null_string */) {
  2486. mbfl_string string;
  2487. mbfl_string_init(&string);
  2488. string.no_language = MBSTRG(current_language);
  2489. string.no_encoding = MBSTRG(current_internal_encoding);
  2490. string.val = (unsigned char *)str.data();
  2491. string.len = str.size();
  2492. if (!encoding.empty()) {
  2493. string.no_encoding = mbfl_name2no_encoding(encoding.data());
  2494. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2495. raise_warning("Unknown encoding \"%s\"", encoding.data());
  2496. return false;
  2497. }
  2498. }
  2499. int n = mbfl_strwidth(&string);
  2500. if (n >= 0) {
  2501. return n;
  2502. }
  2503. return false;
  2504. }
  2505. Variant f_mb_substitute_character(CVarRef substrchar /* = null_variant */) {
  2506. if (substrchar.isNull()) {
  2507. switch (MBSTRG(current_filter_illegal_mode)) {
  2508. case MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE:
  2509. return "none";
  2510. case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
  2511. return "long";
  2512. case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
  2513. return "entity";
  2514. default:
  2515. return MBSTRG(current_filter_illegal_substchar);
  2516. }
  2517. }
  2518. if (substrchar.isString()) {
  2519. String s = substrchar.toString();
  2520. if (strcasecmp("none", s.data()) == 0) {
  2521. MBSTRG(current_filter_illegal_mode) =
  2522. MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  2523. return true;
  2524. }
  2525. if (strcasecmp("long", s.data()) == 0) {
  2526. MBSTRG(current_filter_illegal_mode) =
  2527. MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  2528. return true;
  2529. }
  2530. if (strcasecmp("entity", s.data()) == 0) {
  2531. MBSTRG(current_filter_illegal_mode) =
  2532. MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  2533. return true;
  2534. }
  2535. }
  2536. int64_t n = substrchar.toInt64();
  2537. if (n < 0xffff && n > 0) {
  2538. MBSTRG(current_filter_illegal_mode) =
  2539. MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  2540. MBSTRG(current_filter_illegal_substchar) = n;
  2541. } else {
  2542. raise_warning("Unknown character.");
  2543. return false;
  2544. }
  2545. return true;
  2546. }
  2547. Variant f_mb_substr_count(const String& haystack, const String& needle,
  2548. const String& encoding /* = null_string */) {
  2549. mbfl_string mbs_haystack;
  2550. mbfl_string_init(&mbs_haystack);
  2551. mbs_haystack.no_language = MBSTRG(current_language);
  2552. mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
  2553. mbs_haystack.val = (unsigned char *)haystack.data();
  2554. mbs_haystack.len = haystack.size();
  2555. mbfl_string mbs_needle;
  2556. mbfl_string_init(&mbs_needle);
  2557. mbs_needle.no_language = MBSTRG(current_language);
  2558. mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
  2559. mbs_needle.val = (unsigned char *)needle.data();
  2560. mbs_needle.len = needle.size();
  2561. if (!encoding.empty()) {
  2562. mbs_haystack.no_encoding = mbs_needle.no_encoding =
  2563. mbfl_name2no_encoding(encoding.data());
  2564. if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
  2565. raise_warning("Unknown encoding \"%s\"", encoding.data());
  2566. return false;
  2567. }
  2568. }
  2569. if (mbs_needle.len <= 0) {
  2570. raise_warning("Empty substring.");
  2571. return false;
  2572. }
  2573. int n = mbfl_substr_count(&mbs_haystack, &mbs_needle);
  2574. if (n >= 0) {
  2575. return n;
  2576. }
  2577. return false;
  2578. }
  2579. ///////////////////////////////////////////////////////////////////////////////
  2580. // regex helpers
  2581. typedef struct _php_mb_regex_enc_name_map_t {
  2582. const char *names;
  2583. OnigEncoding code;
  2584. } php_mb_regex_enc_name_map_t;
  2585. static php_mb_regex_enc_name_map_t enc_name_map[] ={
  2586. {
  2587. "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
  2588. ONIG_ENCODING_EUC_JP
  2589. },
  2590. {
  2591. "UTF-8\0UTF8\0",
  2592. ONIG_ENCODING_UTF8
  2593. },
  2594. {
  2595. "UTF-16\0UTF-16BE\0",
  2596. ONIG_ENCODING_UTF16_BE
  2597. },
  2598. {
  2599. "UTF-16LE\0",
  2600. ONIG_ENCODING_UTF16_LE
  2601. },
  2602. {
  2603. "UCS-4\0UTF-32\0UTF-32BE\0",
  2604. ONIG_ENCODING_UTF32_BE
  2605. },
  2606. {
  2607. "UCS-4LE\0UTF-32LE\0",
  2608. ONIG_ENCODING_UTF32_LE
  2609. },
  2610. {
  2611. "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
  2612. ONIG_ENCODING_SJIS
  2613. },
  2614. {
  2615. "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
  2616. ONIG_ENCODING_BIG5
  2617. },
  2618. {
  2619. "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
  2620. ONIG_ENCODING_EUC_CN
  2621. },
  2622. {
  2623. "EUC-TW\0EUCTW\0EUC_TW\0",
  2624. ONIG_ENCODING_EUC_TW
  2625. },
  2626. {
  2627. "EUC-KR\0EUCKR\0EUC_KR\0",
  2628. ONIG_ENCODING_EUC_KR
  2629. },
  2630. {
  2631. "KOI8R\0KOI8-R\0KOI-8R\0",
  2632. ONIG_ENCODING_KOI8_R
  2633. },
  2634. {
  2635. "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
  2636. ONIG_ENCODING_ISO_8859_1
  2637. },
  2638. {
  2639. "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
  2640. ONIG_ENCODING_ISO_8859_2
  2641. },
  2642. {
  2643. "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
  2644. ONIG_ENCODING_ISO_8859_3
  2645. },
  2646. {
  2647. "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
  2648. ONIG_ENCODING_ISO_8859_4
  2649. },
  2650. {
  2651. "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
  2652. ONIG_ENCODING_ISO_8859_5
  2653. },
  2654. {
  2655. "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
  2656. ONIG_ENCODING_ISO_8859_6
  2657. },
  2658. {
  2659. "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
  2660. ONIG_ENCODING_ISO_8859_7
  2661. },
  2662. {
  2663. "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
  2664. ONIG_ENCODING_ISO_8859_8
  2665. },
  2666. {
  2667. "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
  2668. ONIG_ENCODING_ISO_8859_9
  2669. },
  2670. {
  2671. "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
  2672. ONIG_ENCODING_ISO_8859_10
  2673. },
  2674. {
  2675. "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
  2676. ONIG_ENCODING_ISO_8859_11
  2677. },
  2678. {
  2679. "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
  2680. ONIG_ENCODING_ISO_8859_13
  2681. },
  2682. {
  2683. "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
  2684. ONIG_ENCODING_ISO_8859_14
  2685. },
  2686. {
  2687. "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
  2688. ONIG_ENCODING_ISO_8859_15
  2689. },
  2690. {
  2691. "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
  2692. ONIG_ENCODING_ISO_8859_16
  2693. },
  2694. {
  2695. "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
  2696. ONIG_ENCODING_ASCII
  2697. },
  2698. { NULL, ONIG_ENCODING_UNDEF }
  2699. };
  2700. static OnigEncoding php_mb_regex_name2mbctype(const char *pname) {
  2701. const char *p;
  2702. php_mb_regex_enc_name_map_t *mapping;
  2703. if (pname == NULL) {
  2704. return ONIG_ENCODING_UNDEF;
  2705. }
  2706. for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
  2707. for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
  2708. if (strcasecmp(p, pname) == 0) {
  2709. return mapping->code;
  2710. }
  2711. }
  2712. }
  2713. return ONIG_ENCODING_UNDEF;
  2714. }
  2715. static const char *php_mb_regex_mbctype2name(OnigEncoding mbctype) {
  2716. php_mb_regex_enc_name_map_t *mapping;
  2717. for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
  2718. if (mapping->code == mbctype) {
  2719. return mapping->names;
  2720. }
  2721. }
  2722. return NULL;
  2723. }
  2724. /*
  2725. * regex cache
  2726. */
  2727. static php_mb_regex_t *php_mbregex_compile_pattern(const String& pattern,
  2728. OnigOptionType options,
  2729. OnigEncoding enc,
  2730. OnigSyntaxType *syntax) {
  2731. int err_code = 0;
  2732. OnigErrorInfo err_info;
  2733. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  2734. php_mb_regex_t *rc = NULL;
  2735. std::string spattern = std::string(pattern.data(), pattern.size());
  2736. RegexCache &cache = MBSTRG(ht_rc);
  2737. RegexCache::const_iterator it =
  2738. cache.find(spattern);
  2739. if (it != cache.end()) {
  2740. rc = it->second;
  2741. }
  2742. if (!rc || rc->options != options || rc->enc != enc ||
  2743. rc->syntax != syntax) {
  2744. if (rc) {
  2745. onig_free(rc);
  2746. rc = NULL;
  2747. }
  2748. if ((err_code = onig_new(&rc, (OnigUChar *)pattern.data(),
  2749. (OnigUChar *)(pattern.data() + pattern.size()),
  2750. options,enc, syntax, &err_info)) != ONIG_NORMAL) {
  2751. onig_error_code_to_str(err_str, err_code, err_info);
  2752. raise_warning("mbregex compile err: %s", err_str);
  2753. return NULL;
  2754. }
  2755. MBSTRG(ht_rc)[spattern] = rc;
  2756. }
  2757. return rc;
  2758. }
  2759. static size_t _php_mb_regex_get_option_string(char *str, size_t len,
  2760. OnigOptionType option,
  2761. OnigSyntaxType *syntax) {
  2762. size_t len_left = len;
  2763. size_t len_req = 0;
  2764. char *p = str;
  2765. char c;
  2766. if ((option & ONIG_OPTION_IGNORECASE) != 0) {
  2767. if (len_left > 0) {
  2768. --len_left;
  2769. *(p++) = 'i';
  2770. }
  2771. ++len_req;
  2772. }
  2773. if ((option & ONIG_OPTION_EXTEND) != 0) {
  2774. if (len_left > 0) {
  2775. --len_left;
  2776. *(p++) = 'x';
  2777. }
  2778. ++len_req;
  2779. }
  2780. if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
  2781. (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
  2782. if (len_left > 0) {
  2783. --len_left;
  2784. *(p++) = 'p';
  2785. }
  2786. ++len_req;
  2787. } else {
  2788. if ((option & ONIG_OPTION_MULTILINE) != 0) {
  2789. if (len_left > 0) {
  2790. --len_left;
  2791. *(p++) = 'm';
  2792. }
  2793. ++len_req;
  2794. }
  2795. if ((option & ONIG_OPTION_SINGLELINE) != 0) {
  2796. if (len_left > 0) {
  2797. --len_left;
  2798. *(p++) = 's';
  2799. }
  2800. ++len_req;
  2801. }
  2802. }
  2803. if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
  2804. if (len_left > 0) {
  2805. --len_left;
  2806. *(p++) = 'l';
  2807. }
  2808. ++len_req;
  2809. }
  2810. if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
  2811. if (len_left > 0) {
  2812. --len_left;
  2813. *(p++) = 'n';
  2814. }
  2815. ++len_req;
  2816. }
  2817. c = 0;
  2818. if (syntax == ONIG_SYNTAX_JAVA) {
  2819. c = 'j';
  2820. } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
  2821. c = 'u';
  2822. } else if (syntax == ONIG_SYNTAX_GREP) {
  2823. c = 'g';
  2824. } else if (syntax == ONIG_SYNTAX_EMACS) {
  2825. c = 'c';
  2826. } else if (syntax == ONIG_SYNTAX_RUBY) {
  2827. c = 'r';
  2828. } else if (syntax == ONIG_SYNTAX_PERL) {
  2829. c = 'z';
  2830. } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
  2831. c = 'b';
  2832. } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
  2833. c = 'd';
  2834. }
  2835. if (c != 0) {
  2836. if (len_left > 0) {
  2837. --len_left;
  2838. *(p++) = c;
  2839. }
  2840. ++len_req;
  2841. }
  2842. if (len_left > 0) {
  2843. --len_left;
  2844. *(p++) = '\0';
  2845. }
  2846. ++len_req;
  2847. if (len < len_req) {
  2848. return len_req;
  2849. }
  2850. return 0;
  2851. }
  2852. static void _php_mb_regex_init_options(const char *parg, int narg,
  2853. OnigOptionType *option,
  2854. OnigSyntaxType **syntax, int *eval) {
  2855. int n;
  2856. char c;
  2857. int optm = 0;
  2858. *syntax = ONIG_SYNTAX_RUBY;
  2859. if (parg != NULL) {
  2860. n = 0;
  2861. while (n < narg) {
  2862. c = parg[n++];
  2863. switch (c) {
  2864. case 'i': optm |= ONIG_OPTION_IGNORECASE; break;
  2865. case 'x': optm |= ONIG_OPTION_EXTEND; break;
  2866. case 'm': optm |= ONIG_OPTION_MULTILINE; break;
  2867. case 's': optm |= ONIG_OPTION_SINGLELINE; break;
  2868. case 'p': optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE; break;
  2869. case 'l': optm |= ONIG_OPTION_FIND_LONGEST; break;
  2870. case 'n': optm |= ONIG_OPTION_FIND_NOT_EMPTY; break;
  2871. case 'j': *syntax = ONIG_SYNTAX_JAVA; break;
  2872. case 'u': *syntax = ONIG_SYNTAX_GNU_REGEX; break;
  2873. case 'g': *syntax = ONIG_SYNTAX_GREP; break;
  2874. case 'c': *syntax = ONIG_SYNTAX_EMACS; break;
  2875. case 'r': *syntax = ONIG_SYNTAX_RUBY; break;
  2876. case 'z': *syntax = ONIG_SYNTAX_PERL; break;
  2877. case 'b': *syntax = ONIG_SYNTAX_POSIX_BASIC; break;
  2878. case 'd': *syntax = ONIG_SYNTAX_POSIX_EXTENDED; break;
  2879. case 'e':
  2880. if (eval != NULL) *eval = 1;
  2881. break;
  2882. default:
  2883. break;
  2884. }
  2885. }
  2886. if (option != NULL) *option|=optm;
  2887. }
  2888. }
  2889. ///////////////////////////////////////////////////////////////////////////////
  2890. // regex functions
  2891. bool f_mb_ereg_match(const String& pattern, const String& str,
  2892. const String& option /* = null_string */) {
  2893. OnigSyntaxType *syntax;
  2894. OnigOptionType noption = 0;
  2895. if (!option.empty()) {
  2896. _php_mb_regex_init_options(option.data(), option.size(), &noption,
  2897. &syntax, NULL);
  2898. } else {
  2899. noption |= MBSTRG(regex_default_options);
  2900. syntax = MBSTRG(regex_default_syntax);
  2901. }
  2902. php_mb_regex_t *re;
  2903. if ((re = php_mbregex_compile_pattern
  2904. (pattern, noption, MBSTRG(current_mbctype), syntax)) == NULL) {
  2905. return false;
  2906. }
  2907. /* match */
  2908. int err = onig_match(re, (OnigUChar *)str.data(),
  2909. (OnigUChar *)(str.data() + str.size()),
  2910. (OnigUChar *)str.data(), NULL, 0);
  2911. return err >= 0;
  2912. }
  2913. static Variant _php_mb_regex_ereg_replace_exec(CVarRef pattern,
  2914. const String& replacement,
  2915. const String& str,
  2916. const String& option,
  2917. OnigOptionType options) {
  2918. const char *p;
  2919. php_mb_regex_t *re;
  2920. OnigSyntaxType *syntax;
  2921. OnigRegion *regs = NULL;
  2922. StringBuffer out_buf;
  2923. int i, err, eval, n;
  2924. OnigUChar *pos;
  2925. OnigUChar *string_lim;
  2926. char pat_buf[2];
  2927. const mbfl_encoding *enc;
  2928. {
  2929. const char *current_enc_name;
  2930. current_enc_name = php_mb_regex_mbctype2name(MBSTRG(current_mbctype));
  2931. if (current_enc_name == NULL ||
  2932. (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
  2933. raise_warning("Unknown error");
  2934. return false;
  2935. }
  2936. }
  2937. eval = 0;
  2938. {
  2939. if (!option.empty()) {
  2940. _php_mb_regex_init_options(option.data(), option.size(),
  2941. &options, &syntax, &eval);
  2942. } else {
  2943. options |= MBSTRG(regex_default_options);
  2944. syntax = MBSTRG(regex_default_syntax);
  2945. }
  2946. }
  2947. String spattern;
  2948. if (pattern.isString()) {
  2949. spattern = pattern.toString();
  2950. } else {
  2951. /* FIXME: this code is not multibyte aware! */
  2952. pat_buf[0] = pattern.toByte();
  2953. pat_buf[1] = '\0';
  2954. spattern = String(pat_buf, 1, CopyString);
  2955. }
  2956. /* create regex pattern buffer */
  2957. re = php_mbregex_compile_pattern(spattern, options,
  2958. MBSTRG(current_mbctype), syntax);
  2959. if (re == NULL) {
  2960. return false;
  2961. }
  2962. if (eval) {
  2963. throw NotSupportedException("ereg_replace", "dynamic coding");
  2964. }
  2965. /* do the actual work */
  2966. err = 0;
  2967. pos = (OnigUChar*)str.data();
  2968. string_lim = (OnigUChar*)(str.data() + str.size());
  2969. regs = onig_region_new();
  2970. while (err >= 0) {
  2971. err = onig_search(re, (OnigUChar *)str.data(), (OnigUChar *)string_lim,
  2972. pos, (OnigUChar *)string_lim, regs, 0);
  2973. if (err <= -2) {
  2974. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  2975. onig_error_code_to_str(err_str, err);
  2976. raise_warning("mbregex search failure: %s", err_str);
  2977. break;
  2978. }
  2979. if (err >= 0) {
  2980. #if moriyoshi_0
  2981. if (regs->beg[0] == regs->end[0]) {
  2982. raise_warning("Empty regular expression");
  2983. break;
  2984. }
  2985. #endif
  2986. /* copy the part of the string before the match */
  2987. out_buf.append((const char *)pos,
  2988. (OnigUChar *)(str.data() + regs->beg[0]) - pos);
  2989. /* copy replacement and backrefs */
  2990. i = 0;
  2991. p = replacement.data();
  2992. while (i < replacement.size()) {
  2993. int fwd = (int)php_mb_mbchar_bytes_ex(p, enc);
  2994. n = -1;
  2995. if ((replacement.size() - i) >= 2 && fwd == 1 &&
  2996. p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
  2997. n = p[1] - '0';
  2998. }
  2999. if (n >= 0 && n < regs->num_regs) {
  3000. if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] &&
  3001. regs->end[n] <= str.size()) {
  3002. out_buf.append(str.data() + regs->beg[n],
  3003. regs->end[n] - regs->beg[n]);
  3004. }
  3005. p += 2;
  3006. i += 2;
  3007. } else {
  3008. out_buf.append(p, fwd);
  3009. p += fwd;
  3010. i += fwd;
  3011. }
  3012. }
  3013. n = regs->end[0];
  3014. if ((pos - (OnigUChar *)str.data()) < n) {
  3015. pos = (OnigUChar *)(str.data() + n);
  3016. } else {
  3017. if (pos < string_lim) {
  3018. out_buf.append((const char *)pos, 1);
  3019. }
  3020. pos++;
  3021. }
  3022. } else { /* nomatch */
  3023. /* stick that last bit of string on our output */
  3024. if (string_lim - pos > 0) {
  3025. out_buf.append((const char *)pos, string_lim - pos);
  3026. }
  3027. }
  3028. onig_region_free(regs, 0);
  3029. }
  3030. if (regs != NULL) {
  3031. onig_region_free(regs, 1);
  3032. }
  3033. if (err <= -2) {
  3034. return false;
  3035. }
  3036. return out_buf.detach();
  3037. }
  3038. Variant f_mb_ereg_replace(CVarRef pattern, const String& replacement,
  3039. const String& str,
  3040. const String& option /* = null_string */) {
  3041. return _php_mb_regex_ereg_replace_exec(pattern, replacement,
  3042. str, option, 0);
  3043. }
  3044. Variant f_mb_eregi_replace(CVarRef pattern, const String& replacement,
  3045. const String& str,
  3046. const String& option /* = null_string */) {
  3047. return _php_mb_regex_ereg_replace_exec(pattern, replacement,
  3048. str, option, ONIG_OPTION_IGNORECASE);
  3049. }
  3050. int64_t f_mb_ereg_search_getpos() {
  3051. return MBSTRG(search_pos);
  3052. }
  3053. bool f_mb_ereg_search_setpos(int position) {
  3054. if (position < 0 || position >= (int)MBSTRG(search_str).size()) {
  3055. raise_warning("Position is out of range");
  3056. MBSTRG(search_pos) = 0;
  3057. return false;
  3058. }
  3059. MBSTRG(search_pos) = position;
  3060. return true;
  3061. }
  3062. Variant f_mb_ereg_search_getregs() {
  3063. OnigRegion *search_regs = MBSTRG(search_regs);
  3064. if (search_regs && !MBSTRG(search_str).empty()) {
  3065. Array ret;
  3066. OnigUChar *str = (OnigUChar *)MBSTRG(search_str).data();
  3067. int len = MBSTRG(search_str).size();
  3068. int n = search_regs->num_regs;
  3069. for (int i = 0; i < n; i++) {
  3070. int beg = search_regs->beg[i];
  3071. int end = search_regs->end[i];
  3072. if (beg >= 0 && beg <= end && end <= len) {
  3073. ret.append(String((const char *)(str + beg), end - beg, CopyString));
  3074. } else {
  3075. ret.append(false);
  3076. }
  3077. }
  3078. return ret;
  3079. }
  3080. return false;
  3081. }
  3082. bool f_mb_ereg_search_init(const String& str,
  3083. const String& pattern /* = null_string */,
  3084. const String& option /* = null_string */) {
  3085. OnigOptionType noption = MBSTRG(regex_default_options);
  3086. OnigSyntaxType *syntax = MBSTRG(regex_default_syntax);
  3087. if (!option.empty()) {
  3088. noption = 0;
  3089. _php_mb_regex_init_options(option.data(), option.size(),
  3090. &noption, &syntax, NULL);
  3091. }
  3092. if (!pattern.empty()) {
  3093. if ((MBSTRG(search_re) = php_mbregex_compile_pattern
  3094. (pattern, noption, MBSTRG(current_mbctype), syntax)) == NULL) {
  3095. return false;
  3096. }
  3097. }
  3098. MBSTRG(search_str) = std::string(str.data(), str.size());
  3099. MBSTRG(search_pos) = 0;
  3100. if (MBSTRG(search_regs) != NULL) {
  3101. onig_region_free(MBSTRG(search_regs), 1);
  3102. MBSTRG(search_regs) = (OnigRegion *)NULL;
  3103. }
  3104. return true;
  3105. }
  3106. /* regex search */
  3107. static Variant _php_mb_regex_ereg_search_exec(const String& pattern,
  3108. const String& option,
  3109. int mode) {
  3110. int n, i, err, pos, len, beg, end;
  3111. OnigUChar *str;
  3112. OnigSyntaxType *syntax = NULL;
  3113. OnigOptionType noption;
  3114. noption = MBSTRG(regex_default_options);
  3115. if (!option.empty()) {
  3116. noption = 0;
  3117. _php_mb_regex_init_options(option.data(), option.size(),
  3118. &noption, &syntax, NULL);
  3119. }
  3120. if (!pattern.empty()) {
  3121. if ((MBSTRG(search_re) = php_mbregex_compile_pattern
  3122. (pattern, noption, MBSTRG(current_mbctype), syntax)) == NULL) {
  3123. return false;
  3124. }
  3125. }
  3126. pos = MBSTRG(search_pos);
  3127. str = NULL;
  3128. len = 0;
  3129. if (!MBSTRG(search_str).empty()) {
  3130. str = (OnigUChar *)MBSTRG(search_str).data();
  3131. len = MBSTRG(search_str).size();
  3132. }
  3133. if (MBSTRG(search_re) == NULL) {
  3134. raise_warning("No regex given");
  3135. return false;
  3136. }
  3137. if (str == NULL) {
  3138. raise_warning("No string given");
  3139. return false;
  3140. }
  3141. if (MBSTRG(search_regs)) {
  3142. onig_region_free(MBSTRG(search_regs), 1);
  3143. }
  3144. MBSTRG(search_regs) = onig_region_new();
  3145. err = onig_search(MBSTRG(search_re), str, str + len, str + pos, str + len,
  3146. MBSTRG(search_regs), 0);
  3147. Variant ret;
  3148. if (err == ONIG_MISMATCH) {
  3149. MBSTRG(search_pos) = len;
  3150. ret = false;
  3151. } else if (err <= -2) {
  3152. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  3153. onig_error_code_to_str(err_str, err);
  3154. raise_warning("mbregex search failure in mbregex_search(): %s", err_str);
  3155. ret = false;
  3156. } else {
  3157. if (MBSTRG(search_regs)->beg[0] == MBSTRG(search_regs)->end[0]) {
  3158. raise_warning("Empty regular expression");
  3159. }
  3160. switch (mode) {
  3161. case 1:
  3162. {
  3163. beg = MBSTRG(search_regs)->beg[0];
  3164. end = MBSTRG(search_regs)->end[0];
  3165. ret.append(beg);
  3166. ret.append(end - beg);
  3167. }
  3168. break;
  3169. case 2:
  3170. n = MBSTRG(search_regs)->num_regs;
  3171. for (i = 0; i < n; i++) {
  3172. beg = MBSTRG(search_regs)->beg[i];
  3173. end = MBSTRG(search_regs)->end[i];
  3174. if (beg >= 0 && beg <= end && end <= len) {
  3175. ret.append(String((const char *)(str + beg), end - beg, CopyString));
  3176. } else {
  3177. ret.append(false);
  3178. }
  3179. }
  3180. break;
  3181. default:
  3182. ret = true;
  3183. break;
  3184. }
  3185. end = MBSTRG(search_regs)->end[0];
  3186. if (pos < end) {
  3187. MBSTRG(search_pos) = end;
  3188. } else {
  3189. MBSTRG(search_pos) = pos + 1;
  3190. }
  3191. }
  3192. if (err < 0) {
  3193. onig_region_free(MBSTRG(search_regs), 1);
  3194. MBSTRG(search_regs) = (OnigRegion *)NULL;
  3195. }
  3196. return ret;
  3197. }
  3198. Variant f_mb_ereg_search(const String& pattern /* = null_string */,
  3199. const String& option /* = null_string */) {
  3200. return _php_mb_regex_ereg_search_exec(pattern, option, 0);
  3201. }
  3202. Variant f_mb_ereg_search_pos(const String& pattern /* = null_string */,
  3203. const String& option /* = null_string */) {
  3204. return _php_mb_regex_ereg_search_exec(pattern, option, 1);
  3205. }
  3206. Variant f_mb_ereg_search_regs(const String& pattern /* = null_string */,
  3207. const String& option /* = null_string */) {
  3208. return _php_mb_regex_ereg_search_exec(pattern, option, 2);
  3209. }
  3210. static Variant _php_mb_regex_ereg_exec(CVarRef pattern, const String& str,
  3211. Variant &regs, int icase) {
  3212. php_mb_regex_t *re;
  3213. OnigRegion *regions = NULL;
  3214. int i, match_len, beg, end;
  3215. OnigOptionType options;
  3216. options = MBSTRG(regex_default_options);
  3217. if (icase) {
  3218. options |= ONIG_OPTION_IGNORECASE;
  3219. }
  3220. /* compile the regular expression from the supplied regex */
  3221. String spattern;
  3222. if (!pattern.isString()) {
  3223. /* we convert numbers to integers and treat them as a string */
  3224. if (pattern.is(KindOfDouble)) {
  3225. spattern = String(pattern.toInt64()); /* get rid of decimal places */
  3226. } else {
  3227. spattern = pattern.toString();
  3228. }
  3229. } else {
  3230. spattern = pattern.toString();
  3231. }
  3232. re = php_mbregex_compile_pattern(spattern, options, MBSTRG(current_mbctype),
  3233. MBSTRG(regex_default_syntax));
  3234. if (re == NULL) {
  3235. return false;
  3236. }
  3237. regions = onig_region_new();
  3238. /* actually execute the regular expression */
  3239. if (onig_search(re, (OnigUChar *)str.data(),
  3240. (OnigUChar *)(str.data() + str.size()),
  3241. (OnigUChar *)str.data(),
  3242. (OnigUChar *)(str.data() + str.size()),
  3243. regions, 0) < 0) {
  3244. onig_region_free(regions, 1);
  3245. return false;
  3246. }
  3247. const char *s = str.data();
  3248. int string_len = str.size();
  3249. match_len = regions->end[0] - regions->beg[0];
  3250. regs = Array::Create();
  3251. for (i = 0; i < regions->num_regs; i++) {
  3252. beg = regions->beg[i];
  3253. end = regions->end[i];
  3254. if (beg >= 0 && beg < end && end <= string_len) {
  3255. regs.append(String(s + beg, end - beg, CopyString));
  3256. } else {
  3257. regs.append(false);
  3258. }
  3259. }
  3260. if (match_len == 0) {
  3261. match_len = 1;
  3262. }
  3263. if (regions != NULL) {
  3264. onig_region_free(regions, 1);
  3265. }
  3266. return match_len;
  3267. }
  3268. Variant f_mb_ereg(CVarRef pattern, const String& str,
  3269. VRefParam regs /* = null */) {
  3270. return _php_mb_regex_ereg_exec(pattern, str, regs, 0);
  3271. }
  3272. Variant f_mb_eregi(CVarRef pattern, const String& str,
  3273. VRefParam regs /* = null */) {
  3274. return _php_mb_regex_ereg_exec(pattern, str, regs, 1);
  3275. }
  3276. Variant f_mb_regex_encoding(const String& encoding /* = null_string */) {
  3277. if (encoding.empty()) {
  3278. const char *retval = php_mb_regex_mbctype2name(MBSTRG(current_mbctype));
  3279. if (retval != NULL) {
  3280. return String(retval, CopyString);
  3281. }
  3282. return false;
  3283. }
  3284. OnigEncoding mbctype = php_mb_regex_name2mbctype(encoding.data());
  3285. if (mbctype == ONIG_ENCODING_UNDEF) {
  3286. raise_warning("Unknown encoding \"%s\"", encoding.data());
  3287. return false;
  3288. }
  3289. MBSTRG(current_mbctype) = mbctype;
  3290. return true;
  3291. }
  3292. static void php_mb_regex_set_options(OnigOptionType options,
  3293. OnigSyntaxType *syntax,
  3294. OnigOptionType *prev_options,
  3295. OnigSyntaxType **prev_syntax) {
  3296. if (prev_options != NULL) {
  3297. *prev_options = MBSTRG(regex_default_options);
  3298. }
  3299. if (prev_syntax != NULL) {
  3300. *prev_syntax = MBSTRG(regex_default_syntax);
  3301. }
  3302. MBSTRG(regex_default_options) = options;
  3303. MBSTRG(regex_default_syntax) = syntax;
  3304. }
  3305. String f_mb_regex_set_options(const String& options /* = null_string */) {
  3306. OnigOptionType opt;
  3307. OnigSyntaxType *syntax;
  3308. char buf[16];
  3309. if (!options.empty()) {
  3310. opt = 0;
  3311. syntax = NULL;
  3312. _php_mb_regex_init_options(options.data(), options.size(),
  3313. &opt, &syntax, NULL);
  3314. php_mb_regex_set_options(opt, syntax, NULL, NULL);
  3315. } else {
  3316. opt = MBSTRG(regex_default_options);
  3317. syntax = MBSTRG(regex_default_syntax);
  3318. }
  3319. _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
  3320. return String(buf, CopyString);
  3321. }
  3322. Variant f_mb_split(const String& pattern, const String& str,
  3323. int count /* = -1 */) {
  3324. php_mb_regex_t *re;
  3325. OnigRegion *regs = NULL;
  3326. int n, err;
  3327. if (count == 0) {
  3328. count = 1;
  3329. }
  3330. /* create regex pattern buffer */
  3331. if ((re = php_mbregex_compile_pattern(pattern,
  3332. MBSTRG(regex_default_options),
  3333. MBSTRG(current_mbctype),
  3334. MBSTRG(regex_default_syntax)))
  3335. == NULL) {
  3336. return false;
  3337. }
  3338. Array ret;
  3339. OnigUChar *pos0 = (OnigUChar *)str.data();
  3340. OnigUChar *pos_end = (OnigUChar *)(str.data() + str.size());
  3341. OnigUChar *pos = pos0;
  3342. err = 0;
  3343. regs = onig_region_new();
  3344. /* churn through str, generating array entries as we go */
  3345. while ((--count != 0) &&
  3346. (err = onig_search(re, pos0, pos_end, pos, pos_end, regs, 0)) >= 0) {
  3347. if (regs->beg[0] == regs->end[0]) {
  3348. raise_warning("Empty regular expression");
  3349. break;
  3350. }
  3351. /* add it to the array */
  3352. if (regs->beg[0] < str.size() && regs->beg[0] >= (pos - pos0)) {
  3353. ret.append(String((const char *)pos,
  3354. ((OnigUChar *)(str.data() + regs->beg[0]) - pos),
  3355. CopyString));
  3356. } else {
  3357. err = -2;
  3358. break;
  3359. }
  3360. /* point at our new starting point */
  3361. n = regs->end[0];
  3362. if ((pos - pos0) < n) {
  3363. pos = pos0 + n;
  3364. }
  3365. if (count < 0) {
  3366. count = 0;
  3367. }
  3368. onig_region_free(regs, 0);
  3369. }
  3370. onig_region_free(regs, 1);
  3371. /* see if we encountered an error */
  3372. if (err <= -2) {
  3373. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  3374. onig_error_code_to_str(err_str, err);
  3375. raise_warning("mbregex search failure in mbsplit(): %s", err_str);
  3376. return false;
  3377. }
  3378. /* otherwise we just have one last element to add to the array */
  3379. n = pos_end - pos;
  3380. if (n > 0) {
  3381. ret.append(String((const char *)pos, n, CopyString));
  3382. } else {
  3383. ret.append("");
  3384. }
  3385. return ret;
  3386. }
  3387. ///////////////////////////////////////////////////////////////////////////////
  3388. #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
  3389. if (str[pos] == '\r' && str[pos + 1] == '\n' && \
  3390. (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
  3391. pos += 2; \
  3392. while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
  3393. pos++; \
  3394. } \
  3395. continue; \
  3396. }
  3397. static int _php_mbstr_parse_mail_headers(Array &ht, const char *str,
  3398. size_t str_len) {
  3399. const char *ps;
  3400. size_t icnt;
  3401. int state = 0;
  3402. int crlf_state = -1;
  3403. StringBuffer token;
  3404. String fld_name, fld_val;
  3405. ps = str;
  3406. icnt = str_len;
  3407. /*
  3408. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  3409. * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
  3410. * state 0 1 2 3
  3411. *
  3412. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  3413. * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
  3414. * crlf_state -1 0 1 -1
  3415. *
  3416. */
  3417. while (icnt > 0) {
  3418. switch (*ps) {
  3419. case ':':
  3420. if (crlf_state == 1) {
  3421. token.append('\r');
  3422. }
  3423. if (state == 0 || state == 1) {
  3424. fld_name = token.detach();
  3425. state = 2;
  3426. } else {
  3427. token.append(*ps);
  3428. }
  3429. crlf_state = 0;
  3430. break;
  3431. case '\n':
  3432. if (crlf_state == -1) {
  3433. goto out;
  3434. }
  3435. crlf_state = -1;
  3436. break;
  3437. case '\r':
  3438. if (crlf_state == 1) {
  3439. token.append('\r');
  3440. } else {
  3441. crlf_state = 1;
  3442. }
  3443. break;
  3444. case ' ': case '\t':
  3445. if (crlf_state == -1) {
  3446. if (state == 3) {
  3447. /* continuing from the previous line */
  3448. state = 4;
  3449. } else {
  3450. /* simply skipping this new line */
  3451. state = 5;
  3452. }
  3453. } else {
  3454. if (crlf_state == 1) {
  3455. token.append('\r');
  3456. }
  3457. if (state == 1 || state == 3) {
  3458. token.append(*ps);
  3459. }
  3460. }
  3461. crlf_state = 0;
  3462. break;
  3463. default:
  3464. switch (state) {
  3465. case 0:
  3466. token.clear();
  3467. state = 1;
  3468. break;
  3469. case 2:
  3470. if (crlf_state != -1) {
  3471. token.clear();
  3472. state = 3;
  3473. break;
  3474. }
  3475. /* break is missing intentionally */
  3476. case 3:
  3477. if (crlf_state == -1) {
  3478. fld_val = token.detach();
  3479. if (!fld_name.empty() && !fld_val.empty()) {
  3480. /* FIXME: some locale free implementation is
  3481. * really required here,,, */
  3482. ht.set(f_strtoupper(fld_name), fld_val);
  3483. }
  3484. state = 1;
  3485. }
  3486. break;
  3487. case 4:
  3488. token.append(' ');
  3489. state = 3;
  3490. break;
  3491. }
  3492. if (crlf_state == 1) {
  3493. token.append('\r');
  3494. }
  3495. token.append(*ps);
  3496. crlf_state = 0;
  3497. break;
  3498. }
  3499. ps++, icnt--;
  3500. }
  3501. out:
  3502. if (state == 2) {
  3503. token.clear();
  3504. state = 3;
  3505. }
  3506. if (state == 3) {
  3507. fld_val = token.detach();
  3508. if (!fld_name.empty() && !fld_val.empty()) {
  3509. /* FIXME: some locale free implementation is
  3510. * really required here,,, */
  3511. ht.set(f_strtoupper(fld_name), fld_val);
  3512. }
  3513. }
  3514. return state;
  3515. }
  3516. static int php_mail(const char *to, const char *subject, const char *message,
  3517. const char *headers, const char *extra_cmd) {
  3518. const char *sendmail_path = "/usr/sbin/sendmail -t -i";
  3519. String sendmail_cmd = sendmail_path;
  3520. if (extra_cmd != NULL) {
  3521. sendmail_cmd += " ";
  3522. sendmail_cmd += extra_cmd;
  3523. }
  3524. /* Since popen() doesn't indicate if the internal fork() doesn't work
  3525. * (e.g. the shell can't be executed) we explicitely set it to 0 to be
  3526. * sure we don't catch any older errno value. */
  3527. errno = 0;
  3528. FILE *sendmail = popen(sendmail_cmd.data(), "w");
  3529. if (sendmail == NULL) {
  3530. raise_warning("Could not execute mail delivery program '%s'",
  3531. sendmail_path);
  3532. return 0;
  3533. }
  3534. if (EACCES == errno) {
  3535. raise_warning("Permission denied: unable to execute shell to run "
  3536. "mail delivery binary '%s'", sendmail_path);
  3537. pclose(sendmail);
  3538. return 0;
  3539. }
  3540. fprintf(sendmail, "To: %s\n", to);
  3541. fprintf(sendmail, "Subject: %s\n", subject);
  3542. if (headers != NULL) {
  3543. fprintf(sendmail, "%s\n", headers);
  3544. }
  3545. fprintf(sendmail, "\n%s\n", message);
  3546. int ret = pclose(sendmail);
  3547. #if defined(EX_TEMPFAIL)
  3548. if ((ret != EX_OK) && (ret != EX_TEMPFAIL)) return 0;
  3549. #elif defined(EX_OK)
  3550. if (ret != EX_OK) return 0;
  3551. #else
  3552. if (ret != 0) return 0;
  3553. #endif
  3554. return 1;
  3555. }
  3556. bool f_mb_send_mail(const String& to, const String& subject, const String& message,
  3557. const String& headers /* = null_string */,
  3558. const String& extra_cmd /* = null_string */) {
  3559. /* initialize */
  3560. /* automatic allocateable buffer for additional header */
  3561. mbfl_memory_device device;
  3562. mbfl_memory_device_init(&device, 0, 0);
  3563. mbfl_string orig_str, conv_str;
  3564. mbfl_string_init(&orig_str);
  3565. mbfl_string_init(&conv_str);
  3566. /* character-set, transfer-encoding */
  3567. mbfl_no_encoding
  3568. tran_cs, /* transfar text charset */
  3569. head_enc, /* header transfar encoding */
  3570. body_enc; /* body transfar encoding */
  3571. tran_cs = mbfl_no_encoding_utf8;
  3572. head_enc = mbfl_no_encoding_base64;
  3573. body_enc = mbfl_no_encoding_base64;
  3574. const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
  3575. if (lang != NULL) {
  3576. tran_cs = lang->mail_charset;
  3577. head_enc = lang->mail_header_encoding;
  3578. body_enc = lang->mail_body_encoding;
  3579. }
  3580. Array ht_headers;
  3581. if (!headers.empty()) {
  3582. _php_mbstr_parse_mail_headers(ht_headers, headers.data(), headers.size());
  3583. }
  3584. struct {
  3585. int cnt_type:1;
  3586. int cnt_trans_enc:1;
  3587. } suppressed_hdrs = { 0, 0 };
  3588. static const StaticString s_CONTENT_TYPE("CONTENT-TYPE");
  3589. String s = ht_headers[s_CONTENT_TYPE].toString();
  3590. if (!s.isNull()) {
  3591. char *tmp;
  3592. char *param_name;
  3593. char *charset = NULL;
  3594. char *p = const_cast<char*>(strchr(s.data(), ';'));
  3595. if (p != NULL) {
  3596. /* skipping the padded spaces */
  3597. do {
  3598. ++p;
  3599. } while (*p == ' ' || *p == '\t');
  3600. if (*p != '\0') {
  3601. if ((param_name = strtok_r(p, "= ", &tmp)) != NULL) {
  3602. if (strcasecmp(param_name, "charset") == 0) {
  3603. mbfl_no_encoding _tran_cs = tran_cs;
  3604. charset = strtok_r(NULL, "= ", &tmp);
  3605. if (charset != NULL) {
  3606. _tran_cs = mbfl_name2no_encoding(charset);
  3607. }
  3608. if (_tran_cs == mbfl_no_encoding_invalid) {
  3609. raise_warning("Unsupported charset \"%s\" - "
  3610. "will be regarded as ascii", charset);
  3611. _tran_cs = mbfl_no_encoding_ascii;
  3612. }
  3613. tran_cs = _tran_cs;
  3614. }
  3615. }
  3616. }
  3617. }
  3618. suppressed_hdrs.cnt_type = 1;
  3619. }
  3620. static const StaticString
  3621. s_CONTENT_TRANSFER_ENCODING("CONTENT-TRANSFER-ENCODING");
  3622. s = ht_headers[s_CONTENT_TRANSFER_ENCODING];
  3623. if (!s.isNull()) {
  3624. mbfl_no_encoding _body_enc = mbfl_name2no_encoding(s.data());
  3625. switch (_body_enc) {
  3626. case mbfl_no_encoding_base64:
  3627. case mbfl_no_encoding_7bit:
  3628. case mbfl_no_encoding_8bit:
  3629. body_enc = _body_enc;
  3630. break;
  3631. default:
  3632. raise_warning("Unsupported transfer encoding \"%s\" - "
  3633. "will be regarded as 8bit", s.data());
  3634. body_enc = mbfl_no_encoding_8bit;
  3635. break;
  3636. }
  3637. suppressed_hdrs.cnt_trans_enc = 1;
  3638. }
  3639. /* To: */
  3640. char *to_r = NULL;
  3641. int err = 0;
  3642. if (!to.empty()) {
  3643. int to_len = to.size();
  3644. if (to_len > 0) {
  3645. to_r = strndup(to.data(), to_len);
  3646. for (; to_len; to_len--) {
  3647. if (!isspace((unsigned char)to_r[to_len - 1])) {
  3648. break;
  3649. }
  3650. to_r[to_len - 1] = '\0';
  3651. }
  3652. for (int i = 0; to_r[i]; i++) {
  3653. if (iscntrl((unsigned char)to_r[i])) {
  3654. /**
  3655. * According to RFC 822, section 3.1.1 long headers may be
  3656. * separated into parts using CRLF followed at least one
  3657. * linear-white-space character ('\t' or ' ').
  3658. * To prevent these separators from being replaced with a space,
  3659. * we use the SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
  3660. */
  3661. SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
  3662. to_r[i] = ' ';
  3663. }
  3664. }
  3665. } else {
  3666. to_r = (char*)to.data();
  3667. }
  3668. } else {
  3669. raise_warning("Missing To: field");
  3670. err = 1;
  3671. }
  3672. /* Subject: */
  3673. String encoded_subject;
  3674. if (!subject.isNull()) {
  3675. orig_str.no_language = MBSTRG(current_language);
  3676. orig_str.val = (unsigned char *)subject.data();
  3677. orig_str.len = subject.size();
  3678. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  3679. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  3680. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  3681. orig_str.no_encoding = mbfl_identify_encoding_no
  3682. (&orig_str, MBSTRG(current_detect_order_list),
  3683. MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3684. }
  3685. mbfl_string *pstr = mbfl_mime_header_encode
  3686. (&orig_str, &conv_str, tran_cs, head_enc,
  3687. "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
  3688. if (pstr != NULL) {
  3689. encoded_subject = String(reinterpret_cast<char*>(pstr->val),
  3690. pstr->len,
  3691. AttachString);
  3692. }
  3693. } else {
  3694. raise_warning("Missing Subject: field");
  3695. err = 1;
  3696. }
  3697. /* message body */
  3698. String encoded_message;
  3699. if (!message.empty()) {
  3700. orig_str.no_language = MBSTRG(current_language);
  3701. orig_str.val = (unsigned char*)message.data();
  3702. orig_str.len = message.size();
  3703. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  3704. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  3705. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  3706. orig_str.no_encoding = mbfl_identify_encoding_no
  3707. (&orig_str, MBSTRG(current_detect_order_list),
  3708. MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3709. }
  3710. mbfl_string *pstr = NULL;
  3711. {
  3712. mbfl_string tmpstr;
  3713. if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
  3714. tmpstr.no_encoding = mbfl_no_encoding_8bit;
  3715. pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
  3716. free(tmpstr.val);
  3717. }
  3718. }
  3719. if (pstr != NULL) {
  3720. encoded_message = String(reinterpret_cast<char*>(pstr->val),
  3721. pstr->len,
  3722. AttachString);
  3723. }
  3724. } else {
  3725. /* this is not really an error, so it is allowed. */
  3726. raise_warning("Empty message body");
  3727. }
  3728. /* other headers */
  3729. #define PHP_MBSTR_MAIL_MIME_HEADER1 "Mime-Version: 1.0"
  3730. #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
  3731. #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
  3732. #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
  3733. if (!headers.empty()) {
  3734. const char *p = headers.data();
  3735. int n = headers.size();
  3736. mbfl_memory_device_strncat(&device, p, n);
  3737. if (n > 0 && p[n - 1] != '\n') {
  3738. mbfl_memory_device_strncat(&device, "\n", 1);
  3739. }
  3740. }
  3741. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1,
  3742. sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
  3743. mbfl_memory_device_strncat(&device, "\n", 1);
  3744. if (!suppressed_hdrs.cnt_type) {
  3745. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2,
  3746. sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
  3747. char *p = (char *)mbfl_no2preferred_mime_name(tran_cs);
  3748. if (p != NULL) {
  3749. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3,
  3750. sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
  3751. mbfl_memory_device_strcat(&device, p);
  3752. }
  3753. mbfl_memory_device_strncat(&device, "\n", 1);
  3754. }
  3755. if (!suppressed_hdrs.cnt_trans_enc) {
  3756. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4,
  3757. sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
  3758. const char *p = (char *)mbfl_no2preferred_mime_name(body_enc);
  3759. if (p == NULL) {
  3760. p = "7bit";
  3761. }
  3762. mbfl_memory_device_strcat(&device, p);
  3763. mbfl_memory_device_strncat(&device, "\n", 1);
  3764. }
  3765. mbfl_memory_device_unput(&device);
  3766. mbfl_memory_device_output('\0', &device);
  3767. char *all_headers = (char *)device.buffer;
  3768. String cmd = f_escapeshellcmd(extra_cmd);
  3769. bool ret = (!err && php_mail(to_r, encoded_subject.data(),
  3770. encoded_message.data(),
  3771. all_headers, cmd.data()));
  3772. mbfl_memory_device_clear(&device);
  3773. return ret;
  3774. }
  3775. ///////////////////////////////////////////////////////////////////////////////
  3776. }