PageRenderTime 60ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/ext/mbstring/php_mbregex.c

http://github.com/infusion/PHP
C | 1401 lines | 1100 code | 148 blank | 153 comment | 273 complexity | 2a8aeea61d9642ea3fbe0bf868014155 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-2.1, BSD-3-Clause
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2011 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  16. +----------------------------------------------------------------------+
  17. */
  18. /* $Id: php_mbregex.c 306939 2011-01-01 02:19:59Z felipe $ */
  19. #ifdef HAVE_CONFIG_H
  20. #include "config.h"
  21. #endif
  22. #include "php.h"
  23. #include "php_ini.h"
  24. #if HAVE_MBREGEX
  25. #include "ext/standard/php_smart_str.h"
  26. #include "ext/standard/info.h"
  27. #include "php_mbregex.h"
  28. #include "mbstring.h"
  29. #include "php_onig_compat.h" /* must come prior to the oniguruma header */
  30. #include <oniguruma.h>
  31. #undef UChar
  32. ZEND_EXTERN_MODULE_GLOBALS(mbstring)
  33. struct _zend_mb_regex_globals {
  34. OnigEncoding default_mbctype;
  35. OnigEncoding current_mbctype;
  36. HashTable ht_rc;
  37. zval *search_str;
  38. zval *search_str_val;
  39. unsigned int search_pos;
  40. php_mb_regex_t *search_re;
  41. OnigRegion *search_regs;
  42. OnigOptionType regex_default_options;
  43. OnigSyntaxType *regex_default_syntax;
  44. };
  45. #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
  46. /* {{{ static void php_mb_regex_free_cache() */
  47. static void php_mb_regex_free_cache(php_mb_regex_t **pre)
  48. {
  49. onig_free(*pre);
  50. }
  51. /* }}} */
  52. /* {{{ _php_mb_regex_globals_ctor */
  53. static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
  54. {
  55. pglobals->default_mbctype = ONIG_ENCODING_EUC_JP;
  56. pglobals->current_mbctype = ONIG_ENCODING_EUC_JP;
  57. zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
  58. pglobals->search_str = (zval*) NULL;
  59. pglobals->search_re = (php_mb_regex_t*)NULL;
  60. pglobals->search_pos = 0;
  61. pglobals->search_regs = (OnigRegion*)NULL;
  62. pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
  63. pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
  64. return SUCCESS;
  65. }
  66. /* }}} */
  67. /* {{{ _php_mb_regex_globals_dtor */
  68. static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC)
  69. {
  70. zend_hash_destroy(&pglobals->ht_rc);
  71. }
  72. /* }}} */
  73. /* {{{ php_mb_regex_globals_alloc */
  74. zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
  75. {
  76. zend_mb_regex_globals *pglobals = pemalloc(
  77. sizeof(zend_mb_regex_globals), 1);
  78. if (!pglobals) {
  79. return NULL;
  80. }
  81. if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
  82. pefree(pglobals, 1);
  83. return NULL;
  84. }
  85. return pglobals;
  86. }
  87. /* }}} */
  88. /* {{{ php_mb_regex_globals_free */
  89. void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
  90. {
  91. if (!pglobals) {
  92. return;
  93. }
  94. _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
  95. pefree(pglobals, 1);
  96. }
  97. /* }}} */
  98. /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
  99. PHP_MINIT_FUNCTION(mb_regex)
  100. {
  101. onig_init();
  102. return SUCCESS;
  103. }
  104. /* }}} */
  105. /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
  106. PHP_MSHUTDOWN_FUNCTION(mb_regex)
  107. {
  108. onig_end();
  109. return SUCCESS;
  110. }
  111. /* }}} */
  112. /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
  113. PHP_RINIT_FUNCTION(mb_regex)
  114. {
  115. return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
  116. }
  117. /* }}} */
  118. /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
  119. PHP_RSHUTDOWN_FUNCTION(mb_regex)
  120. {
  121. MBREX(current_mbctype) = MBREX(default_mbctype);
  122. if (MBREX(search_str) != NULL) {
  123. zval_ptr_dtor(&MBREX(search_str));
  124. MBREX(search_str) = (zval *)NULL;
  125. }
  126. MBREX(search_pos) = 0;
  127. if (MBREX(search_regs) != NULL) {
  128. onig_region_free(MBREX(search_regs), 1);
  129. MBREX(search_regs) = (OnigRegion *)NULL;
  130. }
  131. zend_hash_clean(&MBREX(ht_rc));
  132. return SUCCESS;
  133. }
  134. /* }}} */
  135. /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
  136. PHP_MINFO_FUNCTION(mb_regex)
  137. {
  138. char buf[32];
  139. php_info_print_table_start();
  140. php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
  141. snprintf(buf, sizeof(buf), "%d.%d.%d",
  142. ONIGURUMA_VERSION_MAJOR,
  143. ONIGURUMA_VERSION_MINOR,
  144. ONIGURUMA_VERSION_TEENY);
  145. #ifdef PHP_ONIG_BUNDLED
  146. #ifdef USE_COMBINATION_EXPLOSION_CHECK
  147. php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
  148. #else /* USE_COMBINATION_EXPLOSION_CHECK */
  149. php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
  150. #endif /* USE_COMBINATION_EXPLOSION_CHECK */
  151. #endif /* PHP_BUNDLED_ONIG */
  152. php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
  153. php_info_print_table_end();
  154. }
  155. /* }}} */
  156. /*
  157. * encoding name resolver
  158. */
  159. /* {{{ encoding name map */
  160. typedef struct _php_mb_regex_enc_name_map_t {
  161. const char *names;
  162. OnigEncoding code;
  163. } php_mb_regex_enc_name_map_t;
  164. php_mb_regex_enc_name_map_t enc_name_map[] = {
  165. #ifdef ONIG_ENCODING_EUC_JP
  166. {
  167. "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
  168. ONIG_ENCODING_EUC_JP
  169. },
  170. #endif
  171. #ifdef ONIG_ENCODING_UTF8
  172. {
  173. "UTF-8\0UTF8\0",
  174. ONIG_ENCODING_UTF8
  175. },
  176. #endif
  177. #ifdef ONIG_ENCODING_UTF16_BE
  178. {
  179. "UTF-16\0UTF-16BE\0",
  180. ONIG_ENCODING_UTF16_BE
  181. },
  182. #endif
  183. #ifdef ONIG_ENCODING_UTF16_LE
  184. {
  185. "UTF-16LE\0",
  186. ONIG_ENCODING_UTF16_LE
  187. },
  188. #endif
  189. #ifdef ONIG_ENCODING_UTF32_BE
  190. {
  191. "UCS-4\0UTF-32\0UTF-32BE\0",
  192. ONIG_ENCODING_UTF32_BE
  193. },
  194. #endif
  195. #ifdef ONIG_ENCODING_UTF32_LE
  196. {
  197. "UCS-4LE\0UTF-32LE\0",
  198. ONIG_ENCODING_UTF32_LE
  199. },
  200. #endif
  201. #ifdef ONIG_ENCODING_SJIS
  202. {
  203. "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
  204. ONIG_ENCODING_SJIS
  205. },
  206. #endif
  207. #ifdef ONIG_ENCODING_BIG5
  208. {
  209. "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
  210. ONIG_ENCODING_BIG5
  211. },
  212. #endif
  213. #ifdef ONIG_ENCODING_EUC_CN
  214. {
  215. "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
  216. ONIG_ENCODING_EUC_CN
  217. },
  218. #endif
  219. #ifdef ONIG_ENCODING_EUC_TW
  220. {
  221. "EUC-TW\0EUCTW\0EUC_TW\0",
  222. ONIG_ENCODING_EUC_TW
  223. },
  224. #endif
  225. #ifdef ONIG_ENCODING_EUC_KR
  226. {
  227. "EUC-KR\0EUCKR\0EUC_KR\0",
  228. ONIG_ENCODING_EUC_KR
  229. },
  230. #endif
  231. #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
  232. {
  233. "KOI8\0KOI-8\0",
  234. ONIG_ENCODING_KOI8
  235. },
  236. #endif
  237. #ifdef ONIG_ENCODING_KOI8_R
  238. {
  239. "KOI8R\0KOI8-R\0KOI-8R\0",
  240. ONIG_ENCODING_KOI8_R
  241. },
  242. #endif
  243. #ifdef ONIG_ENCODING_ISO_8859_1
  244. {
  245. "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
  246. ONIG_ENCODING_ISO_8859_1
  247. },
  248. #endif
  249. #ifdef ONIG_ENCODING_ISO_8859_2
  250. {
  251. "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
  252. ONIG_ENCODING_ISO_8859_2
  253. },
  254. #endif
  255. #ifdef ONIG_ENCODING_ISO_8859_3
  256. {
  257. "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
  258. ONIG_ENCODING_ISO_8859_3
  259. },
  260. #endif
  261. #ifdef ONIG_ENCODING_ISO_8859_4
  262. {
  263. "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
  264. ONIG_ENCODING_ISO_8859_4
  265. },
  266. #endif
  267. #ifdef ONIG_ENCODING_ISO_8859_5
  268. {
  269. "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
  270. ONIG_ENCODING_ISO_8859_5
  271. },
  272. #endif
  273. #ifdef ONIG_ENCODING_ISO_8859_6
  274. {
  275. "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
  276. ONIG_ENCODING_ISO_8859_6
  277. },
  278. #endif
  279. #ifdef ONIG_ENCODING_ISO_8859_7
  280. {
  281. "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
  282. ONIG_ENCODING_ISO_8859_7
  283. },
  284. #endif
  285. #ifdef ONIG_ENCODING_ISO_8859_8
  286. {
  287. "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
  288. ONIG_ENCODING_ISO_8859_8
  289. },
  290. #endif
  291. #ifdef ONIG_ENCODING_ISO_8859_9
  292. {
  293. "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
  294. ONIG_ENCODING_ISO_8859_9
  295. },
  296. #endif
  297. #ifdef ONIG_ENCODING_ISO_8859_10
  298. {
  299. "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
  300. ONIG_ENCODING_ISO_8859_10
  301. },
  302. #endif
  303. #ifdef ONIG_ENCODING_ISO_8859_11
  304. {
  305. "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
  306. ONIG_ENCODING_ISO_8859_11
  307. },
  308. #endif
  309. #ifdef ONIG_ENCODING_ISO_8859_13
  310. {
  311. "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
  312. ONIG_ENCODING_ISO_8859_13
  313. },
  314. #endif
  315. #ifdef ONIG_ENCODING_ISO_8859_14
  316. {
  317. "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
  318. ONIG_ENCODING_ISO_8859_14
  319. },
  320. #endif
  321. #ifdef ONIG_ENCODING_ISO_8859_15
  322. {
  323. "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
  324. ONIG_ENCODING_ISO_8859_15
  325. },
  326. #endif
  327. #ifdef ONIG_ENCODING_ISO_8859_16
  328. {
  329. "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
  330. ONIG_ENCODING_ISO_8859_16
  331. },
  332. #endif
  333. #ifdef ONIG_ENCODING_ASCII
  334. {
  335. "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
  336. ONIG_ENCODING_ASCII
  337. },
  338. #endif
  339. { NULL, ONIG_ENCODING_UNDEF }
  340. };
  341. /* }}} */
  342. /* {{{ php_mb_regex_name2mbctype */
  343. static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
  344. {
  345. const char *p;
  346. php_mb_regex_enc_name_map_t *mapping;
  347. if (pname == NULL) {
  348. return ONIG_ENCODING_UNDEF;
  349. }
  350. for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
  351. for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
  352. if (strcasecmp(p, pname) == 0) {
  353. return mapping->code;
  354. }
  355. }
  356. }
  357. return ONIG_ENCODING_UNDEF;
  358. }
  359. /* }}} */
  360. /* {{{ php_mb_regex_mbctype2name */
  361. static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
  362. {
  363. php_mb_regex_enc_name_map_t *mapping;
  364. for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
  365. if (mapping->code == mbctype) {
  366. return mapping->names;
  367. }
  368. }
  369. return NULL;
  370. }
  371. /* }}} */
  372. /* {{{ php_mb_regex_set_mbctype */
  373. int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
  374. {
  375. OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
  376. if (mbctype == ONIG_ENCODING_UNDEF) {
  377. return FAILURE;
  378. }
  379. MBREX(current_mbctype) = mbctype;
  380. return SUCCESS;
  381. }
  382. /* }}} */
  383. /* {{{ php_mb_regex_set_default_mbctype */
  384. int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
  385. {
  386. OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
  387. if (mbctype == ONIG_ENCODING_UNDEF) {
  388. return FAILURE;
  389. }
  390. MBREX(default_mbctype) = mbctype;
  391. return SUCCESS;
  392. }
  393. /* }}} */
  394. /* {{{ php_mb_regex_get_mbctype */
  395. const char *php_mb_regex_get_mbctype(TSRMLS_D)
  396. {
  397. return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  398. }
  399. /* }}} */
  400. /* {{{ php_mb_regex_get_default_mbctype */
  401. const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
  402. {
  403. return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
  404. }
  405. /* }}} */
  406. /*
  407. * regex cache
  408. */
  409. /* {{{ php_mbregex_compile_pattern */
  410. static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
  411. {
  412. int err_code = 0;
  413. int found = 0;
  414. php_mb_regex_t *retval = NULL, **rc = NULL;
  415. OnigErrorInfo err_info;
  416. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  417. found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
  418. if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
  419. if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
  420. onig_error_code_to_str(err_str, err_code, err_info);
  421. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
  422. retval = NULL;
  423. goto out;
  424. }
  425. zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
  426. } else if (found == SUCCESS) {
  427. retval = *rc;
  428. }
  429. out:
  430. return retval;
  431. }
  432. /* }}} */
  433. /* {{{ _php_mb_regex_get_option_string */
  434. static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
  435. {
  436. size_t len_left = len;
  437. size_t len_req = 0;
  438. char *p = str;
  439. char c;
  440. if ((option & ONIG_OPTION_IGNORECASE) != 0) {
  441. if (len_left > 0) {
  442. --len_left;
  443. *(p++) = 'i';
  444. }
  445. ++len_req;
  446. }
  447. if ((option & ONIG_OPTION_EXTEND) != 0) {
  448. if (len_left > 0) {
  449. --len_left;
  450. *(p++) = 'x';
  451. }
  452. ++len_req;
  453. }
  454. if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
  455. (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
  456. if (len_left > 0) {
  457. --len_left;
  458. *(p++) = 'p';
  459. }
  460. ++len_req;
  461. } else {
  462. if ((option & ONIG_OPTION_MULTILINE) != 0) {
  463. if (len_left > 0) {
  464. --len_left;
  465. *(p++) = 'm';
  466. }
  467. ++len_req;
  468. }
  469. if ((option & ONIG_OPTION_SINGLELINE) != 0) {
  470. if (len_left > 0) {
  471. --len_left;
  472. *(p++) = 's';
  473. }
  474. ++len_req;
  475. }
  476. }
  477. if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
  478. if (len_left > 0) {
  479. --len_left;
  480. *(p++) = 'l';
  481. }
  482. ++len_req;
  483. }
  484. if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
  485. if (len_left > 0) {
  486. --len_left;
  487. *(p++) = 'n';
  488. }
  489. ++len_req;
  490. }
  491. c = 0;
  492. if (syntax == ONIG_SYNTAX_JAVA) {
  493. c = 'j';
  494. } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
  495. c = 'u';
  496. } else if (syntax == ONIG_SYNTAX_GREP) {
  497. c = 'g';
  498. } else if (syntax == ONIG_SYNTAX_EMACS) {
  499. c = 'c';
  500. } else if (syntax == ONIG_SYNTAX_RUBY) {
  501. c = 'r';
  502. } else if (syntax == ONIG_SYNTAX_PERL) {
  503. c = 'z';
  504. } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
  505. c = 'b';
  506. } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
  507. c = 'd';
  508. }
  509. if (c != 0) {
  510. if (len_left > 0) {
  511. --len_left;
  512. *(p++) = c;
  513. }
  514. ++len_req;
  515. }
  516. if (len_left > 0) {
  517. --len_left;
  518. *(p++) = '\0';
  519. }
  520. ++len_req;
  521. if (len < len_req) {
  522. return len_req;
  523. }
  524. return 0;
  525. }
  526. /* }}} */
  527. /* {{{ _php_mb_regex_init_options */
  528. static void
  529. _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
  530. {
  531. int n;
  532. char c;
  533. int optm = 0;
  534. *syntax = ONIG_SYNTAX_RUBY;
  535. if (parg != NULL) {
  536. n = 0;
  537. while(n < narg) {
  538. c = parg[n++];
  539. switch (c) {
  540. case 'i':
  541. optm |= ONIG_OPTION_IGNORECASE;
  542. break;
  543. case 'x':
  544. optm |= ONIG_OPTION_EXTEND;
  545. break;
  546. case 'm':
  547. optm |= ONIG_OPTION_MULTILINE;
  548. break;
  549. case 's':
  550. optm |= ONIG_OPTION_SINGLELINE;
  551. break;
  552. case 'p':
  553. optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
  554. break;
  555. case 'l':
  556. optm |= ONIG_OPTION_FIND_LONGEST;
  557. break;
  558. case 'n':
  559. optm |= ONIG_OPTION_FIND_NOT_EMPTY;
  560. break;
  561. case 'j':
  562. *syntax = ONIG_SYNTAX_JAVA;
  563. break;
  564. case 'u':
  565. *syntax = ONIG_SYNTAX_GNU_REGEX;
  566. break;
  567. case 'g':
  568. *syntax = ONIG_SYNTAX_GREP;
  569. break;
  570. case 'c':
  571. *syntax = ONIG_SYNTAX_EMACS;
  572. break;
  573. case 'r':
  574. *syntax = ONIG_SYNTAX_RUBY;
  575. break;
  576. case 'z':
  577. *syntax = ONIG_SYNTAX_PERL;
  578. break;
  579. case 'b':
  580. *syntax = ONIG_SYNTAX_POSIX_BASIC;
  581. break;
  582. case 'd':
  583. *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
  584. break;
  585. case 'e':
  586. if (eval != NULL) *eval = 1;
  587. break;
  588. default:
  589. break;
  590. }
  591. }
  592. if (option != NULL) *option|=optm;
  593. }
  594. }
  595. /* }}} */
  596. /*
  597. * php funcions
  598. */
  599. /* {{{ proto string mb_regex_encoding([string encoding])
  600. Returns the current encoding for regex as a string. */
  601. PHP_FUNCTION(mb_regex_encoding)
  602. {
  603. size_t argc = ZEND_NUM_ARGS();
  604. char *encoding;
  605. int encoding_len;
  606. OnigEncoding mbctype;
  607. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
  608. return;
  609. }
  610. if (argc == 0) {
  611. const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  612. if (retval == NULL) {
  613. RETURN_FALSE;
  614. }
  615. RETURN_STRING((char *)retval, 1);
  616. } else if (argc == 1) {
  617. mbctype = _php_mb_regex_name2mbctype(encoding);
  618. if (mbctype == ONIG_ENCODING_UNDEF) {
  619. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  620. RETURN_FALSE;
  621. }
  622. MBREX(current_mbctype) = mbctype;
  623. RETURN_TRUE;
  624. }
  625. }
  626. /* }}} */
  627. /* {{{ _php_mb_regex_ereg_exec */
  628. static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
  629. {
  630. zval **arg_pattern, *array;
  631. char *string;
  632. int string_len;
  633. php_mb_regex_t *re;
  634. OnigRegion *regs = NULL;
  635. int i, match_len, beg, end;
  636. OnigOptionType options;
  637. char *str;
  638. array = NULL;
  639. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
  640. RETURN_FALSE;
  641. }
  642. options = MBREX(regex_default_options);
  643. if (icase) {
  644. options |= ONIG_OPTION_IGNORECASE;
  645. }
  646. /* compile the regular expression from the supplied regex */
  647. if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
  648. /* we convert numbers to integers and treat them as a string */
  649. if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
  650. convert_to_long_ex(arg_pattern); /* get rid of decimal places */
  651. }
  652. convert_to_string_ex(arg_pattern);
  653. /* don't bother doing an extended regex with just a number */
  654. }
  655. if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
  656. php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
  657. RETVAL_FALSE;
  658. goto out;
  659. }
  660. re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
  661. if (re == NULL) {
  662. RETVAL_FALSE;
  663. goto out;
  664. }
  665. regs = onig_region_new();
  666. /* actually execute the regular expression */
  667. if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
  668. RETVAL_FALSE;
  669. goto out;
  670. }
  671. match_len = 1;
  672. str = string;
  673. if (array != NULL) {
  674. match_len = regs->end[0] - regs->beg[0];
  675. zval_dtor(array);
  676. array_init(array);
  677. for (i = 0; i < regs->num_regs; i++) {
  678. beg = regs->beg[i];
  679. end = regs->end[i];
  680. if (beg >= 0 && beg < end && end <= string_len) {
  681. add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
  682. } else {
  683. add_index_bool(array, i, 0);
  684. }
  685. }
  686. }
  687. if (match_len == 0) {
  688. match_len = 1;
  689. }
  690. RETVAL_LONG(match_len);
  691. out:
  692. if (regs != NULL) {
  693. onig_region_free(regs, 1);
  694. }
  695. }
  696. /* }}} */
  697. /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
  698. Regular expression match for multibyte string */
  699. PHP_FUNCTION(mb_ereg)
  700. {
  701. _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  702. }
  703. /* }}} */
  704. /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
  705. Case-insensitive regular expression match for multibyte string */
  706. PHP_FUNCTION(mb_eregi)
  707. {
  708. _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  709. }
  710. /* }}} */
  711. /* {{{ _php_mb_regex_ereg_replace_exec */
  712. static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options)
  713. {
  714. zval **arg_pattern_zval;
  715. char *arg_pattern;
  716. int arg_pattern_len;
  717. char *replace;
  718. int replace_len;
  719. char *string;
  720. int string_len;
  721. char *p;
  722. php_mb_regex_t *re;
  723. OnigSyntaxType *syntax;
  724. OnigRegion *regs = NULL;
  725. smart_str out_buf = { 0 };
  726. smart_str eval_buf = { 0 };
  727. smart_str *pbuf;
  728. int i, err, eval, n;
  729. OnigUChar *pos;
  730. OnigUChar *string_lim;
  731. char *description = NULL;
  732. char pat_buf[2];
  733. const mbfl_encoding *enc;
  734. {
  735. const char *current_enc_name;
  736. current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  737. if (current_enc_name == NULL ||
  738. (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
  739. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
  740. RETURN_FALSE;
  741. }
  742. }
  743. eval = 0;
  744. {
  745. char *option_str = NULL;
  746. int option_str_len = 0;
  747. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
  748. &arg_pattern_zval,
  749. &replace, &replace_len,
  750. &string, &string_len,
  751. &option_str, &option_str_len) == FAILURE) {
  752. RETURN_FALSE;
  753. }
  754. if (option_str != NULL) {
  755. _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
  756. } else {
  757. options |= MBREX(regex_default_options);
  758. syntax = MBREX(regex_default_syntax);
  759. }
  760. }
  761. if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
  762. arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
  763. arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
  764. } else {
  765. /* FIXME: this code is not multibyte aware! */
  766. convert_to_long_ex(arg_pattern_zval);
  767. pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval);
  768. pat_buf[1] = '\0';
  769. arg_pattern = pat_buf;
  770. arg_pattern_len = 1;
  771. }
  772. /* create regex pattern buffer */
  773. re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
  774. if (re == NULL) {
  775. RETURN_FALSE;
  776. }
  777. if (eval) {
  778. pbuf = &eval_buf;
  779. description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
  780. } else {
  781. pbuf = &out_buf;
  782. description = NULL;
  783. }
  784. /* do the actual work */
  785. err = 0;
  786. pos = (OnigUChar *)string;
  787. string_lim = (OnigUChar*)(string + string_len);
  788. regs = onig_region_new();
  789. while (err >= 0) {
  790. err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
  791. if (err <= -2) {
  792. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  793. onig_error_code_to_str(err_str, err);
  794. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
  795. break;
  796. }
  797. if (err >= 0) {
  798. #if moriyoshi_0
  799. if (regs->beg[0] == regs->end[0]) {
  800. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
  801. break;
  802. }
  803. #endif
  804. /* copy the part of the string before the match */
  805. smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
  806. /* copy replacement and backrefs */
  807. i = 0;
  808. p = replace;
  809. while (i < replace_len) {
  810. int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
  811. n = -1;
  812. if ((replace_len - i) >= 2 && fwd == 1 &&
  813. p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
  814. n = p[1] - '0';
  815. }
  816. if (n >= 0 && n < regs->num_regs) {
  817. if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
  818. smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
  819. }
  820. p += 2;
  821. i += 2;
  822. } else {
  823. smart_str_appendl(pbuf, p, fwd);
  824. p += fwd;
  825. i += fwd;
  826. }
  827. }
  828. if (eval) {
  829. zval v;
  830. /* null terminate buffer */
  831. smart_str_0(&eval_buf);
  832. /* do eval */
  833. if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
  834. efree(description);
  835. php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
  836. /* zend_error() does not return in this case */
  837. }
  838. /* result of eval */
  839. convert_to_string(&v);
  840. smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
  841. /* Clean up */
  842. eval_buf.len = 0;
  843. zval_dtor(&v);
  844. }
  845. n = regs->end[0];
  846. if ((pos - (OnigUChar *)string) < n) {
  847. pos = (OnigUChar *)string + n;
  848. } else {
  849. if (pos < string_lim) {
  850. smart_str_appendl(&out_buf, pos, 1);
  851. }
  852. pos++;
  853. }
  854. } else { /* nomatch */
  855. /* stick that last bit of string on our output */
  856. if (string_lim - pos > 0) {
  857. smart_str_appendl(&out_buf, pos, string_lim - pos);
  858. }
  859. }
  860. onig_region_free(regs, 0);
  861. }
  862. if (description) {
  863. efree(description);
  864. }
  865. if (regs != NULL) {
  866. onig_region_free(regs, 1);
  867. }
  868. smart_str_free(&eval_buf);
  869. if (err <= -2) {
  870. smart_str_free(&out_buf);
  871. RETVAL_FALSE;
  872. } else {
  873. smart_str_appendc(&out_buf, '\0');
  874. RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
  875. }
  876. }
  877. /* }}} */
  878. /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
  879. Replace regular expression for multibyte string */
  880. PHP_FUNCTION(mb_ereg_replace)
  881. {
  882. _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  883. }
  884. /* }}} */
  885. /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
  886. Case insensitive replace regular expression for multibyte string */
  887. PHP_FUNCTION(mb_eregi_replace)
  888. {
  889. _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE);
  890. }
  891. /* }}} */
  892. /* {{{ proto array mb_split(string pattern, string string [, int limit])
  893. split multibyte string into array by regular expression */
  894. PHP_FUNCTION(mb_split)
  895. {
  896. char *arg_pattern;
  897. int arg_pattern_len;
  898. php_mb_regex_t *re;
  899. OnigRegion *regs = NULL;
  900. char *string;
  901. OnigUChar *pos;
  902. int string_len;
  903. int n, err;
  904. long count = -1;
  905. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
  906. RETURN_FALSE;
  907. }
  908. if (count == 0) {
  909. count = 1;
  910. }
  911. /* create regex pattern buffer */
  912. if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
  913. RETURN_FALSE;
  914. }
  915. array_init(return_value);
  916. pos = (OnigUChar *)string;
  917. err = 0;
  918. regs = onig_region_new();
  919. /* churn through str, generating array entries as we go */
  920. while ((--count != 0) &&
  921. (err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0)) >= 0) {
  922. if (regs->beg[0] == regs->end[0]) {
  923. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
  924. break;
  925. }
  926. /* add it to the array */
  927. if (regs->beg[0] < string_len && regs->beg[0] >= (pos - (OnigUChar *)string)) {
  928. add_next_index_stringl(return_value, (char *)pos, ((OnigUChar *)(string + regs->beg[0]) - pos), 1);
  929. } else {
  930. err = -2;
  931. break;
  932. }
  933. /* point at our new starting point */
  934. n = regs->end[0];
  935. if ((pos - (OnigUChar *)string) < n) {
  936. pos = (OnigUChar *)string + n;
  937. }
  938. if (count < 0) {
  939. count = 0;
  940. }
  941. onig_region_free(regs, 0);
  942. }
  943. onig_region_free(regs, 1);
  944. /* see if we encountered an error */
  945. if (err <= -2) {
  946. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  947. onig_error_code_to_str(err_str, err);
  948. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
  949. zval_dtor(return_value);
  950. RETURN_FALSE;
  951. }
  952. /* otherwise we just have one last element to add to the array */
  953. n = ((OnigUChar *)(string + string_len) - pos);
  954. if (n > 0) {
  955. add_next_index_stringl(return_value, (char *)pos, n, 1);
  956. } else {
  957. add_next_index_stringl(return_value, "", 0, 1);
  958. }
  959. }
  960. /* }}} */
  961. /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
  962. Regular expression match for multibyte string */
  963. PHP_FUNCTION(mb_ereg_match)
  964. {
  965. char *arg_pattern;
  966. int arg_pattern_len;
  967. char *string;
  968. int string_len;
  969. php_mb_regex_t *re;
  970. OnigSyntaxType *syntax;
  971. OnigOptionType option = 0;
  972. int err;
  973. {
  974. char *option_str = NULL;
  975. int option_str_len = 0;
  976. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
  977. &arg_pattern, &arg_pattern_len, &string, &string_len,
  978. &option_str, &option_str_len)==FAILURE) {
  979. RETURN_FALSE;
  980. }
  981. if (option_str != NULL) {
  982. _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
  983. } else {
  984. option |= MBREX(regex_default_options);
  985. syntax = MBREX(regex_default_syntax);
  986. }
  987. }
  988. if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
  989. RETURN_FALSE;
  990. }
  991. /* match */
  992. err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
  993. if (err >= 0) {
  994. RETVAL_TRUE;
  995. } else {
  996. RETVAL_FALSE;
  997. }
  998. }
  999. /* }}} */
  1000. /* regex search */
  1001. /* {{{ _php_mb_regex_ereg_search_exec */
  1002. static void
  1003. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
  1004. {
  1005. size_t argc = ZEND_NUM_ARGS();
  1006. char *arg_pattern, *arg_options;
  1007. int arg_pattern_len, arg_options_len;
  1008. int n, i, err, pos, len, beg, end;
  1009. OnigOptionType option;
  1010. OnigUChar *str;
  1011. OnigSyntaxType *syntax;
  1012. if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
  1013. return;
  1014. }
  1015. option = MBREX(regex_default_options);
  1016. if (argc == 2) {
  1017. option = 0;
  1018. _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
  1019. }
  1020. if (argc > 0) {
  1021. /* create regex pattern buffer */
  1022. if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
  1023. RETURN_FALSE;
  1024. }
  1025. }
  1026. pos = MBREX(search_pos);
  1027. str = NULL;
  1028. len = 0;
  1029. if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
  1030. str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
  1031. len = Z_STRLEN_P(MBREX(search_str));
  1032. }
  1033. if (MBREX(search_re) == NULL) {
  1034. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
  1035. RETURN_FALSE;
  1036. }
  1037. if (str == NULL) {
  1038. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
  1039. RETURN_FALSE;
  1040. }
  1041. if (MBREX(search_regs)) {
  1042. onig_region_free(MBREX(search_regs), 1);
  1043. }
  1044. MBREX(search_regs) = onig_region_new();
  1045. err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
  1046. if (err == ONIG_MISMATCH) {
  1047. MBREX(search_pos) = len;
  1048. RETVAL_FALSE;
  1049. } else if (err <= -2) {
  1050. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  1051. onig_error_code_to_str(err_str, err);
  1052. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
  1053. RETVAL_FALSE;
  1054. } else {
  1055. if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
  1056. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
  1057. }
  1058. switch (mode) {
  1059. case 1:
  1060. array_init(return_value);
  1061. beg = MBREX(search_regs)->beg[0];
  1062. end = MBREX(search_regs)->end[0];
  1063. add_next_index_long(return_value, beg);
  1064. add_next_index_long(return_value, end - beg);
  1065. break;
  1066. case 2:
  1067. array_init(return_value);
  1068. n = MBREX(search_regs)->num_regs;
  1069. for (i = 0; i < n; i++) {
  1070. beg = MBREX(search_regs)->beg[i];
  1071. end = MBREX(search_regs)->end[i];
  1072. if (beg >= 0 && beg <= end && end <= len) {
  1073. add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
  1074. } else {
  1075. add_index_bool(return_value, i, 0);
  1076. }
  1077. }
  1078. break;
  1079. default:
  1080. RETVAL_TRUE;
  1081. break;
  1082. }
  1083. end = MBREX(search_regs)->end[0];
  1084. if (pos < end) {
  1085. MBREX(search_pos) = end;
  1086. } else {
  1087. MBREX(search_pos) = pos + 1;
  1088. }
  1089. }
  1090. if (err < 0) {
  1091. onig_region_free(MBREX(search_regs), 1);
  1092. MBREX(search_regs) = (OnigRegion *)NULL;
  1093. }
  1094. }
  1095. /* }}} */
  1096. /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
  1097. Regular expression search for multibyte string */
  1098. PHP_FUNCTION(mb_ereg_search)
  1099. {
  1100. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  1101. }
  1102. /* }}} */
  1103. /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
  1104. Regular expression search for multibyte string */
  1105. PHP_FUNCTION(mb_ereg_search_pos)
  1106. {
  1107. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  1108. }
  1109. /* }}} */
  1110. /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
  1111. Regular expression search for multibyte string */
  1112. PHP_FUNCTION(mb_ereg_search_regs)
  1113. {
  1114. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
  1115. }
  1116. /* }}} */
  1117. /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
  1118. Initialize string and regular expression for search. */
  1119. PHP_FUNCTION(mb_ereg_search_init)
  1120. {
  1121. size_t argc = ZEND_NUM_ARGS();
  1122. zval *arg_str;
  1123. char *arg_pattern, *arg_options;
  1124. int arg_pattern_len, arg_options_len;
  1125. OnigSyntaxType *syntax = NULL;
  1126. OnigOptionType option;
  1127. if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
  1128. return;
  1129. }
  1130. option = MBREX(regex_default_options);
  1131. syntax = MBREX(regex_default_syntax);
  1132. if (argc == 3) {
  1133. option = 0;
  1134. _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
  1135. }
  1136. if (argc > 1) {
  1137. /* create regex pattern buffer */
  1138. if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
  1139. RETURN_FALSE;
  1140. }
  1141. }
  1142. if (MBREX(search_str) != NULL) {
  1143. zval_ptr_dtor(&MBREX(search_str));
  1144. MBREX(search_str) = (zval *)NULL;
  1145. }
  1146. MBREX(search_str) = arg_str;
  1147. Z_ADDREF_P(MBREX(search_str));
  1148. SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
  1149. MBREX(search_pos) = 0;
  1150. if (MBREX(search_regs) != NULL) {
  1151. onig_region_free(MBREX(search_regs), 1);
  1152. MBREX(search_regs) = (OnigRegion *) NULL;
  1153. }
  1154. RETURN_TRUE;
  1155. }
  1156. /* }}} */
  1157. /* {{{ proto array mb_ereg_search_getregs(void)
  1158. Get matched substring of the last time */
  1159. PHP_FUNCTION(mb_ereg_search_getregs)
  1160. {
  1161. int n, i, len, beg, end;
  1162. OnigUChar *str;
  1163. if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
  1164. array_init(return_value);
  1165. str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
  1166. len = Z_STRLEN_P(MBREX(search_str));
  1167. n = MBREX(search_regs)->num_regs;
  1168. for (i = 0; i < n; i++) {
  1169. beg = MBREX(search_regs)->beg[i];
  1170. end = MBREX(search_regs)->end[i];
  1171. if (beg >= 0 && beg <= end && end <= len) {
  1172. add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
  1173. } else {
  1174. add_index_bool(return_value, i, 0);
  1175. }
  1176. }
  1177. } else {
  1178. RETVAL_FALSE;
  1179. }
  1180. }
  1181. /* }}} */
  1182. /* {{{ proto int mb_ereg_search_getpos(void)
  1183. Get search start position */
  1184. PHP_FUNCTION(mb_ereg_search_getpos)
  1185. {
  1186. RETVAL_LONG(MBREX(search_pos));
  1187. }
  1188. /* }}} */
  1189. /* {{{ proto bool mb_ereg_search_setpos(int position)
  1190. Set search start position */
  1191. PHP_FUNCTION(mb_ereg_search_setpos)
  1192. {
  1193. long position;
  1194. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
  1195. return;
  1196. }
  1197. if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
  1198. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
  1199. MBREX(search_pos) = 0;
  1200. RETURN_FALSE;
  1201. }
  1202. MBREX(search_pos) = position;
  1203. RETURN_TRUE;
  1204. }
  1205. /* }}} */
  1206. /* {{{ php_mb_regex_set_options */
  1207. static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
  1208. {
  1209. if (prev_options != NULL) {
  1210. *prev_options = MBREX(regex_default_options);
  1211. }
  1212. if (prev_syntax != NULL) {
  1213. *prev_syntax = MBREX(regex_default_syntax);
  1214. }
  1215. MBREX(regex_default_options) = options;
  1216. MBREX(regex_default_syntax) = syntax;
  1217. }
  1218. /* }}} */
  1219. /* {{{ proto string mb_regex_set_options([string options])
  1220. Set or get the default options for mbregex functions */
  1221. PHP_FUNCTION(mb_regex_set_options)
  1222. {
  1223. OnigOptionType opt;
  1224. OnigSyntaxType *syntax;
  1225. char *string = NULL;
  1226. int string_len;
  1227. char buf[16];
  1228. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
  1229. &string, &string_len) == FAILURE) {
  1230. RETURN_FALSE;
  1231. }
  1232. if (string != NULL) {
  1233. opt = 0;
  1234. syntax = NULL;
  1235. _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
  1236. _php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
  1237. } else {
  1238. opt = MBREX(regex_default_options);
  1239. syntax = MBREX(regex_default_syntax);
  1240. }
  1241. _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
  1242. RETVAL_STRING(buf, 1);
  1243. }
  1244. /* }}} */
  1245. #endif /* HAVE_MBREGEX */
  1246. /*
  1247. * Local variables:
  1248. * tab-width: 4
  1249. * c-basic-offset: 4
  1250. * End:
  1251. * vim600: fdm=marker
  1252. * vim: noet sw=4 ts=4
  1253. */