PageRenderTime 47ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/ext/ereg/ereg.c

http://github.com/infusion/PHP
C | 768 lines | 538 code | 102 blank | 128 comment | 137 complexity | 27a1b87df0e1a775fb7fbd8df810a588 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-2.1, BSD-3-Clause
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2011 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Authors: Rasmus Lerdorf <rasmus@php.net> |
  16. | Jim Winstead <jimw@php.net> |
  17. | Jaakko Hyvätti <jaakko@hyvatti.iki.fi> |
  18. +----------------------------------------------------------------------+
  19. */
  20. /* $Id: ereg.c 306939 2011-01-01 02:19:59Z felipe $ */
  21. #include <stdio.h>
  22. #include <ctype.h>
  23. #include "php.h"
  24. #include "ext/standard/php_string.h"
  25. #include "php_ereg.h"
  26. #include "ext/standard/info.h"
  27. /* {{{ arginfo */
  28. ZEND_BEGIN_ARG_INFO_EX(arginfo_ereg, 0, 0, 2)
  29. ZEND_ARG_INFO(0, pattern)
  30. ZEND_ARG_INFO(0, string)
  31. ZEND_ARG_INFO(1, registers) /* ARRAY_INFO(1, registers, 1) */
  32. ZEND_END_ARG_INFO()
  33. ZEND_BEGIN_ARG_INFO(arginfo_ereg_replace, 0)
  34. ZEND_ARG_INFO(0, pattern)
  35. ZEND_ARG_INFO(0, replacement)
  36. ZEND_ARG_INFO(0, string)
  37. ZEND_END_ARG_INFO()
  38. ZEND_BEGIN_ARG_INFO_EX(arginfo_split, 0, 0, 2)
  39. ZEND_ARG_INFO(0, pattern)
  40. ZEND_ARG_INFO(0, string)
  41. ZEND_ARG_INFO(0, limit)
  42. ZEND_END_ARG_INFO()
  43. ZEND_BEGIN_ARG_INFO(arginfo_sql_regcase, 0)
  44. ZEND_ARG_INFO(0, string)
  45. ZEND_END_ARG_INFO()
  46. /* }}} */
  47. /* {{{ Function table */
  48. const zend_function_entry ereg_functions[] = {
  49. PHP_DEP_FE(ereg, arginfo_ereg)
  50. PHP_DEP_FE(ereg_replace, arginfo_ereg_replace)
  51. PHP_DEP_FE(eregi, arginfo_ereg)
  52. PHP_DEP_FE(eregi_replace, arginfo_ereg_replace)
  53. PHP_DEP_FE(split, arginfo_split)
  54. PHP_DEP_FE(spliti, arginfo_split)
  55. PHP_DEP_FE(sql_regcase, arginfo_sql_regcase)
  56. {NULL, NULL, NULL}
  57. };
  58. /* }}} */
  59. /* {{{ reg_cache */
  60. typedef struct {
  61. regex_t preg;
  62. int cflags;
  63. unsigned long lastuse;
  64. } reg_cache;
  65. static int reg_magic = 0;
  66. #define EREG_CACHE_SIZE 4096
  67. /* }}} */
  68. ZEND_DECLARE_MODULE_GLOBALS(ereg)
  69. /* {{{ Module entry */
  70. zend_module_entry ereg_module_entry = {
  71. STANDARD_MODULE_HEADER,
  72. "ereg",
  73. ereg_functions,
  74. PHP_MINIT(ereg),
  75. PHP_MSHUTDOWN(ereg),
  76. NULL,
  77. NULL,
  78. PHP_MINFO(ereg),
  79. NO_VERSION_YET,
  80. STANDARD_MODULE_PROPERTIES
  81. };
  82. /* }}} */
  83. /* {{{ ereg_lru_cmp */
  84. static int ereg_lru_cmp(const void *a, const void *b TSRMLS_DC)
  85. {
  86. Bucket *f = *((Bucket **) a);
  87. Bucket *s = *((Bucket **) b);
  88. if (((reg_cache *)f->pData)->lastuse <
  89. ((reg_cache *)s->pData)->lastuse) {
  90. return -1;
  91. } else if (((reg_cache *)f->pData)->lastuse ==
  92. ((reg_cache *)s->pData)->lastuse) {
  93. return 0;
  94. } else {
  95. return 1;
  96. }
  97. }
  98. /* }}} */
  99. /* {{{ static ereg_clean_cache */
  100. static int ereg_clean_cache(void *data, void *arg TSRMLS_DC)
  101. {
  102. int *num_clean = (int *)arg;
  103. if (*num_clean > 0) {
  104. (*num_clean)--;
  105. return ZEND_HASH_APPLY_REMOVE;
  106. } else {
  107. return ZEND_HASH_APPLY_STOP;
  108. }
  109. }
  110. /* }}} */
  111. /* {{{ _php_regcomp
  112. */
  113. static int _php_regcomp(regex_t *preg, const char *pattern, int cflags)
  114. {
  115. int r = 0;
  116. int patlen = strlen(pattern);
  117. reg_cache *rc = NULL;
  118. TSRMLS_FETCH();
  119. if (zend_hash_num_elements(&EREG(ht_rc)) >= EREG_CACHE_SIZE) {
  120. /* easier than dealing with overflow as it happens */
  121. if (EREG(lru_counter) >= (1 << 31) || zend_hash_sort(&EREG(ht_rc), zend_qsort, ereg_lru_cmp, 0 TSRMLS_CC) == FAILURE) {
  122. zend_hash_clean(&EREG(ht_rc));
  123. EREG(lru_counter) = 0;
  124. } else {
  125. int num_clean = EREG_CACHE_SIZE / 4;
  126. zend_hash_apply_with_argument(&EREG(ht_rc), ereg_clean_cache, &num_clean TSRMLS_CC);
  127. }
  128. }
  129. if(zend_hash_find(&EREG(ht_rc), (char *) pattern, patlen+1, (void **) &rc) == SUCCESS
  130. && rc->cflags == cflags) {
  131. #ifdef HAVE_REGEX_T_RE_MAGIC
  132. /*
  133. * We use a saved magic number to see whether cache is corrupted, and if it
  134. * is, we flush it and compile the pattern from scratch.
  135. */
  136. if (rc->preg.re_magic != reg_magic) {
  137. zend_hash_clean(&EREG(ht_rc));
  138. EREG(lru_counter) = 0;
  139. } else {
  140. memcpy(preg, &rc->preg, sizeof(*preg));
  141. return r;
  142. }
  143. }
  144. r = regcomp(preg, pattern, cflags);
  145. if(!r) {
  146. reg_cache rcp;
  147. rcp.cflags = cflags;
  148. rcp.lastuse = ++(EREG(lru_counter));
  149. memcpy(&rcp.preg, preg, sizeof(*preg));
  150. /*
  151. * Since we don't have access to the actual MAGIC1 definition in the private
  152. * header file, we save the magic value immediately after compilation. Hopefully,
  153. * it's good.
  154. */
  155. if (!reg_magic) reg_magic = preg->re_magic;
  156. zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
  157. (void *) &rcp, sizeof(rcp), NULL);
  158. }
  159. #else
  160. memcpy(preg, &rc->preg, sizeof(*preg));
  161. } else {
  162. r = regcomp(preg, pattern, cflags);
  163. if(!r) {
  164. reg_cache rcp;
  165. rcp.cflags = cflags;
  166. rcp.lastuse = ++(EREG(lru_counter));
  167. memcpy(&rcp.preg, preg, sizeof(*preg));
  168. zend_hash_update(&EREG(ht_rc), (char *) pattern, patlen+1,
  169. (void *) &rcp, sizeof(rcp), NULL);
  170. }
  171. }
  172. #endif
  173. return r;
  174. }
  175. /* }}} */
  176. static void _free_ereg_cache(reg_cache *rc)
  177. {
  178. regfree(&rc->preg);
  179. }
  180. #undef regfree
  181. #define regfree(a);
  182. #undef regcomp
  183. #define regcomp(a, b, c) _php_regcomp(a, b, c)
  184. static void php_ereg_init_globals(zend_ereg_globals *ereg_globals TSRMLS_DC)
  185. {
  186. zend_hash_init(&ereg_globals->ht_rc, 0, NULL, (void (*)(void *)) _free_ereg_cache, 1);
  187. ereg_globals->lru_counter = 0;
  188. }
  189. static void php_ereg_destroy_globals(zend_ereg_globals *ereg_globals TSRMLS_DC)
  190. {
  191. zend_hash_destroy(&ereg_globals->ht_rc);
  192. }
  193. PHP_MINIT_FUNCTION(ereg)
  194. {
  195. ZEND_INIT_MODULE_GLOBALS(ereg, php_ereg_init_globals, php_ereg_destroy_globals);
  196. return SUCCESS;
  197. }
  198. PHP_MSHUTDOWN_FUNCTION(ereg)
  199. {
  200. #ifndef ZTS
  201. php_ereg_destroy_globals(&ereg_globals TSRMLS_CC);
  202. #endif
  203. return SUCCESS;
  204. }
  205. PHP_MINFO_FUNCTION(ereg)
  206. {
  207. php_info_print_table_start();
  208. #if HSREGEX
  209. php_info_print_table_row(2, "Regex Library", "Bundled library enabled");
  210. #else
  211. php_info_print_table_row(2, "Regex Library", "System library enabled");
  212. #endif
  213. php_info_print_table_end();
  214. }
  215. /* {{{ php_ereg_eprint
  216. * php_ereg_eprint - convert error number to name
  217. */
  218. static void php_ereg_eprint(int err, regex_t *re) {
  219. char *buf = NULL, *message = NULL;
  220. size_t len;
  221. size_t buf_len;
  222. #ifdef REG_ITOA
  223. /* get the length of the message */
  224. buf_len = regerror(REG_ITOA | err, re, NULL, 0);
  225. if (buf_len) {
  226. buf = (char *)safe_emalloc(buf_len, sizeof(char), 0);
  227. if (!buf) return; /* fail silently */
  228. /* finally, get the error message */
  229. regerror(REG_ITOA | err, re, buf, buf_len);
  230. }
  231. #else
  232. buf_len = 0;
  233. #endif
  234. len = regerror(err, re, NULL, 0);
  235. if (len) {
  236. TSRMLS_FETCH();
  237. message = (char *)safe_emalloc((buf_len + len + 2), sizeof(char), 0);
  238. if (!message) {
  239. return; /* fail silently */
  240. }
  241. if (buf_len) {
  242. snprintf(message, buf_len, "%s: ", buf);
  243. buf_len += 1; /* so pointer math below works */
  244. }
  245. /* drop the message into place */
  246. regerror(err, re, message + buf_len, len);
  247. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", message);
  248. }
  249. STR_FREE(buf);
  250. STR_FREE(message);
  251. }
  252. /* }}} */
  253. /* {{{ php_ereg
  254. */
  255. static void php_ereg(INTERNAL_FUNCTION_PARAMETERS, int icase)
  256. {
  257. zval **regex, /* Regular expression */
  258. **array = NULL; /* Optional register array */
  259. char *findin; /* String to apply expression to */
  260. int findin_len;
  261. regex_t re;
  262. regmatch_t *subs;
  263. int err, match_len, string_len;
  264. uint i;
  265. int copts = 0;
  266. off_t start, end;
  267. char *buf = NULL;
  268. char *string = NULL;
  269. int argc = ZEND_NUM_ARGS();
  270. if (zend_parse_parameters(argc TSRMLS_CC, "Zs|Z", &regex, &findin, &findin_len, &array) == FAILURE) {
  271. return;
  272. }
  273. if (icase) {
  274. copts |= REG_ICASE;
  275. }
  276. if (argc == 2) {
  277. copts |= REG_NOSUB;
  278. }
  279. /* compile the regular expression from the supplied regex */
  280. if (Z_TYPE_PP(regex) == IS_STRING) {
  281. err = regcomp(&re, Z_STRVAL_PP(regex), REG_EXTENDED | copts);
  282. } else {
  283. /* we convert numbers to integers and treat them as a string */
  284. if (Z_TYPE_PP(regex) == IS_DOUBLE) {
  285. convert_to_long_ex(regex); /* get rid of decimal places */
  286. }
  287. convert_to_string_ex(regex);
  288. /* don't bother doing an extended regex with just a number */
  289. err = regcomp(&re, Z_STRVAL_PP(regex), copts);
  290. }
  291. if (err) {
  292. php_ereg_eprint(err, &re);
  293. RETURN_FALSE;
  294. }
  295. /* make a copy of the string we're looking in */
  296. string = estrndup(findin, findin_len);
  297. /* allocate storage for (sub-)expression-matches */
  298. subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
  299. /* actually execute the regular expression */
  300. err = regexec(&re, string, re.re_nsub+1, subs, 0);
  301. if (err && err != REG_NOMATCH) {
  302. php_ereg_eprint(err, &re);
  303. regfree(&re);
  304. efree(subs);
  305. RETURN_FALSE;
  306. }
  307. match_len = 1;
  308. if (array && err != REG_NOMATCH) {
  309. match_len = (int) (subs[0].rm_eo - subs[0].rm_so);
  310. string_len = findin_len + 1;
  311. buf = emalloc(string_len);
  312. zval_dtor(*array); /* start with clean array */
  313. array_init(*array);
  314. for (i = 0; i <= re.re_nsub; i++) {
  315. start = subs[i].rm_so;
  316. end = subs[i].rm_eo;
  317. if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) {
  318. add_index_stringl(*array, i, string+start, end-start, 1);
  319. } else {
  320. add_index_bool(*array, i, 0);
  321. }
  322. }
  323. efree(buf);
  324. }
  325. efree(subs);
  326. efree(string);
  327. if (err == REG_NOMATCH) {
  328. RETVAL_FALSE;
  329. } else {
  330. if (match_len == 0)
  331. match_len = 1;
  332. RETVAL_LONG(match_len);
  333. }
  334. regfree(&re);
  335. }
  336. /* }}} */
  337. /* {{{ proto int ereg(string pattern, string string [, array registers])
  338. Regular expression match */
  339. PHP_FUNCTION(ereg)
  340. {
  341. php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  342. }
  343. /* }}} */
  344. /* {{{ proto int eregi(string pattern, string string [, array registers])
  345. Case-insensitive regular expression match */
  346. PHP_FUNCTION(eregi)
  347. {
  348. php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  349. }
  350. /* }}} */
  351. /* {{{ php_ereg_replace
  352. * this is the meat and potatoes of regex replacement! */
  353. PHPAPI char *php_ereg_replace(const char *pattern, const char *replace, const char *string, int icase, int extended)
  354. {
  355. regex_t re;
  356. regmatch_t *subs;
  357. char *buf, /* buf is where we build the replaced string */
  358. *nbuf, /* nbuf is used when we grow the buffer */
  359. *walkbuf; /* used to walk buf when replacing backrefs */
  360. const char *walk; /* used to walk replacement string for backrefs */
  361. int buf_len;
  362. int pos, tmp, string_len, new_l;
  363. int err, copts = 0;
  364. string_len = strlen(string);
  365. if (icase) {
  366. copts = REG_ICASE;
  367. }
  368. if (extended) {
  369. copts |= REG_EXTENDED;
  370. }
  371. err = regcomp(&re, pattern, copts);
  372. if (err) {
  373. php_ereg_eprint(err, &re);
  374. return ((char *) -1);
  375. }
  376. /* allocate storage for (sub-)expression-matches */
  377. subs = (regmatch_t *)ecalloc(sizeof(regmatch_t),re.re_nsub+1);
  378. /* start with a buffer that is twice the size of the stringo
  379. we're doing replacements in */
  380. buf_len = 2 * string_len + 1;
  381. buf = safe_emalloc(buf_len, sizeof(char), 0);
  382. err = pos = 0;
  383. buf[0] = '\0';
  384. while (!err) {
  385. err = regexec(&re, &string[pos], re.re_nsub+1, subs, (pos ? REG_NOTBOL : 0));
  386. if (err && err != REG_NOMATCH) {
  387. php_ereg_eprint(err, &re);
  388. efree(subs);
  389. efree(buf);
  390. regfree(&re);
  391. return ((char *) -1);
  392. }
  393. if (!err) {
  394. /* backref replacement is done in two passes:
  395. 1) find out how long the string will be, and allocate buf
  396. 2) copy the part before match, replacement and backrefs to buf
  397. Jaakko Hyvätti <Jaakko.Hyvatti@iki.fi>
  398. */
  399. new_l = strlen(buf) + subs[0].rm_so; /* part before the match */
  400. walk = replace;
  401. while (*walk) {
  402. if ('\\' == *walk && isdigit((unsigned char)walk[1]) && ((unsigned char)walk[1]) - '0' <= (int)re.re_nsub) {
  403. if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) {
  404. new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
  405. }
  406. walk += 2;
  407. } else {
  408. new_l++;
  409. walk++;
  410. }
  411. }
  412. if (new_l + 1 > buf_len) {
  413. buf_len = 1 + buf_len + 2 * new_l;
  414. nbuf = emalloc(buf_len);
  415. strcpy(nbuf, buf);
  416. efree(buf);
  417. buf = nbuf;
  418. }
  419. tmp = strlen(buf);
  420. /* copy the part of the string before the match */
  421. strncat(buf, &string[pos], subs[0].rm_so);
  422. /* copy replacement and backrefs */
  423. walkbuf = &buf[tmp + subs[0].rm_so];
  424. walk = replace;
  425. while (*walk) {
  426. if ('\\' == *walk && isdigit(walk[1]) && walk[1] - '0' <= (int)re.re_nsub) {
  427. if (subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1
  428. /* this next case shouldn't happen. it does. */
  429. && subs[walk[1] - '0'].rm_so <= subs[walk[1] - '0'].rm_eo) {
  430. tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
  431. memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp);
  432. walkbuf += tmp;
  433. }
  434. walk += 2;
  435. } else {
  436. *walkbuf++ = *walk++;
  437. }
  438. }
  439. *walkbuf = '\0';
  440. /* and get ready to keep looking for replacements */
  441. if (subs[0].rm_so == subs[0].rm_eo) {
  442. if (subs[0].rm_so + pos >= string_len) {
  443. break;
  444. }
  445. new_l = strlen (buf) + 1;
  446. if (new_l + 1 > buf_len) {
  447. buf_len = 1 + buf_len + 2 * new_l;
  448. nbuf = safe_emalloc(buf_len, sizeof(char), 0);
  449. strcpy(nbuf, buf);
  450. efree(buf);
  451. buf = nbuf;
  452. }
  453. pos += subs[0].rm_eo + 1;
  454. buf [new_l-1] = string [pos-1];
  455. buf [new_l] = '\0';
  456. } else {
  457. pos += subs[0].rm_eo;
  458. }
  459. } else { /* REG_NOMATCH */
  460. new_l = strlen(buf) + strlen(&string[pos]);
  461. if (new_l + 1 > buf_len) {
  462. buf_len = new_l + 1; /* now we know exactly how long it is */
  463. nbuf = safe_emalloc(buf_len, sizeof(char), 0);
  464. strcpy(nbuf, buf);
  465. efree(buf);
  466. buf = nbuf;
  467. }
  468. /* stick that last bit of string on our output */
  469. strlcat(buf, &string[pos], buf_len);
  470. }
  471. }
  472. /* don't want to leak memory .. */
  473. efree(subs);
  474. regfree(&re);
  475. /* whew. */
  476. return (buf);
  477. }
  478. /* }}} */
  479. /* {{{ php_do_ereg_replace
  480. */
  481. static void php_do_ereg_replace(INTERNAL_FUNCTION_PARAMETERS, int icase)
  482. {
  483. zval **arg_pattern,
  484. **arg_replace;
  485. char *pattern, *arg_string;
  486. char *string;
  487. char *replace;
  488. char *ret;
  489. int arg_string_len;
  490. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZs", &arg_pattern, &arg_replace, &arg_string, &arg_string_len) == FAILURE) {
  491. return;
  492. }
  493. if (Z_TYPE_PP(arg_pattern) == IS_STRING) {
  494. if (Z_STRVAL_PP(arg_pattern) && Z_STRLEN_PP(arg_pattern)) {
  495. pattern = estrndup(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern));
  496. } else {
  497. pattern = STR_EMPTY_ALLOC();
  498. }
  499. } else {
  500. convert_to_long_ex(arg_pattern);
  501. pattern = emalloc(2);
  502. pattern[0] = (char) Z_LVAL_PP(arg_pattern);
  503. pattern[1] = '\0';
  504. }
  505. if (Z_TYPE_PP(arg_replace) == IS_STRING) {
  506. if (Z_STRVAL_PP(arg_replace) && Z_STRLEN_PP(arg_replace)) {
  507. replace = estrndup(Z_STRVAL_PP(arg_replace), Z_STRLEN_PP(arg_replace));
  508. } else {
  509. replace = STR_EMPTY_ALLOC();
  510. }
  511. } else {
  512. convert_to_long_ex(arg_replace);
  513. replace = emalloc(2);
  514. replace[0] = (char) Z_LVAL_PP(arg_replace);
  515. replace[1] = '\0';
  516. }
  517. if (arg_string && arg_string_len) {
  518. string = estrndup(arg_string, arg_string_len);
  519. } else {
  520. string = STR_EMPTY_ALLOC();
  521. }
  522. /* do the actual work */
  523. ret = php_ereg_replace(pattern, replace, string, icase, 1);
  524. if (ret == (char *) -1) {
  525. RETVAL_FALSE;
  526. } else {
  527. RETVAL_STRING(ret, 1);
  528. STR_FREE(ret);
  529. }
  530. STR_FREE(string);
  531. STR_FREE(replace);
  532. STR_FREE(pattern);
  533. }
  534. /* }}} */
  535. /* {{{ proto string ereg_replace(string pattern, string replacement, string string)
  536. Replace regular expression */
  537. PHP_FUNCTION(ereg_replace)
  538. {
  539. php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  540. }
  541. /* }}} */
  542. /* {{{ proto string eregi_replace(string pattern, string replacement, string string)
  543. Case insensitive replace regular expression */
  544. PHP_FUNCTION(eregi_replace)
  545. {
  546. php_do_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  547. }
  548. /* }}} */
  549. /* {{{ php_split
  550. */
  551. static void php_split(INTERNAL_FUNCTION_PARAMETERS, int icase)
  552. {
  553. long count = -1;
  554. regex_t re;
  555. regmatch_t subs[1];
  556. char *spliton, *str, *strp, *endp;
  557. int spliton_len, str_len;
  558. int err, size, copts = 0;
  559. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &spliton, &spliton_len, &str, &str_len, &count) == FAILURE) {
  560. return;
  561. }
  562. if (icase) {
  563. copts = REG_ICASE;
  564. }
  565. strp = str;
  566. endp = strp + str_len;
  567. err = regcomp(&re, spliton, REG_EXTENDED | copts);
  568. if (err) {
  569. php_ereg_eprint(err, &re);
  570. RETURN_FALSE;
  571. }
  572. array_init(return_value);
  573. /* churn through str, generating array entries as we go */
  574. while ((count == -1 || count > 1) && !(err = regexec(&re, strp, 1, subs, 0))) {
  575. if (subs[0].rm_so == 0 && subs[0].rm_eo) {
  576. /* match is at start of string, return empty string */
  577. add_next_index_stringl(return_value, "", 0, 1);
  578. /* skip ahead the length of the regex match */
  579. strp += subs[0].rm_eo;
  580. } else if (subs[0].rm_so == 0 && subs[0].rm_eo == 0) {
  581. /* No more matches */
  582. regfree(&re);
  583. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Regular Expression");
  584. zend_hash_destroy(Z_ARRVAL_P(return_value));
  585. efree(Z_ARRVAL_P(return_value));
  586. RETURN_FALSE;
  587. } else {
  588. /* On a real match */
  589. /* make a copy of the substring */
  590. size = subs[0].rm_so;
  591. /* add it to the array */
  592. add_next_index_stringl(return_value, strp, size, 1);
  593. /* point at our new starting point */
  594. strp = strp + subs[0].rm_eo;
  595. }
  596. /* if we're only looking for a certain number of points,
  597. stop looking once we hit it */
  598. if (count != -1) {
  599. count--;
  600. }
  601. }
  602. /* see if we encountered an error */
  603. if (err && err != REG_NOMATCH) {
  604. php_ereg_eprint(err, &re);
  605. regfree(&re);
  606. zend_hash_destroy(Z_ARRVAL_P(return_value));
  607. efree(Z_ARRVAL_P(return_value));
  608. RETURN_FALSE;
  609. }
  610. /* otherwise we just have one last element to add to the array */
  611. size = endp - strp;
  612. add_next_index_stringl(return_value, strp, size, 1);
  613. regfree(&re);
  614. }
  615. /* }}} */
  616. /* {{{ proto array split(string pattern, string string [, int limit])
  617. Split string into array by regular expression */
  618. PHP_FUNCTION(split)
  619. {
  620. php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  621. }
  622. /* }}} */
  623. /* {{{ proto array spliti(string pattern, string string [, int limit])
  624. Split string into array by regular expression case-insensitive */
  625. PHP_FUNCTION(spliti)
  626. {
  627. php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  628. }
  629. /* }}} */
  630. /* {{{ proto string sql_regcase(string string)
  631. Make regular expression for case insensitive match */
  632. PHPAPI PHP_FUNCTION(sql_regcase)
  633. {
  634. char *string, *tmp;
  635. int string_len;
  636. unsigned char c;
  637. register int i, j;
  638. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &string, &string_len) == FAILURE) {
  639. return;
  640. }
  641. tmp = safe_emalloc(string_len, 4, 1);
  642. for (i = j = 0; i < string_len; i++) {
  643. c = (unsigned char) string[i];
  644. if (isalpha(c)) {
  645. tmp[j++] = '[';
  646. tmp[j++] = toupper(c);
  647. tmp[j++] = tolower(c);
  648. tmp[j++] = ']';
  649. } else {
  650. tmp[j++] = c;
  651. }
  652. }
  653. tmp[j] = 0;
  654. RETVAL_STRINGL(tmp, j, 1);
  655. efree(tmp);
  656. }
  657. /* }}} */
  658. /*
  659. * Local variables:
  660. * tab-width: 4
  661. * c-basic-offset: 4
  662. * End:
  663. * vim600: noet sw=4 ts=4 fdm=marker
  664. * vim<600: noet sw=4 ts=4
  665. */