PageRenderTime 83ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/ext/standard/string.c

https://github.com/php/php-src
C | 6197 lines | 5022 code | 692 blank | 483 comment | 1234 complexity | 36aa99423264fce3e37fb1a5d0ece920 MD5 | raw file
Possible License(s): BSD-2-Clause, BSD-3-Clause, MPL-2.0-no-copyleft-exception, LGPL-2.1
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Copyright (c) The PHP Group |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | https://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Rasmus Lerdorf <rasmus@php.net> |
  14. | Stig Sæther Bakken <ssb@php.net> |
  15. | Zeev Suraski <zeev@php.net> |
  16. +----------------------------------------------------------------------+
  17. */
  18. #include <stdio.h>
  19. #include "php.h"
  20. #include "php_rand.h"
  21. #include "php_string.h"
  22. #include "php_variables.h"
  23. #include <locale.h>
  24. #ifdef HAVE_LANGINFO_H
  25. # include <langinfo.h>
  26. #endif
  27. #ifdef HAVE_LIBINTL
  28. # include <libintl.h> /* For LC_MESSAGES */
  29. #endif
  30. #include "scanf.h"
  31. #include "zend_API.h"
  32. #include "zend_execute.h"
  33. #include "php_globals.h"
  34. #include "basic_functions.h"
  35. #include "zend_smart_str.h"
  36. #include <Zend/zend_exceptions.h>
  37. #ifdef ZTS
  38. #include "TSRM.h"
  39. #endif
  40. /* For str_getcsv() support */
  41. #include "ext/standard/file.h"
  42. /* For php_next_utf8_char() */
  43. #include "ext/standard/html.h"
  44. #ifdef __SSE2__
  45. #include <emmintrin.h>
  46. #endif
  47. #define STR_PAD_LEFT 0
  48. #define STR_PAD_RIGHT 1
  49. #define STR_PAD_BOTH 2
  50. #define PHP_PATHINFO_DIRNAME 1
  51. #define PHP_PATHINFO_BASENAME 2
  52. #define PHP_PATHINFO_EXTENSION 4
  53. #define PHP_PATHINFO_FILENAME 8
  54. #define PHP_PATHINFO_ALL (PHP_PATHINFO_DIRNAME | PHP_PATHINFO_BASENAME | PHP_PATHINFO_EXTENSION | PHP_PATHINFO_FILENAME)
  55. #define STR_STRSPN 0
  56. #define STR_STRCSPN 1
  57. /* {{{ register_string_constants */
  58. void register_string_constants(INIT_FUNC_ARGS)
  59. {
  60. REGISTER_LONG_CONSTANT("STR_PAD_LEFT", STR_PAD_LEFT, CONST_CS | CONST_PERSISTENT);
  61. REGISTER_LONG_CONSTANT("STR_PAD_RIGHT", STR_PAD_RIGHT, CONST_CS | CONST_PERSISTENT);
  62. REGISTER_LONG_CONSTANT("STR_PAD_BOTH", STR_PAD_BOTH, CONST_CS | CONST_PERSISTENT);
  63. REGISTER_LONG_CONSTANT("PATHINFO_DIRNAME", PHP_PATHINFO_DIRNAME, CONST_CS | CONST_PERSISTENT);
  64. REGISTER_LONG_CONSTANT("PATHINFO_BASENAME", PHP_PATHINFO_BASENAME, CONST_CS | CONST_PERSISTENT);
  65. REGISTER_LONG_CONSTANT("PATHINFO_EXTENSION", PHP_PATHINFO_EXTENSION, CONST_CS | CONST_PERSISTENT);
  66. REGISTER_LONG_CONSTANT("PATHINFO_FILENAME", PHP_PATHINFO_FILENAME, CONST_CS | CONST_PERSISTENT);
  67. REGISTER_LONG_CONSTANT("PATHINFO_ALL", PHP_PATHINFO_ALL, CONST_CS | CONST_PERSISTENT);
  68. /* If last members of struct lconv equal CHAR_MAX, no grouping is done */
  69. REGISTER_LONG_CONSTANT("CHAR_MAX", CHAR_MAX, CONST_CS | CONST_PERSISTENT);
  70. REGISTER_LONG_CONSTANT("LC_CTYPE", LC_CTYPE, CONST_CS | CONST_PERSISTENT);
  71. REGISTER_LONG_CONSTANT("LC_NUMERIC", LC_NUMERIC, CONST_CS | CONST_PERSISTENT);
  72. REGISTER_LONG_CONSTANT("LC_TIME", LC_TIME, CONST_CS | CONST_PERSISTENT);
  73. REGISTER_LONG_CONSTANT("LC_COLLATE", LC_COLLATE, CONST_CS | CONST_PERSISTENT);
  74. REGISTER_LONG_CONSTANT("LC_MONETARY", LC_MONETARY, CONST_CS | CONST_PERSISTENT);
  75. REGISTER_LONG_CONSTANT("LC_ALL", LC_ALL, CONST_CS | CONST_PERSISTENT);
  76. # ifdef LC_MESSAGES
  77. REGISTER_LONG_CONSTANT("LC_MESSAGES", LC_MESSAGES, CONST_CS | CONST_PERSISTENT);
  78. # endif
  79. }
  80. /* }}} */
  81. int php_tag_find(char *tag, size_t len, const char *set);
  82. /* this is read-only, so it's ok */
  83. ZEND_SET_ALIGNED(16, static const char hexconvtab[]) = "0123456789abcdef";
  84. /* localeconv mutex */
  85. #ifdef ZTS
  86. static MUTEX_T locale_mutex = NULL;
  87. #endif
  88. /* {{{ php_bin2hex */
  89. static zend_string *php_bin2hex(const unsigned char *old, const size_t oldlen)
  90. {
  91. zend_string *result;
  92. size_t i, j;
  93. result = zend_string_safe_alloc(oldlen, 2 * sizeof(char), 0, 0);
  94. for (i = j = 0; i < oldlen; i++) {
  95. ZSTR_VAL(result)[j++] = hexconvtab[old[i] >> 4];
  96. ZSTR_VAL(result)[j++] = hexconvtab[old[i] & 15];
  97. }
  98. ZSTR_VAL(result)[j] = '\0';
  99. return result;
  100. }
  101. /* }}} */
  102. /* {{{ php_hex2bin */
  103. static zend_string *php_hex2bin(const unsigned char *old, const size_t oldlen)
  104. {
  105. size_t target_length = oldlen >> 1;
  106. zend_string *str = zend_string_alloc(target_length, 0);
  107. unsigned char *ret = (unsigned char *)ZSTR_VAL(str);
  108. size_t i, j;
  109. for (i = j = 0; i < target_length; i++) {
  110. unsigned char c = old[j++];
  111. unsigned char l = c & ~0x20;
  112. int is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
  113. unsigned char d;
  114. /* basically (c >= '0' && c <= '9') || (l >= 'A' && l <= 'F') */
  115. if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
  116. d = (l - 0x10 - 0x27 * is_letter) << 4;
  117. } else {
  118. zend_string_efree(str);
  119. return NULL;
  120. }
  121. c = old[j++];
  122. l = c & ~0x20;
  123. is_letter = ((unsigned int) ((l - 'A') ^ (l - 'F' - 1))) >> (8 * sizeof(unsigned int) - 1);
  124. if (EXPECTED((((c ^ '0') - 10) >> (8 * sizeof(unsigned int) - 1)) | is_letter)) {
  125. d |= l - 0x10 - 0x27 * is_letter;
  126. } else {
  127. zend_string_efree(str);
  128. return NULL;
  129. }
  130. ret[i] = d;
  131. }
  132. ret[i] = '\0';
  133. return str;
  134. }
  135. /* }}} */
  136. /* {{{ localeconv_r
  137. * glibc's localeconv is not reentrant, so lets make it so ... sorta */
  138. PHPAPI struct lconv *localeconv_r(struct lconv *out)
  139. {
  140. #ifdef ZTS
  141. tsrm_mutex_lock( locale_mutex );
  142. #endif
  143. /* cur->locinfo is struct __crt_locale_info which implementation is
  144. hidden in vc14. TODO revisit this and check if a workaround available
  145. and needed. */
  146. #if defined(PHP_WIN32) && _MSC_VER < 1900 && defined(ZTS)
  147. {
  148. /* Even with the enabled per thread locale, localeconv
  149. won't check any locale change in the master thread. */
  150. _locale_t cur = _get_current_locale();
  151. *out = *cur->locinfo->lconv;
  152. _free_locale(cur);
  153. }
  154. #else
  155. /* localeconv doesn't return an error condition */
  156. *out = *localeconv();
  157. #endif
  158. #ifdef ZTS
  159. tsrm_mutex_unlock( locale_mutex );
  160. #endif
  161. return out;
  162. }
  163. /* }}} */
  164. #ifdef ZTS
  165. /* {{{ PHP_MINIT_FUNCTION */
  166. PHP_MINIT_FUNCTION(localeconv)
  167. {
  168. locale_mutex = tsrm_mutex_alloc();
  169. return SUCCESS;
  170. }
  171. /* }}} */
  172. /* {{{ PHP_MSHUTDOWN_FUNCTION */
  173. PHP_MSHUTDOWN_FUNCTION(localeconv)
  174. {
  175. tsrm_mutex_free( locale_mutex );
  176. locale_mutex = NULL;
  177. return SUCCESS;
  178. }
  179. /* }}} */
  180. #endif
  181. /* {{{ Converts the binary representation of data to hex */
  182. PHP_FUNCTION(bin2hex)
  183. {
  184. zend_string *result;
  185. zend_string *data;
  186. ZEND_PARSE_PARAMETERS_START(1, 1)
  187. Z_PARAM_STR(data)
  188. ZEND_PARSE_PARAMETERS_END();
  189. result = php_bin2hex((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
  190. RETURN_STR(result);
  191. }
  192. /* }}} */
  193. /* {{{ Converts the hex representation of data to binary */
  194. PHP_FUNCTION(hex2bin)
  195. {
  196. zend_string *result, *data;
  197. ZEND_PARSE_PARAMETERS_START(1, 1)
  198. Z_PARAM_STR(data)
  199. ZEND_PARSE_PARAMETERS_END();
  200. if (ZSTR_LEN(data) % 2 != 0) {
  201. php_error_docref(NULL, E_WARNING, "Hexadecimal input string must have an even length");
  202. RETURN_FALSE;
  203. }
  204. result = php_hex2bin((unsigned char *)ZSTR_VAL(data), ZSTR_LEN(data));
  205. if (!result) {
  206. php_error_docref(NULL, E_WARNING, "Input string must be hexadecimal string");
  207. RETURN_FALSE;
  208. }
  209. RETVAL_STR(result);
  210. }
  211. /* }}} */
  212. static void php_spn_common_handler(INTERNAL_FUNCTION_PARAMETERS, int behavior) /* {{{ */
  213. {
  214. zend_string *s11, *s22;
  215. zend_long start = 0, len = 0;
  216. bool len_is_null = 1;
  217. ZEND_PARSE_PARAMETERS_START(2, 4)
  218. Z_PARAM_STR(s11)
  219. Z_PARAM_STR(s22)
  220. Z_PARAM_OPTIONAL
  221. Z_PARAM_LONG(start)
  222. Z_PARAM_LONG_OR_NULL(len, len_is_null)
  223. ZEND_PARSE_PARAMETERS_END();
  224. size_t remain_len = ZSTR_LEN(s11);
  225. if (start < 0) {
  226. start += remain_len;
  227. if (start < 0) {
  228. start = 0;
  229. }
  230. } else if ((size_t) start > remain_len) {
  231. start = remain_len;
  232. }
  233. remain_len -= start;
  234. if (!len_is_null) {
  235. if (len < 0) {
  236. len += remain_len;
  237. if (len < 0) {
  238. len = 0;
  239. }
  240. } else if ((size_t) len > remain_len) {
  241. len = remain_len;
  242. }
  243. } else {
  244. len = remain_len;
  245. }
  246. if (len == 0) {
  247. RETURN_LONG(0);
  248. }
  249. if (behavior == STR_STRSPN) {
  250. RETURN_LONG(php_strspn(ZSTR_VAL(s11) + start /*str1_start*/,
  251. ZSTR_VAL(s22) /*str2_start*/,
  252. ZSTR_VAL(s11) + start + len /*str1_end*/,
  253. ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
  254. } else {
  255. ZEND_ASSERT(behavior == STR_STRCSPN);
  256. RETURN_LONG(php_strcspn(ZSTR_VAL(s11) + start /*str1_start*/,
  257. ZSTR_VAL(s22) /*str2_start*/,
  258. ZSTR_VAL(s11) + start + len /*str1_end*/,
  259. ZSTR_VAL(s22) + ZSTR_LEN(s22) /*str2_end*/));
  260. }
  261. }
  262. /* }}} */
  263. /* {{{ Finds length of initial segment consisting entirely of characters found in mask. If start or/and length is provided works like strspn(substr($s,$start,$len),$good_chars) */
  264. PHP_FUNCTION(strspn)
  265. {
  266. php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, STR_STRSPN);
  267. }
  268. /* }}} */
  269. /* {{{ Finds length of initial segment consisting entirely of characters not found in mask. If start or/and length is provide works like strcspn(substr($s,$start,$len),$bad_chars) */
  270. PHP_FUNCTION(strcspn)
  271. {
  272. php_spn_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, STR_STRCSPN);
  273. }
  274. /* }}} */
  275. /* {{{ PHP_MINIT_FUNCTION(nl_langinfo) */
  276. #if HAVE_NL_LANGINFO
  277. PHP_MINIT_FUNCTION(nl_langinfo)
  278. {
  279. #define REGISTER_NL_LANGINFO_CONSTANT(x) REGISTER_LONG_CONSTANT(#x, x, CONST_CS | CONST_PERSISTENT)
  280. #ifdef ABDAY_1
  281. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_1);
  282. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_2);
  283. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_3);
  284. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_4);
  285. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_5);
  286. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_6);
  287. REGISTER_NL_LANGINFO_CONSTANT(ABDAY_7);
  288. #endif
  289. #ifdef DAY_1
  290. REGISTER_NL_LANGINFO_CONSTANT(DAY_1);
  291. REGISTER_NL_LANGINFO_CONSTANT(DAY_2);
  292. REGISTER_NL_LANGINFO_CONSTANT(DAY_3);
  293. REGISTER_NL_LANGINFO_CONSTANT(DAY_4);
  294. REGISTER_NL_LANGINFO_CONSTANT(DAY_5);
  295. REGISTER_NL_LANGINFO_CONSTANT(DAY_6);
  296. REGISTER_NL_LANGINFO_CONSTANT(DAY_7);
  297. #endif
  298. #ifdef ABMON_1
  299. REGISTER_NL_LANGINFO_CONSTANT(ABMON_1);
  300. REGISTER_NL_LANGINFO_CONSTANT(ABMON_2);
  301. REGISTER_NL_LANGINFO_CONSTANT(ABMON_3);
  302. REGISTER_NL_LANGINFO_CONSTANT(ABMON_4);
  303. REGISTER_NL_LANGINFO_CONSTANT(ABMON_5);
  304. REGISTER_NL_LANGINFO_CONSTANT(ABMON_6);
  305. REGISTER_NL_LANGINFO_CONSTANT(ABMON_7);
  306. REGISTER_NL_LANGINFO_CONSTANT(ABMON_8);
  307. REGISTER_NL_LANGINFO_CONSTANT(ABMON_9);
  308. REGISTER_NL_LANGINFO_CONSTANT(ABMON_10);
  309. REGISTER_NL_LANGINFO_CONSTANT(ABMON_11);
  310. REGISTER_NL_LANGINFO_CONSTANT(ABMON_12);
  311. #endif
  312. #ifdef MON_1
  313. REGISTER_NL_LANGINFO_CONSTANT(MON_1);
  314. REGISTER_NL_LANGINFO_CONSTANT(MON_2);
  315. REGISTER_NL_LANGINFO_CONSTANT(MON_3);
  316. REGISTER_NL_LANGINFO_CONSTANT(MON_4);
  317. REGISTER_NL_LANGINFO_CONSTANT(MON_5);
  318. REGISTER_NL_LANGINFO_CONSTANT(MON_6);
  319. REGISTER_NL_LANGINFO_CONSTANT(MON_7);
  320. REGISTER_NL_LANGINFO_CONSTANT(MON_8);
  321. REGISTER_NL_LANGINFO_CONSTANT(MON_9);
  322. REGISTER_NL_LANGINFO_CONSTANT(MON_10);
  323. REGISTER_NL_LANGINFO_CONSTANT(MON_11);
  324. REGISTER_NL_LANGINFO_CONSTANT(MON_12);
  325. #endif
  326. #ifdef AM_STR
  327. REGISTER_NL_LANGINFO_CONSTANT(AM_STR);
  328. #endif
  329. #ifdef PM_STR
  330. REGISTER_NL_LANGINFO_CONSTANT(PM_STR);
  331. #endif
  332. #ifdef D_T_FMT
  333. REGISTER_NL_LANGINFO_CONSTANT(D_T_FMT);
  334. #endif
  335. #ifdef D_FMT
  336. REGISTER_NL_LANGINFO_CONSTANT(D_FMT);
  337. #endif
  338. #ifdef T_FMT
  339. REGISTER_NL_LANGINFO_CONSTANT(T_FMT);
  340. #endif
  341. #ifdef T_FMT_AMPM
  342. REGISTER_NL_LANGINFO_CONSTANT(T_FMT_AMPM);
  343. #endif
  344. #ifdef ERA
  345. REGISTER_NL_LANGINFO_CONSTANT(ERA);
  346. #endif
  347. #ifdef ERA_YEAR
  348. REGISTER_NL_LANGINFO_CONSTANT(ERA_YEAR);
  349. #endif
  350. #ifdef ERA_D_T_FMT
  351. REGISTER_NL_LANGINFO_CONSTANT(ERA_D_T_FMT);
  352. #endif
  353. #ifdef ERA_D_FMT
  354. REGISTER_NL_LANGINFO_CONSTANT(ERA_D_FMT);
  355. #endif
  356. #ifdef ERA_T_FMT
  357. REGISTER_NL_LANGINFO_CONSTANT(ERA_T_FMT);
  358. #endif
  359. #ifdef ALT_DIGITS
  360. REGISTER_NL_LANGINFO_CONSTANT(ALT_DIGITS);
  361. #endif
  362. #ifdef INT_CURR_SYMBOL
  363. REGISTER_NL_LANGINFO_CONSTANT(INT_CURR_SYMBOL);
  364. #endif
  365. #ifdef CURRENCY_SYMBOL
  366. REGISTER_NL_LANGINFO_CONSTANT(CURRENCY_SYMBOL);
  367. #endif
  368. #ifdef CRNCYSTR
  369. REGISTER_NL_LANGINFO_CONSTANT(CRNCYSTR);
  370. #endif
  371. #ifdef MON_DECIMAL_POINT
  372. REGISTER_NL_LANGINFO_CONSTANT(MON_DECIMAL_POINT);
  373. #endif
  374. #ifdef MON_THOUSANDS_SEP
  375. REGISTER_NL_LANGINFO_CONSTANT(MON_THOUSANDS_SEP);
  376. #endif
  377. #ifdef MON_GROUPING
  378. REGISTER_NL_LANGINFO_CONSTANT(MON_GROUPING);
  379. #endif
  380. #ifdef POSITIVE_SIGN
  381. REGISTER_NL_LANGINFO_CONSTANT(POSITIVE_SIGN);
  382. #endif
  383. #ifdef NEGATIVE_SIGN
  384. REGISTER_NL_LANGINFO_CONSTANT(NEGATIVE_SIGN);
  385. #endif
  386. #ifdef INT_FRAC_DIGITS
  387. REGISTER_NL_LANGINFO_CONSTANT(INT_FRAC_DIGITS);
  388. #endif
  389. #ifdef FRAC_DIGITS
  390. REGISTER_NL_LANGINFO_CONSTANT(FRAC_DIGITS);
  391. #endif
  392. #ifdef P_CS_PRECEDES
  393. REGISTER_NL_LANGINFO_CONSTANT(P_CS_PRECEDES);
  394. #endif
  395. #ifdef P_SEP_BY_SPACE
  396. REGISTER_NL_LANGINFO_CONSTANT(P_SEP_BY_SPACE);
  397. #endif
  398. #ifdef N_CS_PRECEDES
  399. REGISTER_NL_LANGINFO_CONSTANT(N_CS_PRECEDES);
  400. #endif
  401. #ifdef N_SEP_BY_SPACE
  402. REGISTER_NL_LANGINFO_CONSTANT(N_SEP_BY_SPACE);
  403. #endif
  404. #ifdef P_SIGN_POSN
  405. REGISTER_NL_LANGINFO_CONSTANT(P_SIGN_POSN);
  406. #endif
  407. #ifdef N_SIGN_POSN
  408. REGISTER_NL_LANGINFO_CONSTANT(N_SIGN_POSN);
  409. #endif
  410. #ifdef DECIMAL_POINT
  411. REGISTER_NL_LANGINFO_CONSTANT(DECIMAL_POINT);
  412. #endif
  413. #ifdef RADIXCHAR
  414. REGISTER_NL_LANGINFO_CONSTANT(RADIXCHAR);
  415. #endif
  416. #ifdef THOUSANDS_SEP
  417. REGISTER_NL_LANGINFO_CONSTANT(THOUSANDS_SEP);
  418. #endif
  419. #ifdef THOUSEP
  420. REGISTER_NL_LANGINFO_CONSTANT(THOUSEP);
  421. #endif
  422. #ifdef GROUPING
  423. REGISTER_NL_LANGINFO_CONSTANT(GROUPING);
  424. #endif
  425. #ifdef YESEXPR
  426. REGISTER_NL_LANGINFO_CONSTANT(YESEXPR);
  427. #endif
  428. #ifdef NOEXPR
  429. REGISTER_NL_LANGINFO_CONSTANT(NOEXPR);
  430. #endif
  431. #ifdef YESSTR
  432. REGISTER_NL_LANGINFO_CONSTANT(YESSTR);
  433. #endif
  434. #ifdef NOSTR
  435. REGISTER_NL_LANGINFO_CONSTANT(NOSTR);
  436. #endif
  437. #ifdef CODESET
  438. REGISTER_NL_LANGINFO_CONSTANT(CODESET);
  439. #endif
  440. #undef REGISTER_NL_LANGINFO_CONSTANT
  441. return SUCCESS;
  442. }
  443. /* }}} */
  444. /* {{{ Query language and locale information */
  445. PHP_FUNCTION(nl_langinfo)
  446. {
  447. zend_long item;
  448. char *value;
  449. ZEND_PARSE_PARAMETERS_START(1, 1)
  450. Z_PARAM_LONG(item)
  451. ZEND_PARSE_PARAMETERS_END();
  452. switch(item) { /* {{{ */
  453. #ifdef ABDAY_1
  454. case ABDAY_1:
  455. case ABDAY_2:
  456. case ABDAY_3:
  457. case ABDAY_4:
  458. case ABDAY_5:
  459. case ABDAY_6:
  460. case ABDAY_7:
  461. #endif
  462. #ifdef DAY_1
  463. case DAY_1:
  464. case DAY_2:
  465. case DAY_3:
  466. case DAY_4:
  467. case DAY_5:
  468. case DAY_6:
  469. case DAY_7:
  470. #endif
  471. #ifdef ABMON_1
  472. case ABMON_1:
  473. case ABMON_2:
  474. case ABMON_3:
  475. case ABMON_4:
  476. case ABMON_5:
  477. case ABMON_6:
  478. case ABMON_7:
  479. case ABMON_8:
  480. case ABMON_9:
  481. case ABMON_10:
  482. case ABMON_11:
  483. case ABMON_12:
  484. #endif
  485. #ifdef MON_1
  486. case MON_1:
  487. case MON_2:
  488. case MON_3:
  489. case MON_4:
  490. case MON_5:
  491. case MON_6:
  492. case MON_7:
  493. case MON_8:
  494. case MON_9:
  495. case MON_10:
  496. case MON_11:
  497. case MON_12:
  498. #endif
  499. #ifdef AM_STR
  500. case AM_STR:
  501. #endif
  502. #ifdef PM_STR
  503. case PM_STR:
  504. #endif
  505. #ifdef D_T_FMT
  506. case D_T_FMT:
  507. #endif
  508. #ifdef D_FMT
  509. case D_FMT:
  510. #endif
  511. #ifdef T_FMT
  512. case T_FMT:
  513. #endif
  514. #ifdef T_FMT_AMPM
  515. case T_FMT_AMPM:
  516. #endif
  517. #ifdef ERA
  518. case ERA:
  519. #endif
  520. #ifdef ERA_YEAR
  521. case ERA_YEAR:
  522. #endif
  523. #ifdef ERA_D_T_FMT
  524. case ERA_D_T_FMT:
  525. #endif
  526. #ifdef ERA_D_FMT
  527. case ERA_D_FMT:
  528. #endif
  529. #ifdef ERA_T_FMT
  530. case ERA_T_FMT:
  531. #endif
  532. #ifdef ALT_DIGITS
  533. case ALT_DIGITS:
  534. #endif
  535. #ifdef INT_CURR_SYMBOL
  536. case INT_CURR_SYMBOL:
  537. #endif
  538. #ifdef CURRENCY_SYMBOL
  539. case CURRENCY_SYMBOL:
  540. #endif
  541. #ifdef CRNCYSTR
  542. case CRNCYSTR:
  543. #endif
  544. #ifdef MON_DECIMAL_POINT
  545. case MON_DECIMAL_POINT:
  546. #endif
  547. #ifdef MON_THOUSANDS_SEP
  548. case MON_THOUSANDS_SEP:
  549. #endif
  550. #ifdef MON_GROUPING
  551. case MON_GROUPING:
  552. #endif
  553. #ifdef POSITIVE_SIGN
  554. case POSITIVE_SIGN:
  555. #endif
  556. #ifdef NEGATIVE_SIGN
  557. case NEGATIVE_SIGN:
  558. #endif
  559. #ifdef INT_FRAC_DIGITS
  560. case INT_FRAC_DIGITS:
  561. #endif
  562. #ifdef FRAC_DIGITS
  563. case FRAC_DIGITS:
  564. #endif
  565. #ifdef P_CS_PRECEDES
  566. case P_CS_PRECEDES:
  567. #endif
  568. #ifdef P_SEP_BY_SPACE
  569. case P_SEP_BY_SPACE:
  570. #endif
  571. #ifdef N_CS_PRECEDES
  572. case N_CS_PRECEDES:
  573. #endif
  574. #ifdef N_SEP_BY_SPACE
  575. case N_SEP_BY_SPACE:
  576. #endif
  577. #ifdef P_SIGN_POSN
  578. case P_SIGN_POSN:
  579. #endif
  580. #ifdef N_SIGN_POSN
  581. case N_SIGN_POSN:
  582. #endif
  583. #ifdef DECIMAL_POINT
  584. case DECIMAL_POINT:
  585. #elif defined(RADIXCHAR)
  586. case RADIXCHAR:
  587. #endif
  588. #ifdef THOUSANDS_SEP
  589. case THOUSANDS_SEP:
  590. #elif defined(THOUSEP)
  591. case THOUSEP:
  592. #endif
  593. #ifdef GROUPING
  594. case GROUPING:
  595. #endif
  596. #ifdef YESEXPR
  597. case YESEXPR:
  598. #endif
  599. #ifdef NOEXPR
  600. case NOEXPR:
  601. #endif
  602. #ifdef YESSTR
  603. case YESSTR:
  604. #endif
  605. #ifdef NOSTR
  606. case NOSTR:
  607. #endif
  608. #ifdef CODESET
  609. case CODESET:
  610. #endif
  611. break;
  612. default:
  613. php_error_docref(NULL, E_WARNING, "Item '" ZEND_LONG_FMT "' is not valid", item);
  614. RETURN_FALSE;
  615. }
  616. /* }}} */
  617. value = nl_langinfo(item);
  618. if (value == NULL) {
  619. RETURN_FALSE;
  620. } else {
  621. RETURN_STRING(value);
  622. }
  623. }
  624. #endif
  625. /* }}} */
  626. /* {{{ Compares two strings using the current locale */
  627. PHP_FUNCTION(strcoll)
  628. {
  629. zend_string *s1, *s2;
  630. ZEND_PARSE_PARAMETERS_START(2, 2)
  631. Z_PARAM_STR(s1)
  632. Z_PARAM_STR(s2)
  633. ZEND_PARSE_PARAMETERS_END();
  634. RETURN_LONG(strcoll((const char *) ZSTR_VAL(s1),
  635. (const char *) ZSTR_VAL(s2)));
  636. }
  637. /* }}} */
  638. /* {{{ php_charmask
  639. * Fills a 256-byte bytemask with input. You can specify a range like 'a..z',
  640. * it needs to be incrementing.
  641. * Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors)
  642. */
  643. static inline int php_charmask(const unsigned char *input, size_t len, char *mask)
  644. {
  645. const unsigned char *end;
  646. unsigned char c;
  647. int result = SUCCESS;
  648. memset(mask, 0, 256);
  649. for (end = input+len; input < end; input++) {
  650. c=*input;
  651. if ((input+3 < end) && input[1] == '.' && input[2] == '.'
  652. && input[3] >= c) {
  653. memset(mask+c, 1, input[3] - c + 1);
  654. input+=3;
  655. } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
  656. /* Error, try to be as helpful as possible:
  657. (a range ending/starting with '.' won't be captured here) */
  658. if (end-len >= input) { /* there was no 'left' char */
  659. php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the left of '..'");
  660. result = FAILURE;
  661. continue;
  662. }
  663. if (input+2 >= end) { /* there is no 'right' char */
  664. php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the right of '..'");
  665. result = FAILURE;
  666. continue;
  667. }
  668. if (input[-1] > input[2]) { /* wrong order */
  669. php_error_docref(NULL, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing");
  670. result = FAILURE;
  671. continue;
  672. }
  673. /* FIXME: better error (a..b..c is the only left possibility?) */
  674. php_error_docref(NULL, E_WARNING, "Invalid '..'-range");
  675. result = FAILURE;
  676. continue;
  677. } else {
  678. mask[c]=1;
  679. }
  680. }
  681. return result;
  682. }
  683. /* }}} */
  684. /* {{{ php_trim_int()
  685. * mode 1 : trim left
  686. * mode 2 : trim right
  687. * mode 3 : trim left and right
  688. * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
  689. */
  690. static zend_always_inline zend_string *php_trim_int(zend_string *str, const char *what, size_t what_len, int mode)
  691. {
  692. const char *start = ZSTR_VAL(str);
  693. const char *end = start + ZSTR_LEN(str);
  694. char mask[256];
  695. if (what) {
  696. if (what_len == 1) {
  697. char p = *what;
  698. if (mode & 1) {
  699. while (start != end) {
  700. if (*start == p) {
  701. start++;
  702. } else {
  703. break;
  704. }
  705. }
  706. }
  707. if (mode & 2) {
  708. while (start != end) {
  709. if (*(end-1) == p) {
  710. end--;
  711. } else {
  712. break;
  713. }
  714. }
  715. }
  716. } else {
  717. php_charmask((const unsigned char *) what, what_len, mask);
  718. if (mode & 1) {
  719. while (start != end) {
  720. if (mask[(unsigned char)*start]) {
  721. start++;
  722. } else {
  723. break;
  724. }
  725. }
  726. }
  727. if (mode & 2) {
  728. while (start != end) {
  729. if (mask[(unsigned char)*(end-1)]) {
  730. end--;
  731. } else {
  732. break;
  733. }
  734. }
  735. }
  736. }
  737. } else {
  738. if (mode & 1) {
  739. while (start != end) {
  740. unsigned char c = (unsigned char)*start;
  741. if (c <= ' ' &&
  742. (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
  743. start++;
  744. } else {
  745. break;
  746. }
  747. }
  748. }
  749. if (mode & 2) {
  750. while (start != end) {
  751. unsigned char c = (unsigned char)*(end-1);
  752. if (c <= ' ' &&
  753. (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' || c == '\0')) {
  754. end--;
  755. } else {
  756. break;
  757. }
  758. }
  759. }
  760. }
  761. if (ZSTR_LEN(str) == end - start) {
  762. return zend_string_copy(str);
  763. } else if (end - start == 0) {
  764. return ZSTR_EMPTY_ALLOC();
  765. } else {
  766. return zend_string_init(start, end - start, 0);
  767. }
  768. }
  769. /* }}} */
  770. /* {{{ php_trim_int()
  771. * mode 1 : trim left
  772. * mode 2 : trim right
  773. * mode 3 : trim left and right
  774. * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
  775. */
  776. PHPAPI zend_string *php_trim(zend_string *str, const char *what, size_t what_len, int mode)
  777. {
  778. return php_trim_int(str, what, what_len, mode);
  779. }
  780. /* }}} */
  781. /* {{{ php_do_trim
  782. * Base for trim(), rtrim() and ltrim() functions.
  783. */
  784. static zend_always_inline void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
  785. {
  786. zend_string *str;
  787. zend_string *what = NULL;
  788. ZEND_PARSE_PARAMETERS_START(1, 2)
  789. Z_PARAM_STR(str)
  790. Z_PARAM_OPTIONAL
  791. Z_PARAM_STR(what)
  792. ZEND_PARSE_PARAMETERS_END();
  793. ZVAL_STR(return_value, php_trim_int(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));
  794. }
  795. /* }}} */
  796. /* {{{ Strips whitespace from the beginning and end of a string */
  797. PHP_FUNCTION(trim)
  798. {
  799. php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);
  800. }
  801. /* }}} */
  802. /* {{{ Removes trailing whitespace */
  803. PHP_FUNCTION(rtrim)
  804. {
  805. php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
  806. }
  807. /* }}} */
  808. /* {{{ Strips whitespace from the beginning of a string */
  809. PHP_FUNCTION(ltrim)
  810. {
  811. php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  812. }
  813. /* }}} */
  814. /* {{{ Wraps buffer to selected number of characters using string break char */
  815. PHP_FUNCTION(wordwrap)
  816. {
  817. zend_string *text;
  818. char *breakchar = "\n";
  819. size_t newtextlen, chk, breakchar_len = 1;
  820. size_t alloced;
  821. zend_long current = 0, laststart = 0, lastspace = 0;
  822. zend_long linelength = 75;
  823. bool docut = 0;
  824. zend_string *newtext;
  825. ZEND_PARSE_PARAMETERS_START(1, 4)
  826. Z_PARAM_STR(text)
  827. Z_PARAM_OPTIONAL
  828. Z_PARAM_LONG(linelength)
  829. Z_PARAM_STRING(breakchar, breakchar_len)
  830. Z_PARAM_BOOL(docut)
  831. ZEND_PARSE_PARAMETERS_END();
  832. if (ZSTR_LEN(text) == 0) {
  833. RETURN_EMPTY_STRING();
  834. }
  835. if (breakchar_len == 0) {
  836. zend_argument_value_error(3, "cannot be empty");
  837. RETURN_THROWS();
  838. }
  839. if (linelength == 0 && docut) {
  840. zend_argument_value_error(4, "cannot be true when argument #2 ($width) is 0");
  841. RETURN_THROWS();
  842. }
  843. /* Special case for a single-character break as it needs no
  844. additional storage space */
  845. if (breakchar_len == 1 && !docut) {
  846. newtext = zend_string_init(ZSTR_VAL(text), ZSTR_LEN(text), 0);
  847. laststart = lastspace = 0;
  848. for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
  849. if (ZSTR_VAL(text)[current] == breakchar[0]) {
  850. laststart = lastspace = current + 1;
  851. } else if (ZSTR_VAL(text)[current] == ' ') {
  852. if (current - laststart >= linelength) {
  853. ZSTR_VAL(newtext)[current] = breakchar[0];
  854. laststart = current + 1;
  855. }
  856. lastspace = current;
  857. } else if (current - laststart >= linelength && laststart != lastspace) {
  858. ZSTR_VAL(newtext)[lastspace] = breakchar[0];
  859. laststart = lastspace + 1;
  860. }
  861. }
  862. RETURN_NEW_STR(newtext);
  863. } else {
  864. /* Multiple character line break or forced cut */
  865. if (linelength > 0) {
  866. chk = (size_t)(ZSTR_LEN(text)/linelength + 1);
  867. newtext = zend_string_safe_alloc(chk, breakchar_len, ZSTR_LEN(text), 0);
  868. alloced = ZSTR_LEN(text) + chk * breakchar_len + 1;
  869. } else {
  870. chk = ZSTR_LEN(text);
  871. alloced = ZSTR_LEN(text) * (breakchar_len + 1) + 1;
  872. newtext = zend_string_safe_alloc(ZSTR_LEN(text), breakchar_len + 1, 0, 0);
  873. }
  874. /* now keep track of the actual new text length */
  875. newtextlen = 0;
  876. laststart = lastspace = 0;
  877. for (current = 0; current < (zend_long)ZSTR_LEN(text); current++) {
  878. if (chk == 0) {
  879. alloced += (size_t) (((ZSTR_LEN(text) - current + 1)/linelength + 1) * breakchar_len) + 1;
  880. newtext = zend_string_extend(newtext, alloced, 0);
  881. chk = (size_t) ((ZSTR_LEN(text) - current)/linelength) + 1;
  882. }
  883. /* when we hit an existing break, copy to new buffer, and
  884. * fix up laststart and lastspace */
  885. if (ZSTR_VAL(text)[current] == breakchar[0]
  886. && current + breakchar_len < ZSTR_LEN(text)
  887. && !strncmp(ZSTR_VAL(text) + current, breakchar, breakchar_len)) {
  888. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart + breakchar_len);
  889. newtextlen += current - laststart + breakchar_len;
  890. current += breakchar_len - 1;
  891. laststart = lastspace = current + 1;
  892. chk--;
  893. }
  894. /* if it is a space, check if it is at the line boundary,
  895. * copy and insert a break, or just keep track of it */
  896. else if (ZSTR_VAL(text)[current] == ' ') {
  897. if (current - laststart >= linelength) {
  898. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
  899. newtextlen += current - laststart;
  900. memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
  901. newtextlen += breakchar_len;
  902. laststart = current + 1;
  903. chk--;
  904. }
  905. lastspace = current;
  906. }
  907. /* if we are cutting, and we've accumulated enough
  908. * characters, and we haven't see a space for this line,
  909. * copy and insert a break. */
  910. else if (current - laststart >= linelength
  911. && docut && laststart >= lastspace) {
  912. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
  913. newtextlen += current - laststart;
  914. memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
  915. newtextlen += breakchar_len;
  916. laststart = lastspace = current;
  917. chk--;
  918. }
  919. /* if the current word puts us over the linelength, copy
  920. * back up until the last space, insert a break, and move
  921. * up the laststart */
  922. else if (current - laststart >= linelength
  923. && laststart < lastspace) {
  924. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, lastspace - laststart);
  925. newtextlen += lastspace - laststart;
  926. memcpy(ZSTR_VAL(newtext) + newtextlen, breakchar, breakchar_len);
  927. newtextlen += breakchar_len;
  928. laststart = lastspace = lastspace + 1;
  929. chk--;
  930. }
  931. }
  932. /* copy over any stragglers */
  933. if (laststart != current) {
  934. memcpy(ZSTR_VAL(newtext) + newtextlen, ZSTR_VAL(text) + laststart, current - laststart);
  935. newtextlen += current - laststart;
  936. }
  937. ZSTR_VAL(newtext)[newtextlen] = '\0';
  938. /* free unused memory */
  939. newtext = zend_string_truncate(newtext, newtextlen, 0);
  940. RETURN_NEW_STR(newtext);
  941. }
  942. }
  943. /* }}} */
  944. /* {{{ php_explode */
  945. PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
  946. {
  947. const char *p1 = ZSTR_VAL(str);
  948. const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
  949. const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  950. zval tmp;
  951. if (p2 == NULL) {
  952. ZVAL_STR_COPY(&tmp, str);
  953. zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
  954. } else {
  955. zend_hash_real_init_packed(Z_ARRVAL_P(return_value));
  956. ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
  957. do {
  958. ZEND_HASH_FILL_GROW();
  959. ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, p2 - p1));
  960. ZEND_HASH_FILL_NEXT();
  961. p1 = p2 + ZSTR_LEN(delim);
  962. p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  963. } while (p2 != NULL && --limit > 1);
  964. if (p1 <= endp) {
  965. ZEND_HASH_FILL_GROW();
  966. ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, endp - p1));
  967. ZEND_HASH_FILL_NEXT();
  968. }
  969. } ZEND_HASH_FILL_END();
  970. }
  971. }
  972. /* }}} */
  973. /* {{{ php_explode_negative_limit */
  974. PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *str, zval *return_value, zend_long limit)
  975. {
  976. #define EXPLODE_ALLOC_STEP 64
  977. const char *p1 = ZSTR_VAL(str);
  978. const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
  979. const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  980. zval tmp;
  981. if (p2 == NULL) {
  982. /*
  983. do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
  984. by doing nothing we return empty array
  985. */
  986. } else {
  987. size_t allocated = EXPLODE_ALLOC_STEP, found = 0;
  988. zend_long i, to_return;
  989. const char **positions = emalloc(allocated * sizeof(char *));
  990. positions[found++] = p1;
  991. do {
  992. if (found >= allocated) {
  993. allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
  994. positions = erealloc(ZEND_VOIDP(positions), allocated*sizeof(char *));
  995. }
  996. positions[found++] = p1 = p2 + ZSTR_LEN(delim);
  997. p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
  998. } while (p2 != NULL);
  999. to_return = limit + found;
  1000. /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
  1001. for (i = 0; i < to_return; i++) { /* this checks also for to_return > 0 */
  1002. ZVAL_STRINGL(&tmp, positions[i], (positions[i+1] - ZSTR_LEN(delim)) - positions[i]);
  1003. zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
  1004. }
  1005. efree((void *)positions);
  1006. }
  1007. #undef EXPLODE_ALLOC_STEP
  1008. }
  1009. /* }}} */
  1010. /* {{{ Splits a string on string separator and return array of components. If limit is positive only limit number of components is returned. If limit is negative all components except the last abs(limit) are returned. */
  1011. PHP_FUNCTION(explode)
  1012. {
  1013. zend_string *str, *delim;
  1014. zend_long limit = ZEND_LONG_MAX; /* No limit */
  1015. zval tmp;
  1016. ZEND_PARSE_PARAMETERS_START(2, 3)
  1017. Z_PARAM_STR(delim)
  1018. Z_PARAM_STR(str)
  1019. Z_PARAM_OPTIONAL
  1020. Z_PARAM_LONG(limit)
  1021. ZEND_PARSE_PARAMETERS_END();
  1022. if (ZSTR_LEN(delim) == 0) {
  1023. zend_argument_value_error(1, "cannot be empty");
  1024. RETURN_THROWS();
  1025. }
  1026. array_init(return_value);
  1027. if (ZSTR_LEN(str) == 0) {
  1028. if (limit >= 0) {
  1029. ZVAL_EMPTY_STRING(&tmp);
  1030. zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
  1031. }
  1032. return;
  1033. }
  1034. if (limit > 1) {
  1035. php_explode(delim, str, return_value, limit);
  1036. } else if (limit < 0) {
  1037. php_explode_negative_limit(delim, str, return_value, limit);
  1038. } else {
  1039. ZVAL_STR_COPY(&tmp, str);
  1040. zend_hash_index_add_new(Z_ARRVAL_P(return_value), 0, &tmp);
  1041. }
  1042. }
  1043. /* }}} */
  1044. /* {{{ An alias for implode */
  1045. /* }}} */
  1046. /* {{{ php_implode */
  1047. PHPAPI void php_implode(const zend_string *glue, HashTable *pieces, zval *return_value)
  1048. {
  1049. zval *tmp;
  1050. int numelems;
  1051. zend_string *str;
  1052. char *cptr;
  1053. size_t len = 0;
  1054. struct {
  1055. zend_string *str;
  1056. zend_long lval;
  1057. } *strings, *ptr;
  1058. ALLOCA_FLAG(use_heap)
  1059. numelems = zend_hash_num_elements(pieces);
  1060. if (numelems == 0) {
  1061. RETURN_EMPTY_STRING();
  1062. } else if (numelems == 1) {
  1063. /* loop to search the first not undefined element... */
  1064. ZEND_HASH_FOREACH_VAL(pieces, tmp) {
  1065. RETURN_STR(zval_get_string(tmp));
  1066. } ZEND_HASH_FOREACH_END();
  1067. }
  1068. ptr = strings = do_alloca((sizeof(*strings)) * numelems, use_heap);
  1069. ZEND_HASH_FOREACH_VAL(pieces, tmp) {
  1070. if (EXPECTED(Z_TYPE_P(tmp) == IS_STRING)) {
  1071. ptr->str = Z_STR_P(tmp);
  1072. len += ZSTR_LEN(ptr->str);
  1073. ptr->lval = 0;
  1074. ptr++;
  1075. } else if (UNEXPECTED(Z_TYPE_P(tmp) == IS_LONG)) {
  1076. zend_long val = Z_LVAL_P(tmp);
  1077. ptr->str = NULL;
  1078. ptr->lval = val;
  1079. ptr++;
  1080. if (val <= 0) {
  1081. len++;
  1082. }
  1083. while (val) {
  1084. val /= 10;
  1085. len++;
  1086. }
  1087. } else {
  1088. ptr->str = zval_get_string_func(tmp);
  1089. len += ZSTR_LEN(ptr->str);
  1090. ptr->lval = 1;
  1091. ptr++;
  1092. }
  1093. } ZEND_HASH_FOREACH_END();
  1094. /* numelems cannot be 0, we checked above */
  1095. str = zend_string_safe_alloc(numelems - 1, ZSTR_LEN(glue), len, 0);
  1096. cptr = ZSTR_VAL(str) + ZSTR_LEN(str);
  1097. *cptr = 0;
  1098. while (1) {
  1099. ptr--;
  1100. if (EXPECTED(ptr->str)) {
  1101. cptr -= ZSTR_LEN(ptr->str);
  1102. memcpy(cptr, ZSTR_VAL(ptr->str), ZSTR_LEN(ptr->str));
  1103. if (ptr->lval) {
  1104. zend_string_release_ex(ptr->str, 0);
  1105. }
  1106. } else {
  1107. char *oldPtr = cptr;
  1108. char oldVal = *cptr;
  1109. cptr = zend_print_long_to_buf(cptr, ptr->lval);
  1110. *oldPtr = oldVal;
  1111. }
  1112. if (ptr == strings) {
  1113. break;
  1114. }
  1115. cptr -= ZSTR_LEN(glue);
  1116. memcpy(cptr, ZSTR_VAL(glue), ZSTR_LEN(glue));
  1117. }
  1118. free_alloca(strings, use_heap);
  1119. RETURN_NEW_STR(str);
  1120. }
  1121. /* }}} */
  1122. /* {{{ Joins array elements placing glue string between items and return one string */
  1123. PHP_FUNCTION(implode)
  1124. {
  1125. zend_string *arg1_str = NULL;
  1126. HashTable *arg1_array = NULL;
  1127. zend_array *pieces = NULL;
  1128. ZEND_PARSE_PARAMETERS_START(1, 2)
  1129. Z_PARAM_ARRAY_HT_OR_STR(arg1_array, arg1_str)
  1130. Z_PARAM_OPTIONAL
  1131. Z_PARAM_ARRAY_HT_OR_NULL(pieces)
  1132. ZEND_PARSE_PARAMETERS_END();
  1133. if (pieces == NULL) {
  1134. if (arg1_array == NULL) {
  1135. zend_type_error("%s(): Argument #1 ($pieces) must be of type array, string given", get_active_function_name());
  1136. RETURN_THROWS();
  1137. }
  1138. arg1_str = ZSTR_EMPTY_ALLOC();
  1139. pieces = arg1_array;
  1140. } else {
  1141. if (arg1_str == NULL) {
  1142. zend_argument_type_error(1, "must be of type string, array given");
  1143. RETURN_THROWS();
  1144. }
  1145. }
  1146. php_implode(arg1_str, pieces, return_value);
  1147. }
  1148. /* }}} */
  1149. #define STRTOK_TABLE(p) BG(strtok_table)[(unsigned char) *p]
  1150. /* {{{ Tokenize a string */
  1151. PHP_FUNCTION(strtok)
  1152. {
  1153. zend_string *str, *tok = NULL;
  1154. char *token;
  1155. char *token_end;
  1156. char *p;
  1157. char *pe;
  1158. size_t skipped = 0;
  1159. ZEND_PARSE_PARAMETERS_START(1, 2)
  1160. Z_PARAM_STR(str)
  1161. Z_PARAM_OPTIONAL
  1162. Z_PARAM_STR_OR_NULL(tok)
  1163. ZEND_PARSE_PARAMETERS_END();
  1164. if (!tok) {
  1165. tok = str;
  1166. } else {
  1167. if (BG(strtok_string)) {
  1168. zend_string_release(BG(strtok_string));
  1169. }
  1170. BG(strtok_string) = zend_string_copy(str);
  1171. BG(strtok_last) = ZSTR_VAL(str);
  1172. BG(strtok_len) = ZSTR_LEN(str);
  1173. }
  1174. if (!BG(strtok_string)) {
  1175. /* String to tokenize not set. */
  1176. // TODO: Should this warn?
  1177. RETURN_FALSE;
  1178. }
  1179. p = BG(strtok_last); /* Where we start to search */
  1180. pe = ZSTR_VAL(BG(strtok_string)) + BG(strtok_len);
  1181. if (p >= pe) {
  1182. /* Reached the end of the string. */
  1183. RETURN_FALSE;
  1184. }
  1185. token = ZSTR_VAL(tok);
  1186. token_end = token + ZSTR_LEN(tok);
  1187. while (token < token_end) {
  1188. STRTOK_TABLE(token++) = 1;
  1189. }
  1190. /* Skip leading delimiters */
  1191. while (STRTOK_TABLE(p)) {
  1192. if (++p >= pe) {
  1193. /* no other chars left */
  1194. goto return_false;
  1195. }
  1196. skipped++;
  1197. }
  1198. /* We know at this place that *p is no delimiter, so skip it */
  1199. while (++p < pe) {
  1200. if (STRTOK_TABLE(p)) {
  1201. goto return_token;
  1202. }
  1203. }
  1204. if (p - BG(strtok_last)) {
  1205. return_token:
  1206. RETVAL_STRINGL(BG(strtok_last) + skipped, (p - BG(strtok_last)) - skipped);
  1207. BG(strtok_last) = p + 1;
  1208. } else {
  1209. return_false:
  1210. RETVAL_FALSE;
  1211. zend_string_release(BG(strtok_string));
  1212. BG(strtok_string) = NULL;
  1213. }
  1214. /* Restore table -- usually faster then memset'ing the table on every invocation */
  1215. token = ZSTR_VAL(tok);
  1216. while (token < token_end) {
  1217. STRTOK_TABLE(token++) = 0;
  1218. }
  1219. }
  1220. /* }}} */
  1221. /* {{{ php_strtoupper */
  1222. PHPAPI char *php_strtoupper(char *s, size_t len)
  1223. {
  1224. unsigned char *c;
  1225. const unsigned char *e;
  1226. c = (unsigned char *)s;
  1227. e = (unsigned char *)c+len;
  1228. while (c < e) {
  1229. *c = toupper(*c);
  1230. c++;
  1231. }
  1232. return s;
  1233. }
  1234. /* }}} */
  1235. /* {{{ php_string_toupper */
  1236. PHPAPI zend_string *php_string_toupper(zend_string *s)
  1237. {
  1238. unsigned char *c;
  1239. const unsigned char *e;
  1240. if (EXPECTED(!BG(ctype_string))) {
  1241. return zend_string_toupper(s);
  1242. }
  1243. c = (unsigned char *)ZSTR_VAL(s);
  1244. e = c + ZSTR_LEN(s);
  1245. while (c < e) {
  1246. if (islower(*c)) {
  1247. unsigned char *r;
  1248. zend_string *res = zend_string_alloc(ZSTR_LEN(s), 0);
  1249. if (c != (unsigned char*)ZSTR_VAL(s)) {
  1250. memcpy(ZSTR_VAL(res), ZSTR_VAL(s), c - (unsigned char*)ZSTR_VAL(s));
  1251. }
  1252. r = c + (ZSTR_VAL(res) - ZSTR_VAL(s));
  1253. while (c < e) {
  1254. *r = toupper(*c);
  1255. r++;
  1256. c++;
  1257. }
  1258. *r = '\0';
  1259. return res;
  1260. }
  1261. c++;
  1262. }
  1263. return zend_string_copy(s);
  1264. }
  1265. /* }}} */
  1266. /* {{{ Makes a string uppercase */
  1267. PHP_FUNCTION(strtoupper)
  1268. {
  1269. zend_string *arg;
  1270. ZEND_PARSE_PARAMETERS_START(1, 1)
  1271. Z_PARAM_STR(arg)
  1272. ZEND_PARSE_PARAMETERS_END();
  1273. RETURN_STR(php_string_toupper(arg));
  1274. }
  1275. /* }}} */
  1276. /* {{{ php_strtolower */
  1277. PHPAPI char *php_strtolower(char *s, size_t len)
  1278. {
  1279. unsigned char *c;
  1280. const unsigned char *e;
  1281. c = (unsigned char *)s;
  1282. e = c+len;
  1283. while (c < e) {
  1284. *c = tolower(*c);
  1285. c++;
  1286. }
  1287. return s;
  1288. }
  1289. /* }}} */
  1290. /* {{{ php_string_tolower */
  1291. PHPAPI zend_string *php_string_tolower(zend_string *s)
  1292. {
  1293. if (EXPECTED(!BG(ctype_string))) {
  1294. return zend_string_tolower(s);
  1295. }
  1296. unsigned char *c = (unsigned char *)ZSTR_VAL(s);
  1297. const unsigned char *e = c + ZSTR_LEN(s);
  1298. while (c < e) {
  1299. if (isupper(*c)) {
  1300. unsigned char *r;
  1301. zend_string *res = zend_string_alloc(ZSTR_LEN(s), 0);
  1302. if (c != (unsigned char*)ZSTR_VAL(s)) {
  1303. memcpy(ZSTR_VAL(res), ZSTR_VAL(s), c - (unsigned char*)ZSTR_VAL(s));
  1304. }
  1305. r = c + (ZSTR_VAL(res) - ZSTR_VAL(s));
  1306. while (c < e) {
  1307. *r = tolower(*c);
  1308. r++;
  1309. c++;
  1310. }
  1311. *r = '\0';
  1312. return res;
  1313. }
  1314. c++;
  1315. }
  1316. return zend_string_copy(s);
  1317. }
  1318. /* }}} */
  1319. /* {{{ Makes a string lowercase */
  1320. PHP_FUNCTION(strtolower)
  1321. {
  1322. zend_string *str;
  1323. ZEND_PARSE_PARAMETERS_START(1, 1)
  1324. Z_PARAM_STR(str)
  1325. ZEND_PARSE_PARAMETERS_END();
  1326. RETURN_STR(php_string_tolower(str));
  1327. }
  1328. /* }}} */
  1329. #if defined(PHP_WIN32)
  1330. static bool _is_basename_start(const char *start, const char *pos)
  1331. {
  1332. if (pos - start >= 1
  1333. && *(pos-1) != '/'
  1334. && *(pos-1) != '\\') {
  1335. if (pos - start == 1) {
  1336. return 1;
  1337. } else if (*(pos-2) == '/' || *(pos-2) == '\\') {
  1338. return 1;
  1339. } else if (*(pos-2) == ':'
  1340. && _is_basename_start(start, pos - 2)) {
  1341. return 1;
  1342. }
  1343. }
  1344. return 0;
  1345. }
  1346. #endif
  1347. /* {{{ php_basename */
  1348. PHPAPI zend_string *php_basename(const char *s, size_t len, const char *suffix, size_t suffix_len)
  1349. {
  1350. const char *basename_start;
  1351. const char *basename_end;
  1352. if (CG(ascii_compatible_locale)) {
  1353. basename_end = s + len - 1;
  1354. /* Strip trailing slashes */
  1355. while (basename_end >= s
  1356. #if defined(PHP_WIN32)
  1357. && (*basename_end == '/'
  1358. || *basename_end == '\\'
  1359. || (*basename_end == ':'
  1360. && _is_basename_start(s, basename_end)))) {
  1361. #else
  1362. && *basename_end == '/') {
  1363. #endif
  1364. basename_end--;
  1365. }
  1366. if (basename_end < s) {
  1367. return ZSTR_EMPTY_ALLOC();
  1368. }
  1369. /* Extract filename */
  1370. basename_start = basename_end;
  1371. basename_end++;
  1372. while (basename_start > s
  1373. #if defined(PHP_WIN32)
  1374. && *(basename_start-1) != '/'
  1375. && *(basename_start-1) != '\\') {
  1376. if (*(basename_start-1) == ':' &&
  1377. _is_basename_start(s, basename_start - 1)) {
  1378. break;
  1379. }
  1380. #else
  1381. && *(basename_start-1) != '/') {
  1382. #endif
  1383. basename_start--;
  1384. }
  1385. } else {
  1386. /* State 0 is directly after a directory separator (or at the start of the string).
  1387. * State 1 is everything else. */
  1388. int state = 0;
  1389. basename_start = s;
  1390. basename_end = s;
  1391. while (len > 0) {
  1392. int inc_len = (*s == '\0' ? 1 : php_mblen(s, len));
  1393. switch (inc_len) {
  1394. case 0:
  1395. goto quit_loop;
  1396. case 1:
  1397. #if defined(PHP_WIN32)
  1398. if (*s == '/' || *s == '\\') {
  1399. #else
  1400. if (*s == '/') {
  1401. #endif
  1402. if (state == 1) {
  1403. state = 0;
  1404. basename_end = s;
  1405. }
  1406. #if defined(PHP_WIN32)
  1407. /* Catch relative paths in c:file.txt style. They're not to confuse
  1408. with the NTFS streams. This part ensures also, that no drive
  1409. letter traversing happens. */
  1410. } else if ((*s == ':' && (s - basename_start == 1))) {
  1411. if (state == 0) {
  1412. basename_start = s;
  1413. state = 1;
  1414. } else {
  1415. basename_end = s;
  1416. state = 0;
  1417. }
  1418. #endif
  1419. } else {
  1420. if (state == 0) {
  1421. basename_start = s;
  1422. state = 1;
  1423. }
  1424. }
  1425. break;
  1426. default:
  1427. if (inc_len < 0) {
  1428. /* If character is invalid, treat it like other non-significant characters. */
  1429. inc_len = 1;
  1430. php_mb_reset();
  1431. }
  1432. if (state == 0) {
  1433. basename_start = s;
  1434. state = 1;
  1435. }
  1436. break;
  1437. }
  1438. s += inc_len;
  1439. len -= inc_len;
  1440. }
  1441. quit_loop:
  1442. if (state == 1) {
  1443. basename_end = s;
  1444. }
  1445. }
  1446. if (suffix != NULL && suffix_len < (size_t)(basename_end - basename_start) &&
  1447. memcmp(basename_end - suffix_len, suffix, suffix_len) == 0) {
  1448. basename_end -= suffix_len;
  1449. }
  1450. return zend_string_init(basename_start, basename_end - basename_start, 0);
  1451. }
  1452. /* }}} */
  1453. /* {{{ Returns the filename component of the path */
  1454. PHP_FUNCTION(basename)
  1455. {
  1456. char *string, *suffix = NULL;
  1457. size_t string_len, suffix_len = 0;
  1458. ZEND_PARSE_PARAMETERS_START(1, 2)
  1459. Z_PARAM_STRING(string, string_len)
  1460. Z_PARAM_OPTIONAL
  1461. Z_PARAM_STRING(suffix, suffix_len)
  1462. ZEND_PARSE_PARAMETERS_END();
  1463. RETURN_STR(php_basename(string, string_len, suffix, suffix_len));
  1464. }
  1465. /* }}} */
  1466. /* {{{ php_dirname
  1467. Returns directory name component of path */
  1468. PHPAPI size_t php_dirname(char *path, size_t len)
  1469. {
  1470. return zend_dirname(path, len);
  1471. }
  1472. /* }}} */
  1473. /* {{{ Returns the directory name component of the path */
  1474. PHP_FUNCTION(dirname)
  1475. {
  1476. char *str;
  1477. size_t str_len;
  1478. zend_string *ret;
  1479. zend_long levels = 1;
  1480. ZEND_PARSE_PARAMETERS_START(1, 2)
  1481. Z_PARAM_STRING(str, str_len)
  1482. Z_PARAM_OPTIONAL
  1483. Z_PARAM_LONG(levels)
  1484. ZEND_PARSE_PARAMETERS_END();
  1485. ret = zend_string_init(str, str_len, 0);
  1486. if (levels == 1) {
  1487. /* Default case */
  1488. #ifdef PHP_WIN32
  1489. ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len);
  1490. #else
  1491. ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len);
  1492. #endif
  1493. } else if (levels < 1) {
  1494. zend_argument_value_error(2, "must be greater than or equal to 1");
  1495. zend_string_efree(ret);
  1496. RETURN_THROWS();
  1497. } else {
  1498. /* Some levels up */
  1499. do {
  1500. #ifdef PHP_WIN32
  1501. ZSTR_LEN(ret) = php_win32_ioutil_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
  1502. #else
  1503. ZSTR_LEN(ret) = zend_dirname(ZSTR_VAL(ret), str_len = ZSTR_LEN(ret));
  1504. #endif
  1505. } while (ZSTR_LEN(ret) < str_len && --levels);
  1506. }
  1507. RETURN_NEW_STR(ret);
  1508. }
  1509. /* }}} */
  1510. /* {{{ Returns information about a certain string */
  1511. PHP_FUNCTION(pathinfo)
  1512. {
  1513. zval tmp;
  1514. char *path, *dirname;
  1515. size_t path_len;
  1516. int have_basename;
  1517. zend_long opt = PHP_PATHINFO_ALL;
  1518. zend_string *ret = NULL;
  1519. ZEND_PARSE_PARAMETERS_START(1, 2)
  1520. Z_PARAM_STRING(path, path_len)
  1521. Z_PARAM_OPTIONAL
  1522. Z_PARAM_LONG(opt)
  1523. ZEND_PARSE_PARAMETERS_END();
  1524. have_basename = ((opt & PHP_PATHINFO_BASENAME) == PHP_PATHINFO_BASENAME);
  1525. array_init(&tmp);
  1526. if ((opt & PHP_PATHINFO_DIRNAME) == PHP_PATHINFO_DIRNAME) {
  1527. dirname = estrndup(path, path_len);
  1528. php_dirname(dirname, path_len);
  1529. if (*dirname) {
  1530. add_assoc_string(&tmp, "dirname", dirname);
  1531. }
  1532. efree(dirname);
  1533. }
  1534. if (have_basename) {
  1535. ret = php_basename(path, path_len, NULL, 0);
  1536. add_assoc_str(&tmp, "basename", zend_string_copy(ret));
  1537. }
  1538. if ((opt & PHP_PATHINFO_EXTENSION) == PHP_PATHINFO_EXTENSION) {
  1539. const char *p;
  1540. ptrdiff_t idx;
  1541. if (!have_basename) {
  1542. ret = php_basename(path, path_len, NULL, 0);
  1543. }
  1544. p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
  1545. if (p) {
  1546. idx = p - ZSTR_VAL(ret);
  1547. add_assoc_stringl(&tmp, "extension", ZSTR_VAL(ret) + idx + 1, ZSTR_LEN(ret) - idx - 1);
  1548. }
  1549. }
  1550. if ((opt & PHP_PATHINFO_FILENAME) == PHP_PATHINFO_FILENAME) {
  1551. const char *p;
  1552. ptrdiff_t idx;
  1553. /* Have we already looked up the basename? */
  1554. if (!have_basename && !ret) {
  1555. ret = php_basename(path, path_len, NULL, 0);
  1556. }
  1557. p = zend_memrchr(ZSTR_VAL(ret), '.', ZSTR_LEN(ret));
  1558. idx = p ? (p - ZSTR_VAL(ret)) : (ptrdiff_t)ZSTR_LEN(ret);
  1559. add_assoc_stringl(&tmp, "filename", ZSTR_VAL(ret), idx);
  1560. }
  1561. if (ret) {
  1562. zend_string_release_ex(ret, 0);
  1563. }
  1564. if (opt == PHP_PATHINFO_ALL) {
  1565. RETURN_COPY_VALUE(&tmp);
  1566. } else {
  1567. zval *element;
  1568. if ((element = zend_hash_get_current_data(Z_ARRVAL(tmp))) != NULL) {
  1569. RETVAL_COPY_DEREF(element);
  1570. } else {
  1571. RETVAL_EMPTY_STRING();
  1572. }
  1573. zval_ptr_dtor(&tmp);
  1574. }
  1575. }
  1576. /* }}} */
  1577. /* {{{ php_stristr
  1578. case insensitive strstr */
  1579. PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len)
  1580. {
  1581. php_strtolower(s, s_len);
  1582. php_strtolower(t, t_len);
  1583. return (char*)php_memnstr(s, t, t_len, s + s_len);
  1584. }
  1585. /* }}} */
  1586. /* {{{ php_strspn */
  1587. PHPAPI size_t php_strspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
  1588. {
  1589. const char *p = s1, *spanp;
  1590. char c = *p;
  1591. cont:
  1592. for (spanp = s2; p != s1_end && spanp != s2_end;) {
  1593. if (*spanp++ == c) {
  1594. c = *(++p);
  1595. goto cont;
  1596. }
  1597. }
  1598. return (p - s1);
  1599. }
  1600. /* }}} */
  1601. /* {{{ php_strcspn */
  1602. PHPAPI size_t php_strcspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
  1603. {
  1604. const char *p, *spanp;
  1605. char c = *s1;
  1606. for (p = s1;;) {
  1607. spanp = s2;
  1608. do {
  1609. if (*spanp == c || p == s1_end) {
  1610. return p - s1;
  1611. }
  1612. } while (spanp++ < (s2_end - 1));
  1613. c = *++p;
  1614. }
  1615. /* NOTREACHED */
  1616. }
  1617. /* }}} */
  1618. /* {{{ Finds first occurrence of a string within another, case insensitive */
  1619. PHP_FUNCTION(stristr)
  1620. {
  1621. zend_string *haystack, *needle;
  1622. const char *found = NULL;
  1623. size_t found_offset;
  1624. char *haystack_dup;
  1625. char *orig_needle;
  1626. bool part = 0;
  1627. ZEND_PARSE_PARAMETERS_START(2, 3)
  1628. Z_PARAM_STR(haystack)
  1629. Z_PARAM_STR(needle)
  1630. Z_PARAM_OPTIONAL
  1631. Z_PARAM_BOOL(part)
  1632. ZEND_PARSE_PARAMETERS_END();
  1633. haystack_dup = estrndup(ZSTR_VAL(haystack), ZSTR_LEN(haystack));
  1634. orig_needle = estrndup(ZSTR_VAL(needle), ZSTR_LEN(needle));
  1635. found = php_stristr(haystack_dup, orig_needle, ZSTR_LEN(haystack), ZSTR_LEN(needle));
  1636. efree(orig_needle);
  1637. if (found) {
  1638. found_offset = found - haystack_dup;
  1639. if (part) {
  1640. RETVAL_STRINGL(ZSTR_VAL(haystack), found_offset);
  1641. } else {
  1642. RETVAL_STRINGL(ZSTR_VAL(haystack) + found_offset, ZSTR_LEN(haystack) - found_offset);
  1643. }
  1644. } else {
  1645. RETVAL_FALSE;
  1646. }
  1647. efree(haystack_dup);
  1648. }
  1649. /* }}} */
  1650. /* {{{ Finds first occurrence of a string within another */
  1651. PHP_FUNCTION(strstr)
  1652. {
  1653. zend_string *haystack, *needle;
  1654. const char *found = NULL;
  1655. zend_long found_offset;
  1656. bool part = 0;
  1657. ZEND_PARSE_PARAMETERS_START(2, 3)
  1658. Z_PARAM_STR(haystack)
  1659. Z_PARAM_STR(needle)
  1660. Z_PARAM_OPTIONAL
  1661. Z_PARAM_BOOL(part)
  1662. ZEND_PARSE_PARAMETERS_END();
  1663. found = php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
  1664. if (found) {
  1665. found_offset = found - ZSTR_VAL(haystack);
  1666. if (part) {
  1667. RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
  1668. } else {
  1669. RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
  1670. }
  1671. }
  1672. RETURN_FALSE;
  1673. }
  1674. /* }}} */
  1675. /* {{{ Checks if a string contains another */
  1676. PHP_FUNCTION(str_contains)
  1677. {
  1678. zend_string *haystack, *needle;
  1679. ZEND_PARSE_PARAMETERS_START(2, 2)
  1680. Z_PARAM_STR(haystack)
  1681. Z_PARAM_STR(needle)
  1682. ZEND_PARSE_PARAMETERS_END();
  1683. RETURN_BOOL(php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack)));
  1684. }
  1685. /* }}} */
  1686. /* {{{ Checks if haystack starts with needle */
  1687. PHP_FUNCTION(str_starts_with)
  1688. {
  1689. zend_string *haystack, *needle;
  1690. ZEND_PARSE_PARAMETERS_START(2, 2)
  1691. Z_PARAM_STR(haystack)
  1692. Z_PARAM_STR(needle)
  1693. ZEND_PARSE_PARAMETERS_END();
  1694. if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
  1695. RETURN_FALSE;
  1696. }
  1697. RETURN_BOOL(memcmp(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
  1698. }
  1699. /* }}} */
  1700. /* {{{ Checks if haystack ends with needle */
  1701. PHP_FUNCTION(str_ends_with)
  1702. {
  1703. zend_string *haystack, *needle;
  1704. ZEND_PARSE_PARAMETERS_START(2, 2)
  1705. Z_PARAM_STR(haystack)
  1706. Z_PARAM_STR(needle)
  1707. ZEND_PARSE_PARAMETERS_END();
  1708. if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
  1709. RETURN_FALSE;
  1710. }
  1711. RETURN_BOOL(memcmp(
  1712. ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - ZSTR_LEN(needle),
  1713. ZSTR_VAL(needle), ZSTR_LEN(needle)) == 0);
  1714. }
  1715. /* }}} */
  1716. /* {{{ An alias for strstr */
  1717. /* }}} */
  1718. /* {{{ Finds position of first occurrence of a string within another */
  1719. PHP_FUNCTION(strpos)
  1720. {
  1721. zend_string *haystack, *needle;
  1722. const char *found = NULL;
  1723. zend_long offset = 0;
  1724. ZEND_PARSE_PARAMETERS_START(2, 3)
  1725. Z_PARAM_STR(haystack)
  1726. Z_PARAM_STR(needle)
  1727. Z_PARAM_OPTIONAL
  1728. Z_PARAM_LONG(offset)
  1729. ZEND_PARSE_PARAMETERS_END();
  1730. if (offset < 0) {
  1731. offset += (zend_long)ZSTR_LEN(haystack);
  1732. }
  1733. if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
  1734. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1735. RETURN_THROWS();
  1736. }
  1737. found = (char*)php_memnstr(ZSTR_VAL(haystack) + offset,
  1738. ZSTR_VAL(needle), ZSTR_LEN(needle),
  1739. ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
  1740. if (found) {
  1741. RETURN_LONG(found - ZSTR_VAL(haystack));
  1742. } else {
  1743. RETURN_FALSE;
  1744. }
  1745. }
  1746. /* }}} */
  1747. /* {{{ Finds position of first occurrence of a string within another, case insensitive */
  1748. PHP_FUNCTION(stripos)
  1749. {
  1750. const char *found = NULL;
  1751. zend_string *haystack, *needle;
  1752. zend_long offset = 0;
  1753. zend_string *needle_dup = NULL, *haystack_dup;
  1754. ZEND_PARSE_PARAMETERS_START(2, 3)
  1755. Z_PARAM_STR(haystack)
  1756. Z_PARAM_STR(needle)
  1757. Z_PARAM_OPTIONAL
  1758. Z_PARAM_LONG(offset)
  1759. ZEND_PARSE_PARAMETERS_END();
  1760. if (offset < 0) {
  1761. offset += (zend_long)ZSTR_LEN(haystack);
  1762. }
  1763. if (offset < 0 || (size_t)offset > ZSTR_LEN(haystack)) {
  1764. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1765. RETURN_THROWS();
  1766. }
  1767. if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
  1768. RETURN_FALSE;
  1769. }
  1770. haystack_dup = php_string_tolower(haystack);
  1771. needle_dup = php_string_tolower(needle);
  1772. found = (char*)php_memnstr(ZSTR_VAL(haystack_dup) + offset,
  1773. ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack));
  1774. if (found) {
  1775. RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
  1776. } else {
  1777. RETVAL_FALSE;
  1778. }
  1779. zend_string_release_ex(haystack_dup, 0);
  1780. zend_string_release_ex(needle_dup, 0);
  1781. }
  1782. /* }}} */
  1783. /* {{{ Finds position of last occurrence of a string within another string */
  1784. PHP_FUNCTION(strrpos)
  1785. {
  1786. zend_string *needle;
  1787. zend_string *haystack;
  1788. zend_long offset = 0;
  1789. const char *p, *e, *found;
  1790. ZEND_PARSE_PARAMETERS_START(2, 3)
  1791. Z_PARAM_STR(haystack)
  1792. Z_PARAM_STR(needle)
  1793. Z_PARAM_OPTIONAL
  1794. Z_PARAM_LONG(offset)
  1795. ZEND_PARSE_PARAMETERS_END();
  1796. if (offset >= 0) {
  1797. if ((size_t)offset > ZSTR_LEN(haystack)) {
  1798. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1799. RETURN_THROWS();
  1800. }
  1801. p = ZSTR_VAL(haystack) + (size_t)offset;
  1802. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  1803. } else {
  1804. if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
  1805. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1806. RETURN_THROWS();
  1807. }
  1808. p = ZSTR_VAL(haystack);
  1809. if ((size_t)-offset < ZSTR_LEN(needle)) {
  1810. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  1811. } else {
  1812. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
  1813. }
  1814. }
  1815. if ((found = zend_memnrstr(p, ZSTR_VAL(needle), ZSTR_LEN(needle), e))) {
  1816. RETURN_LONG(found - ZSTR_VAL(haystack));
  1817. }
  1818. RETURN_FALSE;
  1819. }
  1820. /* }}} */
  1821. /* {{{ Finds position of last occurrence of a string within another string */
  1822. PHP_FUNCTION(strripos)
  1823. {
  1824. zend_string *needle;
  1825. zend_string *haystack;
  1826. zend_long offset = 0;
  1827. const char *p, *e, *found;
  1828. zend_string *needle_dup, *haystack_dup;
  1829. ZEND_PARSE_PARAMETERS_START(2, 3)
  1830. Z_PARAM_STR(haystack)
  1831. Z_PARAM_STR(needle)
  1832. Z_PARAM_OPTIONAL
  1833. Z_PARAM_LONG(offset)
  1834. ZEND_PARSE_PARAMETERS_END();
  1835. if (ZSTR_LEN(needle) == 1) {
  1836. /* Single character search can shortcut memcmps
  1837. Can also avoid tolower emallocs */
  1838. char lowered;
  1839. if (offset >= 0) {
  1840. if ((size_t)offset > ZSTR_LEN(haystack)) {
  1841. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1842. RETURN_THROWS();
  1843. }
  1844. p = ZSTR_VAL(haystack) + (size_t)offset;
  1845. e = ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - 1;
  1846. } else {
  1847. p = ZSTR_VAL(haystack);
  1848. if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
  1849. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1850. RETURN_THROWS();
  1851. }
  1852. e = ZSTR_VAL(haystack) + (ZSTR_LEN(haystack) + (size_t)offset);
  1853. }
  1854. /* Borrow that ord_needle buffer to avoid repeatedly tolower()ing needle */
  1855. lowered = tolower(*ZSTR_VAL(needle));
  1856. while (e >= p) {
  1857. if (tolower(*e) == lowered) {
  1858. RETURN_LONG(e - p + (offset > 0 ? offset : 0));
  1859. }
  1860. e--;
  1861. }
  1862. RETURN_FALSE;
  1863. }
  1864. haystack_dup = php_string_tolower(haystack);
  1865. if (offset >= 0) {
  1866. if ((size_t)offset > ZSTR_LEN(haystack)) {
  1867. zend_string_release_ex(haystack_dup, 0);
  1868. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1869. RETURN_THROWS();
  1870. }
  1871. p = ZSTR_VAL(haystack_dup) + offset;
  1872. e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
  1873. } else {
  1874. if (offset < -ZEND_LONG_MAX || (size_t)(-offset) > ZSTR_LEN(haystack)) {
  1875. zend_string_release_ex(haystack_dup, 0);
  1876. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  1877. RETURN_THROWS();
  1878. }
  1879. p = ZSTR_VAL(haystack_dup);
  1880. if ((size_t)-offset < ZSTR_LEN(needle)) {
  1881. e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack);
  1882. } else {
  1883. e = ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack) + offset + ZSTR_LEN(needle);
  1884. }
  1885. }
  1886. needle_dup = php_string_tolower(needle);
  1887. if ((found = (char *)zend_memnrstr(p, ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), e))) {
  1888. RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
  1889. zend_string_release_ex(needle_dup, 0);
  1890. zend_string_release_ex(haystack_dup, 0);
  1891. } else {
  1892. zend_string_release_ex(needle_dup, 0);
  1893. zend_string_release_ex(haystack_dup, 0);
  1894. RETURN_FALSE;
  1895. }
  1896. }
  1897. /* }}} */
  1898. /* {{{ Finds the last occurrence of a character in a string within another */
  1899. PHP_FUNCTION(strrchr)
  1900. {
  1901. zend_string *haystack, *needle;
  1902. const char *found = NULL;
  1903. zend_long found_offset;
  1904. ZEND_PARSE_PARAMETERS_START(2, 2)
  1905. Z_PARAM_STR(haystack)
  1906. Z_PARAM_STR(needle)
  1907. ZEND_PARSE_PARAMETERS_END();
  1908. found = zend_memrchr(ZSTR_VAL(haystack), *ZSTR_VAL(needle), ZSTR_LEN(haystack));
  1909. if (found) {
  1910. found_offset = found - ZSTR_VAL(haystack);
  1911. RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
  1912. } else {
  1913. RETURN_FALSE;
  1914. }
  1915. }
  1916. /* }}} */
  1917. /* {{{ php_chunk_split */
  1918. static zend_string *php_chunk_split(const char *src, size_t srclen, const char *end, size_t endlen, size_t chunklen)
  1919. {
  1920. char *q;
  1921. const char *p;
  1922. size_t chunks;
  1923. size_t restlen;
  1924. zend_string *dest;
  1925. chunks = srclen / chunklen;
  1926. restlen = srclen - chunks * chunklen; /* srclen % chunklen */
  1927. if (restlen) {
  1928. /* We want chunks to be rounded up rather than rounded down.
  1929. * Increment can't overflow because chunks <= SIZE_MAX/2 at this point. */
  1930. chunks++;
  1931. }
  1932. dest = zend_string_safe_alloc(chunks, endlen, srclen, 0);
  1933. for (p = src, q = ZSTR_VAL(dest); p < (src + srclen - chunklen + 1); ) {
  1934. memcpy(q, p, chunklen);
  1935. q += chunklen;
  1936. memcpy(q, end, endlen);
  1937. q += endlen;
  1938. p += chunklen;
  1939. }
  1940. if (restlen) {
  1941. memcpy(q, p, restlen);
  1942. q += restlen;
  1943. memcpy(q, end, endlen);
  1944. q += endlen;
  1945. }
  1946. *q = '\0';
  1947. ZEND_ASSERT(q - ZSTR_VAL(dest) == ZSTR_LEN(dest));
  1948. return dest;
  1949. }
  1950. /* }}} */
  1951. /* {{{ Returns split line */
  1952. PHP_FUNCTION(chunk_split)
  1953. {
  1954. zend_string *str;
  1955. char *end = "\r\n";
  1956. size_t endlen = 2;
  1957. zend_long chunklen = 76;
  1958. zend_string *result;
  1959. ZEND_PARSE_PARAMETERS_START(1, 3)
  1960. Z_PARAM_STR(str)
  1961. Z_PARAM_OPTIONAL
  1962. Z_PARAM_LONG(chunklen)
  1963. Z_PARAM_STRING(end, endlen)
  1964. ZEND_PARSE_PARAMETERS_END();
  1965. if (chunklen <= 0) {
  1966. zend_argument_value_error(2, "must be greater than 0");
  1967. RETURN_THROWS();
  1968. }
  1969. if ((size_t)chunklen > ZSTR_LEN(str)) {
  1970. /* to maintain BC, we must return original string + ending */
  1971. result = zend_string_safe_alloc(ZSTR_LEN(str), 1, endlen, 0);
  1972. memcpy(ZSTR_VAL(result), ZSTR_VAL(str), ZSTR_LEN(str));
  1973. memcpy(ZSTR_VAL(result) + ZSTR_LEN(str), end, endlen);
  1974. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  1975. RETURN_NEW_STR(result);
  1976. }
  1977. if (!ZSTR_LEN(str)) {
  1978. RETURN_EMPTY_STRING();
  1979. }
  1980. result = php_chunk_split(ZSTR_VAL(str), ZSTR_LEN(str), end, endlen, (size_t)chunklen);
  1981. RETURN_STR(result);
  1982. }
  1983. /* }}} */
  1984. /* {{{ Returns part of a string */
  1985. PHP_FUNCTION(substr)
  1986. {
  1987. zend_string *str;
  1988. zend_long l = 0, f;
  1989. bool len_is_null = 1;
  1990. ZEND_PARSE_PARAMETERS_START(2, 3)
  1991. Z_PARAM_STR(str)
  1992. Z_PARAM_LONG(f)
  1993. Z_PARAM_OPTIONAL
  1994. Z_PARAM_LONG_OR_NULL(l, len_is_null)
  1995. ZEND_PARSE_PARAMETERS_END();
  1996. if (f < 0) {
  1997. /* if "from" position is negative, count start position from the end
  1998. * of the string
  1999. */
  2000. if (-(size_t)f > ZSTR_LEN(str)) {
  2001. f = 0;
  2002. } else {
  2003. f = (zend_long)ZSTR_LEN(str) + f;
  2004. }
  2005. } else if ((size_t)f > ZSTR_LEN(str)) {
  2006. RETURN_EMPTY_STRING();
  2007. }
  2008. if (!len_is_null) {
  2009. if (l < 0) {
  2010. /* if "length" position is negative, set it to the length
  2011. * needed to stop that many chars from the end of the string
  2012. */
  2013. if (-(size_t)l > ZSTR_LEN(str) - (size_t)f) {
  2014. l = 0;
  2015. } else {
  2016. l = (zend_long)ZSTR_LEN(str) - f + l;
  2017. }
  2018. } else if ((size_t)l > ZSTR_LEN(str) - (size_t)f) {
  2019. l = (zend_long)ZSTR_LEN(str) - f;
  2020. }
  2021. } else {
  2022. l = (zend_long)ZSTR_LEN(str) - f;
  2023. }
  2024. if (l == ZSTR_LEN(str)) {
  2025. RETURN_STR_COPY(str);
  2026. } else {
  2027. RETURN_STRINGL_FAST(ZSTR_VAL(str) + f, l);
  2028. }
  2029. }
  2030. /* }}} */
  2031. /* {{{ Replaces part of a string with another string */
  2032. PHP_FUNCTION(substr_replace)
  2033. {
  2034. zend_string *str, *repl_str;
  2035. HashTable *str_ht, *repl_ht;
  2036. HashTable *from_ht;
  2037. zend_long from_long;
  2038. HashTable *len_ht = NULL;
  2039. zend_long len_long;
  2040. bool len_is_null = 1;
  2041. zend_long l = 0;
  2042. zend_long f;
  2043. zend_string *result;
  2044. HashPosition from_idx, repl_idx, len_idx;
  2045. zval *tmp_str = NULL, *tmp_repl, *tmp_from = NULL, *tmp_len= NULL;
  2046. ZEND_PARSE_PARAMETERS_START(3, 4)
  2047. Z_PARAM_ARRAY_HT_OR_STR(str_ht, str)
  2048. Z_PARAM_ARRAY_HT_OR_STR(repl_ht, repl_str)
  2049. Z_PARAM_ARRAY_HT_OR_LONG(from_ht, from_long)
  2050. Z_PARAM_OPTIONAL
  2051. Z_PARAM_ARRAY_HT_OR_LONG_OR_NULL(len_ht, len_long, len_is_null)
  2052. ZEND_PARSE_PARAMETERS_END();
  2053. if (len_is_null) {
  2054. if (str) {
  2055. l = ZSTR_LEN(str);
  2056. }
  2057. } else if (!len_ht) {
  2058. l = len_long;
  2059. }
  2060. if (str) {
  2061. if (from_ht) {
  2062. zend_argument_type_error(3, "cannot be an array when working on a single string");
  2063. RETURN_THROWS();
  2064. }
  2065. if (len_ht) {
  2066. zend_argument_type_error(4, "cannot be an array when working on a single string");
  2067. RETURN_THROWS();
  2068. }
  2069. f = from_long;
  2070. /* if "from" position is negative, count start position from the end
  2071. * of the string
  2072. */
  2073. if (f < 0) {
  2074. f = (zend_long)ZSTR_LEN(str) + f;
  2075. if (f < 0) {
  2076. f = 0;
  2077. }
  2078. } else if ((size_t)f > ZSTR_LEN(str)) {
  2079. f = ZSTR_LEN(str);
  2080. }
  2081. /* if "length" position is negative, set it to the length
  2082. * needed to stop that many chars from the end of the string
  2083. */
  2084. if (l < 0) {
  2085. l = ((zend_long)ZSTR_LEN(str) - f) + l;
  2086. if (l < 0) {
  2087. l = 0;
  2088. }
  2089. }
  2090. if ((size_t)l > ZSTR_LEN(str) || (l < 0 && (size_t)(-l) > ZSTR_LEN(str))) {
  2091. l = ZSTR_LEN(str);
  2092. }
  2093. if ((f + l) > (zend_long)ZSTR_LEN(str)) {
  2094. l = ZSTR_LEN(str) - f;
  2095. }
  2096. zend_string *tmp_repl_str = NULL;
  2097. if (repl_ht) {
  2098. repl_idx = 0;
  2099. if (HT_IS_PACKED(repl_ht)) {
  2100. while (repl_idx < repl_ht->nNumUsed) {
  2101. tmp_repl = &repl_ht->arPacked[repl_idx];
  2102. if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
  2103. break;
  2104. }
  2105. repl_idx++;
  2106. }
  2107. } else {
  2108. while (repl_idx < repl_ht->nNumUsed) {
  2109. tmp_repl = &repl_ht->arData[repl_idx].val;
  2110. if (Z_TYPE_P(tmp_repl) != IS_UNDEF) {
  2111. break;
  2112. }
  2113. repl_idx++;
  2114. }
  2115. }
  2116. if (repl_idx < repl_ht->nNumUsed) {
  2117. repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
  2118. } else {
  2119. repl_str = STR_EMPTY_ALLOC();
  2120. }
  2121. }
  2122. result = zend_string_safe_alloc(1, ZSTR_LEN(str) - l + ZSTR_LEN(repl_str), 0, 0);
  2123. memcpy(ZSTR_VAL(result), ZSTR_VAL(str), f);
  2124. if (ZSTR_LEN(repl_str)) {
  2125. memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
  2126. }
  2127. memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(str) + f + l, ZSTR_LEN(str) - f - l);
  2128. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  2129. zend_tmp_string_release(tmp_repl_str);
  2130. RETURN_NEW_STR(result);
  2131. } else { /* str is array of strings */
  2132. zend_string *str_index = NULL;
  2133. size_t result_len;
  2134. zend_ulong num_index;
  2135. /* TODO
  2136. if (!len_is_null && from_ht) {
  2137. if (zend_hash_num_elements(from_ht) != zend_hash_num_elements(len_ht)) {
  2138. php_error_docref(NULL, E_WARNING, "'start' and 'length' should have the same number of elements");
  2139. RETURN_STR_COPY(str);
  2140. }
  2141. }
  2142. */
  2143. array_init(return_value);
  2144. from_idx = len_idx = repl_idx = 0;
  2145. ZEND_HASH_FOREACH_KEY_VAL(str_ht, num_index, str_index, tmp_str) {
  2146. zend_string *tmp_orig_str;
  2147. zend_string *orig_str = zval_get_tmp_string(tmp_str, &tmp_orig_str);
  2148. if (from_ht) {
  2149. if (HT_IS_PACKED(from_ht)) {
  2150. while (from_idx < from_ht->nNumUsed) {
  2151. tmp_from = &from_ht->arPacked[from_idx];
  2152. if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
  2153. break;
  2154. }
  2155. from_idx++;
  2156. }
  2157. } else {
  2158. while (from_idx < from_ht->nNumUsed) {
  2159. tmp_from = &from_ht->arData[from_idx].val;
  2160. if (Z_TYPE_P(tmp_from) != IS_UNDEF) {
  2161. break;
  2162. }
  2163. from_idx++;
  2164. }
  2165. }
  2166. if (from_idx < from_ht->nNumUsed) {
  2167. f = zval_get_long(tmp_from);
  2168. if (f < 0) {
  2169. f = (zend_long)ZSTR_LEN(orig_str) + f;
  2170. if (f < 0) {
  2171. f = 0;
  2172. }
  2173. } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
  2174. f = ZSTR_LEN(orig_str);
  2175. }
  2176. from_idx++;
  2177. } else {
  2178. f = 0;
  2179. }
  2180. } else {
  2181. f = from_long;
  2182. if (f < 0) {
  2183. f = (zend_long)ZSTR_LEN(orig_str) + f;
  2184. if (f < 0) {
  2185. f = 0;
  2186. }
  2187. } else if (f > (zend_long)ZSTR_LEN(orig_str)) {
  2188. f = ZSTR_LEN(orig_str);
  2189. }
  2190. }
  2191. if (len_ht) {
  2192. if (HT_IS_PACKED(len_ht)) {
  2193. while (len_idx < len_ht->nNumUsed) {
  2194. tmp_len = &len_ht->arPacked[len_idx];
  2195. if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
  2196. break;
  2197. }
  2198. len_idx++;
  2199. }
  2200. } else {
  2201. while (len_idx < len_ht->nNumUsed) {
  2202. tmp_len = &len_ht->arData[len_idx].val;
  2203. if (Z_TYPE_P(tmp_len) != IS_UNDEF) {
  2204. break;
  2205. }
  2206. len_idx++;
  2207. }
  2208. }
  2209. if (len_idx < len_ht->nNumUsed) {
  2210. l = zval_get_long(tmp_len);
  2211. len_idx++;
  2212. } else {
  2213. l = ZSTR_LEN(orig_str);
  2214. }
  2215. } else if (!len_is_null) {
  2216. l = len_long;
  2217. } else {
  2218. l = ZSTR_LEN(orig_str);
  2219. }
  2220. if (l < 0) {
  2221. l = (ZSTR_LEN(orig_str) - f) + l;
  2222. if (l < 0) {
  2223. l = 0;
  2224. }
  2225. }
  2226. ZEND_ASSERT(0 <= f && f <= ZEND_LONG_MAX);
  2227. ZEND_ASSERT(0 <= l && l <= ZEND_LONG_MAX);
  2228. if (((size_t) f + l) > ZSTR_LEN(orig_str)) {
  2229. l = ZSTR_LEN(orig_str) - f;
  2230. }
  2231. result_len = ZSTR_LEN(orig_str) - l;
  2232. if (repl_ht) {
  2233. if (HT_IS_PACKED(repl_ht)) {
  2234. while (repl_idx < repl_ht->nNumUsed) {
  2235. tmp_repl = &repl_ht->arPacked[repl_idx];
  2236. if (repl_ht != IS_UNDEF) {
  2237. break;
  2238. }
  2239. repl_idx++;
  2240. }
  2241. } else {
  2242. while (repl_idx < repl_ht->nNumUsed) {
  2243. tmp_repl = &repl_ht->arData[repl_idx].val;
  2244. if (repl_ht != IS_UNDEF) {
  2245. break;
  2246. }
  2247. repl_idx++;
  2248. }
  2249. }
  2250. if (repl_idx < repl_ht->nNumUsed) {
  2251. zend_string *tmp_repl_str;
  2252. zend_string *repl_str = zval_get_tmp_string(tmp_repl, &tmp_repl_str);
  2253. result_len += ZSTR_LEN(repl_str);
  2254. repl_idx++;
  2255. result = zend_string_safe_alloc(1, result_len, 0, 0);
  2256. memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
  2257. memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
  2258. memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
  2259. zend_tmp_string_release(tmp_repl_str);
  2260. } else {
  2261. result = zend_string_safe_alloc(1, result_len, 0, 0);
  2262. memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
  2263. memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
  2264. }
  2265. } else {
  2266. result_len += ZSTR_LEN(repl_str);
  2267. result = zend_string_safe_alloc(1, result_len, 0, 0);
  2268. memcpy(ZSTR_VAL(result), ZSTR_VAL(orig_str), f);
  2269. memcpy((ZSTR_VAL(result) + f), ZSTR_VAL(repl_str), ZSTR_LEN(repl_str));
  2270. memcpy((ZSTR_VAL(result) + f + ZSTR_LEN(repl_str)), ZSTR_VAL(orig_str) + f + l, ZSTR_LEN(orig_str) - f - l);
  2271. }
  2272. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  2273. if (str_index) {
  2274. zval tmp;
  2275. ZVAL_NEW_STR(&tmp, result);
  2276. zend_symtable_update(Z_ARRVAL_P(return_value), str_index, &tmp);
  2277. } else {
  2278. add_index_str(return_value, num_index, result);
  2279. }
  2280. zend_tmp_string_release(tmp_orig_str);
  2281. } ZEND_HASH_FOREACH_END();
  2282. } /* if */
  2283. }
  2284. /* }}} */
  2285. /* {{{ Quotes meta characters */
  2286. PHP_FUNCTION(quotemeta)
  2287. {
  2288. zend_string *old;
  2289. const char *old_end, *p;
  2290. char *q;
  2291. char c;
  2292. zend_string *str;
  2293. ZEND_PARSE_PARAMETERS_START(1, 1)
  2294. Z_PARAM_STR(old)
  2295. ZEND_PARSE_PARAMETERS_END();
  2296. old_end = ZSTR_VAL(old) + ZSTR_LEN(old);
  2297. if (ZSTR_LEN(old) == 0) {
  2298. RETURN_EMPTY_STRING();
  2299. }
  2300. str = zend_string_safe_alloc(2, ZSTR_LEN(old), 0, 0);
  2301. for (p = ZSTR_VAL(old), q = ZSTR_VAL(str); p != old_end; p++) {
  2302. c = *p;
  2303. switch (c) {
  2304. case '.':
  2305. case '\\':
  2306. case '+':
  2307. case '*':
  2308. case '?':
  2309. case '[':
  2310. case '^':
  2311. case ']':
  2312. case '$':
  2313. case '(':
  2314. case ')':
  2315. *q++ = '\\';
  2316. ZEND_FALLTHROUGH;
  2317. default:
  2318. *q++ = c;
  2319. }
  2320. }
  2321. *q = '\0';
  2322. RETURN_NEW_STR(zend_string_truncate(str, q - ZSTR_VAL(str), 0));
  2323. }
  2324. /* }}} */
  2325. /* {{{ Returns ASCII value of character
  2326. Warning: This function is special-cased by zend_compile.c and so is bypassed for constant string argument */
  2327. PHP_FUNCTION(ord)
  2328. {
  2329. zend_string *str;
  2330. ZEND_PARSE_PARAMETERS_START(1, 1)
  2331. Z_PARAM_STR(str)
  2332. ZEND_PARSE_PARAMETERS_END();
  2333. RETURN_LONG((unsigned char) ZSTR_VAL(str)[0]);
  2334. }
  2335. /* }}} */
  2336. /* {{{ Converts ASCII code to a character
  2337. Warning: This function is special-cased by zend_compile.c and so is bypassed for constant integer argument */
  2338. PHP_FUNCTION(chr)
  2339. {
  2340. zend_long c;
  2341. ZEND_PARSE_PARAMETERS_START(1, 1)
  2342. Z_PARAM_LONG(c)
  2343. ZEND_PARSE_PARAMETERS_END();
  2344. c &= 0xff;
  2345. RETURN_CHAR(c);
  2346. }
  2347. /* }}} */
  2348. /* {{{ php_ucfirst
  2349. Uppercase the first character of the word in a native string */
  2350. static zend_string* php_ucfirst(zend_string *str)
  2351. {
  2352. const unsigned char ch = ZSTR_VAL(str)[0];
  2353. unsigned char r = toupper(ch);
  2354. if (r == ch) {
  2355. return zend_string_copy(str);
  2356. } else {
  2357. zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
  2358. ZSTR_VAL(s)[0] = r;
  2359. return s;
  2360. }
  2361. }
  2362. /* }}} */
  2363. /* {{{ Makes a string's first character uppercase */
  2364. PHP_FUNCTION(ucfirst)
  2365. {
  2366. zend_string *str;
  2367. ZEND_PARSE_PARAMETERS_START(1, 1)
  2368. Z_PARAM_STR(str)
  2369. ZEND_PARSE_PARAMETERS_END();
  2370. if (!ZSTR_LEN(str)) {
  2371. RETURN_EMPTY_STRING();
  2372. }
  2373. RETURN_STR(php_ucfirst(str));
  2374. }
  2375. /* }}} */
  2376. /* {{{
  2377. Lowercase the first character of the word in a native string */
  2378. static zend_string* php_lcfirst(zend_string *str)
  2379. {
  2380. unsigned char r = tolower(ZSTR_VAL(str)[0]);
  2381. if (r == ZSTR_VAL(str)[0]) {
  2382. return zend_string_copy(str);
  2383. } else {
  2384. zend_string *s = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
  2385. ZSTR_VAL(s)[0] = r;
  2386. return s;
  2387. }
  2388. }
  2389. /* }}} */
  2390. /* {{{ Make a string's first character lowercase */
  2391. PHP_FUNCTION(lcfirst)
  2392. {
  2393. zend_string *str;
  2394. ZEND_PARSE_PARAMETERS_START(1, 1)
  2395. Z_PARAM_STR(str)
  2396. ZEND_PARSE_PARAMETERS_END();
  2397. if (!ZSTR_LEN(str)) {
  2398. RETURN_EMPTY_STRING();
  2399. }
  2400. RETURN_STR(php_lcfirst(str));
  2401. }
  2402. /* }}} */
  2403. /* {{{ Uppercase the first character of every word in a string */
  2404. PHP_FUNCTION(ucwords)
  2405. {
  2406. zend_string *str;
  2407. char *delims = " \t\r\n\f\v";
  2408. char *r;
  2409. const char *r_end;
  2410. size_t delims_len = 6;
  2411. char mask[256];
  2412. ZEND_PARSE_PARAMETERS_START(1, 2)
  2413. Z_PARAM_STR(str)
  2414. Z_PARAM_OPTIONAL
  2415. Z_PARAM_STRING(delims, delims_len)
  2416. ZEND_PARSE_PARAMETERS_END();
  2417. if (!ZSTR_LEN(str)) {
  2418. RETURN_EMPTY_STRING();
  2419. }
  2420. php_charmask((const unsigned char *) delims, delims_len, mask);
  2421. ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  2422. r = Z_STRVAL_P(return_value);
  2423. *r = toupper((unsigned char) *r);
  2424. for (r_end = r + Z_STRLEN_P(return_value) - 1; r < r_end; ) {
  2425. if (mask[(unsigned char)*r++]) {
  2426. *r = toupper((unsigned char) *r);
  2427. }
  2428. }
  2429. }
  2430. /* }}} */
  2431. /* {{{ php_strtr */
  2432. PHPAPI char *php_strtr(char *str, size_t len, const char *str_from, const char *str_to, size_t trlen)
  2433. {
  2434. size_t i;
  2435. if (UNEXPECTED(trlen < 1)) {
  2436. return str;
  2437. } else if (trlen == 1) {
  2438. char ch_from = *str_from;
  2439. char ch_to = *str_to;
  2440. for (i = 0; i < len; i++) {
  2441. if (str[i] == ch_from) {
  2442. str[i] = ch_to;
  2443. }
  2444. }
  2445. } else {
  2446. unsigned char xlat[256];
  2447. memset(xlat, 0, sizeof(xlat));
  2448. for (i = 0; i < trlen; i++) {
  2449. xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
  2450. }
  2451. for (i = 0; i < len; i++) {
  2452. str[i] += xlat[(size_t)(unsigned char) str[i]];
  2453. }
  2454. }
  2455. return str;
  2456. }
  2457. /* }}} */
  2458. /* {{{ php_strtr_ex */
  2459. static zend_string *php_strtr_ex(zend_string *str, const char *str_from, const char *str_to, size_t trlen)
  2460. {
  2461. zend_string *new_str = NULL;
  2462. size_t i;
  2463. if (UNEXPECTED(trlen < 1)) {
  2464. return zend_string_copy(str);
  2465. } else if (trlen == 1) {
  2466. char ch_from = *str_from;
  2467. char ch_to = *str_to;
  2468. char *output;
  2469. char *input = ZSTR_VAL(str);
  2470. size_t len = ZSTR_LEN(str);
  2471. #ifdef __SSE2__
  2472. if (ZSTR_LEN(str) >= sizeof(__m128i)) {
  2473. __m128i search = _mm_set1_epi8(ch_from);
  2474. __m128i delta = _mm_set1_epi8(ch_to - ch_from);
  2475. do {
  2476. __m128i src = _mm_loadu_si128((__m128i*)(input));
  2477. __m128i mask = _mm_cmpeq_epi8(src, search);
  2478. if (_mm_movemask_epi8(mask)) {
  2479. new_str = zend_string_alloc(ZSTR_LEN(str), 0);
  2480. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), input - ZSTR_VAL(str));
  2481. output = ZSTR_VAL(new_str) + (input - ZSTR_VAL(str));
  2482. _mm_storeu_si128((__m128i *)(output),
  2483. _mm_add_epi8(src,
  2484. _mm_and_si128(mask, delta)));
  2485. input += sizeof(__m128i);
  2486. output += sizeof(__m128i);
  2487. len -= sizeof(__m128i);
  2488. for (; len >= sizeof(__m128i); input += sizeof(__m128i), output += sizeof(__m128i), len -= sizeof(__m128i)) {
  2489. src = _mm_loadu_si128((__m128i*)(input));
  2490. mask = _mm_cmpeq_epi8(src, search);
  2491. _mm_storeu_si128((__m128i *)(output),
  2492. _mm_add_epi8(src,
  2493. _mm_and_si128(mask, delta)));
  2494. }
  2495. for (; len > 0; input++, output++, len--) {
  2496. *output = (*input == ch_from) ? ch_to : *input;
  2497. }
  2498. *output = 0;
  2499. return new_str;
  2500. }
  2501. input += sizeof(__m128i);
  2502. len -= sizeof(__m128i);
  2503. } while (len >= sizeof(__m128i));
  2504. }
  2505. #endif
  2506. for (; len > 0; input++, len--) {
  2507. if (*input == ch_from) {
  2508. new_str = zend_string_alloc(ZSTR_LEN(str), 0);
  2509. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), input - ZSTR_VAL(str));
  2510. output = ZSTR_VAL(new_str) + (input - ZSTR_VAL(str));
  2511. *output = ch_to;
  2512. input++;
  2513. output++;
  2514. len--;
  2515. for (; len > 0; input++, output++, len--) {
  2516. *output = (*input == ch_from) ? ch_to : *input;
  2517. }
  2518. *output = 0;
  2519. return new_str;
  2520. }
  2521. }
  2522. } else {
  2523. unsigned char xlat[256];
  2524. memset(xlat, 0, sizeof(xlat));;
  2525. for (i = 0; i < trlen; i++) {
  2526. xlat[(size_t)(unsigned char) str_from[i]] = str_to[i] - str_from[i];
  2527. }
  2528. for (i = 0; i < ZSTR_LEN(str); i++) {
  2529. if (xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]]) {
  2530. new_str = zend_string_alloc(ZSTR_LEN(str), 0);
  2531. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), i);
  2532. do {
  2533. ZSTR_VAL(new_str)[i] = ZSTR_VAL(str)[i] + xlat[(size_t)(unsigned char) ZSTR_VAL(str)[i]];
  2534. i++;
  2535. } while (i < ZSTR_LEN(str));
  2536. ZSTR_VAL(new_str)[i] = 0;
  2537. return new_str;
  2538. }
  2539. }
  2540. }
  2541. return zend_string_copy(str);
  2542. }
  2543. /* }}} */
  2544. /* {{{ php_strtr_array */
  2545. static void php_strtr_array(zval *return_value, zend_string *input, HashTable *pats)
  2546. {
  2547. const char *str = ZSTR_VAL(input);
  2548. size_t slen = ZSTR_LEN(input);
  2549. zend_ulong num_key;
  2550. zend_string *str_key;
  2551. size_t len, pos, old_pos;
  2552. int num_keys = 0;
  2553. size_t minlen = 128*1024;
  2554. size_t maxlen = 0;
  2555. HashTable str_hash;
  2556. zval *entry;
  2557. const char *key;
  2558. smart_str result = {0};
  2559. zend_ulong bitset[256/sizeof(zend_ulong)];
  2560. zend_ulong *num_bitset;
  2561. /* we will collect all possible key lengths */
  2562. num_bitset = ecalloc((slen + sizeof(zend_ulong)) / sizeof(zend_ulong), sizeof(zend_ulong));
  2563. memset(bitset, 0, sizeof(bitset));
  2564. /* check if original array has numeric keys */
  2565. ZEND_HASH_FOREACH_STR_KEY(pats, str_key) {
  2566. if (UNEXPECTED(!str_key)) {
  2567. num_keys = 1;
  2568. } else {
  2569. len = ZSTR_LEN(str_key);
  2570. if (UNEXPECTED(len < 1)) {
  2571. php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
  2572. continue;
  2573. } else if (UNEXPECTED(len > slen)) {
  2574. /* skip long patterns */
  2575. continue;
  2576. }
  2577. if (len > maxlen) {
  2578. maxlen = len;
  2579. }
  2580. if (len < minlen) {
  2581. minlen = len;
  2582. }
  2583. /* remember possible key length */
  2584. num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
  2585. bitset[((unsigned char)ZSTR_VAL(str_key)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(str_key)[0]) % sizeof(zend_ulong));
  2586. }
  2587. } ZEND_HASH_FOREACH_END();
  2588. if (UNEXPECTED(num_keys)) {
  2589. zend_string *key_used;
  2590. /* we have to rebuild HashTable with numeric keys */
  2591. zend_hash_init(&str_hash, zend_hash_num_elements(pats), NULL, NULL, 0);
  2592. ZEND_HASH_FOREACH_KEY_VAL(pats, num_key, str_key, entry) {
  2593. if (UNEXPECTED(!str_key)) {
  2594. key_used = zend_long_to_str(num_key);
  2595. len = ZSTR_LEN(key_used);
  2596. if (UNEXPECTED(len > slen)) {
  2597. /* skip long patterns */
  2598. zend_string_release(key_used);
  2599. continue;
  2600. }
  2601. if (len > maxlen) {
  2602. maxlen = len;
  2603. }
  2604. if (len < minlen) {
  2605. minlen = len;
  2606. }
  2607. /* remember possible key length */
  2608. num_bitset[len / sizeof(zend_ulong)] |= Z_UL(1) << (len % sizeof(zend_ulong));
  2609. bitset[((unsigned char)ZSTR_VAL(key_used)[0]) / sizeof(zend_ulong)] |= Z_UL(1) << (((unsigned char)ZSTR_VAL(key_used)[0]) % sizeof(zend_ulong));
  2610. } else {
  2611. key_used = str_key;
  2612. len = ZSTR_LEN(key_used);
  2613. if (UNEXPECTED(len > slen)) {
  2614. /* skip long patterns */
  2615. continue;
  2616. }
  2617. }
  2618. zend_hash_add(&str_hash, key_used, entry);
  2619. if (UNEXPECTED(!str_key)) {
  2620. zend_string_release_ex(key_used, 0);
  2621. }
  2622. } ZEND_HASH_FOREACH_END();
  2623. pats = &str_hash;
  2624. }
  2625. if (UNEXPECTED(minlen > maxlen)) {
  2626. /* return the original string */
  2627. if (pats == &str_hash) {
  2628. zend_hash_destroy(&str_hash);
  2629. }
  2630. efree(num_bitset);
  2631. RETURN_STR_COPY(input);
  2632. }
  2633. old_pos = pos = 0;
  2634. while (pos <= slen - minlen) {
  2635. key = str + pos;
  2636. if (bitset[((unsigned char)key[0]) / sizeof(zend_ulong)] & (Z_UL(1) << (((unsigned char)key[0]) % sizeof(zend_ulong)))) {
  2637. len = maxlen;
  2638. if (len > slen - pos) {
  2639. len = slen - pos;
  2640. }
  2641. while (len >= minlen) {
  2642. if ((num_bitset[len / sizeof(zend_ulong)] & (Z_UL(1) << (len % sizeof(zend_ulong))))) {
  2643. entry = zend_hash_str_find(pats, key, len);
  2644. if (entry != NULL) {
  2645. zend_string *tmp;
  2646. zend_string *s = zval_get_tmp_string(entry, &tmp);
  2647. smart_str_appendl(&result, str + old_pos, pos - old_pos);
  2648. smart_str_append(&result, s);
  2649. old_pos = pos + len;
  2650. pos = old_pos - 1;
  2651. zend_tmp_string_release(tmp);
  2652. break;
  2653. }
  2654. }
  2655. len--;
  2656. }
  2657. }
  2658. pos++;
  2659. }
  2660. if (result.s) {
  2661. smart_str_appendl(&result, str + old_pos, slen - old_pos);
  2662. smart_str_0(&result);
  2663. RETVAL_NEW_STR(result.s);
  2664. } else {
  2665. smart_str_free(&result);
  2666. RETVAL_STR_COPY(input);
  2667. }
  2668. if (pats == &str_hash) {
  2669. zend_hash_destroy(&str_hash);
  2670. }
  2671. efree(num_bitset);
  2672. }
  2673. /* }}} */
  2674. /* {{{ count_chars */
  2675. static zend_always_inline zend_long count_chars(const char *p, zend_long length, char ch)
  2676. {
  2677. zend_long count = 0;
  2678. const char *endp;
  2679. #ifdef __SSE2__
  2680. if (length >= sizeof(__m128i)) {
  2681. __m128i search = _mm_set1_epi8(ch);
  2682. do {
  2683. __m128i src = _mm_loadu_si128((__m128i*)(p));
  2684. uint32_t mask = _mm_movemask_epi8(_mm_cmpeq_epi8(src, search));
  2685. // TODO: It would be great to use POPCNT, but it's available only with SSE4.1
  2686. #if 1
  2687. while (mask != 0) {
  2688. count++;
  2689. mask = mask & (mask - 1);
  2690. }
  2691. #else
  2692. if (mask) {
  2693. mask = mask - ((mask >> 1) & 0x5555);
  2694. mask = (mask & 0x3333) + ((mask >> 2) & 0x3333);
  2695. mask = (mask + (mask >> 4)) & 0x0F0F;
  2696. mask = (mask + (mask >> 8)) & 0x00ff;
  2697. count += mask;
  2698. }
  2699. #endif
  2700. p += sizeof(__m128i);
  2701. length -= sizeof(__m128i);
  2702. } while (length >= sizeof(__m128i));
  2703. }
  2704. endp = p + length;
  2705. while (p != endp) {
  2706. count += (*p == ch);
  2707. p++;
  2708. }
  2709. #else
  2710. endp = p + length;
  2711. while ((p = memchr(p, ch, endp-p))) {
  2712. count++;
  2713. p++;
  2714. }
  2715. #endif
  2716. return count;
  2717. }
  2718. /* }}} */
  2719. /* {{{ php_char_to_str_ex */
  2720. static zend_string* php_char_to_str_ex(zend_string *str, char from, char *to, size_t to_len, int case_sensitivity, zend_long *replace_count)
  2721. {
  2722. zend_string *result;
  2723. size_t char_count;
  2724. int lc_from = 0;
  2725. const char *source, *source_end;
  2726. char *target;
  2727. if (case_sensitivity) {
  2728. char_count = count_chars(ZSTR_VAL(str), ZSTR_LEN(str), from);
  2729. } else {
  2730. lc_from = tolower(from);
  2731. char_count = 0;
  2732. source_end = ZSTR_VAL(str) + ZSTR_LEN(str);
  2733. for (source = ZSTR_VAL(str); source < source_end; source++) {
  2734. if (tolower(*source) == lc_from) {
  2735. char_count++;
  2736. }
  2737. }
  2738. }
  2739. if (char_count == 0) {
  2740. return zend_string_copy(str);
  2741. }
  2742. if (replace_count) {
  2743. *replace_count += char_count;
  2744. }
  2745. if (to_len > 0) {
  2746. result = zend_string_safe_alloc(char_count, to_len - 1, ZSTR_LEN(str), 0);
  2747. } else {
  2748. result = zend_string_alloc(ZSTR_LEN(str) - char_count, 0);
  2749. }
  2750. target = ZSTR_VAL(result);
  2751. if (case_sensitivity) {
  2752. char *p = ZSTR_VAL(str), *e = p + ZSTR_LEN(str), *s = ZSTR_VAL(str);
  2753. while ((p = memchr(p, from, (e - p)))) {
  2754. memcpy(target, s, (p - s));
  2755. target += p - s;
  2756. memcpy(target, to, to_len);
  2757. target += to_len;
  2758. p++;
  2759. s = p;
  2760. if (--char_count == 0) break;
  2761. }
  2762. if (s < e) {
  2763. memcpy(target, s, (e - s));
  2764. target += e - s;
  2765. }
  2766. } else {
  2767. source_end = ZSTR_VAL(str) + ZSTR_LEN(str);
  2768. for (source = ZSTR_VAL(str); source < source_end; source++) {
  2769. if (tolower(*source) == lc_from) {
  2770. memcpy(target, to, to_len);
  2771. target += to_len;
  2772. } else {
  2773. *target = *source;
  2774. target++;
  2775. }
  2776. }
  2777. }
  2778. *target = 0;
  2779. return result;
  2780. }
  2781. /* }}} */
  2782. /* {{{ php_str_to_str_ex */
  2783. static zend_string *php_str_to_str_ex(zend_string *haystack,
  2784. const char *needle, size_t needle_len, const char *str, size_t str_len, zend_long *replace_count)
  2785. {
  2786. if (needle_len < ZSTR_LEN(haystack)) {
  2787. zend_string *new_str;
  2788. const char *end;
  2789. const char *p, *r;
  2790. char *e;
  2791. if (needle_len == str_len) {
  2792. new_str = NULL;
  2793. end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  2794. for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  2795. if (!new_str) {
  2796. new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
  2797. }
  2798. memcpy(ZSTR_VAL(new_str) + (r - ZSTR_VAL(haystack)), str, str_len);
  2799. (*replace_count)++;
  2800. }
  2801. if (!new_str) {
  2802. goto nothing_todo;
  2803. }
  2804. return new_str;
  2805. } else {
  2806. size_t count = 0;
  2807. const char *o = ZSTR_VAL(haystack);
  2808. const char *n = needle;
  2809. const char *endp = o + ZSTR_LEN(haystack);
  2810. while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
  2811. o += needle_len;
  2812. count++;
  2813. }
  2814. if (count == 0) {
  2815. /* Needle doesn't occur, shortcircuit the actual replacement. */
  2816. goto nothing_todo;
  2817. }
  2818. if (str_len > needle_len) {
  2819. new_str = zend_string_safe_alloc(count, str_len - needle_len, ZSTR_LEN(haystack), 0);
  2820. } else {
  2821. new_str = zend_string_alloc(count * (str_len - needle_len) + ZSTR_LEN(haystack), 0);
  2822. }
  2823. e = ZSTR_VAL(new_str);
  2824. end = ZSTR_VAL(haystack) + ZSTR_LEN(haystack);
  2825. for (p = ZSTR_VAL(haystack); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  2826. memcpy(e, p, r - p);
  2827. e += r - p;
  2828. memcpy(e, str, str_len);
  2829. e += str_len;
  2830. (*replace_count)++;
  2831. }
  2832. if (p < end) {
  2833. memcpy(e, p, end - p);
  2834. e += end - p;
  2835. }
  2836. *e = '\0';
  2837. return new_str;
  2838. }
  2839. } else if (needle_len > ZSTR_LEN(haystack) || memcmp(ZSTR_VAL(haystack), needle, ZSTR_LEN(haystack))) {
  2840. nothing_todo:
  2841. return zend_string_copy(haystack);
  2842. } else {
  2843. (*replace_count)++;
  2844. return zend_string_init_fast(str, str_len);
  2845. }
  2846. }
  2847. /* }}} */
  2848. /* {{{ php_str_to_str_i_ex */
  2849. static zend_string *php_str_to_str_i_ex(zend_string *haystack, const char *lc_haystack,
  2850. zend_string *needle, const char *str, size_t str_len, zend_long *replace_count)
  2851. {
  2852. zend_string *new_str = NULL;
  2853. zend_string *lc_needle;
  2854. if (ZSTR_LEN(needle) < ZSTR_LEN(haystack)) {
  2855. const char *end;
  2856. const char *p, *r;
  2857. char *e;
  2858. if (ZSTR_LEN(needle) == str_len) {
  2859. lc_needle = php_string_tolower(needle);
  2860. end = lc_haystack + ZSTR_LEN(haystack);
  2861. for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
  2862. if (!new_str) {
  2863. new_str = zend_string_init(ZSTR_VAL(haystack), ZSTR_LEN(haystack), 0);
  2864. }
  2865. memcpy(ZSTR_VAL(new_str) + (r - lc_haystack), str, str_len);
  2866. (*replace_count)++;
  2867. }
  2868. zend_string_release_ex(lc_needle, 0);
  2869. if (!new_str) {
  2870. goto nothing_todo;
  2871. }
  2872. return new_str;
  2873. } else {
  2874. size_t count = 0;
  2875. const char *o = lc_haystack;
  2876. const char *n;
  2877. const char *endp = o + ZSTR_LEN(haystack);
  2878. lc_needle = php_string_tolower(needle);
  2879. n = ZSTR_VAL(lc_needle);
  2880. while ((o = (char*)php_memnstr(o, n, ZSTR_LEN(lc_needle), endp))) {
  2881. o += ZSTR_LEN(lc_needle);
  2882. count++;
  2883. }
  2884. if (count == 0) {
  2885. /* Needle doesn't occur, shortcircuit the actual replacement. */
  2886. zend_string_release_ex(lc_needle, 0);
  2887. goto nothing_todo;
  2888. }
  2889. if (str_len > ZSTR_LEN(lc_needle)) {
  2890. new_str = zend_string_safe_alloc(count, str_len - ZSTR_LEN(lc_needle), ZSTR_LEN(haystack), 0);
  2891. } else {
  2892. new_str = zend_string_alloc(count * (str_len - ZSTR_LEN(lc_needle)) + ZSTR_LEN(haystack), 0);
  2893. }
  2894. e = ZSTR_VAL(new_str);
  2895. end = lc_haystack + ZSTR_LEN(haystack);
  2896. for (p = lc_haystack; (r = (char*)php_memnstr(p, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle), end)); p = r + ZSTR_LEN(lc_needle)) {
  2897. memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), r - p);
  2898. e += r - p;
  2899. memcpy(e, str, str_len);
  2900. e += str_len;
  2901. (*replace_count)++;
  2902. }
  2903. if (p < end) {
  2904. memcpy(e, ZSTR_VAL(haystack) + (p - lc_haystack), end - p);
  2905. e += end - p;
  2906. }
  2907. *e = '\0';
  2908. zend_string_release_ex(lc_needle, 0);
  2909. return new_str;
  2910. }
  2911. } else if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
  2912. nothing_todo:
  2913. return zend_string_copy(haystack);
  2914. } else {
  2915. lc_needle = php_string_tolower(needle);
  2916. if (memcmp(lc_haystack, ZSTR_VAL(lc_needle), ZSTR_LEN(lc_needle))) {
  2917. zend_string_release_ex(lc_needle, 0);
  2918. goto nothing_todo;
  2919. }
  2920. zend_string_release_ex(lc_needle, 0);
  2921. new_str = zend_string_init(str, str_len, 0);
  2922. (*replace_count)++;
  2923. return new_str;
  2924. }
  2925. }
  2926. /* }}} */
  2927. /* {{{ php_str_to_str */
  2928. PHPAPI zend_string *php_str_to_str(const char *haystack, size_t length, const char *needle, size_t needle_len, const char *str, size_t str_len)
  2929. {
  2930. zend_string *new_str;
  2931. if (needle_len < length) {
  2932. const char *end;
  2933. const char *s, *p;
  2934. char *e, *r;
  2935. if (needle_len == str_len) {
  2936. new_str = zend_string_init(haystack, length, 0);
  2937. end = ZSTR_VAL(new_str) + length;
  2938. for (p = ZSTR_VAL(new_str); (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  2939. memcpy(r, str, str_len);
  2940. }
  2941. return new_str;
  2942. } else {
  2943. if (str_len < needle_len) {
  2944. new_str = zend_string_alloc(length, 0);
  2945. } else {
  2946. size_t count = 0;
  2947. const char *o = haystack;
  2948. const char *n = needle;
  2949. const char *endp = o + length;
  2950. while ((o = (char*)php_memnstr(o, n, needle_len, endp))) {
  2951. o += needle_len;
  2952. count++;
  2953. }
  2954. if (count == 0) {
  2955. /* Needle doesn't occur, shortcircuit the actual replacement. */
  2956. new_str = zend_string_init(haystack, length, 0);
  2957. return new_str;
  2958. } else {
  2959. if (str_len > needle_len) {
  2960. new_str = zend_string_safe_alloc(count, str_len - needle_len, length, 0);
  2961. } else {
  2962. new_str = zend_string_alloc(count * (str_len - needle_len) + length, 0);
  2963. }
  2964. }
  2965. }
  2966. s = e = ZSTR_VAL(new_str);
  2967. end = haystack + length;
  2968. for (p = haystack; (r = (char*)php_memnstr(p, needle, needle_len, end)); p = r + needle_len) {
  2969. memcpy(e, p, r - p);
  2970. e += r - p;
  2971. memcpy(e, str, str_len);
  2972. e += str_len;
  2973. }
  2974. if (p < end) {
  2975. memcpy(e, p, end - p);
  2976. e += end - p;
  2977. }
  2978. *e = '\0';
  2979. new_str = zend_string_truncate(new_str, e - s, 0);
  2980. return new_str;
  2981. }
  2982. } else if (needle_len > length || memcmp(haystack, needle, length)) {
  2983. new_str = zend_string_init(haystack, length, 0);
  2984. return new_str;
  2985. } else {
  2986. new_str = zend_string_init(str, str_len, 0);
  2987. return new_str;
  2988. }
  2989. }
  2990. /* }}} */
  2991. /* {{{ Translates characters in str using given translation tables */
  2992. PHP_FUNCTION(strtr)
  2993. {
  2994. zend_string *str, *from_str = NULL;
  2995. HashTable *from_ht = NULL;
  2996. char *to = NULL;
  2997. size_t to_len = 0;
  2998. ZEND_PARSE_PARAMETERS_START(2, 3)
  2999. Z_PARAM_STR(str)
  3000. Z_PARAM_ARRAY_HT_OR_STR(from_ht, from_str)
  3001. Z_PARAM_OPTIONAL
  3002. Z_PARAM_STRING_OR_NULL(to, to_len)
  3003. ZEND_PARSE_PARAMETERS_END();
  3004. if (!to && from_ht == NULL) {
  3005. zend_argument_type_error(2, "must be of type array, string given");
  3006. RETURN_THROWS();
  3007. } else if (to && from_str == NULL) {
  3008. zend_argument_type_error(2, "must be of type string, array given");
  3009. RETURN_THROWS();
  3010. }
  3011. /* shortcut for empty string */
  3012. if (ZSTR_LEN(str) == 0) {
  3013. RETURN_EMPTY_STRING();
  3014. }
  3015. if (!to) {
  3016. if (zend_hash_num_elements(from_ht) < 1) {
  3017. RETURN_STR_COPY(str);
  3018. } else if (zend_hash_num_elements(from_ht) == 1) {
  3019. zend_long num_key;
  3020. zend_string *str_key, *tmp_str, *replace, *tmp_replace;
  3021. zval *entry;
  3022. ZEND_HASH_FOREACH_KEY_VAL(from_ht, num_key, str_key, entry) {
  3023. tmp_str = NULL;
  3024. if (UNEXPECTED(!str_key)) {
  3025. str_key = tmp_str = zend_long_to_str(num_key);
  3026. }
  3027. replace = zval_get_tmp_string(entry, &tmp_replace);
  3028. if (ZSTR_LEN(str_key) < 1) {
  3029. php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
  3030. RETVAL_STR_COPY(str);
  3031. } else if (ZSTR_LEN(str_key) == 1) {
  3032. RETVAL_STR(php_char_to_str_ex(str,
  3033. ZSTR_VAL(str_key)[0],
  3034. ZSTR_VAL(replace),
  3035. ZSTR_LEN(replace),
  3036. 1,
  3037. NULL));
  3038. } else {
  3039. zend_long dummy;
  3040. RETVAL_STR(php_str_to_str_ex(str,
  3041. ZSTR_VAL(str_key), ZSTR_LEN(str_key),
  3042. ZSTR_VAL(replace), ZSTR_LEN(replace), &dummy));
  3043. }
  3044. zend_tmp_string_release(tmp_str);
  3045. zend_tmp_string_release(tmp_replace);
  3046. return;
  3047. } ZEND_HASH_FOREACH_END();
  3048. } else {
  3049. php_strtr_array(return_value, str, from_ht);
  3050. }
  3051. } else {
  3052. RETURN_STR(php_strtr_ex(str,
  3053. ZSTR_VAL(from_str),
  3054. to,
  3055. MIN(ZSTR_LEN(from_str), to_len)));
  3056. }
  3057. }
  3058. /* }}} */
  3059. /* {{{ Reverse a string */
  3060. #if ZEND_INTRIN_SSSE3_NATIVE
  3061. #include <tmmintrin.h>
  3062. #elif defined(__aarch64__)
  3063. #include <arm_neon.h>
  3064. #endif
  3065. PHP_FUNCTION(strrev)
  3066. {
  3067. zend_string *str;
  3068. const char *s, *e;
  3069. char *p;
  3070. zend_string *n;
  3071. ZEND_PARSE_PARAMETERS_START(1, 1)
  3072. Z_PARAM_STR(str)
  3073. ZEND_PARSE_PARAMETERS_END();
  3074. n = zend_string_alloc(ZSTR_LEN(str), 0);
  3075. p = ZSTR_VAL(n);
  3076. s = ZSTR_VAL(str);
  3077. e = s + ZSTR_LEN(str);
  3078. --e;
  3079. #if ZEND_INTRIN_SSSE3_NATIVE
  3080. if (e - s > 15) {
  3081. const __m128i map = _mm_set_epi8(
  3082. 0, 1, 2, 3,
  3083. 4, 5, 6, 7,
  3084. 8, 9, 10, 11,
  3085. 12, 13, 14, 15);
  3086. do {
  3087. const __m128i str = _mm_loadu_si128((__m128i *)(e - 15));
  3088. _mm_storeu_si128((__m128i *)p, _mm_shuffle_epi8(str, map));
  3089. p += 16;
  3090. e -= 16;
  3091. } while (e - s > 15);
  3092. }
  3093. #elif defined(__aarch64__)
  3094. if (e - s > 15) {
  3095. do {
  3096. const uint8x16_t str = vld1q_u8((uint8_t *)(e - 15));
  3097. /* Synthesize rev128 with a rev64 + ext. */
  3098. const uint8x16_t rev = vrev64q_u8(str);
  3099. const uint8x16_t ext = (uint8x16_t)
  3100. vextq_u64((uint64x2_t)rev, (uint64x2_t)rev, 1);
  3101. vst1q_u8((uint8_t *)p, ext);
  3102. p += 16;
  3103. e -= 16;
  3104. } while (e - s > 15);
  3105. }
  3106. #endif
  3107. while (e >= s) {
  3108. *p++ = *e--;
  3109. }
  3110. *p = '\0';
  3111. RETVAL_NEW_STR(n);
  3112. }
  3113. /* }}} */
  3114. /* {{{ php_similar_str */
  3115. static void php_similar_str(const char *txt1, size_t len1, const char *txt2, size_t len2, size_t *pos1, size_t *pos2, size_t *max, size_t *count)
  3116. {
  3117. const char *p, *q;
  3118. const char *end1 = (char *) txt1 + len1;
  3119. const char *end2 = (char *) txt2 + len2;
  3120. size_t l;
  3121. *max = 0;
  3122. *count = 0;
  3123. for (p = (char *) txt1; p < end1; p++) {
  3124. for (q = (char *) txt2; q < end2; q++) {
  3125. for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);
  3126. if (l > *max) {
  3127. *max = l;
  3128. *count += 1;
  3129. *pos1 = p - txt1;
  3130. *pos2 = q - txt2;
  3131. }
  3132. }
  3133. }
  3134. }
  3135. /* }}} */
  3136. /* {{{ php_similar_char */
  3137. static size_t php_similar_char(const char *txt1, size_t len1, const char *txt2, size_t len2)
  3138. {
  3139. size_t sum;
  3140. size_t pos1 = 0, pos2 = 0, max, count;
  3141. php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max, &count);
  3142. if ((sum = max)) {
  3143. if (pos1 && pos2 && count > 1) {
  3144. sum += php_similar_char(txt1, pos1,
  3145. txt2, pos2);
  3146. }
  3147. if ((pos1 + max < len1) && (pos2 + max < len2)) {
  3148. sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
  3149. txt2 + pos2 + max, len2 - pos2 - max);
  3150. }
  3151. }
  3152. return sum;
  3153. }
  3154. /* }}} */
  3155. /* {{{ Calculates the similarity between two strings */
  3156. PHP_FUNCTION(similar_text)
  3157. {
  3158. zend_string *t1, *t2;
  3159. zval *percent = NULL;
  3160. int ac = ZEND_NUM_ARGS();
  3161. size_t sim;
  3162. ZEND_PARSE_PARAMETERS_START(2, 3)
  3163. Z_PARAM_STR(t1)
  3164. Z_PARAM_STR(t2)
  3165. Z_PARAM_OPTIONAL
  3166. Z_PARAM_ZVAL(percent)
  3167. ZEND_PARSE_PARAMETERS_END();
  3168. if (ZSTR_LEN(t1) + ZSTR_LEN(t2) == 0) {
  3169. if (ac > 2) {
  3170. ZEND_TRY_ASSIGN_REF_DOUBLE(percent, 0);
  3171. }
  3172. RETURN_LONG(0);
  3173. }
  3174. sim = php_similar_char(ZSTR_VAL(t1), ZSTR_LEN(t1), ZSTR_VAL(t2), ZSTR_LEN(t2));
  3175. if (ac > 2) {
  3176. ZEND_TRY_ASSIGN_REF_DOUBLE(percent, sim * 200.0 / (ZSTR_LEN(t1) + ZSTR_LEN(t2)));
  3177. }
  3178. RETURN_LONG(sim);
  3179. }
  3180. /* }}} */
  3181. /* {{{ Escapes all chars mentioned in charlist with backslash. It creates octal representations if asked to backslash characters with 8th bit set or with ASCII<32 (except '\n', '\r', '\t' etc...) */
  3182. PHP_FUNCTION(addcslashes)
  3183. {
  3184. zend_string *str, *what;
  3185. ZEND_PARSE_PARAMETERS_START(2, 2)
  3186. Z_PARAM_STR(str)
  3187. Z_PARAM_STR(what)
  3188. ZEND_PARSE_PARAMETERS_END();
  3189. if (ZSTR_LEN(str) == 0) {
  3190. RETURN_EMPTY_STRING();
  3191. }
  3192. if (ZSTR_LEN(what) == 0) {
  3193. RETURN_STR_COPY(str);
  3194. }
  3195. RETURN_STR(php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), ZSTR_VAL(what), ZSTR_LEN(what)));
  3196. }
  3197. /* }}} */
  3198. /* {{{ Escapes single quote, double quotes and backslash characters in a string with backslashes */
  3199. PHP_FUNCTION(addslashes)
  3200. {
  3201. zend_string *str;
  3202. ZEND_PARSE_PARAMETERS_START(1, 1)
  3203. Z_PARAM_STR(str)
  3204. ZEND_PARSE_PARAMETERS_END();
  3205. if (ZSTR_LEN(str) == 0) {
  3206. RETURN_EMPTY_STRING();
  3207. }
  3208. RETURN_STR(php_addslashes(str));
  3209. }
  3210. /* }}} */
  3211. /* {{{ Strips backslashes from a string. Uses C-style conventions */
  3212. PHP_FUNCTION(stripcslashes)
  3213. {
  3214. zend_string *str;
  3215. ZEND_PARSE_PARAMETERS_START(1, 1)
  3216. Z_PARAM_STR(str)
  3217. ZEND_PARSE_PARAMETERS_END();
  3218. ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  3219. php_stripcslashes(Z_STR_P(return_value));
  3220. }
  3221. /* }}} */
  3222. /* {{{ Strips backslashes from a string */
  3223. PHP_FUNCTION(stripslashes)
  3224. {
  3225. zend_string *str;
  3226. ZEND_PARSE_PARAMETERS_START(1, 1)
  3227. Z_PARAM_STR(str)
  3228. ZEND_PARSE_PARAMETERS_END();
  3229. ZVAL_STRINGL(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  3230. php_stripslashes(Z_STR_P(return_value));
  3231. }
  3232. /* }}} */
  3233. /* {{{ php_stripcslashes */
  3234. PHPAPI void php_stripcslashes(zend_string *str)
  3235. {
  3236. const char *source, *end;
  3237. char *target;
  3238. size_t nlen = ZSTR_LEN(str), i;
  3239. char numtmp[4];
  3240. for (source = (char*)ZSTR_VAL(str), end = source + ZSTR_LEN(str), target = ZSTR_VAL(str); source < end; source++) {
  3241. if (*source == '\\' && source + 1 < end) {
  3242. source++;
  3243. switch (*source) {
  3244. case 'n': *target++='\n'; nlen--; break;
  3245. case 'r': *target++='\r'; nlen--; break;
  3246. case 'a': *target++='\a'; nlen--; break;
  3247. case 't': *target++='\t'; nlen--; break;
  3248. case 'v': *target++='\v'; nlen--; break;
  3249. case 'b': *target++='\b'; nlen--; break;
  3250. case 'f': *target++='\f'; nlen--; break;
  3251. case '\\': *target++='\\'; nlen--; break;
  3252. case 'x':
  3253. if (source+1 < end && isxdigit((int)(*(source+1)))) {
  3254. numtmp[0] = *++source;
  3255. if (source+1 < end && isxdigit((int)(*(source+1)))) {
  3256. numtmp[1] = *++source;
  3257. numtmp[2] = '\0';
  3258. nlen-=3;
  3259. } else {
  3260. numtmp[1] = '\0';
  3261. nlen-=2;
  3262. }
  3263. *target++=(char)strtol(numtmp, NULL, 16);
  3264. break;
  3265. }
  3266. ZEND_FALLTHROUGH;
  3267. default:
  3268. i=0;
  3269. while (source < end && *source >= '0' && *source <= '7' && i<3) {
  3270. numtmp[i++] = *source++;
  3271. }
  3272. if (i) {
  3273. numtmp[i]='\0';
  3274. *target++=(char)strtol(numtmp, NULL, 8);
  3275. nlen-=i;
  3276. source--;
  3277. } else {
  3278. *target++=*source;
  3279. nlen--;
  3280. }
  3281. }
  3282. } else {
  3283. *target++=*source;
  3284. }
  3285. }
  3286. if (nlen != 0) {
  3287. *target='\0';
  3288. }
  3289. ZSTR_LEN(str) = nlen;
  3290. }
  3291. /* }}} */
  3292. /* {{{ php_addcslashes_str */
  3293. PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, const char *what, size_t wlength)
  3294. {
  3295. char flags[256];
  3296. char *target;
  3297. const char *source, *end;
  3298. char c;
  3299. size_t newlen;
  3300. zend_string *new_str = zend_string_safe_alloc(4, len, 0, 0);
  3301. php_charmask((const unsigned char *) what, wlength, flags);
  3302. for (source = str, end = source + len, target = ZSTR_VAL(new_str); source < end; source++) {
  3303. c = *source;
  3304. if (flags[(unsigned char)c]) {
  3305. if ((unsigned char) c < 32 || (unsigned char) c > 126) {
  3306. *target++ = '\\';
  3307. switch (c) {
  3308. case '\n': *target++ = 'n'; break;
  3309. case '\t': *target++ = 't'; break;
  3310. case '\r': *target++ = 'r'; break;
  3311. case '\a': *target++ = 'a'; break;
  3312. case '\v': *target++ = 'v'; break;
  3313. case '\b': *target++ = 'b'; break;
  3314. case '\f': *target++ = 'f'; break;
  3315. default: target += sprintf(target, "%03o", (unsigned char) c);
  3316. }
  3317. continue;
  3318. }
  3319. *target++ = '\\';
  3320. }
  3321. *target++ = c;
  3322. }
  3323. *target = 0;
  3324. newlen = target - ZSTR_VAL(new_str);
  3325. if (newlen < len * 4) {
  3326. new_str = zend_string_truncate(new_str, newlen, 0);
  3327. }
  3328. return new_str;
  3329. }
  3330. /* }}} */
  3331. /* {{{ php_addcslashes */
  3332. PHPAPI zend_string *php_addcslashes(zend_string *str, const char *what, size_t wlength)
  3333. {
  3334. return php_addcslashes_str(ZSTR_VAL(str), ZSTR_LEN(str), what, wlength);
  3335. }
  3336. /* }}} */
  3337. /* {{{ php_addslashes */
  3338. #if ZEND_INTRIN_SSE4_2_NATIVE
  3339. # include <nmmintrin.h>
  3340. # include "Zend/zend_bitset.h"
  3341. #elif ZEND_INTRIN_SSE4_2_RESOLVER
  3342. # include <nmmintrin.h>
  3343. # include "Zend/zend_bitset.h"
  3344. # include "Zend/zend_cpuinfo.h"
  3345. ZEND_INTRIN_SSE4_2_FUNC_DECL(zend_string *php_addslashes_sse42(zend_string *str));
  3346. zend_string *php_addslashes_default(zend_string *str);
  3347. ZEND_INTRIN_SSE4_2_FUNC_DECL(void php_stripslashes_sse42(zend_string *str));
  3348. void php_stripslashes_default(zend_string *str);
  3349. # if ZEND_INTRIN_SSE4_2_FUNC_PROTO
  3350. PHPAPI zend_string *php_addslashes(zend_string *str) __attribute__((ifunc("resolve_addslashes")));
  3351. PHPAPI void php_stripslashes(zend_string *str) __attribute__((ifunc("resolve_stripslashes")));
  3352. typedef zend_string *(*php_addslashes_func_t)(zend_string *);
  3353. typedef void (*php_stripslashes_func_t)(zend_string *);
  3354. ZEND_NO_SANITIZE_ADDRESS
  3355. ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
  3356. static php_addslashes_func_t resolve_addslashes(void) {
  3357. if (zend_cpu_supports_sse42()) {
  3358. return php_addslashes_sse42;
  3359. }
  3360. return php_addslashes_default;
  3361. }
  3362. ZEND_NO_SANITIZE_ADDRESS
  3363. ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
  3364. static php_stripslashes_func_t resolve_stripslashes(void) {
  3365. if (zend_cpu_supports_sse42()) {
  3366. return php_stripslashes_sse42;
  3367. }
  3368. return php_stripslashes_default;
  3369. }
  3370. # else /* ZEND_INTRIN_SSE4_2_FUNC_PTR */
  3371. static zend_string *(*php_addslashes_ptr)(zend_string *str) = NULL;
  3372. static void (*php_stripslashes_ptr)(zend_string *str) = NULL;
  3373. PHPAPI zend_string *php_addslashes(zend_string *str) {
  3374. return php_addslashes_ptr(str);
  3375. }
  3376. PHPAPI void php_stripslashes(zend_string *str) {
  3377. php_stripslashes_ptr(str);
  3378. }
  3379. /* {{{ PHP_MINIT_FUNCTION */
  3380. PHP_MINIT_FUNCTION(string_intrin)
  3381. {
  3382. if (zend_cpu_supports_sse42()) {
  3383. php_addslashes_ptr = php_addslashes_sse42;
  3384. php_stripslashes_ptr = php_stripslashes_sse42;
  3385. } else {
  3386. php_addslashes_ptr = php_addslashes_default;
  3387. php_stripslashes_ptr = php_stripslashes_default;
  3388. }
  3389. return SUCCESS;
  3390. }
  3391. /* }}} */
  3392. # endif
  3393. #endif
  3394. #if ZEND_INTRIN_SSE4_2_NATIVE || ZEND_INTRIN_SSE4_2_RESOLVER
  3395. # if ZEND_INTRIN_SSE4_2_NATIVE
  3396. PHPAPI zend_string *php_addslashes(zend_string *str) /* {{{ */
  3397. # elif ZEND_INTRIN_SSE4_2_RESOLVER
  3398. zend_string *php_addslashes_sse42(zend_string *str)
  3399. # endif
  3400. {
  3401. ZEND_SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
  3402. __m128i w128, s128;
  3403. uint32_t res = 0;
  3404. /* maximum string length, worst case situation */
  3405. char *target;
  3406. const char *source, *end;
  3407. size_t offset;
  3408. zend_string *new_str;
  3409. if (!str) {
  3410. return ZSTR_EMPTY_ALLOC();
  3411. }
  3412. source = ZSTR_VAL(str);
  3413. end = source + ZSTR_LEN(str);
  3414. if (ZSTR_LEN(str) > 15) {
  3415. w128 = _mm_load_si128((__m128i *)slashchars);
  3416. do {
  3417. s128 = _mm_loadu_si128((__m128i *)source);
  3418. res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
  3419. if (res) {
  3420. goto do_escape;
  3421. }
  3422. source += 16;
  3423. } while ((end - source) > 15);
  3424. }
  3425. while (source < end) {
  3426. switch (*source) {
  3427. case '\0':
  3428. case '\'':
  3429. case '\"':
  3430. case '\\':
  3431. goto do_escape;
  3432. default:
  3433. source++;
  3434. break;
  3435. }
  3436. }
  3437. return zend_string_copy(str);
  3438. do_escape:
  3439. offset = source - (char *)ZSTR_VAL(str);
  3440. new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
  3441. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
  3442. target = ZSTR_VAL(new_str) + offset;
  3443. if (res) {
  3444. int pos = 0;
  3445. do {
  3446. int i, n = zend_ulong_ntz(res);
  3447. for (i = 0; i < n; i++) {
  3448. *target++ = source[pos + i];
  3449. }
  3450. pos += n;
  3451. *target++ = '\\';
  3452. if (source[pos] == '\0') {
  3453. *target++ = '0';
  3454. } else {
  3455. *target++ = source[pos];
  3456. }
  3457. pos++;
  3458. res = res >> (n + 1);
  3459. } while (res);
  3460. for (; pos < 16; pos++) {
  3461. *target++ = source[pos];
  3462. }
  3463. source += 16;
  3464. } else if (end - source > 15) {
  3465. w128 = _mm_load_si128((__m128i *)slashchars);
  3466. }
  3467. for (; end - source > 15; source += 16) {
  3468. int pos = 0;
  3469. s128 = _mm_loadu_si128((__m128i *)source);
  3470. res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
  3471. if (res) {
  3472. do {
  3473. int i, n = zend_ulong_ntz(res);
  3474. for (i = 0; i < n; i++) {
  3475. *target++ = source[pos + i];
  3476. }
  3477. pos += n;
  3478. *target++ = '\\';
  3479. if (source[pos] == '\0') {
  3480. *target++ = '0';
  3481. } else {
  3482. *target++ = source[pos];
  3483. }
  3484. pos++;
  3485. res = res >> (n + 1);
  3486. } while (res);
  3487. for (; pos < 16; pos++) {
  3488. *target++ = source[pos];
  3489. }
  3490. } else {
  3491. _mm_storeu_si128((__m128i*)target, s128);
  3492. target += 16;
  3493. }
  3494. }
  3495. while (source < end) {
  3496. switch (*source) {
  3497. case '\0':
  3498. *target++ = '\\';
  3499. *target++ = '0';
  3500. break;
  3501. case '\'':
  3502. case '\"':
  3503. case '\\':
  3504. *target++ = '\\';
  3505. ZEND_FALLTHROUGH;
  3506. default:
  3507. *target++ = *source;
  3508. break;
  3509. }
  3510. source++;
  3511. }
  3512. *target = '\0';
  3513. if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
  3514. new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
  3515. } else {
  3516. ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
  3517. }
  3518. return new_str;
  3519. }
  3520. /* }}} */
  3521. #endif
  3522. #ifdef __aarch64__
  3523. typedef union {
  3524. uint8_t mem[16];
  3525. uint64_t dw[2];
  3526. } quad_word;
  3527. static zend_always_inline quad_word aarch64_contains_slash_chars(uint8x16_t x) {
  3528. uint8x16_t s0 = vceqq_u8(x, vdupq_n_u8('\0'));
  3529. uint8x16_t s1 = vceqq_u8(x, vdupq_n_u8('\''));
  3530. uint8x16_t s2 = vceqq_u8(x, vdupq_n_u8('\"'));
  3531. uint8x16_t s3 = vceqq_u8(x, vdupq_n_u8('\\'));
  3532. uint8x16_t s01 = vorrq_u8(s0, s1);
  3533. uint8x16_t s23 = vorrq_u8(s2, s3);
  3534. uint8x16_t s0123 = vorrq_u8(s01, s23);
  3535. quad_word qw;
  3536. vst1q_u8(qw.mem, s0123);
  3537. return qw;
  3538. }
  3539. static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *source, char *target)
  3540. {
  3541. int i = 0;
  3542. for (; i < 16; i++) {
  3543. char s = source[i];
  3544. if (res.mem[i] == 0)
  3545. *target++ = s;
  3546. else {
  3547. *target++ = '\\';
  3548. if (s == '\0')
  3549. *target++ = '0';
  3550. else
  3551. *target++ = s;
  3552. }
  3553. }
  3554. return target;
  3555. }
  3556. #endif /* __aarch64__ */
  3557. #if !ZEND_INTRIN_SSE4_2_NATIVE
  3558. # if ZEND_INTRIN_SSE4_2_RESOLVER
  3559. zend_string *php_addslashes_default(zend_string *str) /* {{{ */
  3560. # else
  3561. PHPAPI zend_string *php_addslashes(zend_string *str)
  3562. # endif
  3563. {
  3564. /* maximum string length, worst case situation */
  3565. char *target;
  3566. const char *source, *end;
  3567. size_t offset;
  3568. zend_string *new_str;
  3569. if (!str) {
  3570. return ZSTR_EMPTY_ALLOC();
  3571. }
  3572. source = ZSTR_VAL(str);
  3573. end = source + ZSTR_LEN(str);
  3574. # ifdef __aarch64__
  3575. quad_word res = {0};
  3576. if (ZSTR_LEN(str) > 15) {
  3577. do {
  3578. res = aarch64_contains_slash_chars(vld1q_u8((uint8_t *)source));
  3579. if (res.dw[0] | res.dw[1])
  3580. goto do_escape;
  3581. source += 16;
  3582. } while ((end - source) > 15);
  3583. }
  3584. /* Finish the last 15 bytes or less with the scalar loop. */
  3585. # endif /* __aarch64__ */
  3586. while (source < end) {
  3587. switch (*source) {
  3588. case '\0':
  3589. case '\'':
  3590. case '\"':
  3591. case '\\':
  3592. goto do_escape;
  3593. default:
  3594. source++;
  3595. break;
  3596. }
  3597. }
  3598. return zend_string_copy(str);
  3599. do_escape:
  3600. offset = source - (char *)ZSTR_VAL(str);
  3601. new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
  3602. memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
  3603. target = ZSTR_VAL(new_str) + offset;
  3604. # ifdef __aarch64__
  3605. if (res.dw[0] | res.dw[1]) {
  3606. target = aarch64_add_slashes(res, source, target);
  3607. source += 16;
  3608. }
  3609. for (; end - source > 15; source += 16) {
  3610. uint8x16_t x = vld1q_u8((uint8_t *)source);
  3611. res = aarch64_contains_slash_chars(x);
  3612. if (res.dw[0] | res.dw[1]) {
  3613. target = aarch64_add_slashes(res, source, target);
  3614. } else {
  3615. vst1q_u8((uint8_t*)target, x);
  3616. target += 16;
  3617. }
  3618. }
  3619. /* Finish the last 15 bytes or less with the scalar loop. */
  3620. # endif /* __aarch64__ */
  3621. while (source < end) {
  3622. switch (*source) {
  3623. case '\0':
  3624. *target++ = '\\';
  3625. *target++ = '0';
  3626. break;
  3627. case '\'':
  3628. case '\"':
  3629. case '\\':
  3630. *target++ = '\\';
  3631. ZEND_FALLTHROUGH;
  3632. default:
  3633. *target++ = *source;
  3634. break;
  3635. }
  3636. source++;
  3637. }
  3638. *target = '\0';
  3639. if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
  3640. new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
  3641. } else {
  3642. ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
  3643. }
  3644. return new_str;
  3645. }
  3646. #endif
  3647. /* }}} */
  3648. /* }}} */
  3649. /* {{{ php_stripslashes
  3650. *
  3651. * be careful, this edits the string in-place */
  3652. static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len)
  3653. {
  3654. #ifdef __aarch64__
  3655. while (len > 15) {
  3656. uint8x16_t x = vld1q_u8((uint8_t *)str);
  3657. quad_word q;
  3658. vst1q_u8(q.mem, vceqq_u8(x, vdupq_n_u8('\\')));
  3659. if (q.dw[0] | q.dw[1]) {
  3660. int i = 0;
  3661. for (; i < 16; i++) {
  3662. if (q.mem[i] == 0) {
  3663. *out++ = str[i];
  3664. continue;
  3665. }
  3666. i++; /* skip the slash */
  3667. char s = str[i];
  3668. if (s == '0')
  3669. *out++ = '\0';
  3670. else
  3671. *out++ = s; /* preserve the next character */
  3672. }
  3673. str += i;
  3674. len -= i;
  3675. } else {
  3676. vst1q_u8((uint8_t*)out, x);
  3677. out += 16;
  3678. str += 16;
  3679. len -= 16;
  3680. }
  3681. }
  3682. /* Finish the last 15 bytes or less with the scalar loop. */
  3683. #endif /* __aarch64__ */
  3684. while (len > 0) {
  3685. if (*str == '\\') {
  3686. str++; /* skip the slash */
  3687. len--;
  3688. if (len > 0) {
  3689. if (*str == '0') {
  3690. *out++='\0';
  3691. str++;
  3692. } else {
  3693. *out++ = *str++; /* preserve the next character */
  3694. }
  3695. len--;
  3696. }
  3697. } else {
  3698. *out++ = *str++;
  3699. len--;
  3700. }
  3701. }
  3702. return out;
  3703. }
  3704. #if ZEND_INTRIN_SSE4_2_NATIVE || ZEND_INTRIN_SSE4_2_RESOLVER
  3705. # if ZEND_INTRIN_SSE4_2_NATIVE
  3706. PHPAPI void php_stripslashes(zend_string *str)
  3707. # elif ZEND_INTRIN_SSE4_2_RESOLVER
  3708. void php_stripslashes_sse42(zend_string *str)
  3709. # endif
  3710. {
  3711. const char *s = ZSTR_VAL(str);
  3712. char *t = ZSTR_VAL(str);
  3713. size_t l = ZSTR_LEN(str);
  3714. if (l > 15) {
  3715. const __m128i slash = _mm_set1_epi8('\\');
  3716. do {
  3717. __m128i in = _mm_loadu_si128((__m128i *)s);
  3718. __m128i any_slash = _mm_cmpeq_epi8(in, slash);
  3719. uint32_t res = _mm_movemask_epi8(any_slash);
  3720. if (res) {
  3721. int i, n = zend_ulong_ntz(res);
  3722. const char *e = s + 15;
  3723. l -= n;
  3724. for (i = 0; i < n; i++) {
  3725. *t++ = *s++;
  3726. }
  3727. for (; s < e; s++) {
  3728. if (*s == '\\') {
  3729. s++;
  3730. l--;
  3731. if (*s == '0') {
  3732. *t = '\0';
  3733. } else {
  3734. *t = *s;
  3735. }
  3736. } else {
  3737. *t = *s;
  3738. }
  3739. t++;
  3740. l--;
  3741. }
  3742. } else {
  3743. _mm_storeu_si128((__m128i *)t, in);
  3744. s += 16;
  3745. t += 16;
  3746. l -= 16;
  3747. }
  3748. } while (l > 15);
  3749. }
  3750. t = php_stripslashes_impl(s, t, l);
  3751. if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
  3752. ZSTR_LEN(str) = t - ZSTR_VAL(str);
  3753. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  3754. }
  3755. }
  3756. #endif
  3757. #if !ZEND_INTRIN_SSE4_2_NATIVE
  3758. # if ZEND_INTRIN_SSE4_2_RESOLVER
  3759. void php_stripslashes_default(zend_string *str) /* {{{ */
  3760. # else
  3761. PHPAPI void php_stripslashes(zend_string *str)
  3762. # endif
  3763. {
  3764. const char *t = php_stripslashes_impl(ZSTR_VAL(str), ZSTR_VAL(str), ZSTR_LEN(str));
  3765. if (t != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
  3766. ZSTR_LEN(str) = t - ZSTR_VAL(str);
  3767. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  3768. }
  3769. }
  3770. /* }}} */
  3771. #endif
  3772. /* }}} */
  3773. #define _HEB_BLOCK_TYPE_ENG 1
  3774. #define _HEB_BLOCK_TYPE_HEB 2
  3775. #define isheb(c) (((((unsigned char) c) >= 224) && (((unsigned char) c) <= 250)) ? 1 : 0)
  3776. #define _isblank(c) (((((unsigned char) c) == ' ' || ((unsigned char) c) == '\t')) ? 1 : 0)
  3777. #define _isnewline(c) (((((unsigned char) c) == '\n' || ((unsigned char) c) == '\r')) ? 1 : 0)
  3778. /* {{{ php_str_replace_in_subject */
  3779. static zend_long php_str_replace_in_subject(
  3780. zend_string *search_str, HashTable *search_ht, zend_string *replace_str, HashTable *replace_ht,
  3781. zend_string *subject_str, zval *result, int case_sensitivity
  3782. ) {
  3783. zval *search_entry;
  3784. zend_string *tmp_result;
  3785. char *replace_value = NULL;
  3786. size_t replace_len = 0;
  3787. zend_long replace_count = 0;
  3788. zend_string *lc_subject_str = NULL;
  3789. uint32_t replace_idx;
  3790. if (ZSTR_LEN(subject_str) == 0) {
  3791. ZVAL_EMPTY_STRING(result);
  3792. return 0;
  3793. }
  3794. /* If search is an array */
  3795. if (search_ht) {
  3796. /* Duplicate subject string for repeated replacement */
  3797. zend_string_addref(subject_str);
  3798. if (replace_ht) {
  3799. replace_idx = 0;
  3800. } else {
  3801. /* Set replacement value to the passed one */
  3802. replace_value = ZSTR_VAL(replace_str);
  3803. replace_len = ZSTR_LEN(replace_str);
  3804. }
  3805. /* For each entry in the search array, get the entry */
  3806. ZEND_HASH_FOREACH_VAL(search_ht, search_entry) {
  3807. /* Make sure we're dealing with strings. */
  3808. zend_string *tmp_search_str;
  3809. zend_string *search_str = zval_get_tmp_string(search_entry, &tmp_search_str);
  3810. zend_string *replace_entry_str, *tmp_replace_entry_str = NULL;
  3811. /* If replace is an array. */
  3812. if (replace_ht) {
  3813. /* Get current entry */
  3814. zval *replace_entry = NULL;
  3815. if (HT_IS_PACKED(replace_ht)) {
  3816. while (replace_idx < replace_ht->nNumUsed) {
  3817. replace_entry = &replace_ht->arPacked[replace_idx];
  3818. if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
  3819. break;
  3820. }
  3821. replace_idx++;
  3822. }
  3823. } else {
  3824. while (replace_idx < replace_ht->nNumUsed) {
  3825. replace_entry = &replace_ht->arData[replace_idx].val;
  3826. if (Z_TYPE_P(replace_entry) != IS_UNDEF) {
  3827. break;
  3828. }
  3829. replace_idx++;
  3830. }
  3831. }
  3832. if (replace_idx < replace_ht->nNumUsed) {
  3833. /* Make sure we're dealing with strings. */
  3834. replace_entry_str = zval_get_tmp_string(replace_entry, &tmp_replace_entry_str);
  3835. /* Set replacement value to the one we got from array */
  3836. replace_value = ZSTR_VAL(replace_entry_str);
  3837. replace_len = ZSTR_LEN(replace_entry_str);
  3838. replace_idx++;
  3839. } else {
  3840. /* We've run out of replacement strings, so use an empty one. */
  3841. replace_value = "";
  3842. replace_len = 0;
  3843. }
  3844. }
  3845. if (ZSTR_LEN(search_str) == 1) {
  3846. zend_long old_replace_count = replace_count;
  3847. tmp_result = php_char_to_str_ex(subject_str,
  3848. ZSTR_VAL(search_str)[0],
  3849. replace_value,
  3850. replace_len,
  3851. case_sensitivity,
  3852. &replace_count);
  3853. if (lc_subject_str && replace_count != old_replace_count) {
  3854. zend_string_release_ex(lc_subject_str, 0);
  3855. lc_subject_str = NULL;
  3856. }
  3857. } else if (ZSTR_LEN(search_str) > 1) {
  3858. if (case_sensitivity) {
  3859. tmp_result = php_str_to_str_ex(subject_str,
  3860. ZSTR_VAL(search_str), ZSTR_LEN(search_str),
  3861. replace_value, replace_len, &replace_count);
  3862. } else {
  3863. zend_long old_replace_count = replace_count;
  3864. if (!lc_subject_str) {
  3865. lc_subject_str = php_string_tolower(subject_str);
  3866. }
  3867. tmp_result = php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
  3868. search_str, replace_value, replace_len, &replace_count);
  3869. if (replace_count != old_replace_count) {
  3870. zend_string_release_ex(lc_subject_str, 0);
  3871. lc_subject_str = NULL;
  3872. }
  3873. }
  3874. } else {
  3875. zend_tmp_string_release(tmp_search_str);
  3876. zend_tmp_string_release(tmp_replace_entry_str);
  3877. continue;
  3878. }
  3879. zend_tmp_string_release(tmp_search_str);
  3880. zend_tmp_string_release(tmp_replace_entry_str);
  3881. if (subject_str == tmp_result) {
  3882. zend_string_delref(subject_str);
  3883. } else {
  3884. zend_string_release_ex(subject_str, 0);
  3885. subject_str = tmp_result;
  3886. if (ZSTR_LEN(subject_str) == 0) {
  3887. zend_string_release_ex(subject_str, 0);
  3888. ZVAL_EMPTY_STRING(result);
  3889. if (lc_subject_str) {
  3890. zend_string_release_ex(lc_subject_str, 0);
  3891. }
  3892. return replace_count;
  3893. }
  3894. }
  3895. } ZEND_HASH_FOREACH_END();
  3896. ZVAL_STR(result, subject_str);
  3897. if (lc_subject_str) {
  3898. zend_string_release_ex(lc_subject_str, 0);
  3899. }
  3900. } else {
  3901. ZEND_ASSERT(search_str);
  3902. if (ZSTR_LEN(search_str) == 1) {
  3903. ZVAL_STR(result,
  3904. php_char_to_str_ex(subject_str,
  3905. ZSTR_VAL(search_str)[0],
  3906. ZSTR_VAL(replace_str),
  3907. ZSTR_LEN(replace_str),
  3908. case_sensitivity,
  3909. &replace_count));
  3910. } else if (ZSTR_LEN(search_str) > 1) {
  3911. if (case_sensitivity) {
  3912. ZVAL_STR(result, php_str_to_str_ex(subject_str,
  3913. ZSTR_VAL(search_str), ZSTR_LEN(search_str),
  3914. ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
  3915. } else {
  3916. lc_subject_str = php_string_tolower(subject_str);
  3917. ZVAL_STR(result, php_str_to_str_i_ex(subject_str, ZSTR_VAL(lc_subject_str),
  3918. search_str, ZSTR_VAL(replace_str), ZSTR_LEN(replace_str), &replace_count));
  3919. zend_string_release_ex(lc_subject_str, 0);
  3920. }
  3921. } else {
  3922. ZVAL_STR_COPY(result, subject_str);
  3923. }
  3924. }
  3925. return replace_count;
  3926. }
  3927. /* }}} */
  3928. /* {{{ php_str_replace_common */
  3929. static void php_str_replace_common(INTERNAL_FUNCTION_PARAMETERS, int case_sensitivity)
  3930. {
  3931. zend_string *search_str;
  3932. HashTable *search_ht;
  3933. zend_string *replace_str;
  3934. HashTable *replace_ht;
  3935. zend_string *subject_str;
  3936. HashTable *subject_ht;
  3937. zval *subject_entry, *zcount = NULL;
  3938. zval result;
  3939. zend_string *string_key;
  3940. zend_ulong num_key;
  3941. zend_long count = 0;
  3942. ZEND_PARSE_PARAMETERS_START(3, 4)
  3943. Z_PARAM_ARRAY_HT_OR_STR(search_ht, search_str)
  3944. Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
  3945. Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
  3946. Z_PARAM_OPTIONAL
  3947. Z_PARAM_ZVAL(zcount)
  3948. ZEND_PARSE_PARAMETERS_END();
  3949. /* Make sure we're dealing with strings and do the replacement. */
  3950. if (search_str && replace_ht) {
  3951. zend_argument_type_error(2, "must be of type %s when argument #1 ($search) is %s",
  3952. search_str ? "string" : "array", search_str ? "a string" : "an array"
  3953. );
  3954. RETURN_THROWS();
  3955. }
  3956. /* if subject is an array */
  3957. if (subject_ht) {
  3958. array_init(return_value);
  3959. /* For each subject entry, convert it to string, then perform replacement
  3960. and add the result to the return_value array. */
  3961. ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
  3962. zend_string *tmp_subject_str;
  3963. ZVAL_DEREF(subject_entry);
  3964. subject_str = zval_get_tmp_string(subject_entry, &tmp_subject_str);
  3965. count += php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, &result, case_sensitivity);
  3966. zend_tmp_string_release(tmp_subject_str);
  3967. /* Add to return array */
  3968. if (string_key) {
  3969. zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &result);
  3970. } else {
  3971. zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &result);
  3972. }
  3973. } ZEND_HASH_FOREACH_END();
  3974. } else { /* if subject is not an array */
  3975. count = php_str_replace_in_subject(search_str, search_ht, replace_str, replace_ht, subject_str, return_value, case_sensitivity);
  3976. }
  3977. if (zcount) {
  3978. ZEND_TRY_ASSIGN_REF_LONG(zcount, count);
  3979. }
  3980. }
  3981. /* }}} */
  3982. /* {{{ Replaces all occurrences of search in haystack with replace */
  3983. PHP_FUNCTION(str_replace)
  3984. {
  3985. php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  3986. }
  3987. /* }}} */
  3988. /* {{{ Replaces all occurrences of search in haystack with replace / case-insensitive */
  3989. PHP_FUNCTION(str_ireplace)
  3990. {
  3991. php_str_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  3992. }
  3993. /* }}} */
  3994. /* {{{ Converts logical Hebrew text to visual text */
  3995. PHP_FUNCTION(hebrev)
  3996. {
  3997. char *str, *heb_str, *target;
  3998. const char *tmp;
  3999. size_t block_start, block_end, block_type, block_length, i;
  4000. zend_long max_chars=0, char_count;
  4001. size_t begin, end, orig_begin;
  4002. size_t str_len;
  4003. zend_string *broken_str;
  4004. ZEND_PARSE_PARAMETERS_START(1, 2)
  4005. Z_PARAM_STRING(str, str_len)
  4006. Z_PARAM_OPTIONAL
  4007. Z_PARAM_LONG(max_chars)
  4008. ZEND_PARSE_PARAMETERS_END();
  4009. if (str_len == 0) {
  4010. RETURN_EMPTY_STRING();
  4011. }
  4012. tmp = str;
  4013. block_start=block_end=0;
  4014. heb_str = (char *) emalloc(str_len+1);
  4015. target = heb_str+str_len;
  4016. *target = 0;
  4017. target--;
  4018. block_length=0;
  4019. if (isheb(*tmp)) {
  4020. block_type = _HEB_BLOCK_TYPE_HEB;
  4021. } else {
  4022. block_type = _HEB_BLOCK_TYPE_ENG;
  4023. }
  4024. do {
  4025. if (block_type == _HEB_BLOCK_TYPE_HEB) {
  4026. while ((isheb((int)*(tmp+1)) || _isblank((int)*(tmp+1)) || ispunct((int)*(tmp+1)) || (int)*(tmp+1)=='\n' ) && block_end<str_len-1) {
  4027. tmp++;
  4028. block_end++;
  4029. block_length++;
  4030. }
  4031. for (i = block_start+1; i<= block_end+1; i++) {
  4032. *target = str[i-1];
  4033. switch (*target) {
  4034. case '(':
  4035. *target = ')';
  4036. break;
  4037. case ')':
  4038. *target = '(';
  4039. break;
  4040. case '[':
  4041. *target = ']';
  4042. break;
  4043. case ']':
  4044. *target = '[';
  4045. break;
  4046. case '{':
  4047. *target = '}';
  4048. break;
  4049. case '}':
  4050. *target = '{';
  4051. break;
  4052. case '<':
  4053. *target = '>';
  4054. break;
  4055. case '>':
  4056. *target = '<';
  4057. break;
  4058. case '\\':
  4059. *target = '/';
  4060. break;
  4061. case '/':
  4062. *target = '\\';
  4063. break;
  4064. default:
  4065. break;
  4066. }
  4067. target--;
  4068. }
  4069. block_type = _HEB_BLOCK_TYPE_ENG;
  4070. } else {
  4071. while (!isheb(*(tmp+1)) && (int)*(tmp+1)!='\n' && block_end < str_len-1) {
  4072. tmp++;
  4073. block_end++;
  4074. block_length++;
  4075. }
  4076. while ((_isblank((int)*tmp) || ispunct((int)*tmp)) && *tmp!='/' && *tmp!='-' && block_end > block_start) {
  4077. tmp--;
  4078. block_end--;
  4079. }
  4080. for (i = block_end+1; i >= block_start+1; i--) {
  4081. *target = str[i-1];
  4082. target--;
  4083. }
  4084. block_type = _HEB_BLOCK_TYPE_HEB;
  4085. }
  4086. block_start=block_end+1;
  4087. } while (block_end < str_len-1);
  4088. broken_str = zend_string_alloc(str_len, 0);
  4089. begin = end = str_len-1;
  4090. target = ZSTR_VAL(broken_str);
  4091. while (1) {
  4092. char_count=0;
  4093. while ((!max_chars || (max_chars > 0 && char_count < max_chars)) && begin > 0) {
  4094. char_count++;
  4095. begin--;
  4096. if (_isnewline(heb_str[begin])) {
  4097. while (begin > 0 && _isnewline(heb_str[begin-1])) {
  4098. begin--;
  4099. char_count++;
  4100. }
  4101. break;
  4102. }
  4103. }
  4104. if (max_chars >= 0 && char_count == max_chars) { /* try to avoid breaking words */
  4105. size_t new_char_count=char_count, new_begin=begin;
  4106. while (new_char_count > 0) {
  4107. if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) {
  4108. break;
  4109. }
  4110. new_begin++;
  4111. new_char_count--;
  4112. }
  4113. if (new_char_count > 0) {
  4114. begin=new_begin;
  4115. }
  4116. }
  4117. orig_begin=begin;
  4118. if (_isblank(heb_str[begin])) {
  4119. heb_str[begin]='\n';
  4120. }
  4121. while (begin <= end && _isnewline(heb_str[begin])) { /* skip leading newlines */
  4122. begin++;
  4123. }
  4124. for (i = begin; i <= end; i++) { /* copy content */
  4125. *target = heb_str[i];
  4126. target++;
  4127. }
  4128. for (i = orig_begin; i <= end && _isnewline(heb_str[i]); i++) {
  4129. *target = heb_str[i];
  4130. target++;
  4131. }
  4132. begin=orig_begin;
  4133. if (begin == 0) {
  4134. *target = 0;
  4135. break;
  4136. }
  4137. begin--;
  4138. end=begin;
  4139. }
  4140. efree(heb_str);
  4141. RETURN_NEW_STR(broken_str);
  4142. }
  4143. /* }}} */
  4144. /* {{{ Converts newlines to HTML line breaks */
  4145. PHP_FUNCTION(nl2br)
  4146. {
  4147. /* in brief this inserts <br /> or <br> before matched regexp \n\r?|\r\n? */
  4148. const char *tmp, *end;
  4149. zend_string *str;
  4150. char *target;
  4151. size_t repl_cnt = 0;
  4152. bool is_xhtml = 1;
  4153. zend_string *result;
  4154. ZEND_PARSE_PARAMETERS_START(1, 2)
  4155. Z_PARAM_STR(str)
  4156. Z_PARAM_OPTIONAL
  4157. Z_PARAM_BOOL(is_xhtml)
  4158. ZEND_PARSE_PARAMETERS_END();
  4159. tmp = ZSTR_VAL(str);
  4160. end = ZSTR_VAL(str) + ZSTR_LEN(str);
  4161. /* it is really faster to scan twice and allocate mem once instead of scanning once
  4162. and constantly reallocing */
  4163. while (tmp < end) {
  4164. if (*tmp == '\r') {
  4165. if (*(tmp+1) == '\n') {
  4166. tmp++;
  4167. }
  4168. repl_cnt++;
  4169. } else if (*tmp == '\n') {
  4170. if (*(tmp+1) == '\r') {
  4171. tmp++;
  4172. }
  4173. repl_cnt++;
  4174. }
  4175. tmp++;
  4176. }
  4177. if (repl_cnt == 0) {
  4178. RETURN_STR_COPY(str);
  4179. }
  4180. {
  4181. size_t repl_len = is_xhtml ? (sizeof("<br />") - 1) : (sizeof("<br>") - 1);
  4182. result = zend_string_safe_alloc(repl_cnt, repl_len, ZSTR_LEN(str), 0);
  4183. target = ZSTR_VAL(result);
  4184. }
  4185. tmp = ZSTR_VAL(str);
  4186. while (tmp < end) {
  4187. switch (*tmp) {
  4188. case '\r':
  4189. case '\n':
  4190. *target++ = '<';
  4191. *target++ = 'b';
  4192. *target++ = 'r';
  4193. if (is_xhtml) {
  4194. *target++ = ' ';
  4195. *target++ = '/';
  4196. }
  4197. *target++ = '>';
  4198. if ((*tmp == '\r' && *(tmp+1) == '\n') || (*tmp == '\n' && *(tmp+1) == '\r')) {
  4199. *target++ = *tmp++;
  4200. }
  4201. ZEND_FALLTHROUGH;
  4202. default:
  4203. *target++ = *tmp;
  4204. }
  4205. tmp++;
  4206. }
  4207. *target = '\0';
  4208. RETURN_NEW_STR(result);
  4209. }
  4210. /* }}} */
  4211. /* {{{ Strips HTML and PHP tags from a string */
  4212. PHP_FUNCTION(strip_tags)
  4213. {
  4214. zend_string *buf;
  4215. zend_string *str;
  4216. zend_string *allow_str = NULL;
  4217. HashTable *allow_ht = NULL;
  4218. const char *allowed_tags=NULL;
  4219. size_t allowed_tags_len=0;
  4220. smart_str tags_ss = {0};
  4221. ZEND_PARSE_PARAMETERS_START(1, 2)
  4222. Z_PARAM_STR(str)
  4223. Z_PARAM_OPTIONAL
  4224. Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(allow_ht, allow_str)
  4225. ZEND_PARSE_PARAMETERS_END();
  4226. if (allow_ht) {
  4227. zval *tmp;
  4228. zend_string *tag;
  4229. ZEND_HASH_FOREACH_VAL(allow_ht, tmp) {
  4230. tag = zval_get_string(tmp);
  4231. smart_str_appendc(&tags_ss, '<');
  4232. smart_str_append(&tags_ss, tag);
  4233. smart_str_appendc(&tags_ss, '>');
  4234. zend_string_release(tag);
  4235. } ZEND_HASH_FOREACH_END();
  4236. if (tags_ss.s) {
  4237. smart_str_0(&tags_ss);
  4238. allowed_tags = ZSTR_VAL(tags_ss.s);
  4239. allowed_tags_len = ZSTR_LEN(tags_ss.s);
  4240. }
  4241. } else if (allow_str) {
  4242. allowed_tags = ZSTR_VAL(allow_str);
  4243. allowed_tags_len = ZSTR_LEN(allow_str);
  4244. }
  4245. buf = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0);
  4246. ZSTR_LEN(buf) = php_strip_tags_ex(ZSTR_VAL(buf), ZSTR_LEN(str), allowed_tags, allowed_tags_len, 0);
  4247. smart_str_free(&tags_ss);
  4248. RETURN_NEW_STR(buf);
  4249. }
  4250. /* }}} */
  4251. static zend_string *try_setlocale_str(zend_long cat, zend_string *loc) {
  4252. const char *retval;
  4253. if (zend_string_equals_literal(loc, "0")) {
  4254. loc = NULL;
  4255. } else {
  4256. if (ZSTR_LEN(loc) >= 255) {
  4257. php_error_docref(NULL, E_WARNING, "Specified locale name is too long");
  4258. return NULL;
  4259. }
  4260. }
  4261. # ifndef PHP_WIN32
  4262. retval = setlocale(cat, loc ? ZSTR_VAL(loc) : NULL);
  4263. # else
  4264. if (loc) {
  4265. /* BC: don't try /^[a-z]{2}_[A-Z]{2}($|\..*)/ except for /^u[ks]_U[KS]$/ */
  4266. char *locp = ZSTR_VAL(loc);
  4267. if (ZSTR_LEN(loc) >= 5 && locp[2] == '_'
  4268. && locp[0] >= 'a' && locp[0] <= 'z' && locp[1] >= 'a' && locp[1] <= 'z'
  4269. && locp[3] >= 'A' && locp[3] <= 'Z' && locp[4] >= 'A' && locp[4] <= 'Z'
  4270. && (locp[5] == '\0' || locp[5] == '.')
  4271. && !(locp[0] == 'u' && (locp[1] == 'k' || locp[1] == 's')
  4272. && locp[3] == 'U' && (locp[4] == 'K' || locp[4] == 'S')
  4273. && locp[5] == '\0')
  4274. ) {
  4275. retval = NULL;
  4276. } else {
  4277. retval = setlocale(cat, ZSTR_VAL(loc));
  4278. }
  4279. } else {
  4280. retval = setlocale(cat, NULL);
  4281. }
  4282. # endif
  4283. if (!retval) {
  4284. return NULL;
  4285. }
  4286. if (loc) {
  4287. /* Remember if locale was changed */
  4288. size_t len = strlen(retval);
  4289. BG(locale_changed) = 1;
  4290. if (cat == LC_CTYPE || cat == LC_ALL) {
  4291. zend_update_current_locale();
  4292. if (BG(ctype_string)) {
  4293. zend_string_release_ex(BG(ctype_string), 0);
  4294. }
  4295. if (len == 1 && *retval == 'C') {
  4296. /* C locale is represented as NULL. */
  4297. BG(ctype_string) = NULL;
  4298. return ZSTR_CHAR('C');
  4299. } else if (len == ZSTR_LEN(loc) && !memcmp(ZSTR_VAL(loc), retval, len)) {
  4300. BG(ctype_string) = zend_string_copy(loc);
  4301. return zend_string_copy(BG(ctype_string));
  4302. } else {
  4303. BG(ctype_string) = zend_string_init(retval, len, 0);
  4304. return zend_string_copy(BG(ctype_string));
  4305. }
  4306. } else if (len == ZSTR_LEN(loc) && !memcmp(ZSTR_VAL(loc), retval, len)) {
  4307. return zend_string_copy(loc);
  4308. }
  4309. }
  4310. return zend_string_init(retval, strlen(retval), 0);
  4311. }
  4312. static zend_string *try_setlocale_zval(zend_long cat, zval *loc_zv) {
  4313. zend_string *tmp_loc_str;
  4314. zend_string *loc_str = zval_try_get_tmp_string(loc_zv, &tmp_loc_str);
  4315. if (UNEXPECTED(loc_str == NULL)) {
  4316. return NULL;
  4317. }
  4318. zend_string *result = try_setlocale_str(cat, loc_str);
  4319. zend_tmp_string_release(tmp_loc_str);
  4320. return result;
  4321. }
  4322. /* {{{ Set locale information */
  4323. PHP_FUNCTION(setlocale)
  4324. {
  4325. zend_long cat;
  4326. zval *args = NULL;
  4327. int num_args;
  4328. ZEND_PARSE_PARAMETERS_START(2, -1)
  4329. Z_PARAM_LONG(cat)
  4330. Z_PARAM_VARIADIC('+', args, num_args)
  4331. ZEND_PARSE_PARAMETERS_END();
  4332. for (uint32_t i = 0; i < num_args; i++) {
  4333. if (Z_TYPE(args[i]) == IS_ARRAY) {
  4334. zval *elem;
  4335. ZEND_HASH_FOREACH_VAL(Z_ARRVAL(args[i]), elem) {
  4336. zend_string *result = try_setlocale_zval(cat, elem);
  4337. if (EG(exception)) {
  4338. RETURN_THROWS();
  4339. }
  4340. if (result) {
  4341. RETURN_STR(result);
  4342. }
  4343. } ZEND_HASH_FOREACH_END();
  4344. } else {
  4345. zend_string *result = try_setlocale_zval(cat, &args[i]);
  4346. if (EG(exception)) {
  4347. RETURN_THROWS();
  4348. }
  4349. if (result) {
  4350. RETURN_STR(result);
  4351. }
  4352. }
  4353. }
  4354. RETURN_FALSE;
  4355. }
  4356. /* }}} */
  4357. /* {{{ Parses GET/POST/COOKIE data and sets global variables */
  4358. PHP_FUNCTION(parse_str)
  4359. {
  4360. char *arg;
  4361. zval *arrayArg = NULL;
  4362. char *res = NULL;
  4363. size_t arglen;
  4364. ZEND_PARSE_PARAMETERS_START(2, 2)
  4365. Z_PARAM_STRING(arg, arglen)
  4366. Z_PARAM_ZVAL(arrayArg)
  4367. ZEND_PARSE_PARAMETERS_END();
  4368. arrayArg = zend_try_array_init(arrayArg);
  4369. if (!arrayArg) {
  4370. RETURN_THROWS();
  4371. }
  4372. res = estrndup(arg, arglen);
  4373. sapi_module.treat_data(PARSE_STRING, res, arrayArg);
  4374. }
  4375. /* }}} */
  4376. #define PHP_TAG_BUF_SIZE 1023
  4377. /* {{{ php_tag_find
  4378. *
  4379. * Check if tag is in a set of tags
  4380. *
  4381. * states:
  4382. *
  4383. * 0 start tag
  4384. * 1 first non-whitespace char seen
  4385. */
  4386. int php_tag_find(char *tag, size_t len, const char *set) {
  4387. char c, *n;
  4388. const char *t;
  4389. int state=0, done=0;
  4390. char *norm;
  4391. if (len == 0) {
  4392. return 0;
  4393. }
  4394. norm = emalloc(len+1);
  4395. n = norm;
  4396. t = tag;
  4397. c = tolower(*t);
  4398. /*
  4399. normalize the tag removing leading and trailing whitespace
  4400. and turn any <a whatever...> into just <a> and any </tag>
  4401. into <tag>
  4402. */
  4403. while (!done) {
  4404. switch (c) {
  4405. case '<':
  4406. *(n++) = c;
  4407. break;
  4408. case '>':
  4409. done =1;
  4410. break;
  4411. default:
  4412. if (!isspace((int)c)) {
  4413. if (state == 0) {
  4414. state=1;
  4415. }
  4416. if (c != '/' || (*(t-1) != '<' && *(t+1) != '>')) {
  4417. *(n++) = c;
  4418. }
  4419. } else {
  4420. if (state == 1)
  4421. done=1;
  4422. }
  4423. break;
  4424. }
  4425. c = tolower(*(++t));
  4426. }
  4427. *(n++) = '>';
  4428. *n = '\0';
  4429. if (strstr(set, norm)) {
  4430. done=1;
  4431. } else {
  4432. done=0;
  4433. }
  4434. efree(norm);
  4435. return done;
  4436. }
  4437. /* }}} */
  4438. PHPAPI size_t php_strip_tags(char *rbuf, size_t len, const char *allow, size_t allow_len) /* {{{ */
  4439. {
  4440. return php_strip_tags_ex(rbuf, len, allow, allow_len, 0);
  4441. }
  4442. /* }}} */
  4443. /* {{{ php_strip_tags
  4444. A simple little state-machine to strip out html and php tags
  4445. State 0 is the output state, State 1 means we are inside a
  4446. normal html tag and state 2 means we are inside a php tag.
  4447. The state variable is passed in to allow a function like fgetss
  4448. to maintain state across calls to the function.
  4449. lc holds the last significant character read and br is a bracket
  4450. counter.
  4451. When an allow string is passed in we keep track of the string
  4452. in state 1 and when the tag is closed check it against the
  4453. allow string to see if we should allow it.
  4454. swm: Added ability to strip <?xml tags without assuming it PHP
  4455. code.
  4456. */
  4457. PHPAPI size_t php_strip_tags_ex(char *rbuf, size_t len, const char *allow, size_t allow_len, bool allow_tag_spaces)
  4458. {
  4459. char *tbuf, *tp, *rp, c, lc;
  4460. const char *buf, *p, *end;
  4461. int br, depth=0, in_q = 0;
  4462. uint8_t state = 0;
  4463. size_t pos;
  4464. char *allow_free = NULL;
  4465. char is_xml = 0;
  4466. buf = estrndup(rbuf, len);
  4467. end = buf + len;
  4468. lc = '\0';
  4469. p = buf;
  4470. rp = rbuf;
  4471. br = 0;
  4472. if (allow) {
  4473. allow_free = zend_str_tolower_dup_ex(allow, allow_len);
  4474. allow = allow_free ? allow_free : allow;
  4475. tbuf = emalloc(PHP_TAG_BUF_SIZE + 1);
  4476. tp = tbuf;
  4477. } else {
  4478. tbuf = tp = NULL;
  4479. }
  4480. state_0:
  4481. if (p >= end) {
  4482. goto finish;
  4483. }
  4484. c = *p;
  4485. switch (c) {
  4486. case '\0':
  4487. break;
  4488. case '<':
  4489. if (in_q) {
  4490. break;
  4491. }
  4492. if (isspace(*(p + 1)) && !allow_tag_spaces) {
  4493. *(rp++) = c;
  4494. break;
  4495. }
  4496. lc = '<';
  4497. state = 1;
  4498. if (allow) {
  4499. if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
  4500. pos = tp - tbuf;
  4501. tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
  4502. tp = tbuf + pos;
  4503. }
  4504. *(tp++) = '<';
  4505. }
  4506. p++;
  4507. goto state_1;
  4508. case '>':
  4509. if (depth) {
  4510. depth--;
  4511. break;
  4512. }
  4513. if (in_q) {
  4514. break;
  4515. }
  4516. *(rp++) = c;
  4517. break;
  4518. default:
  4519. *(rp++) = c;
  4520. break;
  4521. }
  4522. p++;
  4523. goto state_0;
  4524. state_1:
  4525. if (p >= end) {
  4526. goto finish;
  4527. }
  4528. c = *p;
  4529. switch (c) {
  4530. case '\0':
  4531. break;
  4532. case '<':
  4533. if (in_q) {
  4534. break;
  4535. }
  4536. if (isspace(*(p + 1)) && !allow_tag_spaces) {
  4537. goto reg_char_1;
  4538. }
  4539. depth++;
  4540. break;
  4541. case '>':
  4542. if (depth) {
  4543. depth--;
  4544. break;
  4545. }
  4546. if (in_q) {
  4547. break;
  4548. }
  4549. lc = '>';
  4550. if (is_xml && p >= buf + 1 && *(p -1) == '-') {
  4551. break;
  4552. }
  4553. in_q = state = is_xml = 0;
  4554. if (allow) {
  4555. if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
  4556. pos = tp - tbuf;
  4557. tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
  4558. tp = tbuf + pos;
  4559. }
  4560. *(tp++) = '>';
  4561. *tp='\0';
  4562. if (php_tag_find(tbuf, tp-tbuf, allow)) {
  4563. memcpy(rp, tbuf, tp-tbuf);
  4564. rp += tp-tbuf;
  4565. }
  4566. tp = tbuf;
  4567. }
  4568. p++;
  4569. goto state_0;
  4570. case '"':
  4571. case '\'':
  4572. if (p != buf && (!in_q || *p == in_q)) {
  4573. if (in_q) {
  4574. in_q = 0;
  4575. } else {
  4576. in_q = *p;
  4577. }
  4578. }
  4579. goto reg_char_1;
  4580. case '!':
  4581. /* JavaScript & Other HTML scripting languages */
  4582. if (p >= buf + 1 && *(p-1) == '<') {
  4583. state = 3;
  4584. lc = c;
  4585. p++;
  4586. goto state_3;
  4587. } else {
  4588. goto reg_char_1;
  4589. }
  4590. break;
  4591. case '?':
  4592. if (p >= buf + 1 && *(p-1) == '<') {
  4593. br=0;
  4594. state = 2;
  4595. p++;
  4596. goto state_2;
  4597. } else {
  4598. goto reg_char_1;
  4599. }
  4600. break;
  4601. default:
  4602. reg_char_1:
  4603. if (allow) {
  4604. if (tp - tbuf >= PHP_TAG_BUF_SIZE) {
  4605. pos = tp - tbuf;
  4606. tbuf = erealloc(tbuf, (tp - tbuf) + PHP_TAG_BUF_SIZE + 1);
  4607. tp = tbuf + pos;
  4608. }
  4609. *(tp++) = c;
  4610. }
  4611. break;
  4612. }
  4613. p++;
  4614. goto state_1;
  4615. state_2:
  4616. if (p >= end) {
  4617. goto finish;
  4618. }
  4619. c = *p;
  4620. switch (c) {
  4621. case '(':
  4622. if (lc != '"' && lc != '\'') {
  4623. lc = '(';
  4624. br++;
  4625. }
  4626. break;
  4627. case ')':
  4628. if (lc != '"' && lc != '\'') {
  4629. lc = ')';
  4630. br--;
  4631. }
  4632. break;
  4633. case '>':
  4634. if (depth) {
  4635. depth--;
  4636. break;
  4637. }
  4638. if (in_q) {
  4639. break;
  4640. }
  4641. if (!br && p >= buf + 1 && lc != '\"' && *(p-1) == '?') {
  4642. in_q = state = 0;
  4643. tp = tbuf;
  4644. p++;
  4645. goto state_0;
  4646. }
  4647. break;
  4648. case '"':
  4649. case '\'':
  4650. if (p >= buf + 1 && *(p-1) != '\\') {
  4651. if (lc == c) {
  4652. lc = '\0';
  4653. } else if (lc != '\\') {
  4654. lc = c;
  4655. }
  4656. if (p != buf && (!in_q || *p == in_q)) {
  4657. if (in_q) {
  4658. in_q = 0;
  4659. } else {
  4660. in_q = *p;
  4661. }
  4662. }
  4663. }
  4664. break;
  4665. case 'l':
  4666. case 'L':
  4667. /* swm: If we encounter '<?xml' then we shouldn't be in
  4668. * state == 2 (PHP). Switch back to HTML.
  4669. */
  4670. if (state == 2 && p > buf+4
  4671. && (*(p-1) == 'm' || *(p-1) == 'M')
  4672. && (*(p-2) == 'x' || *(p-2) == 'X')
  4673. && *(p-3) == '?'
  4674. && *(p-4) == '<') {
  4675. state = 1; is_xml=1;
  4676. p++;
  4677. goto state_1;
  4678. }
  4679. break;
  4680. default:
  4681. break;
  4682. }
  4683. p++;
  4684. goto state_2;
  4685. state_3:
  4686. if (p >= end) {
  4687. goto finish;
  4688. }
  4689. c = *p;
  4690. switch (c) {
  4691. case '>':
  4692. if (depth) {
  4693. depth--;
  4694. break;
  4695. }
  4696. if (in_q) {
  4697. break;
  4698. }
  4699. in_q = state = 0;
  4700. tp = tbuf;
  4701. p++;
  4702. goto state_0;
  4703. case '"':
  4704. case '\'':
  4705. if (p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
  4706. if (in_q) {
  4707. in_q = 0;
  4708. } else {
  4709. in_q = *p;
  4710. }
  4711. }
  4712. break;
  4713. case '-':
  4714. if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
  4715. state = 4;
  4716. p++;
  4717. goto state_4;
  4718. }
  4719. break;
  4720. case 'E':
  4721. case 'e':
  4722. /* !DOCTYPE exception */
  4723. if (p > buf+6
  4724. && (*(p-1) == 'p' || *(p-1) == 'P')
  4725. && (*(p-2) == 'y' || *(p-2) == 'Y')
  4726. && (*(p-3) == 't' || *(p-3) == 'T')
  4727. && (*(p-4) == 'c' || *(p-4) == 'C')
  4728. && (*(p-5) == 'o' || *(p-5) == 'O')
  4729. && (*(p-6) == 'd' || *(p-6) == 'D')) {
  4730. state = 1;
  4731. p++;
  4732. goto state_1;
  4733. }
  4734. break;
  4735. default:
  4736. break;
  4737. }
  4738. p++;
  4739. goto state_3;
  4740. state_4:
  4741. while (p < end) {
  4742. c = *p;
  4743. if (c == '>' && !in_q) {
  4744. if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
  4745. in_q = state = 0;
  4746. tp = tbuf;
  4747. p++;
  4748. goto state_0;
  4749. }
  4750. }
  4751. p++;
  4752. }
  4753. finish:
  4754. if (rp < rbuf + len) {
  4755. *rp = '\0';
  4756. }
  4757. efree((void *)buf);
  4758. if (tbuf) {
  4759. efree(tbuf);
  4760. }
  4761. if (allow_free) {
  4762. efree(allow_free);
  4763. }
  4764. return (size_t)(rp - rbuf);
  4765. }
  4766. /* }}} */
  4767. /* {{{ Parse a CSV string into an array */
  4768. PHP_FUNCTION(str_getcsv)
  4769. {
  4770. zend_string *str;
  4771. char delim = ',', enc = '"';
  4772. int esc = (unsigned char) '\\';
  4773. char *delim_str = NULL, *enc_str = NULL, *esc_str = NULL;
  4774. size_t delim_len = 0, enc_len = 0, esc_len = 0;
  4775. ZEND_PARSE_PARAMETERS_START(1, 4)
  4776. Z_PARAM_STR(str)
  4777. Z_PARAM_OPTIONAL
  4778. Z_PARAM_STRING(delim_str, delim_len)
  4779. Z_PARAM_STRING(enc_str, enc_len)
  4780. Z_PARAM_STRING(esc_str, esc_len)
  4781. ZEND_PARSE_PARAMETERS_END();
  4782. delim = delim_len ? delim_str[0] : delim;
  4783. enc = enc_len ? enc_str[0] : enc;
  4784. if (esc_str != NULL) {
  4785. esc = esc_len ? (unsigned char) esc_str[0] : PHP_CSV_NO_ESCAPE;
  4786. }
  4787. php_fgetcsv(NULL, delim, enc, esc, ZSTR_LEN(str), ZSTR_VAL(str), return_value);
  4788. }
  4789. /* }}} */
  4790. /* {{{ Returns the input string repeat mult times */
  4791. PHP_FUNCTION(str_repeat)
  4792. {
  4793. zend_string *input_str; /* Input string */
  4794. zend_long mult; /* Multiplier */
  4795. zend_string *result; /* Resulting string */
  4796. size_t result_len; /* Length of the resulting string */
  4797. ZEND_PARSE_PARAMETERS_START(2, 2)
  4798. Z_PARAM_STR(input_str)
  4799. Z_PARAM_LONG(mult)
  4800. ZEND_PARSE_PARAMETERS_END();
  4801. if (mult < 0) {
  4802. zend_argument_value_error(2, "must be greater than or equal to 0");
  4803. RETURN_THROWS();
  4804. }
  4805. /* Don't waste our time if it's empty */
  4806. /* ... or if the multiplier is zero */
  4807. if (ZSTR_LEN(input_str) == 0 || mult == 0)
  4808. RETURN_EMPTY_STRING();
  4809. /* Initialize the result string */
  4810. result = zend_string_safe_alloc(ZSTR_LEN(input_str), mult, 0, 0);
  4811. result_len = ZSTR_LEN(input_str) * mult;
  4812. /* Heavy optimization for situations where input string is 1 byte long */
  4813. if (ZSTR_LEN(input_str) == 1) {
  4814. memset(ZSTR_VAL(result), *ZSTR_VAL(input_str), mult);
  4815. } else {
  4816. const char *s, *ee;
  4817. char *e;
  4818. ptrdiff_t l=0;
  4819. memcpy(ZSTR_VAL(result), ZSTR_VAL(input_str), ZSTR_LEN(input_str));
  4820. s = ZSTR_VAL(result);
  4821. e = ZSTR_VAL(result) + ZSTR_LEN(input_str);
  4822. ee = ZSTR_VAL(result) + result_len;
  4823. while (e<ee) {
  4824. l = (e-s) < (ee-e) ? (e-s) : (ee-e);
  4825. memmove(e, s, l);
  4826. e += l;
  4827. }
  4828. }
  4829. ZSTR_VAL(result)[result_len] = '\0';
  4830. RETURN_NEW_STR(result);
  4831. }
  4832. /* }}} */
  4833. /* {{{ Returns info about what characters are used in input */
  4834. PHP_FUNCTION(count_chars)
  4835. {
  4836. zend_string *input;
  4837. int chars[256];
  4838. zend_long mymode=0;
  4839. const unsigned char *buf;
  4840. int inx;
  4841. char retstr[256];
  4842. size_t retlen=0;
  4843. size_t tmp = 0;
  4844. ZEND_PARSE_PARAMETERS_START(1, 2)
  4845. Z_PARAM_STR(input)
  4846. Z_PARAM_OPTIONAL
  4847. Z_PARAM_LONG(mymode)
  4848. ZEND_PARSE_PARAMETERS_END();
  4849. if (mymode < 0 || mymode > 4) {
  4850. zend_argument_value_error(2, "must be between 1 and 4 (inclusive)");
  4851. RETURN_THROWS();
  4852. }
  4853. buf = (const unsigned char *) ZSTR_VAL(input);
  4854. memset((void*) chars, 0, sizeof(chars));
  4855. while (tmp < ZSTR_LEN(input)) {
  4856. chars[*buf]++;
  4857. buf++;
  4858. tmp++;
  4859. }
  4860. if (mymode < 3) {
  4861. array_init(return_value);
  4862. }
  4863. for (inx = 0; inx < 256; inx++) {
  4864. switch (mymode) {
  4865. case 0:
  4866. add_index_long(return_value, inx, chars[inx]);
  4867. break;
  4868. case 1:
  4869. if (chars[inx] != 0) {
  4870. add_index_long(return_value, inx, chars[inx]);
  4871. }
  4872. break;
  4873. case 2:
  4874. if (chars[inx] == 0) {
  4875. add_index_long(return_value, inx, chars[inx]);
  4876. }
  4877. break;
  4878. case 3:
  4879. if (chars[inx] != 0) {
  4880. retstr[retlen++] = inx;
  4881. }
  4882. break;
  4883. case 4:
  4884. if (chars[inx] == 0) {
  4885. retstr[retlen++] = inx;
  4886. }
  4887. break;
  4888. }
  4889. }
  4890. if (mymode == 3 || mymode == 4) {
  4891. RETURN_STRINGL(retstr, retlen);
  4892. }
  4893. }
  4894. /* }}} */
  4895. /* {{{ php_strnatcmp */
  4896. static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, int fold_case)
  4897. {
  4898. zend_string *s1, *s2;
  4899. ZEND_PARSE_PARAMETERS_START(2, 2)
  4900. Z_PARAM_STR(s1)
  4901. Z_PARAM_STR(s2)
  4902. ZEND_PARSE_PARAMETERS_END();
  4903. RETURN_LONG(strnatcmp_ex(ZSTR_VAL(s1), ZSTR_LEN(s1),
  4904. ZSTR_VAL(s2), ZSTR_LEN(s2),
  4905. fold_case));
  4906. }
  4907. /* }}} */
  4908. PHPAPI int string_natural_compare_function_ex(zval *result, zval *op1, zval *op2, bool case_insensitive) /* {{{ */
  4909. {
  4910. zend_string *tmp_str1, *tmp_str2;
  4911. zend_string *str1 = zval_get_tmp_string(op1, &tmp_str1);
  4912. zend_string *str2 = zval_get_tmp_string(op2, &tmp_str2);
  4913. ZVAL_LONG(result, strnatcmp_ex(ZSTR_VAL(str1), ZSTR_LEN(str1), ZSTR_VAL(str2), ZSTR_LEN(str2), case_insensitive));
  4914. zend_tmp_string_release(tmp_str1);
  4915. zend_tmp_string_release(tmp_str2);
  4916. return SUCCESS;
  4917. }
  4918. /* }}} */
  4919. PHPAPI int string_natural_case_compare_function(zval *result, zval *op1, zval *op2) /* {{{ */
  4920. {
  4921. return string_natural_compare_function_ex(result, op1, op2, 1);
  4922. }
  4923. /* }}} */
  4924. PHPAPI int string_natural_compare_function(zval *result, zval *op1, zval *op2) /* {{{ */
  4925. {
  4926. return string_natural_compare_function_ex(result, op1, op2, 0);
  4927. }
  4928. /* }}} */
  4929. /* {{{ Returns the result of string comparison using 'natural' algorithm */
  4930. PHP_FUNCTION(strnatcmp)
  4931. {
  4932. php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  4933. }
  4934. /* }}} */
  4935. /* {{{ Returns numeric formatting information based on the current locale */
  4936. PHP_FUNCTION(localeconv)
  4937. {
  4938. zval grouping, mon_grouping;
  4939. int len, i;
  4940. ZEND_PARSE_PARAMETERS_NONE();
  4941. array_init(return_value);
  4942. array_init(&grouping);
  4943. array_init(&mon_grouping);
  4944. {
  4945. struct lconv currlocdata;
  4946. localeconv_r( &currlocdata );
  4947. /* Grab the grouping data out of the array */
  4948. len = (int)strlen(currlocdata.grouping);
  4949. for (i = 0; i < len; i++) {
  4950. add_index_long(&grouping, i, currlocdata.grouping[i]);
  4951. }
  4952. /* Grab the monetary grouping data out of the array */
  4953. len = (int)strlen(currlocdata.mon_grouping);
  4954. for (i = 0; i < len; i++) {
  4955. add_index_long(&mon_grouping, i, currlocdata.mon_grouping[i]);
  4956. }
  4957. add_assoc_string(return_value, "decimal_point", currlocdata.decimal_point);
  4958. add_assoc_string(return_value, "thousands_sep", currlocdata.thousands_sep);
  4959. add_assoc_string(return_value, "int_curr_symbol", currlocdata.int_curr_symbol);
  4960. add_assoc_string(return_value, "currency_symbol", currlocdata.currency_symbol);
  4961. add_assoc_string(return_value, "mon_decimal_point", currlocdata.mon_decimal_point);
  4962. add_assoc_string(return_value, "mon_thousands_sep", currlocdata.mon_thousands_sep);
  4963. add_assoc_string(return_value, "positive_sign", currlocdata.positive_sign);
  4964. add_assoc_string(return_value, "negative_sign", currlocdata.negative_sign);
  4965. add_assoc_long( return_value, "int_frac_digits", currlocdata.int_frac_digits);
  4966. add_assoc_long( return_value, "frac_digits", currlocdata.frac_digits);
  4967. add_assoc_long( return_value, "p_cs_precedes", currlocdata.p_cs_precedes);
  4968. add_assoc_long( return_value, "p_sep_by_space", currlocdata.p_sep_by_space);
  4969. add_assoc_long( return_value, "n_cs_precedes", currlocdata.n_cs_precedes);
  4970. add_assoc_long( return_value, "n_sep_by_space", currlocdata.n_sep_by_space);
  4971. add_assoc_long( return_value, "p_sign_posn", currlocdata.p_sign_posn);
  4972. add_assoc_long( return_value, "n_sign_posn", currlocdata.n_sign_posn);
  4973. }
  4974. zend_hash_str_update(Z_ARRVAL_P(return_value), "grouping", sizeof("grouping")-1, &grouping);
  4975. zend_hash_str_update(Z_ARRVAL_P(return_value), "mon_grouping", sizeof("mon_grouping")-1, &mon_grouping);
  4976. }
  4977. /* }}} */
  4978. /* {{{ Returns the result of case-insensitive string comparison using 'natural' algorithm */
  4979. PHP_FUNCTION(strnatcasecmp)
  4980. {
  4981. php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  4982. }
  4983. /* }}} */
  4984. /* {{{ Returns the number of times a substring occurs in the string */
  4985. PHP_FUNCTION(substr_count)
  4986. {
  4987. char *haystack, *needle;
  4988. zend_long offset = 0, length = 0;
  4989. bool length_is_null = 1;
  4990. zend_long count;
  4991. size_t haystack_len, needle_len;
  4992. const char *p, *endp;
  4993. ZEND_PARSE_PARAMETERS_START(2, 4)
  4994. Z_PARAM_STRING(haystack, haystack_len)
  4995. Z_PARAM_STRING(needle, needle_len)
  4996. Z_PARAM_OPTIONAL
  4997. Z_PARAM_LONG(offset)
  4998. Z_PARAM_LONG_OR_NULL(length, length_is_null)
  4999. ZEND_PARSE_PARAMETERS_END();
  5000. if (needle_len == 0) {
  5001. zend_argument_value_error(2, "cannot be empty");
  5002. RETURN_THROWS();
  5003. }
  5004. p = haystack;
  5005. if (offset) {
  5006. if (offset < 0) {
  5007. offset += (zend_long)haystack_len;
  5008. }
  5009. if ((offset < 0) || ((size_t)offset > haystack_len)) {
  5010. zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
  5011. RETURN_THROWS();
  5012. }
  5013. p += offset;
  5014. haystack_len -= offset;
  5015. }
  5016. if (!length_is_null) {
  5017. if (length < 0) {
  5018. length += haystack_len;
  5019. }
  5020. if (length < 0 || ((size_t)length > haystack_len)) {
  5021. zend_argument_value_error(4, "must be contained in argument #1 ($haystack)");
  5022. RETURN_THROWS();
  5023. }
  5024. } else {
  5025. length = haystack_len;
  5026. }
  5027. if (needle_len == 1) {
  5028. count = count_chars(p, length, needle[0]);
  5029. } else {
  5030. count = 0;
  5031. endp = p + length;
  5032. while ((p = (char*)php_memnstr(p, needle, needle_len, endp))) {
  5033. p += needle_len;
  5034. count++;
  5035. }
  5036. }
  5037. RETURN_LONG(count);
  5038. }
  5039. /* }}} */
  5040. /* {{{ Returns input string padded on the left or right to specified length with pad_string */
  5041. PHP_FUNCTION(str_pad)
  5042. {
  5043. /* Input arguments */
  5044. zend_string *input; /* Input string */
  5045. zend_long pad_length; /* Length to pad to */
  5046. /* Helper variables */
  5047. size_t num_pad_chars; /* Number of padding characters (total - input size) */
  5048. char *pad_str = " "; /* Pointer to padding string */
  5049. size_t pad_str_len = 1;
  5050. zend_long pad_type_val = STR_PAD_RIGHT; /* The padding type value */
  5051. size_t i, left_pad=0, right_pad=0;
  5052. zend_string *result = NULL; /* Resulting string */
  5053. ZEND_PARSE_PARAMETERS_START(2, 4)
  5054. Z_PARAM_STR(input)
  5055. Z_PARAM_LONG(pad_length)
  5056. Z_PARAM_OPTIONAL
  5057. Z_PARAM_STRING(pad_str, pad_str_len)
  5058. Z_PARAM_LONG(pad_type_val)
  5059. ZEND_PARSE_PARAMETERS_END();
  5060. /* If resulting string turns out to be shorter than input string,
  5061. we simply copy the input and return. */
  5062. if (pad_length < 0 || (size_t)pad_length <= ZSTR_LEN(input)) {
  5063. RETURN_STR_COPY(input);
  5064. }
  5065. if (pad_str_len == 0) {
  5066. zend_argument_value_error(3, "must be a non-empty string");
  5067. RETURN_THROWS();
  5068. }
  5069. if (pad_type_val < STR_PAD_LEFT || pad_type_val > STR_PAD_BOTH) {
  5070. zend_argument_value_error(4, "must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
  5071. RETURN_THROWS();
  5072. }
  5073. num_pad_chars = pad_length - ZSTR_LEN(input);
  5074. result = zend_string_safe_alloc(1, ZSTR_LEN(input), num_pad_chars, 0);
  5075. ZSTR_LEN(result) = 0;
  5076. /* We need to figure out the left/right padding lengths. */
  5077. switch (pad_type_val) {
  5078. case STR_PAD_RIGHT:
  5079. left_pad = 0;
  5080. right_pad = num_pad_chars;
  5081. break;
  5082. case STR_PAD_LEFT:
  5083. left_pad = num_pad_chars;
  5084. right_pad = 0;
  5085. break;
  5086. case STR_PAD_BOTH:
  5087. left_pad = num_pad_chars / 2;
  5088. right_pad = num_pad_chars - left_pad;
  5089. break;
  5090. }
  5091. /* First we pad on the left. */
  5092. for (i = 0; i < left_pad; i++)
  5093. ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
  5094. /* Then we copy the input string. */
  5095. memcpy(ZSTR_VAL(result) + ZSTR_LEN(result), ZSTR_VAL(input), ZSTR_LEN(input));
  5096. ZSTR_LEN(result) += ZSTR_LEN(input);
  5097. /* Finally, we pad on the right. */
  5098. for (i = 0; i < right_pad; i++)
  5099. ZSTR_VAL(result)[ZSTR_LEN(result)++] = pad_str[i % pad_str_len];
  5100. ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
  5101. RETURN_NEW_STR(result);
  5102. }
  5103. /* }}} */
  5104. /* {{{ Implements an ANSI C compatible sscanf */
  5105. PHP_FUNCTION(sscanf)
  5106. {
  5107. zval *args = NULL;
  5108. char *str, *format;
  5109. size_t str_len, format_len;
  5110. int result, num_args = 0;
  5111. ZEND_PARSE_PARAMETERS_START(2, -1)
  5112. Z_PARAM_STRING(str, str_len)
  5113. Z_PARAM_STRING(format, format_len)
  5114. Z_PARAM_VARIADIC('*', args, num_args)
  5115. ZEND_PARSE_PARAMETERS_END();
  5116. result = php_sscanf_internal(str, format, num_args, args, 0, return_value);
  5117. if (SCAN_ERROR_WRONG_PARAM_COUNT == result) {
  5118. WRONG_PARAM_COUNT;
  5119. }
  5120. }
  5121. /* }}} */
  5122. /* static zend_string *php_str_rot13(zend_string *str) {{{ */
  5123. static zend_string *php_str_rot13(zend_string *str)
  5124. {
  5125. zend_string *ret;
  5126. const char *p, *e;
  5127. char *target;
  5128. if (UNEXPECTED(ZSTR_LEN(str) == 0)) {
  5129. return ZSTR_EMPTY_ALLOC();
  5130. }
  5131. ret = zend_string_alloc(ZSTR_LEN(str), 0);
  5132. p = ZSTR_VAL(str);
  5133. e = p + ZSTR_LEN(str);
  5134. target = ZSTR_VAL(ret);
  5135. #ifdef __SSE2__
  5136. if (e - p > 15) {
  5137. const __m128i a_minus_1 = _mm_set1_epi8('a' - 1);
  5138. const __m128i m_plus_1 = _mm_set1_epi8('m' + 1);
  5139. const __m128i n_minus_1 = _mm_set1_epi8('n' - 1);
  5140. const __m128i z_plus_1 = _mm_set1_epi8('z' + 1);
  5141. const __m128i A_minus_1 = _mm_set1_epi8('A' - 1);
  5142. const __m128i M_plus_1 = _mm_set1_epi8('M' + 1);
  5143. const __m128i N_minus_1 = _mm_set1_epi8('N' - 1);
  5144. const __m128i Z_plus_1 = _mm_set1_epi8('Z' + 1);
  5145. const __m128i add = _mm_set1_epi8(13);
  5146. const __m128i sub = _mm_set1_epi8(-13);
  5147. do {
  5148. __m128i in, gt, lt, cmp, delta;
  5149. delta = _mm_setzero_si128();
  5150. in = _mm_loadu_si128((__m128i *)p);
  5151. gt = _mm_cmpgt_epi8(in, a_minus_1);
  5152. lt = _mm_cmplt_epi8(in, m_plus_1);
  5153. cmp = _mm_and_si128(lt, gt);
  5154. if (_mm_movemask_epi8(cmp)) {
  5155. cmp = _mm_and_si128(cmp, add);
  5156. delta = _mm_or_si128(delta, cmp);
  5157. }
  5158. gt = _mm_cmpgt_epi8(in, n_minus_1);
  5159. lt = _mm_cmplt_epi8(in, z_plus_1);
  5160. cmp = _mm_and_si128(lt, gt);
  5161. if (_mm_movemask_epi8(cmp)) {
  5162. cmp = _mm_and_si128(cmp, sub);
  5163. delta = _mm_or_si128(delta, cmp);
  5164. }
  5165. gt = _mm_cmpgt_epi8(in, A_minus_1);
  5166. lt = _mm_cmplt_epi8(in, M_plus_1);
  5167. cmp = _mm_and_si128(lt, gt);
  5168. if (_mm_movemask_epi8(cmp)) {
  5169. cmp = _mm_and_si128(cmp, add);
  5170. delta = _mm_or_si128(delta, cmp);
  5171. }
  5172. gt = _mm_cmpgt_epi8(in, N_minus_1);
  5173. lt = _mm_cmplt_epi8(in, Z_plus_1);
  5174. cmp = _mm_and_si128(lt, gt);
  5175. if (_mm_movemask_epi8(cmp)) {
  5176. cmp = _mm_and_si128(cmp, sub);
  5177. delta = _mm_or_si128(delta, cmp);
  5178. }
  5179. in = _mm_add_epi8(in, delta);
  5180. _mm_storeu_si128((__m128i *)target, in);
  5181. p += 16;
  5182. target += 16;
  5183. } while (e - p > 15);
  5184. }
  5185. #endif
  5186. while (p < e) {
  5187. if (*p >= 'a' && *p <= 'z') {
  5188. *target++ = 'a' + (((*p++ - 'a') + 13) % 26);
  5189. } else if (*p >= 'A' && *p <= 'Z') {
  5190. *target++ = 'A' + (((*p++ - 'A') + 13) % 26);
  5191. } else {
  5192. *target++ = *p++;
  5193. }
  5194. }
  5195. *target = '\0';
  5196. return ret;
  5197. }
  5198. /* }}} */
  5199. /* {{{ Perform the rot13 transform on a string */
  5200. PHP_FUNCTION(str_rot13)
  5201. {
  5202. zend_string *arg;
  5203. ZEND_PARSE_PARAMETERS_START(1, 1)
  5204. Z_PARAM_STR(arg)
  5205. ZEND_PARSE_PARAMETERS_END();
  5206. RETURN_STR(php_str_rot13(arg));
  5207. }
  5208. /* }}} */
  5209. static void php_string_shuffle(char *str, zend_long len) /* {{{ */
  5210. {
  5211. zend_long n_elems, rnd_idx, n_left;
  5212. char temp;
  5213. /* The implementation is stolen from array_data_shuffle */
  5214. /* Thus the characteristics of the randomization are the same */
  5215. n_elems = len;
  5216. if (n_elems <= 1) {
  5217. return;
  5218. }
  5219. n_left = n_elems;
  5220. while (--n_left) {
  5221. rnd_idx = php_mt_rand_range(0, n_left);
  5222. if (rnd_idx != n_left) {
  5223. temp = str[n_left];
  5224. str[n_left] = str[rnd_idx];
  5225. str[rnd_idx] = temp;
  5226. }
  5227. }
  5228. }
  5229. /* }}} */
  5230. /* {{{ Shuffles string. One permutation of all possible is created */
  5231. PHP_FUNCTION(str_shuffle)
  5232. {
  5233. zend_string *arg;
  5234. ZEND_PARSE_PARAMETERS_START(1, 1)
  5235. Z_PARAM_STR(arg)
  5236. ZEND_PARSE_PARAMETERS_END();
  5237. RETVAL_STRINGL(ZSTR_VAL(arg), ZSTR_LEN(arg));
  5238. if (Z_STRLEN_P(return_value) > 1) {
  5239. php_string_shuffle(Z_STRVAL_P(return_value), (zend_long) Z_STRLEN_P(return_value));
  5240. }
  5241. }
  5242. /* }}} */
  5243. /* {{{ Counts the number of words inside a string. If format of 1 is specified,
  5244. then the function will return an array containing all the words
  5245. found inside the string. If format of 2 is specified, then the function
  5246. will return an associated array where the position of the word is the key
  5247. and the word itself is the value.
  5248. For the purpose of this function, 'word' is defined as a locale dependent
  5249. string containing alphabetic characters, which also may contain, but not start
  5250. with "'" and "-" characters.
  5251. */
  5252. PHP_FUNCTION(str_word_count)
  5253. {
  5254. zend_string *str;
  5255. char *char_list = NULL, ch[256];
  5256. const char *p, *e, *s;
  5257. size_t char_list_len = 0, word_count = 0;
  5258. zend_long type = 0;
  5259. ZEND_PARSE_PARAMETERS_START(1, 3)
  5260. Z_PARAM_STR(str)
  5261. Z_PARAM_OPTIONAL
  5262. Z_PARAM_LONG(type)
  5263. Z_PARAM_STRING_OR_NULL(char_list, char_list_len)
  5264. ZEND_PARSE_PARAMETERS_END();
  5265. switch(type) {
  5266. case 1:
  5267. case 2:
  5268. array_init(return_value);
  5269. if (!ZSTR_LEN(str)) {
  5270. return;
  5271. }
  5272. break;
  5273. case 0:
  5274. if (!ZSTR_LEN(str)) {
  5275. RETURN_LONG(0);
  5276. }
  5277. /* nothing to be done */
  5278. break;
  5279. default:
  5280. zend_argument_value_error(2, "must be a valid format value");
  5281. RETURN_THROWS();
  5282. }
  5283. if (char_list) {
  5284. php_charmask((const unsigned char *) char_list, char_list_len, ch);
  5285. }
  5286. p = ZSTR_VAL(str);
  5287. e = ZSTR_VAL(str) + ZSTR_LEN(str);
  5288. /* first character cannot be ' or -, unless explicitly allowed by the user */
  5289. if ((*p == '\'' && (!char_list || !ch['\''])) || (*p == '-' && (!char_list || !ch['-']))) {
  5290. p++;
  5291. }
  5292. /* last character cannot be -, unless explicitly allowed by the user */
  5293. if (*(e - 1) == '-' && (!char_list || !ch['-'])) {
  5294. e--;
  5295. }
  5296. while (p < e) {
  5297. s = p;
  5298. while (p < e && (isalpha((unsigned char)*p) || (char_list && ch[(unsigned char)*p]) || *p == '\'' || *p == '-')) {
  5299. p++;
  5300. }
  5301. if (p > s) {
  5302. switch (type)
  5303. {
  5304. case 1:
  5305. add_next_index_stringl(return_value, s, p - s);
  5306. break;
  5307. case 2:
  5308. add_index_stringl(return_value, (s - ZSTR_VAL(str)), s, p - s);
  5309. break;
  5310. default:
  5311. word_count++;
  5312. break;
  5313. }
  5314. }
  5315. p++;
  5316. }
  5317. if (!type) {
  5318. RETURN_LONG(word_count);
  5319. }
  5320. }
  5321. /* }}} */
  5322. /* {{{ Convert a string to an array. If split_length is specified, break the string down into chunks each split_length characters long. */
  5323. PHP_FUNCTION(str_split)
  5324. {
  5325. zend_string *str;
  5326. zend_long split_length = 1;
  5327. const char *p;
  5328. size_t n_reg_segments;
  5329. ZEND_PARSE_PARAMETERS_START(1, 2)
  5330. Z_PARAM_STR(str)
  5331. Z_PARAM_OPTIONAL
  5332. Z_PARAM_LONG(split_length)
  5333. ZEND_PARSE_PARAMETERS_END();
  5334. if (split_length <= 0) {
  5335. zend_argument_value_error(2, "must be greater than 0");
  5336. RETURN_THROWS();
  5337. }
  5338. if (0 == ZSTR_LEN(str) || (size_t)split_length >= ZSTR_LEN(str)) {
  5339. array_init_size(return_value, 1);
  5340. add_next_index_stringl(return_value, ZSTR_VAL(str), ZSTR_LEN(str));
  5341. return;
  5342. }
  5343. array_init_size(return_value, (uint32_t)(((ZSTR_LEN(str) - 1) / split_length) + 1));
  5344. n_reg_segments = ZSTR_LEN(str) / split_length;
  5345. p = ZSTR_VAL(str);
  5346. while (n_reg_segments-- > 0) {
  5347. add_next_index_stringl(return_value, p, split_length);
  5348. p += split_length;
  5349. }
  5350. if (p != (ZSTR_VAL(str) + ZSTR_LEN(str))) {
  5351. add_next_index_stringl(return_value, p, (ZSTR_VAL(str) + ZSTR_LEN(str) - p));
  5352. }
  5353. }
  5354. /* }}} */
  5355. /* {{{ Search a string for any of a set of characters */
  5356. PHP_FUNCTION(strpbrk)
  5357. {
  5358. zend_string *haystack, *char_list;
  5359. const char *haystack_ptr, *cl_ptr;
  5360. ZEND_PARSE_PARAMETERS_START(2, 2)
  5361. Z_PARAM_STR(haystack)
  5362. Z_PARAM_STR(char_list)
  5363. ZEND_PARSE_PARAMETERS_END();
  5364. if (!ZSTR_LEN(char_list)) {
  5365. zend_argument_value_error(2, "must be a non-empty string");
  5366. RETURN_THROWS();
  5367. }
  5368. for (haystack_ptr = ZSTR_VAL(haystack); haystack_ptr < (ZSTR_VAL(haystack) + ZSTR_LEN(haystack)); ++haystack_ptr) {
  5369. for (cl_ptr = ZSTR_VAL(char_list); cl_ptr < (ZSTR_VAL(char_list) + ZSTR_LEN(char_list)); ++cl_ptr) {
  5370. if (*cl_ptr == *haystack_ptr) {
  5371. RETURN_STRINGL(haystack_ptr, (ZSTR_VAL(haystack) + ZSTR_LEN(haystack) - haystack_ptr));
  5372. }
  5373. }
  5374. }
  5375. RETURN_FALSE;
  5376. }
  5377. /* }}} */
  5378. /* {{{ Binary safe optionally case insensitive comparison of 2 strings from an offset, up to length characters */
  5379. PHP_FUNCTION(substr_compare)
  5380. {
  5381. zend_string *s1, *s2;
  5382. zend_long offset, len=0;
  5383. bool len_is_default=1;
  5384. bool cs=0;
  5385. size_t cmp_len;
  5386. ZEND_PARSE_PARAMETERS_START(3, 5)
  5387. Z_PARAM_STR(s1)
  5388. Z_PARAM_STR(s2)
  5389. Z_PARAM_LONG(offset)
  5390. Z_PARAM_OPTIONAL
  5391. Z_PARAM_LONG_OR_NULL(len, len_is_default)
  5392. Z_PARAM_BOOL(cs)
  5393. ZEND_PARSE_PARAMETERS_END();
  5394. if (!len_is_default && len <= 0) {
  5395. if (len == 0) {
  5396. RETURN_LONG(0L);
  5397. } else {
  5398. zend_argument_value_error(4, "must be greater than or equal to 0");
  5399. RETURN_THROWS();
  5400. }
  5401. }
  5402. if (offset < 0) {
  5403. offset = ZSTR_LEN(s1) + offset;
  5404. offset = (offset < 0) ? 0 : offset;
  5405. }
  5406. if ((size_t)offset > ZSTR_LEN(s1)) {
  5407. zend_argument_value_error(3, "must be contained in argument #1 ($main_str)");
  5408. RETURN_THROWS();
  5409. }
  5410. cmp_len = len ? (size_t)len : MAX(ZSTR_LEN(s2), (ZSTR_LEN(s1) - offset));
  5411. if (!cs) {
  5412. RETURN_LONG(zend_binary_strncmp(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
  5413. } else {
  5414. RETURN_LONG(zend_binary_strncasecmp_l(ZSTR_VAL(s1) + offset, (ZSTR_LEN(s1) - offset), ZSTR_VAL(s2), ZSTR_LEN(s2), cmp_len));
  5415. }
  5416. }
  5417. /* }}} */
  5418. /* {{{ */
  5419. static zend_string *php_utf8_encode(const char *s, size_t len)
  5420. {
  5421. size_t pos = len;
  5422. zend_string *str;
  5423. unsigned char c;
  5424. str = zend_string_safe_alloc(len, 2, 0, 0);
  5425. ZSTR_LEN(str) = 0;
  5426. while (pos > 0) {
  5427. /* The lower 256 codepoints of Unicode are identical to Latin-1,
  5428. * so we don't need to do any mapping here. */
  5429. c = (unsigned char)(*s);
  5430. if (c < 0x80) {
  5431. ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
  5432. /* We only account for the single-byte and two-byte cases because
  5433. * we're only dealing with the first 256 Unicode codepoints. */
  5434. } else {
  5435. ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
  5436. ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
  5437. }
  5438. pos--;
  5439. s++;
  5440. }
  5441. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  5442. str = zend_string_truncate(str, ZSTR_LEN(str), 0);
  5443. return str;
  5444. }
  5445. /* }}} */
  5446. /* {{{ */
  5447. static zend_string *php_utf8_decode(const char *s, size_t len)
  5448. {
  5449. size_t pos = 0;
  5450. unsigned int c;
  5451. zend_string *str;
  5452. str = zend_string_alloc(len, 0);
  5453. ZSTR_LEN(str) = 0;
  5454. while (pos < len) {
  5455. int status = FAILURE;
  5456. c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
  5457. /* The lower 256 codepoints of Unicode are identical to Latin-1,
  5458. * so we don't need to do any mapping here beyond replacing non-Latin-1
  5459. * characters. */
  5460. if (status == FAILURE || c > 0xFFU) {
  5461. c = '?';
  5462. }
  5463. ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
  5464. }
  5465. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  5466. if (ZSTR_LEN(str) < len) {
  5467. str = zend_string_truncate(str, ZSTR_LEN(str), 0);
  5468. }
  5469. return str;
  5470. }
  5471. /* }}} */
  5472. /* {{{ Encodes an ISO-8859-1 string to UTF-8 */
  5473. PHP_FUNCTION(utf8_encode)
  5474. {
  5475. char *arg;
  5476. size_t arg_len;
  5477. ZEND_PARSE_PARAMETERS_START(1, 1)
  5478. Z_PARAM_STRING(arg, arg_len)
  5479. ZEND_PARSE_PARAMETERS_END();
  5480. RETURN_STR(php_utf8_encode(arg, arg_len));
  5481. }
  5482. /* }}} */
  5483. /* {{{ Converts a UTF-8 encoded string to ISO-8859-1 */
  5484. PHP_FUNCTION(utf8_decode)
  5485. {
  5486. char *arg;
  5487. size_t arg_len;
  5488. ZEND_PARSE_PARAMETERS_START(1, 1)
  5489. Z_PARAM_STRING(arg, arg_len)
  5490. ZEND_PARSE_PARAMETERS_END();
  5491. RETURN_STR(php_utf8_decode(arg, arg_len));
  5492. }
  5493. /* }}} */