PageRenderTime 65ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/extensions/hphp/runtime/base/zend/zend_string.cpp

https://bitbucket.org/lucciano/happygit
C++ | 3419 lines | 2847 code | 258 blank | 314 comment | 536 complexity | 998d768dc7e1f85c3d5c8bf60b226665 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
  6. | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
  7. +----------------------------------------------------------------------+
  8. | This source file is subject to version 2.00 of the Zend license, |
  9. | that is bundled with this package in the file LICENSE, and is |
  10. | available through the world-wide-web at the following url: |
  11. | http://www.zend.com/license/2_00.txt. |
  12. | If you did not receive a copy of the Zend license and are unable to |
  13. | obtain it through the world-wide-web, please send a note to |
  14. | license@zend.com so we can mail you a copy immediately. |
  15. +----------------------------------------------------------------------+
  16. */
  17. #include <runtime/base/zend/zend_string.h>
  18. #include <runtime/base/zend/zend_printf.h>
  19. #include <runtime/base/zend/zend_math.h>
  20. //#include <util/lock.h>
  21. #include <math.h>
  22. #include <monetary.h>
  23. #include <runtime/base/util/exceptions.h>
  24. #include <runtime/base/complex_types.h>
  25. //#include <runtime/base/util/string_buffer.h>
  26. //#include <runtime/base/runtime_error.h>
  27. //#include <runtime/base/type_conversions.h>
  28. //#include <runtime/base/builtin_functions.h>
  29. #ifdef __APPLE__
  30. #ifndef isnan
  31. #define isnan(x) \
  32. ( sizeof (x) == sizeof(float ) ? __inline_isnanf((float)(x)) \
  33. : sizeof (x) == sizeof(double) ? __inline_isnand((double)(x)) \
  34. : __inline_isnan ((long double)(x)))
  35. #endif
  36. #ifndef isinf
  37. #define isinf(x) \
  38. ( sizeof (x) == sizeof(float ) ? __inline_isinff((float)(x)) \
  39. : sizeof (x) == sizeof(double) ? __inline_isinfd((double)(x)) \
  40. : __inline_isinf ((long double)(x)))
  41. #endif
  42. #endif
  43. #define PHP_QPRINT_MAXL 75
  44. namespace HPHP {
  45. ///////////////////////////////////////////////////////////////////////////////
  46. // helpers
  47. bool string_substr_check(int len, int &f, int &l, bool strict /* = true */) {
  48. // if "from" position is negative, count start position from the end
  49. if (f < 0) {
  50. f += len;
  51. if (f < 0) {
  52. return false;
  53. }
  54. }
  55. if (f > len || f == len && strict) {
  56. return false;
  57. }
  58. // if "length" position is negative, set it to the length
  59. // needed to stop that many chars from the end of the string
  60. if (l < 0) {
  61. l += len - f;
  62. if (l < 0) {
  63. return false;
  64. }
  65. }
  66. if ((unsigned int)f + (unsigned int)l > (unsigned int)len) {
  67. l = len - f;
  68. }
  69. return true;
  70. }
  71. void string_charmask(const char *sinput, int len, char *mask) {
  72. const unsigned char *input = (unsigned char *)sinput;
  73. const unsigned char *end;
  74. unsigned char c;
  75. memset(mask, 0, 256);
  76. for (end = input+len; input < end; input++) {
  77. c=*input;
  78. if ((input+3 < end) && input[1] == '.' && input[2] == '.'
  79. && input[3] >= c) {
  80. memset(mask+c, 1, input[3] - c + 1);
  81. input+=3;
  82. } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
  83. /* Error, try to be as helpful as possible:
  84. (a range ending/starting with '.' won't be captured here) */
  85. if (end-len >= input) { /* there was no 'left' char */
  86. // FIXME: throw_invalid_argument
  87. // ("charlist: Invalid '..'-range, missing left of '..'");
  88. continue;
  89. }
  90. if (input+2 >= end) { /* there is no 'right' char */
  91. // FIXME: throw_invalid_argument
  92. // ("charlist: Invalid '..'-range, missing right of '..'");
  93. continue;
  94. }
  95. if (input[-1] > input[2]) { /* wrong order */
  96. // FIXME: throw_invalid_argument
  97. // ("charlist: '..'-range needs to be incrementing");
  98. continue;
  99. }
  100. /* FIXME: better error (a..b..c is the only left possibility?) */
  101. // FIXME: throw_invalid_argument("charlist: Invalid '..'-range");
  102. continue;
  103. } else {
  104. mask[c]=1;
  105. }
  106. }
  107. }
  108. int string_copy(char *dst, const char *src, int siz) {
  109. register char *d = dst;
  110. register const char *s = src;
  111. register size_t n = siz;
  112. /* Copy as many bytes as will fit */
  113. if (n != 0 && --n != 0) {
  114. do {
  115. if ((*d++ = *s++) == 0)
  116. break;
  117. } while (--n != 0);
  118. }
  119. /* Not enough room in dst, add NUL and traverse rest of src */
  120. if (n == 0) {
  121. if (siz != 0)
  122. *d = '\0'; /* NUL-terminate dst */
  123. while (*s++)
  124. ;
  125. }
  126. return(s - src - 1); /* count does not include NUL */
  127. }
  128. ///////////////////////////////////////////////////////////////////////////////
  129. char *string_concat(const char *s1, int len1, const char *s2, int len2,
  130. int &len) {
  131. len = len1 + len2;
  132. char *buf = (char *)malloc(len + 1);
  133. if (buf == NULL) {
  134. throw FatalErrorException(0, "malloc failed: %d", len);
  135. }
  136. memcpy(buf, s1, len1);
  137. memcpy(buf + len1, s2, len2);
  138. buf[len] = 0;
  139. return buf;
  140. }
  141. ///////////////////////////////////////////////////////////////////////////////
  142. // comparisons
  143. int string_cmp(const char *s1, int len1, const char *s2, int len2) {
  144. if (len1 <= len2) {
  145. for (int i = 0; i < len1; i++) {
  146. char c1 = s1[i];
  147. char c2 = s2[i];
  148. if (c1 > c2) return 1;
  149. if (c1 < c2) return -1;
  150. }
  151. return len1 < len2 ? -1 : 0;
  152. }
  153. for (int i = 0; i < len2; i++) {
  154. char c1 = s1[i];
  155. char c2 = s2[i];
  156. if (c1 > c2) return 1;
  157. if (c1 < c2) return -1;
  158. }
  159. return 1;
  160. }
  161. int string_casecmp(const char *s1, int len1, const char *s2, int len2) {
  162. if (len1 <= len2) {
  163. for (int i = 0; i < len1; i++) {
  164. char c1 = toupper(s1[i]);
  165. char c2 = toupper(s2[i]);
  166. if (c1 > c2) return 1;
  167. if (c1 < c2) return -1;
  168. }
  169. return len1 < len2 ? -1 : 0;
  170. }
  171. for (int i = 0; i < len2; i++) {
  172. char c1 = toupper(s1[i]);
  173. char c2 = toupper(s2[i]);
  174. if (c1 > c2) return 1;
  175. if (c1 < c2) return -1;
  176. }
  177. return 1;
  178. }
  179. int string_ncmp(const char *s1, const char *s2, int len) {
  180. for (int i = 0; i < len; i++) {
  181. char c1 = s1[i];
  182. char c2 = s2[i];
  183. if (c1 > c2) return 1;
  184. if (c1 < c2) return -1;
  185. }
  186. return 0;
  187. }
  188. int string_ncasecmp(const char *s1, const char *s2, int len) {
  189. for (int i = 0; i < len; i++) {
  190. char c1 = toupper(s1[i]);
  191. char c2 = toupper(s2[i]);
  192. if (c1 > c2) return 1;
  193. if (c1 < c2) return -1;
  194. }
  195. return 0;
  196. }
  197. static int compare_right(char const **a, char const *aend,
  198. char const **b, char const *bend) {
  199. int bias = 0;
  200. /* The longest run of digits wins. That aside, the greatest
  201. value wins, but we can't know that it will until we've scanned
  202. both numbers to know that they have the same magnitude, so we
  203. remember it in BIAS. */
  204. for(;; (*a)++, (*b)++) {
  205. if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
  206. (*b == bend || !isdigit((int)(unsigned char)**b)))
  207. return bias;
  208. else if (*a == aend || !isdigit((int)(unsigned char)**a))
  209. return -1;
  210. else if (*b == bend || !isdigit((int)(unsigned char)**b))
  211. return +1;
  212. else if (**a < **b) {
  213. if (!bias)
  214. bias = -1;
  215. } else if (**a > **b) {
  216. if (!bias)
  217. bias = +1;
  218. }
  219. }
  220. return 0;
  221. }
  222. static int compare_left(char const **a, char const *aend,
  223. char const **b, char const *bend) {
  224. /* Compare two left-aligned numbers: the first to have a
  225. different value wins. */
  226. for(;; (*a)++, (*b)++) {
  227. if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
  228. (*b == bend || !isdigit((int)(unsigned char)**b)))
  229. return 0;
  230. else if (*a == aend || !isdigit((int)(unsigned char)**a))
  231. return -1;
  232. else if (*b == bend || !isdigit((int)(unsigned char)**b))
  233. return +1;
  234. else if (**a < **b)
  235. return -1;
  236. else if (**a > **b)
  237. return +1;
  238. }
  239. return 0;
  240. }
  241. int string_natural_cmp(char const *a, size_t a_len,
  242. char const *b, size_t b_len, int fold_case) {
  243. char ca, cb;
  244. char const *ap, *bp;
  245. char const *aend = a + a_len, *bend = b + b_len;
  246. int fractional, result;
  247. if (a_len == 0 || b_len == 0)
  248. return a_len - b_len;
  249. ap = a;
  250. bp = b;
  251. while (1) {
  252. ca = *ap; cb = *bp;
  253. /* skip over leading spaces or zeros */
  254. while (isspace((int)(unsigned char)ca))
  255. ca = *++ap;
  256. while (isspace((int)(unsigned char)cb))
  257. cb = *++bp;
  258. /* process run of digits */
  259. if (isdigit((int)(unsigned char)ca) && isdigit((int)(unsigned char)cb)) {
  260. fractional = (ca == '0' || cb == '0');
  261. if (fractional)
  262. result = compare_left(&ap, aend, &bp, bend);
  263. else
  264. result = compare_right(&ap, aend, &bp, bend);
  265. if (result != 0)
  266. return result;
  267. else if (ap == aend && bp == bend)
  268. /* End of the strings. Let caller sort them out. */
  269. return 0;
  270. else {
  271. /* Keep on comparing from the current point. */
  272. ca = *ap; cb = *bp;
  273. }
  274. }
  275. if (fold_case) {
  276. ca = toupper((int)(unsigned char)ca);
  277. cb = toupper((int)(unsigned char)cb);
  278. }
  279. if (ca < cb)
  280. return -1;
  281. else if (ca > cb)
  282. return +1;
  283. ++ap; ++bp;
  284. if (ap >= aend && bp >= bend)
  285. /* The strings compare the same. Perhaps the caller
  286. will want to call strcmp to break the tie. */
  287. return 0;
  288. else if (ap >= aend)
  289. return -1;
  290. else if (bp >= bend)
  291. return 1;
  292. }
  293. }
  294. ///////////////////////////////////////////////////////////////////////////////
  295. char *string_to_lower(const char *s, int len) {
  296. ASSERT(s);
  297. char *ret = (char *)malloc(len + 1);
  298. for (int i = 0; i < len; i++) {
  299. ret[i] = tolower(s[i]);
  300. }
  301. ret[len] = '\0';
  302. return ret;
  303. }
  304. char *string_to_upper(const char *s, int len) {
  305. ASSERT(s);
  306. char *ret = (char *)malloc(len + 1);
  307. for (int i = 0; i < len; i++) {
  308. ret[i] = toupper(s[i]);
  309. }
  310. ret[len] = '\0';
  311. return ret;
  312. }
  313. char *string_to_upper_first(const char *s, int len) {
  314. ASSERT(s);
  315. char *ret = string_duplicate(s, len);
  316. if (*ret) {
  317. *ret = toupper(*ret);
  318. }
  319. return ret;
  320. }
  321. char *string_to_upper_words(const char *s, int len) {
  322. ASSERT(s);
  323. char *ret = string_duplicate(s, len);
  324. if (*ret) {
  325. *ret = toupper(*ret);
  326. for (int i = 1; i < len; i++) {
  327. if (isspace(ret[i-1])) {
  328. ret[i] = toupper(ret[i]);
  329. }
  330. }
  331. }
  332. return ret;
  333. }
  334. ///////////////////////////////////////////////////////////////////////////////
  335. char *string_trim(const char *s, int &len,
  336. const char *charlist, int charlistlen, int mode) {
  337. ASSERT(s);
  338. char mask[256];
  339. string_charmask(charlist, charlistlen, mask);
  340. int trimmed = 0;
  341. if (mode & 1) {
  342. for (int i = 0; i < len; i++) {
  343. if (mask[(unsigned char)s[i]]) {
  344. trimmed++;
  345. } else {
  346. break;
  347. }
  348. }
  349. len -= trimmed;
  350. s += trimmed;
  351. }
  352. if (mode & 2) {
  353. for (int i = len - 1; i >= 0; i--) {
  354. if (mask[(unsigned char)s[i]]) {
  355. len--;
  356. } else {
  357. break;
  358. }
  359. }
  360. }
  361. return string_duplicate(s, len);
  362. }
  363. #define STR_PAD_LEFT 0
  364. #define STR_PAD_RIGHT 1
  365. #define STR_PAD_BOTH 2
  366. char *string_pad(const char *input, int &len, int pad_length,
  367. const char *pad_string, int pad_str_len,
  368. int pad_type) {
  369. ASSERT(input);
  370. int num_pad_chars = pad_length - len;
  371. /* If resulting string turns out to be shorter than input string,
  372. we simply copy the input and return. */
  373. if (pad_length < 0 || num_pad_chars < 0) {
  374. return string_duplicate(input, len);
  375. }
  376. /* Setup the padding string values if specified. */
  377. if (pad_str_len == 0) {
  378. // FIXME: throw_invalid_argument("pad_string: (empty)");
  379. return NULL;
  380. }
  381. char *result = (char *)malloc(pad_length + 1);
  382. /* We need to figure out the left/right padding lengths. */
  383. int left_pad, right_pad;
  384. switch (pad_type) {
  385. case STR_PAD_RIGHT:
  386. left_pad = 0;
  387. right_pad = num_pad_chars;
  388. break;
  389. case STR_PAD_LEFT:
  390. left_pad = num_pad_chars;
  391. right_pad = 0;
  392. break;
  393. case STR_PAD_BOTH:
  394. left_pad = num_pad_chars / 2;
  395. right_pad = num_pad_chars - left_pad;
  396. break;
  397. default:
  398. // throw_invalid_argument("pad_type: %d", pad_type);
  399. return NULL;
  400. }
  401. /* First we pad on the left. */
  402. int result_len = 0;
  403. for (int i = 0; i < left_pad; i++) {
  404. result[result_len++] = pad_string[i % pad_str_len];
  405. }
  406. /* Then we copy the input string. */
  407. memcpy(result + result_len, input, len);
  408. result_len += len;
  409. /* Finally, we pad on the right. */
  410. for (int i = 0; i < right_pad; i++) {
  411. result[result_len++] = pad_string[i % pad_str_len];
  412. }
  413. result[result_len] = '\0';
  414. len = result_len;
  415. return result;
  416. }
  417. ///////////////////////////////////////////////////////////////////////////////
  418. char *string_substr(const char *s, int &len, int start, int length,
  419. bool nullable) {
  420. ASSERT(s);
  421. if (string_substr_check(len, start, length)) {
  422. len = length;
  423. return string_duplicate(s + start, length);
  424. }
  425. len = 0;
  426. if (nullable) {
  427. return NULL;
  428. }
  429. return string_duplicate("", 0);
  430. }
  431. int string_find(const char *input, int len, char ch, int pos,
  432. bool case_sensitive) {
  433. ASSERT(input);
  434. if (len && pos < len) {
  435. if (!case_sensitive) {
  436. ch = tolower(ch);
  437. char *lowered = string_to_lower(input, len);
  438. int ret = string_find(lowered, len, ch, pos, true);
  439. free(lowered);
  440. return ret;
  441. }
  442. int l = 1;
  443. if (!string_substr_check(len, pos, l)) {
  444. return -1;
  445. }
  446. const void *ptr = memchr(input + pos, ch, len - pos);
  447. if (ptr != NULL) {
  448. return (int)((const char *)ptr - input);
  449. }
  450. }
  451. return -1;
  452. }
  453. int string_rfind(const char *input, int len, char ch, int pos,
  454. bool case_sensitive) {
  455. ASSERT(input);
  456. if (len > 0 && pos < len) {
  457. if (!case_sensitive) {
  458. ch = tolower(ch);
  459. char *lowered = string_to_lower(input, len);
  460. int ret = string_rfind(lowered, len, ch, pos, true);
  461. free(lowered);
  462. return ret;
  463. }
  464. int l = 0;
  465. bool stop_at_offset = (pos >= 0);
  466. if (!string_substr_check(len, pos, l)) {
  467. return -1;
  468. }
  469. int start = len - 1, stop = 0;
  470. if (stop_at_offset) {
  471. stop = pos;
  472. } else {
  473. start = pos;
  474. }
  475. for (int i = start; i >= stop; i--) {
  476. if (input[i] == ch) {
  477. return i;
  478. }
  479. }
  480. }
  481. return -1;
  482. }
  483. int string_find(const char *input, int len, const char *s, int s_len,
  484. int pos, bool case_sensitive) {
  485. ASSERT(input);
  486. ASSERT(s);
  487. if (!s_len) {
  488. return -1;
  489. }
  490. if (len && pos < len) {
  491. if (!case_sensitive) {
  492. char *lowered_s = string_to_lower(s, s_len);
  493. char *lowered = string_to_lower(input, len);
  494. int ret = string_find(lowered, len, lowered_s, s_len, pos, true);
  495. free(lowered);
  496. free(lowered_s);
  497. return ret;
  498. }
  499. int l = 1;
  500. if (!string_substr_check(len, pos, l)) {
  501. return -1;
  502. }
  503. void *ptr = memmem(input + pos, len - pos, s, s_len);
  504. if (ptr != NULL) {
  505. return (int)((const char *)ptr - input);
  506. }
  507. }
  508. return -1;
  509. }
  510. int string_rfind(const char *input, int len, const char *s, int s_len,
  511. int pos, bool case_sensitive) {
  512. ASSERT(input);
  513. ASSERT(s);
  514. if (!s_len) {
  515. return -1;
  516. }
  517. if (len && pos < len) {
  518. if (!case_sensitive) {
  519. char *lowered_s = string_to_lower(s, s_len);
  520. char *lowered = string_to_lower(input, len);
  521. int ret = string_rfind(lowered, len, lowered_s, s_len, pos, true);
  522. free(lowered);
  523. free(lowered_s);
  524. return ret;
  525. }
  526. int l = 0;
  527. bool stop_at_offset = (pos >= 0);
  528. if (!string_substr_check(len, pos, l)) {
  529. return -1;
  530. }
  531. int start = len, stop = 0;
  532. if (stop_at_offset) {
  533. stop = pos;
  534. } else {
  535. start = pos;
  536. }
  537. for (int i = start - 1; i >= stop; i--) {
  538. if (input[i] == s[0] && memcmp(input+i, s, s_len) == 0) {
  539. return i;
  540. }
  541. }
  542. }
  543. return -1;
  544. }
  545. const char *string_memnstr(const char *haystack, const char *needle,
  546. int needle_len, const char *end) {
  547. const char *p = haystack;
  548. char ne = needle[needle_len-1];
  549. end -= needle_len;
  550. while (p <= end) {
  551. if ((p = (char *)memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
  552. if (!memcmp(needle, p, needle_len-1)) {
  553. return p;
  554. }
  555. }
  556. if (p == NULL) {
  557. return NULL;
  558. }
  559. p++;
  560. }
  561. return NULL;
  562. }
  563. void *string_memrchr(const void *s, int c, size_t n) {
  564. register unsigned char *e;
  565. if (n <= 0) {
  566. return NULL;
  567. }
  568. for (e = (unsigned char *)s + n - 1; e >= (unsigned char *)s; e--) {
  569. if (*e == (unsigned char)c) {
  570. return (void *)e;
  571. }
  572. }
  573. return NULL;
  574. }
  575. char *string_replace(const char *s, int &len, int start, int length,
  576. const char *replacement, int len_repl) {
  577. ASSERT(s);
  578. ASSERT(replacement);
  579. if (!string_substr_check(len, start, length, false)) {
  580. len = 0;
  581. return string_duplicate("", 0);
  582. }
  583. char *ret = (char *)malloc(len + len_repl - length + 1);
  584. int ret_len = 0;
  585. if (start) {
  586. memcpy(ret, s, start);
  587. ret_len += start;
  588. }
  589. if (len_repl) {
  590. memcpy(ret + ret_len, replacement, len_repl);
  591. ret_len += len_repl;
  592. }
  593. len -= (start + length);
  594. if (len) {
  595. memcpy(ret + ret_len, s + start + length, len);
  596. ret_len += len;
  597. }
  598. len = ret_len;
  599. ret[ret_len] = '\0';
  600. return ret;
  601. }
  602. char *string_replace(const char *input, int &len,
  603. const char *search, int len_search,
  604. const char *replacement, int len_replace,
  605. int &count, bool case_sensitive) {
  606. ASSERT(input);
  607. ASSERT(search && len_search);
  608. if (len == 0) {
  609. return NULL;
  610. }
  611. std::vector<int> founds;
  612. founds.reserve(16);
  613. if (len_search == 1) {
  614. for (int pos = string_find(input, len, *search, 0, case_sensitive);
  615. pos >= 0;
  616. pos = string_find(input, len, *search, pos + len_search,
  617. case_sensitive)) {
  618. founds.push_back(pos);
  619. }
  620. } else {
  621. for (int pos = string_find(input, len, search, len_search, 0,
  622. case_sensitive);
  623. pos >= 0;
  624. pos = string_find(input, len, search, len_search,
  625. pos + len_search, case_sensitive)) {
  626. founds.push_back(pos);
  627. }
  628. }
  629. count = founds.size();
  630. if (count == 0) {
  631. return NULL; // not found
  632. }
  633. char *ret = (char *)malloc(len + (len_replace - len_search) * count + 1);
  634. char *p = ret;
  635. int pos = 0; // last position in input that hasn't been copied over yet
  636. int n;
  637. for (unsigned int i = 0; i < founds.size(); i++) {
  638. n = founds[i];
  639. if (n > pos) {
  640. n -= pos;
  641. memcpy(p, input, n);
  642. p += n;
  643. input += n;
  644. pos += n;
  645. }
  646. if (len_replace) {
  647. memcpy(p, replacement, len_replace);
  648. p += len_replace;
  649. }
  650. input += len_search;
  651. pos += len_search;
  652. }
  653. n = len;
  654. if (n > pos) {
  655. n -= pos;
  656. memcpy(p, input, n);
  657. p += n;
  658. }
  659. *p = '\0';
  660. len = p - ret;
  661. return ret;
  662. }
  663. ///////////////////////////////////////////////////////////////////////////////
  664. char *string_reverse(const char *s, int len) {
  665. ASSERT(s);
  666. char *n = (char *)malloc(len + 1);
  667. char *p = n;
  668. const char *e = s + len;
  669. while (--e >= s) {
  670. *p++ = *e;
  671. }
  672. *p = '\0';
  673. return n;
  674. }
  675. char *string_repeat(const char *s, int &len, int count) {
  676. ASSERT(s);
  677. if (len == 0 || count <= 0) {
  678. return NULL;
  679. }
  680. char *ret = (char *)malloc(len * count + 1);
  681. if (len == 1) {
  682. memset(ret, *s, count);
  683. len = count;
  684. } else {
  685. char *p = ret;
  686. for (int i = 0; i < count; i++) {
  687. memcpy(p, s, len);
  688. p += len;
  689. }
  690. len = p - ret;
  691. }
  692. ret[len] = '\0';
  693. return ret;
  694. }
  695. char *string_shuffle(const char *str, int len) {
  696. ASSERT(str);
  697. if (len <= 1) {
  698. return NULL;
  699. }
  700. char *ret = string_duplicate(str, len);
  701. int n_left = len;
  702. while (--n_left) {
  703. int rnd_idx = rand() % n_left;
  704. char temp = ret[n_left];
  705. ret[n_left] = ret[rnd_idx];
  706. ret[rnd_idx] = temp;
  707. }
  708. return ret;
  709. }
  710. char *string_chunk_split(const char *src, int &srclen, const char *end,
  711. int endlen, int chunklen) {
  712. int chunks = srclen / chunklen; // complete chunks!
  713. int restlen = srclen - chunks * chunklen; /* srclen % chunklen */
  714. int out_len = (chunks + 1) * endlen + srclen + 1;
  715. char *dest = (char *)malloc(out_len);
  716. const char *p; char *q;
  717. const char *pMax = src + srclen - chunklen + 1;
  718. for (p = src, q = dest; p < pMax; ) {
  719. memcpy(q, p, chunklen);
  720. q += chunklen;
  721. memcpy(q, end, endlen);
  722. q += endlen;
  723. p += chunklen;
  724. }
  725. if (restlen) {
  726. memcpy(q, p, restlen);
  727. q += restlen;
  728. memcpy(q, end, endlen);
  729. q += endlen;
  730. }
  731. *q = '\0';
  732. srclen = q - dest;
  733. return(dest);
  734. }
  735. ///////////////////////////////////////////////////////////////////////////////
  736. #define PHP_TAG_BUF_SIZE 1023
  737. /**
  738. * Check if tag is in a set of tags
  739. *
  740. * states:
  741. *
  742. * 0 start tag
  743. * 1 first non-whitespace char seen
  744. */
  745. static int string_tag_find(const char *tag, int len, char *set) {
  746. char c, *n;
  747. const char *t;
  748. int state=0, done=0;
  749. char *norm;
  750. if (len <= 0) {
  751. return 0;
  752. }
  753. norm = (char *)malloc(len+1);
  754. n = norm;
  755. t = tag;
  756. c = tolower(*t);
  757. /*
  758. normalize the tag removing leading and trailing whitespace
  759. and turn any <a whatever...> into just <a> and any </tag>
  760. into <tag>
  761. */
  762. while (!done) {
  763. switch (c) {
  764. case '<':
  765. *(n++) = c;
  766. break;
  767. case '>':
  768. done =1;
  769. break;
  770. default:
  771. if (!isspace((int)c)) {
  772. if (state == 0) {
  773. state=1;
  774. if (c != '/')
  775. *(n++) = c;
  776. } else {
  777. *(n++) = c;
  778. }
  779. } else {
  780. if (state == 1)
  781. done=1;
  782. }
  783. break;
  784. }
  785. c = tolower(*(++t));
  786. }
  787. *(n++) = '>';
  788. *n = '\0';
  789. if (strstr(set, norm)) {
  790. done=1;
  791. } else {
  792. done=0;
  793. }
  794. free(norm);
  795. return done;
  796. }
  797. /**
  798. * A simple little state-machine to strip out html and php tags
  799. *
  800. * State 0 is the output state, State 1 means we are inside a
  801. * normal html tag and state 2 means we are inside a php tag.
  802. *
  803. * The state variable is passed in to allow a function like fgetss
  804. * to maintain state across calls to the function.
  805. *
  806. * lc holds the last significant character read and br is a bracket
  807. * counter.
  808. *
  809. * When an allow string is passed in we keep track of the string
  810. * in state 1 and when the tag is closed check it against the
  811. * allow string to see if we should allow it.
  812. * swm: Added ability to strip <?xml tags without assuming it PHP
  813. * code.
  814. */
  815. static size_t strip_tags_impl(char *rbuf, int len, int *stateptr,
  816. char *allow, int allow_len,
  817. bool allow_tag_spaces) {
  818. char *tbuf, *buf, *p, *tp, *rp, c, lc;
  819. int br, i=0, depth=0, in_q = 0;
  820. int state = 0;
  821. if (stateptr)
  822. state = *stateptr;
  823. buf = string_duplicate(rbuf, len);
  824. c = *buf;
  825. lc = '\0';
  826. p = buf;
  827. rp = rbuf;
  828. br = 0;
  829. if (allow) {
  830. for (char *tmp = allow; *tmp; tmp++) {
  831. *tmp = tolower((int)*(unsigned char *)tmp);
  832. }
  833. tbuf = (char *)malloc(PHP_TAG_BUF_SIZE+1);
  834. tp = tbuf;
  835. } else {
  836. tbuf = tp = NULL;
  837. }
  838. while (i < len) {
  839. switch (c) {
  840. case '\0':
  841. break;
  842. case '<':
  843. if (isspace(*(p + 1)) && !allow_tag_spaces) {
  844. goto reg_char;
  845. }
  846. if (state == 0) {
  847. lc = '<';
  848. state = 1;
  849. if (allow) {
  850. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  851. *(tp++) = '<';
  852. }
  853. } else if (state == 1) {
  854. depth++;
  855. }
  856. break;
  857. case '(':
  858. if (state == 2) {
  859. if (lc != '"' && lc != '\'') {
  860. lc = '(';
  861. br++;
  862. }
  863. } else if (allow && state == 1) {
  864. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  865. *(tp++) = c;
  866. } else if (state == 0) {
  867. *(rp++) = c;
  868. }
  869. break;
  870. case ')':
  871. if (state == 2) {
  872. if (lc != '"' && lc != '\'') {
  873. lc = ')';
  874. br--;
  875. }
  876. } else if (allow && state == 1) {
  877. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  878. *(tp++) = c;
  879. } else if (state == 0) {
  880. *(rp++) = c;
  881. }
  882. break;
  883. case '>':
  884. if (depth) {
  885. depth--;
  886. break;
  887. }
  888. if (in_q) {
  889. break;
  890. }
  891. switch (state) {
  892. case 1: /* HTML/XML */
  893. lc = '>';
  894. in_q = state = 0;
  895. if (allow) {
  896. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  897. *(tp++) = '>';
  898. *tp='\0';
  899. if (string_tag_find(tbuf, tp-tbuf, allow)) {
  900. memcpy(rp, tbuf, tp-tbuf);
  901. rp += tp-tbuf;
  902. }
  903. tp = tbuf;
  904. }
  905. break;
  906. case 2: /* PHP */
  907. if (!br && lc != '\"' && *(p-1) == '?') {
  908. in_q = state = 0;
  909. tp = tbuf;
  910. }
  911. break;
  912. case 3:
  913. in_q = state = 0;
  914. tp = tbuf;
  915. break;
  916. case 4: /* JavaScript/CSS/etc... */
  917. if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
  918. in_q = state = 0;
  919. tp = tbuf;
  920. }
  921. break;
  922. default:
  923. *(rp++) = c;
  924. break;
  925. }
  926. break;
  927. case '"':
  928. case '\'':
  929. if (state == 2 && *(p-1) != '\\') {
  930. if (lc == c) {
  931. lc = '\0';
  932. } else if (lc != '\\') {
  933. lc = c;
  934. }
  935. } else if (state == 0) {
  936. *(rp++) = c;
  937. } else if (allow && state == 1) {
  938. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  939. *(tp++) = c;
  940. }
  941. if (state && p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
  942. if (in_q) {
  943. in_q = 0;
  944. } else {
  945. in_q = *p;
  946. }
  947. }
  948. break;
  949. case '!':
  950. /* JavaScript & Other HTML scripting languages */
  951. if (state == 1 && *(p-1) == '<') {
  952. state = 3;
  953. lc = c;
  954. } else {
  955. if (state == 0) {
  956. *(rp++) = c;
  957. } else if (allow && state == 1) {
  958. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  959. *(tp++) = c;
  960. }
  961. }
  962. break;
  963. case '-':
  964. if (state == 3 && p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
  965. state = 4;
  966. } else {
  967. goto reg_char;
  968. }
  969. break;
  970. case '?':
  971. if (state == 1 && *(p-1) == '<') {
  972. br=0;
  973. state=2;
  974. break;
  975. }
  976. case 'E':
  977. case 'e':
  978. /* !DOCTYPE exception */
  979. if (state==3 && p > buf+6
  980. && tolower(*(p-1)) == 'p'
  981. && tolower(*(p-2)) == 'y'
  982. && tolower(*(p-3)) == 't'
  983. && tolower(*(p-4)) == 'c'
  984. && tolower(*(p-5)) == 'o'
  985. && tolower(*(p-6)) == 'd') {
  986. state = 1;
  987. break;
  988. }
  989. /* fall-through */
  990. case 'l':
  991. /* swm: If we encounter '<?xml' then we shouldn't be in
  992. * state == 2 (PHP). Switch back to HTML.
  993. */
  994. if (state == 2 && p > buf+2 && *(p-1) == 'm' && *(p-2) == 'x') {
  995. state = 1;
  996. break;
  997. }
  998. /* fall-through */
  999. default:
  1000. reg_char:
  1001. if (state == 0) {
  1002. *(rp++) = c;
  1003. } else if (allow && state == 1) {
  1004. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  1005. *(tp++) = c;
  1006. }
  1007. break;
  1008. }
  1009. c = *(++p);
  1010. i++;
  1011. }
  1012. if (rp < rbuf + len) {
  1013. *rp = '\0';
  1014. }
  1015. free(buf);
  1016. if (allow)
  1017. free(tbuf);
  1018. if (stateptr)
  1019. *stateptr = state;
  1020. return (size_t)(rp - rbuf);
  1021. }
  1022. char *string_strip_tags(const char *s, int &len, const char *allow,
  1023. int allow_len) {
  1024. ASSERT(s);
  1025. ASSERT(allow);
  1026. char *ret = string_duplicate(s, len);
  1027. char *sallow = string_duplicate(allow, allow_len);
  1028. len = strip_tags_impl(ret, len, NULL, sallow, allow_len, false);
  1029. free(sallow);
  1030. return ret;
  1031. }
  1032. ///////////////////////////////////////////////////////////////////////////////
  1033. char *string_wordwrap(const char *text, int &textlen, int linelength,
  1034. const char *breakchar, int breakcharlen, bool docut) {
  1035. ASSERT(text);
  1036. ASSERT(breakchar);
  1037. char *newtext;
  1038. int newtextlen, chk;
  1039. size_t alloced;
  1040. long current = 0, laststart = 0, lastspace = 0;
  1041. if (textlen == 0) {
  1042. return "";
  1043. }
  1044. if (breakcharlen == 0) {
  1045. // throw_invalid_argument("wordbreak: (empty)");
  1046. return NULL;
  1047. }
  1048. if (linelength == 0 && docut) {
  1049. // throw_invalid_argument("width", "can't force cut when width = 0");
  1050. return NULL;
  1051. }
  1052. /* Special case for a single-character break as it needs no
  1053. additional storage space */
  1054. if (breakcharlen == 1 && !docut) {
  1055. newtext = string_duplicate(text, textlen);
  1056. laststart = lastspace = 0;
  1057. for (current = 0; current < textlen; current++) {
  1058. if (text[current] == breakchar[0]) {
  1059. laststart = lastspace = current;
  1060. } else if (text[current] == ' ') {
  1061. if (current - laststart >= linelength) {
  1062. newtext[current] = breakchar[0];
  1063. laststart = current + 1;
  1064. }
  1065. lastspace = current;
  1066. } else if (current - laststart >= linelength && laststart != lastspace) {
  1067. newtext[lastspace] = breakchar[0];
  1068. laststart = lastspace + 1;
  1069. }
  1070. }
  1071. return newtext;
  1072. }
  1073. /* Multiple character line break or forced cut */
  1074. if (linelength > 0) {
  1075. chk = (int)(textlen/linelength + 1);
  1076. alloced = textlen + chk * breakcharlen + 1;
  1077. } else {
  1078. chk = textlen;
  1079. alloced = textlen * (breakcharlen + 1) + 1;
  1080. }
  1081. newtext = (char *)malloc(alloced);
  1082. /* now keep track of the actual new text length */
  1083. newtextlen = 0;
  1084. laststart = lastspace = 0;
  1085. for (current = 0; current < textlen; current++) {
  1086. if (chk <= 0) {
  1087. alloced += (int) (((textlen - current + 1)/linelength + 1) *
  1088. breakcharlen) + 1;
  1089. newtext = (char *)realloc(newtext, alloced);
  1090. chk = (int) ((textlen - current)/linelength) + 1;
  1091. }
  1092. /* when we hit an existing break, copy to new buffer, and
  1093. * fix up laststart and lastspace */
  1094. if (text[current] == breakchar[0]
  1095. && current + breakcharlen < textlen
  1096. && !strncmp(text+current, breakchar, breakcharlen)) {
  1097. memcpy(newtext+newtextlen, text+laststart,
  1098. current-laststart+breakcharlen);
  1099. newtextlen += current-laststart+breakcharlen;
  1100. current += breakcharlen - 1;
  1101. laststart = lastspace = current + 1;
  1102. chk--;
  1103. }
  1104. /* if it is a space, check if it is at the line boundary,
  1105. * copy and insert a break, or just keep track of it */
  1106. else if (text[current] == ' ') {
  1107. if (current - laststart >= linelength) {
  1108. memcpy(newtext+newtextlen, text+laststart, current-laststart);
  1109. newtextlen += current - laststart;
  1110. memcpy(newtext+newtextlen, breakchar, breakcharlen);
  1111. newtextlen += breakcharlen;
  1112. laststart = current + 1;
  1113. chk--;
  1114. }
  1115. lastspace = current;
  1116. }
  1117. /* if we are cutting, and we've accumulated enough
  1118. * characters, and we haven't see a space for this line,
  1119. * copy and insert a break. */
  1120. else if (current - laststart >= linelength
  1121. && docut && laststart >= lastspace) {
  1122. memcpy(newtext+newtextlen, text+laststart, current-laststart);
  1123. newtextlen += current - laststart;
  1124. memcpy(newtext+newtextlen, breakchar, breakcharlen);
  1125. newtextlen += breakcharlen;
  1126. laststart = lastspace = current;
  1127. chk--;
  1128. }
  1129. /* if the current word puts us over the linelength, copy
  1130. * back up until the last space, insert a break, and move
  1131. * up the laststart */
  1132. else if (current - laststart >= linelength
  1133. && laststart < lastspace) {
  1134. memcpy(newtext+newtextlen, text+laststart, lastspace-laststart);
  1135. newtextlen += lastspace - laststart;
  1136. memcpy(newtext+newtextlen, breakchar, breakcharlen);
  1137. newtextlen += breakcharlen;
  1138. laststart = lastspace = lastspace + 1;
  1139. chk--;
  1140. }
  1141. }
  1142. /* copy over any stragglers */
  1143. if (laststart != current) {
  1144. memcpy(newtext+newtextlen, text+laststart, current-laststart);
  1145. newtextlen += current - laststart;
  1146. }
  1147. textlen = newtextlen;
  1148. newtext[newtextlen] = '\0';
  1149. return newtext;
  1150. }
  1151. ///////////////////////////////////////////////////////////////////////////////
  1152. char *string_addcslashes(const char *str, int &length, const char *what,
  1153. int wlength) {
  1154. ASSERT(str);
  1155. ASSERT(what);
  1156. char flags[256];
  1157. string_charmask(what, wlength, flags);
  1158. char *new_str = (char *)malloc((length << 2) + 1);
  1159. const char *source;
  1160. const char *end;
  1161. char *target;
  1162. for (source = str, end = source + length, target = new_str; source < end;
  1163. source++) {
  1164. char c = *source;
  1165. if (flags[(unsigned char)c]) {
  1166. if ((unsigned char) c < 32 || (unsigned char) c > 126) {
  1167. *target++ = '\\';
  1168. switch (c) {
  1169. case '\n': *target++ = 'n'; break;
  1170. case '\t': *target++ = 't'; break;
  1171. case '\r': *target++ = 'r'; break;
  1172. case '\a': *target++ = 'a'; break;
  1173. case '\v': *target++ = 'v'; break;
  1174. case '\b': *target++ = 'b'; break;
  1175. case '\f': *target++ = 'f'; break;
  1176. default: target += sprintf(target, "%03o", (unsigned char) c);
  1177. }
  1178. continue;
  1179. }
  1180. *target++ = '\\';
  1181. }
  1182. *target++ = c;
  1183. }
  1184. *target = 0;
  1185. length = target - new_str;
  1186. return new_str;
  1187. }
  1188. char *string_stripcslashes(const char *input, int &nlen) {
  1189. ASSERT(input);
  1190. if (nlen == 0) {
  1191. return NULL;
  1192. }
  1193. char *str = string_duplicate(input, nlen);
  1194. char *source, *target, *end;
  1195. int i;
  1196. char numtmp[4];
  1197. for (source=str, end=str+nlen, target=str; source < end; source++) {
  1198. if (*source == '\\' && source+1 < end) {
  1199. source++;
  1200. switch (*source) {
  1201. case 'n': *target++='\n'; nlen--; break;
  1202. case 'r': *target++='\r'; nlen--; break;
  1203. case 'a': *target++='\a'; nlen--; break;
  1204. case 't': *target++='\t'; nlen--; break;
  1205. case 'v': *target++='\v'; nlen--; break;
  1206. case 'b': *target++='\b'; nlen--; break;
  1207. case 'f': *target++='\f'; nlen--; break;
  1208. case '\\': *target++='\\'; nlen--; break;
  1209. case 'x':
  1210. if (source+1 < end && isxdigit((int)(*(source+1)))) {
  1211. numtmp[0] = *++source;
  1212. if (source+1 < end && isxdigit((int)(*(source+1)))) {
  1213. numtmp[1] = *++source;
  1214. numtmp[2] = '\0';
  1215. nlen-=3;
  1216. } else {
  1217. numtmp[1] = '\0';
  1218. nlen-=2;
  1219. }
  1220. *target++=(char)strtol(numtmp, NULL, 16);
  1221. break;
  1222. }
  1223. /* break is left intentionally */
  1224. default:
  1225. i=0;
  1226. while (source < end && *source >= '0' && *source <= '7' && i<3) {
  1227. numtmp[i++] = *source++;
  1228. }
  1229. if (i) {
  1230. numtmp[i]='\0';
  1231. *target++=(char)strtol(numtmp, NULL, 8);
  1232. nlen-=i;
  1233. source--;
  1234. } else {
  1235. *target++=*source;
  1236. nlen--;
  1237. }
  1238. }
  1239. } else {
  1240. *target++=*source;
  1241. }
  1242. }
  1243. *target='\0';
  1244. nlen = target - str;
  1245. return str;
  1246. }
  1247. char *string_addslashes(const char *str, int &length) {
  1248. ASSERT(str);
  1249. if (length == 0) {
  1250. return NULL;
  1251. }
  1252. char *new_str = (char *)malloc((length << 1) + 1);
  1253. const char *source = str;
  1254. const char *end = source + length;
  1255. char *target = new_str;
  1256. while (source < end) {
  1257. switch (*source) {
  1258. case '\0':
  1259. *target++ = '\\';
  1260. *target++ = '0';
  1261. break;
  1262. case '\'':
  1263. case '\"':
  1264. case '\\':
  1265. *target++ = '\\';
  1266. /* break is missing *intentionally* */
  1267. default:
  1268. *target++ = *source;
  1269. break;
  1270. }
  1271. source++;
  1272. }
  1273. *target = 0;
  1274. length = target - new_str;
  1275. return new_str;
  1276. }
  1277. char *string_stripslashes(const char *input, int &l) {
  1278. ASSERT(input);
  1279. if (!*input) {
  1280. return NULL;
  1281. }
  1282. char *str = string_duplicate(input, l);
  1283. char *s, *t;
  1284. s = str;
  1285. t = str;
  1286. while (l > 0) {
  1287. if (*t == '\\') {
  1288. t++; /* skip the slash */
  1289. l--;
  1290. if (l > 0) {
  1291. if (*t == '0') {
  1292. *s++='\0';
  1293. t++;
  1294. } else {
  1295. *s++ = *t++; /* preserve the next character */
  1296. }
  1297. l--;
  1298. }
  1299. } else {
  1300. *s++ = *t++;
  1301. l--;
  1302. }
  1303. }
  1304. if (s != t) {
  1305. *s = '\0';
  1306. }
  1307. l = s - str;
  1308. return str;
  1309. }
  1310. char *string_quotemeta(const char *input, int &len) {
  1311. ASSERT(input);
  1312. if (len == 0) {
  1313. return NULL;
  1314. }
  1315. char *ret = (char *)malloc((len << 1) + 1);
  1316. char *q = ret;
  1317. for (const char *p = input; *p; p++) {
  1318. char c = *p;
  1319. switch (c) {
  1320. case '.':
  1321. case '\\':
  1322. case '+':
  1323. case '*':
  1324. case '?':
  1325. case '[':
  1326. case '^':
  1327. case ']':
  1328. case '$':
  1329. case '(':
  1330. case ')':
  1331. *q++ = '\\';
  1332. /* break is missing _intentionally_ */
  1333. default:
  1334. *q++ = c;
  1335. }
  1336. }
  1337. *q = 0;
  1338. len = q - ret;
  1339. return ret;
  1340. }
  1341. ///////////////////////////////////////////////////////////////////////////////
  1342. static char string_hex2int(int c) {
  1343. if (isdigit(c)) {
  1344. return c - '0';
  1345. }
  1346. if (c >= 'A' && c <= 'F') {
  1347. return c - 'A' + 10;
  1348. }
  1349. if (c >= 'a' && c <= 'f') {
  1350. return c - 'a' + 10;
  1351. }
  1352. return -1;
  1353. }
  1354. char *string_quoted_printable_encode(const char *input, int &len) {
  1355. const char *hex = "0123456789ABCDEF";
  1356. unsigned char *ret =
  1357. (unsigned char *)malloc(3 * len + 3 * (((3 * len)/PHP_QPRINT_MAXL) + 1));
  1358. unsigned char *d = ret;
  1359. int length = len;
  1360. unsigned char c;
  1361. unsigned long lp = 0;
  1362. while (length--) {
  1363. if (((c = *input++) == '\015') && (*input == '\012') && length > 0) {
  1364. *d++ = '\015';
  1365. *d++ = *input++;
  1366. length--;
  1367. lp = 0;
  1368. } else {
  1369. if (iscntrl (c) || (c == 0x7f) || (c & 0x80) || (c == '=') ||
  1370. ((c == ' ') && (*input == '\015'))) {
  1371. if ((lp += 3) > PHP_QPRINT_MAXL) {
  1372. *d++ = '=';
  1373. *d++ = '\015';
  1374. *d++ = '\012';
  1375. lp = 3;
  1376. }
  1377. *d++ = '=';
  1378. *d++ = hex[c >> 4];
  1379. *d++ = hex[c & 0xf];
  1380. } else {
  1381. if ((++lp) > PHP_QPRINT_MAXL) {
  1382. *d++ = '=';
  1383. *d++ = '\015';
  1384. *d++ = '\012';
  1385. lp = 1;
  1386. }
  1387. *d++ = c;
  1388. }
  1389. }
  1390. }
  1391. *d = '\0';
  1392. len = d - ret;
  1393. return (char*)ret;
  1394. }
  1395. char *string_quoted_printable_decode(const char *input, int &len, bool is_q) {
  1396. ASSERT(input);
  1397. if (len == 0) {
  1398. return NULL;
  1399. }
  1400. int i = 0, j = 0, k;
  1401. const char *str_in = input;
  1402. char *str_out = (char *)malloc(len + 1);
  1403. while (i < len && str_in[i]) {
  1404. switch (str_in[i]) {
  1405. case '=':
  1406. if (i + 2 < len && str_in[i + 1] && str_in[i + 2] &&
  1407. isxdigit((int) str_in[i + 1]) && isxdigit((int) str_in[i + 2]))
  1408. {
  1409. str_out[j++] = (string_hex2int((int) str_in[i + 1]) << 4)
  1410. + string_hex2int((int) str_in[i + 2]);
  1411. i += 3;
  1412. } else /* check for soft line break according to RFC 2045*/ {
  1413. k = 1;
  1414. while (str_in[i + k] &&
  1415. ((str_in[i + k] == 32) || (str_in[i + k] == 9))) {
  1416. /* Possibly, skip spaces/tabs at the end of line */
  1417. k++;
  1418. }
  1419. if (!str_in[i + k]) {
  1420. /* End of line reached */
  1421. i += k;
  1422. }
  1423. else if ((str_in[i + k] == 13) && (str_in[i + k + 1] == 10)) {
  1424. /* CRLF */
  1425. i += k + 2;
  1426. }
  1427. else if ((str_in[i + k] == 13) || (str_in[i + k] == 10)) {
  1428. /* CR or LF */
  1429. i += k + 1;
  1430. }
  1431. else {
  1432. str_out[j++] = str_in[i++];
  1433. }
  1434. }
  1435. break;
  1436. case '_':
  1437. if (is_q) {
  1438. str_out[j++] = ' ';
  1439. i++;
  1440. } else {
  1441. str_out[j++] = str_in[i++];
  1442. }
  1443. break;
  1444. default:
  1445. str_out[j++] = str_in[i++];
  1446. }
  1447. }
  1448. str_out[j] = '\0';
  1449. len = j;
  1450. return str_out;
  1451. }
  1452. char *string_bin2hex(const char *input, int &len) {
  1453. static char hexconvtab[] = "0123456789abcdef";
  1454. ASSERT(input);
  1455. if (len == 0) {
  1456. return NULL;
  1457. }
  1458. int i, j;
  1459. char *result = (char *)malloc((len << 1) + 1);
  1460. for (i = j = 0; i < len; i++) {
  1461. result[j++] = hexconvtab[(unsigned char)input[i] >> 4];
  1462. result[j++] = hexconvtab[(unsigned char)input[i] & 15];
  1463. }
  1464. result[j] = '\0';
  1465. len = j;
  1466. return result;
  1467. }
  1468. char *string_hex2bin(const char *input, int &len) {
  1469. len >>= 1;
  1470. char *str = (char *)malloc(len + 1);
  1471. int i, j;
  1472. for (i = j = 0; i < len; i++) {
  1473. char c = input[j++];
  1474. if (c >= '0' && c <= '9') {
  1475. str[i] = (c - '0') << 4;
  1476. } else if (c >= 'a' && c <= 'f') {
  1477. str[i] = (c - 'a' + 10) << 4;
  1478. } else if (c >= 'A' && c <= 'F') {
  1479. str[i] = (c - 'A' + 10) << 4;
  1480. } else {
  1481. free(str);
  1482. throw InvalidArgumentException("bad encoding at position", j);
  1483. }
  1484. c = input[j++];
  1485. if (c >= '0' && c <= '9') {
  1486. str[i] |= c - '0';
  1487. } else if (c >= 'a' && c <= 'f') {
  1488. str[i] |= c - 'a' + 10;
  1489. } else if (c >= 'A' && c <= 'F') {
  1490. str[i] |= c - 'A' + 10;
  1491. } else {
  1492. free(str);
  1493. throw InvalidArgumentException("bad encoding at position", j);
  1494. }
  1495. }
  1496. str[len] = '\0';
  1497. return str;
  1498. }
  1499. Variant string_base_to_numeric(const char *s, int len, int base) {
  1500. int64 num = 0;
  1501. double fnum = 0;
  1502. int mode = 0;
  1503. int64 cutoff;
  1504. int cutlim;
  1505. ASSERT(string_validate_base(base));
  1506. cutoff = LONG_MAX / base;
  1507. cutlim = LONG_MAX % base;
  1508. for (int i = len; i > 0; i--) {
  1509. char c = *s++;
  1510. /* might not work for EBCDIC */
  1511. if (c >= '0' && c <= '9')
  1512. c -= '0';
  1513. else if (c >= 'A' && c <= 'Z')
  1514. c -= 'A' - 10;
  1515. else if (c >= 'a' && c <= 'z')
  1516. c -= 'a' - 10;
  1517. else
  1518. continue;
  1519. if (c >= base)
  1520. continue;
  1521. switch (mode) {
  1522. case 0: /* Integer */
  1523. if (num < cutoff || (num == cutoff && c <= cutlim)) {
  1524. num = num * base + c;
  1525. break;
  1526. } else {
  1527. fnum = num;
  1528. mode = 1;
  1529. }
  1530. /* fall-through */
  1531. case 1: /* Float */
  1532. fnum = fnum * base + c;
  1533. }
  1534. }
  1535. if (mode == 1) {
  1536. return fnum;
  1537. }
  1538. return num;
  1539. }
  1540. char *string_long_to_base(unsigned long value, int base) {
  1541. static char digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
  1542. char buf[(sizeof(unsigned long) << 3) + 1];
  1543. char *ptr, *end;
  1544. ASSERT(string_validate_base(base));
  1545. end = ptr = buf + sizeof(buf) - 1;
  1546. *ptr = '\0';
  1547. do {
  1548. *--ptr = digits[value % base];
  1549. value /= base;
  1550. } while (ptr > buf && value);
  1551. return string_duplicate(ptr, end - ptr);
  1552. }
  1553. char *string_numeric_to_base(CVarRef value, int base) {
  1554. static char digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
  1555. ASSERT(string_validate_base(base));
  1556. if ((!value.isInteger() && !value.isDouble())) {
  1557. return string_duplicate("", 0);
  1558. }
  1559. if (value.isDouble()) {
  1560. double fvalue = floor(value); /* floor it just in case */
  1561. char *ptr, *end;
  1562. char buf[(sizeof(double) << 3) + 1];
  1563. /* Don't try to convert +/- infinity */
  1564. if (fvalue == HUGE_VAL || fvalue == -HUGE_VAL) {
  1565. // raise_warning("Number too large");
  1566. return string_duplicate("", 0);
  1567. }
  1568. end = ptr = buf + sizeof(buf) - 1;
  1569. *ptr = '\0';
  1570. do {
  1571. *--ptr = digits[(int) fmod(fvalue, base)];
  1572. fvalue /= base;
  1573. } while (ptr > buf && fabs(fvalue) >= 1);
  1574. return string_duplicate(ptr, end - ptr);
  1575. }
  1576. return string_long_to_base(value.toInt64(), base);
  1577. }
  1578. ///////////////////////////////////////////////////////////////////////////////
  1579. // uuencode
  1580. #define PHP_UU_ENC(c) \
  1581. ((c) ? ((c) & 077) + ' ' : '`')
  1582. #define PHP_UU_ENC_C2(c) \
  1583. PHP_UU_ENC(((*(c) << 4) & 060) | ((*((c) + 1) >> 4) & 017))
  1584. #define PHP_UU_ENC_C3(c) \
  1585. PHP_UU_ENC(((*(c + 1) << 2) & 074) | ((*((c) + 2) >> 6) & 03))
  1586. #define PHP_UU_DEC(c) \
  1587. (((c) - ' ') & 077)
  1588. char *string_uuencode(const char *src, int src_len, int &dest_len) {
  1589. ASSERT(src);
  1590. ASSERT(src_len);
  1591. int len = 45;
  1592. char *p;
  1593. const char *s, *e, *ee;
  1594. char *dest;
  1595. /* encoded length is ~ 38% greater then the original */
  1596. p = dest = (char *)malloc((int)ceil(src_len * 1.38) + 46);
  1597. s = src;
  1598. e = src + src_len;
  1599. while ((s + 3) < e) {
  1600. ee = s + len;
  1601. if (ee > e) {
  1602. ee = e;
  1603. len = ee - s;
  1604. if (len % 3) {
  1605. ee = s + (int) (floor(len / 3) * 3);
  1606. }
  1607. }
  1608. *p++ = PHP_UU_ENC(len);
  1609. while (s < ee) {
  1610. *p++ = PHP_UU_ENC(*s >> 2);
  1611. *p++ = PHP_UU_ENC_C2(s);
  1612. *p++ = PHP_UU_ENC_C3(s);
  1613. *p++ = PHP_UU_ENC(*(s + 2) & 077);
  1614. s += 3;
  1615. }
  1616. if (len == 45) {
  1617. *p++ = '\n';
  1618. }
  1619. }
  1620. if (s < e) {
  1621. if (len == 45) {
  1622. *p++ = PHP_UU_ENC(e - s);
  1623. len = 0;
  1624. }
  1625. *p++ = PHP_UU_ENC(*s >> 2);
  1626. *p++ = PHP_UU_ENC_C2(s);
  1627. *p++ = ((e - s) > 1) ? PHP_UU_ENC_C3(s) : PHP_UU_ENC('\0');
  1628. *p++ = ((e - s) > 2) ? PHP_UU_ENC(*(s + 2) & 077) : PHP_UU_ENC('\0');
  1629. }
  1630. if (len < 45) {
  1631. *p++ = '\n';
  1632. }
  1633. *p++ = PHP_UU_ENC('\0');
  1634. *p++ = '\n';
  1635. *p = '\0';
  1636. dest_len = p - dest;
  1637. return dest;
  1638. }
  1639. char *string_uudecode(const char *src, int src_len, int &total_len) {
  1640. total_len = 0;
  1641. int len;
  1642. const char *s, *e, *ee;
  1643. char *p, *dest;
  1644. p = dest = (char *)malloc((int)ceil(src_len * 0.75) + 1);
  1645. s = src;
  1646. e = src + src_len;
  1647. while (s < e) {
  1648. if ((len = PHP_UU_DEC(*s++)) <= 0) {
  1649. break;
  1650. }
  1651. /* sanity check */
  1652. if (len > src_len) {
  1653. goto err;
  1654. }
  1655. total_len += len;
  1656. ee = s + (len == 45 ? 60 : (int) floor(len * 1.33));
  1657. /* sanity check */
  1658. if (ee > e) {
  1659. goto err;
  1660. }
  1661. while (s < ee) {
  1662. *p++ = PHP_UU_DEC(*s) << 2 | PHP_UU_DEC(*(s + 1)) >> 4;
  1663. *p++ = PHP_UU_DEC(*(s + 1)) << 4 | PHP_UU_DEC(*(s + 2)) >> 2;
  1664. *p++ = PHP_UU_DEC(*(s + 2)) << 6 | PHP_UU_DEC(*(s + 3));
  1665. s += 4;
  1666. }
  1667. if (len < 45) {
  1668. break;
  1669. }
  1670. /* skip \n */
  1671. s++;
  1672. }
  1673. if ((len = total_len > (p - dest))) {
  1674. *p++ = PHP_UU_DEC(*s) << 2 | PHP_UU_DEC(*(s + 1)) >> 4;
  1675. if (len > 1) {
  1676. *p++ = PHP_UU_DEC(*(s + 1)) << 4 | PHP_UU_DEC(*(s + 2)) >> 2;
  1677. if (len > 2) {
  1678. *p++ = PHP_UU_DEC(*(s + 2)) << 6 | PHP_UU_DEC(*(s + 3));
  1679. }
  1680. }
  1681. }
  1682. *(dest + total_len) = '\0';
  1683. return dest;
  1684. err:
  1685. free(dest);
  1686. return NULL;
  1687. }
  1688. ///////////////////////////////////////////////////////////////////////////////
  1689. // base64
  1690. static const char base64_table[] = {
  1691. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  1692. 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  1693. 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  1694. 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  1695. '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
  1696. };
  1697. static const char base64_pad = '=';
  1698. static const short base64_reverse_table[256] = {
  1699. -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
  1700. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1701. -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
  1702. 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
  1703. -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
  1704. 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
  1705. -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  1706. 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
  1707. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1708. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1709. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1710. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1711. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1712. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1713. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1714. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
  1715. };
  1716. static unsigned char *php_base64_encode(const unsigned char *str, int length,
  1717. int *ret_length) {
  1718. const unsigned char *current = str;
  1719. unsigned char *p;
  1720. unsigned char *result;
  1721. if ((length + 2) < 0 || ((length + 2) / 3) >= (1 << (sizeof(int) * 8 - 2))) {
  1722. if (ret_length != NULL) {
  1723. *ret_length = 0;
  1724. }
  1725. return NULL;
  1726. }
  1727. result = (unsigned char *)malloc(((length + 2) / 3) * 4 + 1);
  1728. p = result;
  1729. while (length > 2) { /* keep going until we have less than 24 bits */
  1730. *p++ = base64_table[current[0] >> 2];
  1731. *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
  1732. *p++ = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)];
  1733. *p++ = base64_table[current[2] & 0x3f];
  1734. current += 3;
  1735. length -= 3; /* we just handle 3 octets of data */
  1736. }
  1737. /* now deal with the tail end of things */
  1738. if (length != 0) {
  1739. *p++ = base64_table[current[0] >> 2];
  1740. if (length > 1) {
  1741. *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
  1742. *p++ = base64_table[(current[1] & 0x0f) << 2];
  1743. *p++ = base64_pad;
  1744. } else {
  1745. *p++ = base64_table[(current[0] & 0x03) << 4];
  1746. *p++ = base64_pad;
  1747. *p++ = base64_pad;
  1748. }
  1749. }
  1750. if (ret_length != NULL) {
  1751. *ret_length = (int)(p - result);
  1752. }
  1753. *p = '\0';
  1754. return result;
  1755. }
  1756. static unsigned char *php_base64_decode(const unsigned char *str,
  1757. int length, int *ret_length,
  1758. bool strict) {
  1759. const unsigned char *current = str;
  1760. int ch, i = 0, j = 0, k;
  1761. /* this sucks for threaded environments */
  1762. unsigned char *result;
  1763. result = (unsigned char *)malloc(length + 1);
  1764. /* run through the whole string, converting as we go */
  1765. while ((ch = *current++) != '\0' && length-- > 0) {
  1766. if (ch == base64_pad) {
  1767. if (*current != '=' && (i % 4) == 1) {
  1768. free(result);
  1769. return NULL;
  1770. }
  1771. continue;
  1772. }
  1773. ch = base64_reverse_table[ch];
  1774. if ((!strict && ch < 0) || ch == -1) {
  1775. /* a space or some other separator character, we simply skip over */
  1776. continue;
  1777. } else if (ch == -2) {

Large files files are truncated, but you can click here to view the full file