PageRenderTime 40ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 1ms

/extensions/hphp/runtime/base/zend/zend_string.cpp

https://bitbucket.org/lucciano/happygit
C++ | 3419 lines | 2847 code | 258 blank | 314 comment | 536 complexity | 998d768dc7e1f85c3d5c8bf60b226665 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception
  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
  6. | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
  7. +----------------------------------------------------------------------+
  8. | This source file is subject to version 2.00 of the Zend license, |
  9. | that is bundled with this package in the file LICENSE, and is |
  10. | available through the world-wide-web at the following url: |
  11. | http://www.zend.com/license/2_00.txt. |
  12. | If you did not receive a copy of the Zend license and are unable to |
  13. | obtain it through the world-wide-web, please send a note to |
  14. | license@zend.com so we can mail you a copy immediately. |
  15. +----------------------------------------------------------------------+
  16. */
  17. #include <runtime/base/zend/zend_string.h>
  18. #include <runtime/base/zend/zend_printf.h>
  19. #include <runtime/base/zend/zend_math.h>
  20. //#include <util/lock.h>
  21. #include <math.h>
  22. #include <monetary.h>
  23. #include <runtime/base/util/exceptions.h>
  24. #include <runtime/base/complex_types.h>
  25. //#include <runtime/base/util/string_buffer.h>
  26. //#include <runtime/base/runtime_error.h>
  27. //#include <runtime/base/type_conversions.h>
  28. //#include <runtime/base/builtin_functions.h>
  29. #ifdef __APPLE__
  30. #ifndef isnan
  31. #define isnan(x) \
  32. ( sizeof (x) == sizeof(float ) ? __inline_isnanf((float)(x)) \
  33. : sizeof (x) == sizeof(double) ? __inline_isnand((double)(x)) \
  34. : __inline_isnan ((long double)(x)))
  35. #endif
  36. #ifndef isinf
  37. #define isinf(x) \
  38. ( sizeof (x) == sizeof(float ) ? __inline_isinff((float)(x)) \
  39. : sizeof (x) == sizeof(double) ? __inline_isinfd((double)(x)) \
  40. : __inline_isinf ((long double)(x)))
  41. #endif
  42. #endif
  43. #define PHP_QPRINT_MAXL 75
  44. namespace HPHP {
  45. ///////////////////////////////////////////////////////////////////////////////
  46. // helpers
  47. bool string_substr_check(int len, int &f, int &l, bool strict /* = true */) {
  48. // if "from" position is negative, count start position from the end
  49. if (f < 0) {
  50. f += len;
  51. if (f < 0) {
  52. return false;
  53. }
  54. }
  55. if (f > len || f == len && strict) {
  56. return false;
  57. }
  58. // if "length" position is negative, set it to the length
  59. // needed to stop that many chars from the end of the string
  60. if (l < 0) {
  61. l += len - f;
  62. if (l < 0) {
  63. return false;
  64. }
  65. }
  66. if ((unsigned int)f + (unsigned int)l > (unsigned int)len) {
  67. l = len - f;
  68. }
  69. return true;
  70. }
  71. void string_charmask(const char *sinput, int len, char *mask) {
  72. const unsigned char *input = (unsigned char *)sinput;
  73. const unsigned char *end;
  74. unsigned char c;
  75. memset(mask, 0, 256);
  76. for (end = input+len; input < end; input++) {
  77. c=*input;
  78. if ((input+3 < end) && input[1] == '.' && input[2] == '.'
  79. && input[3] >= c) {
  80. memset(mask+c, 1, input[3] - c + 1);
  81. input+=3;
  82. } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
  83. /* Error, try to be as helpful as possible:
  84. (a range ending/starting with '.' won't be captured here) */
  85. if (end-len >= input) { /* there was no 'left' char */
  86. // FIXME: throw_invalid_argument
  87. // ("charlist: Invalid '..'-range, missing left of '..'");
  88. continue;
  89. }
  90. if (input+2 >= end) { /* there is no 'right' char */
  91. // FIXME: throw_invalid_argument
  92. // ("charlist: Invalid '..'-range, missing right of '..'");
  93. continue;
  94. }
  95. if (input[-1] > input[2]) { /* wrong order */
  96. // FIXME: throw_invalid_argument
  97. // ("charlist: '..'-range needs to be incrementing");
  98. continue;
  99. }
  100. /* FIXME: better error (a..b..c is the only left possibility?) */
  101. // FIXME: throw_invalid_argument("charlist: Invalid '..'-range");
  102. continue;
  103. } else {
  104. mask[c]=1;
  105. }
  106. }
  107. }
  108. int string_copy(char *dst, const char *src, int siz) {
  109. register char *d = dst;
  110. register const char *s = src;
  111. register size_t n = siz;
  112. /* Copy as many bytes as will fit */
  113. if (n != 0 && --n != 0) {
  114. do {
  115. if ((*d++ = *s++) == 0)
  116. break;
  117. } while (--n != 0);
  118. }
  119. /* Not enough room in dst, add NUL and traverse rest of src */
  120. if (n == 0) {
  121. if (siz != 0)
  122. *d = '\0'; /* NUL-terminate dst */
  123. while (*s++)
  124. ;
  125. }
  126. return(s - src - 1); /* count does not include NUL */
  127. }
  128. ///////////////////////////////////////////////////////////////////////////////
  129. char *string_concat(const char *s1, int len1, const char *s2, int len2,
  130. int &len) {
  131. len = len1 + len2;
  132. char *buf = (char *)malloc(len + 1);
  133. if (buf == NULL) {
  134. throw FatalErrorException(0, "malloc failed: %d", len);
  135. }
  136. memcpy(buf, s1, len1);
  137. memcpy(buf + len1, s2, len2);
  138. buf[len] = 0;
  139. return buf;
  140. }
  141. ///////////////////////////////////////////////////////////////////////////////
  142. // comparisons
  143. int string_cmp(const char *s1, int len1, const char *s2, int len2) {
  144. if (len1 <= len2) {
  145. for (int i = 0; i < len1; i++) {
  146. char c1 = s1[i];
  147. char c2 = s2[i];
  148. if (c1 > c2) return 1;
  149. if (c1 < c2) return -1;
  150. }
  151. return len1 < len2 ? -1 : 0;
  152. }
  153. for (int i = 0; i < len2; i++) {
  154. char c1 = s1[i];
  155. char c2 = s2[i];
  156. if (c1 > c2) return 1;
  157. if (c1 < c2) return -1;
  158. }
  159. return 1;
  160. }
  161. int string_casecmp(const char *s1, int len1, const char *s2, int len2) {
  162. if (len1 <= len2) {
  163. for (int i = 0; i < len1; i++) {
  164. char c1 = toupper(s1[i]);
  165. char c2 = toupper(s2[i]);
  166. if (c1 > c2) return 1;
  167. if (c1 < c2) return -1;
  168. }
  169. return len1 < len2 ? -1 : 0;
  170. }
  171. for (int i = 0; i < len2; i++) {
  172. char c1 = toupper(s1[i]);
  173. char c2 = toupper(s2[i]);
  174. if (c1 > c2) return 1;
  175. if (c1 < c2) return -1;
  176. }
  177. return 1;
  178. }
  179. int string_ncmp(const char *s1, const char *s2, int len) {
  180. for (int i = 0; i < len; i++) {
  181. char c1 = s1[i];
  182. char c2 = s2[i];
  183. if (c1 > c2) return 1;
  184. if (c1 < c2) return -1;
  185. }
  186. return 0;
  187. }
  188. int string_ncasecmp(const char *s1, const char *s2, int len) {
  189. for (int i = 0; i < len; i++) {
  190. char c1 = toupper(s1[i]);
  191. char c2 = toupper(s2[i]);
  192. if (c1 > c2) return 1;
  193. if (c1 < c2) return -1;
  194. }
  195. return 0;
  196. }
  197. static int compare_right(char const **a, char const *aend,
  198. char const **b, char const *bend) {
  199. int bias = 0;
  200. /* The longest run of digits wins. That aside, the greatest
  201. value wins, but we can't know that it will until we've scanned
  202. both numbers to know that they have the same magnitude, so we
  203. remember it in BIAS. */
  204. for(;; (*a)++, (*b)++) {
  205. if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
  206. (*b == bend || !isdigit((int)(unsigned char)**b)))
  207. return bias;
  208. else if (*a == aend || !isdigit((int)(unsigned char)**a))
  209. return -1;
  210. else if (*b == bend || !isdigit((int)(unsigned char)**b))
  211. return +1;
  212. else if (**a < **b) {
  213. if (!bias)
  214. bias = -1;
  215. } else if (**a > **b) {
  216. if (!bias)
  217. bias = +1;
  218. }
  219. }
  220. return 0;
  221. }
  222. static int compare_left(char const **a, char const *aend,
  223. char const **b, char const *bend) {
  224. /* Compare two left-aligned numbers: the first to have a
  225. different value wins. */
  226. for(;; (*a)++, (*b)++) {
  227. if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
  228. (*b == bend || !isdigit((int)(unsigned char)**b)))
  229. return 0;
  230. else if (*a == aend || !isdigit((int)(unsigned char)**a))
  231. return -1;
  232. else if (*b == bend || !isdigit((int)(unsigned char)**b))
  233. return +1;
  234. else if (**a < **b)
  235. return -1;
  236. else if (**a > **b)
  237. return +1;
  238. }
  239. return 0;
  240. }
  241. int string_natural_cmp(char const *a, size_t a_len,
  242. char const *b, size_t b_len, int fold_case) {
  243. char ca, cb;
  244. char const *ap, *bp;
  245. char const *aend = a + a_len, *bend = b + b_len;
  246. int fractional, result;
  247. if (a_len == 0 || b_len == 0)
  248. return a_len - b_len;
  249. ap = a;
  250. bp = b;
  251. while (1) {
  252. ca = *ap; cb = *bp;
  253. /* skip over leading spaces or zeros */
  254. while (isspace((int)(unsigned char)ca))
  255. ca = *++ap;
  256. while (isspace((int)(unsigned char)cb))
  257. cb = *++bp;
  258. /* process run of digits */
  259. if (isdigit((int)(unsigned char)ca) && isdigit((int)(unsigned char)cb)) {
  260. fractional = (ca == '0' || cb == '0');
  261. if (fractional)
  262. result = compare_left(&ap, aend, &bp, bend);
  263. else
  264. result = compare_right(&ap, aend, &bp, bend);
  265. if (result != 0)
  266. return result;
  267. else if (ap == aend && bp == bend)
  268. /* End of the strings. Let caller sort them out. */
  269. return 0;
  270. else {
  271. /* Keep on comparing from the current point. */
  272. ca = *ap; cb = *bp;
  273. }
  274. }
  275. if (fold_case) {
  276. ca = toupper((int)(unsigned char)ca);
  277. cb = toupper((int)(unsigned char)cb);
  278. }
  279. if (ca < cb)
  280. return -1;
  281. else if (ca > cb)
  282. return +1;
  283. ++ap; ++bp;
  284. if (ap >= aend && bp >= bend)
  285. /* The strings compare the same. Perhaps the caller
  286. will want to call strcmp to break the tie. */
  287. return 0;
  288. else if (ap >= aend)
  289. return -1;
  290. else if (bp >= bend)
  291. return 1;
  292. }
  293. }
  294. ///////////////////////////////////////////////////////////////////////////////
  295. char *string_to_lower(const char *s, int len) {
  296. ASSERT(s);
  297. char *ret = (char *)malloc(len + 1);
  298. for (int i = 0; i < len; i++) {
  299. ret[i] = tolower(s[i]);
  300. }
  301. ret[len] = '\0';
  302. return ret;
  303. }
  304. char *string_to_upper(const char *s, int len) {
  305. ASSERT(s);
  306. char *ret = (char *)malloc(len + 1);
  307. for (int i = 0; i < len; i++) {
  308. ret[i] = toupper(s[i]);
  309. }
  310. ret[len] = '\0';
  311. return ret;
  312. }
  313. char *string_to_upper_first(const char *s, int len) {
  314. ASSERT(s);
  315. char *ret = string_duplicate(s, len);
  316. if (*ret) {
  317. *ret = toupper(*ret);
  318. }
  319. return ret;
  320. }
  321. char *string_to_upper_words(const char *s, int len) {
  322. ASSERT(s);
  323. char *ret = string_duplicate(s, len);
  324. if (*ret) {
  325. *ret = toupper(*ret);
  326. for (int i = 1; i < len; i++) {
  327. if (isspace(ret[i-1])) {
  328. ret[i] = toupper(ret[i]);
  329. }
  330. }
  331. }
  332. return ret;
  333. }
  334. ///////////////////////////////////////////////////////////////////////////////
  335. char *string_trim(const char *s, int &len,
  336. const char *charlist, int charlistlen, int mode) {
  337. ASSERT(s);
  338. char mask[256];
  339. string_charmask(charlist, charlistlen, mask);
  340. int trimmed = 0;
  341. if (mode & 1) {
  342. for (int i = 0; i < len; i++) {
  343. if (mask[(unsigned char)s[i]]) {
  344. trimmed++;
  345. } else {
  346. break;
  347. }
  348. }
  349. len -= trimmed;
  350. s += trimmed;
  351. }
  352. if (mode & 2) {
  353. for (int i = len - 1; i >= 0; i--) {
  354. if (mask[(unsigned char)s[i]]) {
  355. len--;
  356. } else {
  357. break;
  358. }
  359. }
  360. }
  361. return string_duplicate(s, len);
  362. }
  363. #define STR_PAD_LEFT 0
  364. #define STR_PAD_RIGHT 1
  365. #define STR_PAD_BOTH 2
  366. char *string_pad(const char *input, int &len, int pad_length,
  367. const char *pad_string, int pad_str_len,
  368. int pad_type) {
  369. ASSERT(input);
  370. int num_pad_chars = pad_length - len;
  371. /* If resulting string turns out to be shorter than input string,
  372. we simply copy the input and return. */
  373. if (pad_length < 0 || num_pad_chars < 0) {
  374. return string_duplicate(input, len);
  375. }
  376. /* Setup the padding string values if specified. */
  377. if (pad_str_len == 0) {
  378. // FIXME: throw_invalid_argument("pad_string: (empty)");
  379. return NULL;
  380. }
  381. char *result = (char *)malloc(pad_length + 1);
  382. /* We need to figure out the left/right padding lengths. */
  383. int left_pad, right_pad;
  384. switch (pad_type) {
  385. case STR_PAD_RIGHT:
  386. left_pad = 0;
  387. right_pad = num_pad_chars;
  388. break;
  389. case STR_PAD_LEFT:
  390. left_pad = num_pad_chars;
  391. right_pad = 0;
  392. break;
  393. case STR_PAD_BOTH:
  394. left_pad = num_pad_chars / 2;
  395. right_pad = num_pad_chars - left_pad;
  396. break;
  397. default:
  398. // throw_invalid_argument("pad_type: %d", pad_type);
  399. return NULL;
  400. }
  401. /* First we pad on the left. */
  402. int result_len = 0;
  403. for (int i = 0; i < left_pad; i++) {
  404. result[result_len++] = pad_string[i % pad_str_len];
  405. }
  406. /* Then we copy the input string. */
  407. memcpy(result + result_len, input, len);
  408. result_len += len;
  409. /* Finally, we pad on the right. */
  410. for (int i = 0; i < right_pad; i++) {
  411. result[result_len++] = pad_string[i % pad_str_len];
  412. }
  413. result[result_len] = '\0';
  414. len = result_len;
  415. return result;
  416. }
  417. ///////////////////////////////////////////////////////////////////////////////
  418. char *string_substr(const char *s, int &len, int start, int length,
  419. bool nullable) {
  420. ASSERT(s);
  421. if (string_substr_check(len, start, length)) {
  422. len = length;
  423. return string_duplicate(s + start, length);
  424. }
  425. len = 0;
  426. if (nullable) {
  427. return NULL;
  428. }
  429. return string_duplicate("", 0);
  430. }
  431. int string_find(const char *input, int len, char ch, int pos,
  432. bool case_sensitive) {
  433. ASSERT(input);
  434. if (len && pos < len) {
  435. if (!case_sensitive) {
  436. ch = tolower(ch);
  437. char *lowered = string_to_lower(input, len);
  438. int ret = string_find(lowered, len, ch, pos, true);
  439. free(lowered);
  440. return ret;
  441. }
  442. int l = 1;
  443. if (!string_substr_check(len, pos, l)) {
  444. return -1;
  445. }
  446. const void *ptr = memchr(input + pos, ch, len - pos);
  447. if (ptr != NULL) {
  448. return (int)((const char *)ptr - input);
  449. }
  450. }
  451. return -1;
  452. }
  453. int string_rfind(const char *input, int len, char ch, int pos,
  454. bool case_sensitive) {
  455. ASSERT(input);
  456. if (len > 0 && pos < len) {
  457. if (!case_sensitive) {
  458. ch = tolower(ch);
  459. char *lowered = string_to_lower(input, len);
  460. int ret = string_rfind(lowered, len, ch, pos, true);
  461. free(lowered);
  462. return ret;
  463. }
  464. int l = 0;
  465. bool stop_at_offset = (pos >= 0);
  466. if (!string_substr_check(len, pos, l)) {
  467. return -1;
  468. }
  469. int start = len - 1, stop = 0;
  470. if (stop_at_offset) {
  471. stop = pos;
  472. } else {
  473. start = pos;
  474. }
  475. for (int i = start; i >= stop; i--) {
  476. if (input[i] == ch) {
  477. return i;
  478. }
  479. }
  480. }
  481. return -1;
  482. }
  483. int string_find(const char *input, int len, const char *s, int s_len,
  484. int pos, bool case_sensitive) {
  485. ASSERT(input);
  486. ASSERT(s);
  487. if (!s_len) {
  488. return -1;
  489. }
  490. if (len && pos < len) {
  491. if (!case_sensitive) {
  492. char *lowered_s = string_to_lower(s, s_len);
  493. char *lowered = string_to_lower(input, len);
  494. int ret = string_find(lowered, len, lowered_s, s_len, pos, true);
  495. free(lowered);
  496. free(lowered_s);
  497. return ret;
  498. }
  499. int l = 1;
  500. if (!string_substr_check(len, pos, l)) {
  501. return -1;
  502. }
  503. void *ptr = memmem(input + pos, len - pos, s, s_len);
  504. if (ptr != NULL) {
  505. return (int)((const char *)ptr - input);
  506. }
  507. }
  508. return -1;
  509. }
  510. int string_rfind(const char *input, int len, const char *s, int s_len,
  511. int pos, bool case_sensitive) {
  512. ASSERT(input);
  513. ASSERT(s);
  514. if (!s_len) {
  515. return -1;
  516. }
  517. if (len && pos < len) {
  518. if (!case_sensitive) {
  519. char *lowered_s = string_to_lower(s, s_len);
  520. char *lowered = string_to_lower(input, len);
  521. int ret = string_rfind(lowered, len, lowered_s, s_len, pos, true);
  522. free(lowered);
  523. free(lowered_s);
  524. return ret;
  525. }
  526. int l = 0;
  527. bool stop_at_offset = (pos >= 0);
  528. if (!string_substr_check(len, pos, l)) {
  529. return -1;
  530. }
  531. int start = len, stop = 0;
  532. if (stop_at_offset) {
  533. stop = pos;
  534. } else {
  535. start = pos;
  536. }
  537. for (int i = start - 1; i >= stop; i--) {
  538. if (input[i] == s[0] && memcmp(input+i, s, s_len) == 0) {
  539. return i;
  540. }
  541. }
  542. }
  543. return -1;
  544. }
  545. const char *string_memnstr(const char *haystack, const char *needle,
  546. int needle_len, const char *end) {
  547. const char *p = haystack;
  548. char ne = needle[needle_len-1];
  549. end -= needle_len;
  550. while (p <= end) {
  551. if ((p = (char *)memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
  552. if (!memcmp(needle, p, needle_len-1)) {
  553. return p;
  554. }
  555. }
  556. if (p == NULL) {
  557. return NULL;
  558. }
  559. p++;
  560. }
  561. return NULL;
  562. }
  563. void *string_memrchr(const void *s, int c, size_t n) {
  564. register unsigned char *e;
  565. if (n <= 0) {
  566. return NULL;
  567. }
  568. for (e = (unsigned char *)s + n - 1; e >= (unsigned char *)s; e--) {
  569. if (*e == (unsigned char)c) {
  570. return (void *)e;
  571. }
  572. }
  573. return NULL;
  574. }
  575. char *string_replace(const char *s, int &len, int start, int length,
  576. const char *replacement, int len_repl) {
  577. ASSERT(s);
  578. ASSERT(replacement);
  579. if (!string_substr_check(len, start, length, false)) {
  580. len = 0;
  581. return string_duplicate("", 0);
  582. }
  583. char *ret = (char *)malloc(len + len_repl - length + 1);
  584. int ret_len = 0;
  585. if (start) {
  586. memcpy(ret, s, start);
  587. ret_len += start;
  588. }
  589. if (len_repl) {
  590. memcpy(ret + ret_len, replacement, len_repl);
  591. ret_len += len_repl;
  592. }
  593. len -= (start + length);
  594. if (len) {
  595. memcpy(ret + ret_len, s + start + length, len);
  596. ret_len += len;
  597. }
  598. len = ret_len;
  599. ret[ret_len] = '\0';
  600. return ret;
  601. }
  602. char *string_replace(const char *input, int &len,
  603. const char *search, int len_search,
  604. const char *replacement, int len_replace,
  605. int &count, bool case_sensitive) {
  606. ASSERT(input);
  607. ASSERT(search && len_search);
  608. if (len == 0) {
  609. return NULL;
  610. }
  611. std::vector<int> founds;
  612. founds.reserve(16);
  613. if (len_search == 1) {
  614. for (int pos = string_find(input, len, *search, 0, case_sensitive);
  615. pos >= 0;
  616. pos = string_find(input, len, *search, pos + len_search,
  617. case_sensitive)) {
  618. founds.push_back(pos);
  619. }
  620. } else {
  621. for (int pos = string_find(input, len, search, len_search, 0,
  622. case_sensitive);
  623. pos >= 0;
  624. pos = string_find(input, len, search, len_search,
  625. pos + len_search, case_sensitive)) {
  626. founds.push_back(pos);
  627. }
  628. }
  629. count = founds.size();
  630. if (count == 0) {
  631. return NULL; // not found
  632. }
  633. char *ret = (char *)malloc(len + (len_replace - len_search) * count + 1);
  634. char *p = ret;
  635. int pos = 0; // last position in input that hasn't been copied over yet
  636. int n;
  637. for (unsigned int i = 0; i < founds.size(); i++) {
  638. n = founds[i];
  639. if (n > pos) {
  640. n -= pos;
  641. memcpy(p, input, n);
  642. p += n;
  643. input += n;
  644. pos += n;
  645. }
  646. if (len_replace) {
  647. memcpy(p, replacement, len_replace);
  648. p += len_replace;
  649. }
  650. input += len_search;
  651. pos += len_search;
  652. }
  653. n = len;
  654. if (n > pos) {
  655. n -= pos;
  656. memcpy(p, input, n);
  657. p += n;
  658. }
  659. *p = '\0';
  660. len = p - ret;
  661. return ret;
  662. }
  663. ///////////////////////////////////////////////////////////////////////////////
  664. char *string_reverse(const char *s, int len) {
  665. ASSERT(s);
  666. char *n = (char *)malloc(len + 1);
  667. char *p = n;
  668. const char *e = s + len;
  669. while (--e >= s) {
  670. *p++ = *e;
  671. }
  672. *p = '\0';
  673. return n;
  674. }
  675. char *string_repeat(const char *s, int &len, int count) {
  676. ASSERT(s);
  677. if (len == 0 || count <= 0) {
  678. return NULL;
  679. }
  680. char *ret = (char *)malloc(len * count + 1);
  681. if (len == 1) {
  682. memset(ret, *s, count);
  683. len = count;
  684. } else {
  685. char *p = ret;
  686. for (int i = 0; i < count; i++) {
  687. memcpy(p, s, len);
  688. p += len;
  689. }
  690. len = p - ret;
  691. }
  692. ret[len] = '\0';
  693. return ret;
  694. }
  695. char *string_shuffle(const char *str, int len) {
  696. ASSERT(str);
  697. if (len <= 1) {
  698. return NULL;
  699. }
  700. char *ret = string_duplicate(str, len);
  701. int n_left = len;
  702. while (--n_left) {
  703. int rnd_idx = rand() % n_left;
  704. char temp = ret[n_left];
  705. ret[n_left] = ret[rnd_idx];
  706. ret[rnd_idx] = temp;
  707. }
  708. return ret;
  709. }
  710. char *string_chunk_split(const char *src, int &srclen, const char *end,
  711. int endlen, int chunklen) {
  712. int chunks = srclen / chunklen; // complete chunks!
  713. int restlen = srclen - chunks * chunklen; /* srclen % chunklen */
  714. int out_len = (chunks + 1) * endlen + srclen + 1;
  715. char *dest = (char *)malloc(out_len);
  716. const char *p; char *q;
  717. const char *pMax = src + srclen - chunklen + 1;
  718. for (p = src, q = dest; p < pMax; ) {
  719. memcpy(q, p, chunklen);
  720. q += chunklen;
  721. memcpy(q, end, endlen);
  722. q += endlen;
  723. p += chunklen;
  724. }
  725. if (restlen) {
  726. memcpy(q, p, restlen);
  727. q += restlen;
  728. memcpy(q, end, endlen);
  729. q += endlen;
  730. }
  731. *q = '\0';
  732. srclen = q - dest;
  733. return(dest);
  734. }
  735. ///////////////////////////////////////////////////////////////////////////////
  736. #define PHP_TAG_BUF_SIZE 1023
  737. /**
  738. * Check if tag is in a set of tags
  739. *
  740. * states:
  741. *
  742. * 0 start tag
  743. * 1 first non-whitespace char seen
  744. */
  745. static int string_tag_find(const char *tag, int len, char *set) {
  746. char c, *n;
  747. const char *t;
  748. int state=0, done=0;
  749. char *norm;
  750. if (len <= 0) {
  751. return 0;
  752. }
  753. norm = (char *)malloc(len+1);
  754. n = norm;
  755. t = tag;
  756. c = tolower(*t);
  757. /*
  758. normalize the tag removing leading and trailing whitespace
  759. and turn any <a whatever...> into just <a> and any </tag>
  760. into <tag>
  761. */
  762. while (!done) {
  763. switch (c) {
  764. case '<':
  765. *(n++) = c;
  766. break;
  767. case '>':
  768. done =1;
  769. break;
  770. default:
  771. if (!isspace((int)c)) {
  772. if (state == 0) {
  773. state=1;
  774. if (c != '/')
  775. *(n++) = c;
  776. } else {
  777. *(n++) = c;
  778. }
  779. } else {
  780. if (state == 1)
  781. done=1;
  782. }
  783. break;
  784. }
  785. c = tolower(*(++t));
  786. }
  787. *(n++) = '>';
  788. *n = '\0';
  789. if (strstr(set, norm)) {
  790. done=1;
  791. } else {
  792. done=0;
  793. }
  794. free(norm);
  795. return done;
  796. }
  797. /**
  798. * A simple little state-machine to strip out html and php tags
  799. *
  800. * State 0 is the output state, State 1 means we are inside a
  801. * normal html tag and state 2 means we are inside a php tag.
  802. *
  803. * The state variable is passed in to allow a function like fgetss
  804. * to maintain state across calls to the function.
  805. *
  806. * lc holds the last significant character read and br is a bracket
  807. * counter.
  808. *
  809. * When an allow string is passed in we keep track of the string
  810. * in state 1 and when the tag is closed check it against the
  811. * allow string to see if we should allow it.
  812. * swm: Added ability to strip <?xml tags without assuming it PHP
  813. * code.
  814. */
  815. static size_t strip_tags_impl(char *rbuf, int len, int *stateptr,
  816. char *allow, int allow_len,
  817. bool allow_tag_spaces) {
  818. char *tbuf, *buf, *p, *tp, *rp, c, lc;
  819. int br, i=0, depth=0, in_q = 0;
  820. int state = 0;
  821. if (stateptr)
  822. state = *stateptr;
  823. buf = string_duplicate(rbuf, len);
  824. c = *buf;
  825. lc = '\0';
  826. p = buf;
  827. rp = rbuf;
  828. br = 0;
  829. if (allow) {
  830. for (char *tmp = allow; *tmp; tmp++) {
  831. *tmp = tolower((int)*(unsigned char *)tmp);
  832. }
  833. tbuf = (char *)malloc(PHP_TAG_BUF_SIZE+1);
  834. tp = tbuf;
  835. } else {
  836. tbuf = tp = NULL;
  837. }
  838. while (i < len) {
  839. switch (c) {
  840. case '\0':
  841. break;
  842. case '<':
  843. if (isspace(*(p + 1)) && !allow_tag_spaces) {
  844. goto reg_char;
  845. }
  846. if (state == 0) {
  847. lc = '<';
  848. state = 1;
  849. if (allow) {
  850. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  851. *(tp++) = '<';
  852. }
  853. } else if (state == 1) {
  854. depth++;
  855. }
  856. break;
  857. case '(':
  858. if (state == 2) {
  859. if (lc != '"' && lc != '\'') {
  860. lc = '(';
  861. br++;
  862. }
  863. } else if (allow && state == 1) {
  864. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  865. *(tp++) = c;
  866. } else if (state == 0) {
  867. *(rp++) = c;
  868. }
  869. break;
  870. case ')':
  871. if (state == 2) {
  872. if (lc != '"' && lc != '\'') {
  873. lc = ')';
  874. br--;
  875. }
  876. } else if (allow && state == 1) {
  877. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  878. *(tp++) = c;
  879. } else if (state == 0) {
  880. *(rp++) = c;
  881. }
  882. break;
  883. case '>':
  884. if (depth) {
  885. depth--;
  886. break;
  887. }
  888. if (in_q) {
  889. break;
  890. }
  891. switch (state) {
  892. case 1: /* HTML/XML */
  893. lc = '>';
  894. in_q = state = 0;
  895. if (allow) {
  896. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  897. *(tp++) = '>';
  898. *tp='\0';
  899. if (string_tag_find(tbuf, tp-tbuf, allow)) {
  900. memcpy(rp, tbuf, tp-tbuf);
  901. rp += tp-tbuf;
  902. }
  903. tp = tbuf;
  904. }
  905. break;
  906. case 2: /* PHP */
  907. if (!br && lc != '\"' && *(p-1) == '?') {
  908. in_q = state = 0;
  909. tp = tbuf;
  910. }
  911. break;
  912. case 3:
  913. in_q = state = 0;
  914. tp = tbuf;
  915. break;
  916. case 4: /* JavaScript/CSS/etc... */
  917. if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
  918. in_q = state = 0;
  919. tp = tbuf;
  920. }
  921. break;
  922. default:
  923. *(rp++) = c;
  924. break;
  925. }
  926. break;
  927. case '"':
  928. case '\'':
  929. if (state == 2 && *(p-1) != '\\') {
  930. if (lc == c) {
  931. lc = '\0';
  932. } else if (lc != '\\') {
  933. lc = c;
  934. }
  935. } else if (state == 0) {
  936. *(rp++) = c;
  937. } else if (allow && state == 1) {
  938. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  939. *(tp++) = c;
  940. }
  941. if (state && p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
  942. if (in_q) {
  943. in_q = 0;
  944. } else {
  945. in_q = *p;
  946. }
  947. }
  948. break;
  949. case '!':
  950. /* JavaScript & Other HTML scripting languages */
  951. if (state == 1 && *(p-1) == '<') {
  952. state = 3;
  953. lc = c;
  954. } else {
  955. if (state == 0) {
  956. *(rp++) = c;
  957. } else if (allow && state == 1) {
  958. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  959. *(tp++) = c;
  960. }
  961. }
  962. break;
  963. case '-':
  964. if (state == 3 && p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
  965. state = 4;
  966. } else {
  967. goto reg_char;
  968. }
  969. break;
  970. case '?':
  971. if (state == 1 && *(p-1) == '<') {
  972. br=0;
  973. state=2;
  974. break;
  975. }
  976. case 'E':
  977. case 'e':
  978. /* !DOCTYPE exception */
  979. if (state==3 && p > buf+6
  980. && tolower(*(p-1)) == 'p'
  981. && tolower(*(p-2)) == 'y'
  982. && tolower(*(p-3)) == 't'
  983. && tolower(*(p-4)) == 'c'
  984. && tolower(*(p-5)) == 'o'
  985. && tolower(*(p-6)) == 'd') {
  986. state = 1;
  987. break;
  988. }
  989. /* fall-through */
  990. case 'l':
  991. /* swm: If we encounter '<?xml' then we shouldn't be in
  992. * state == 2 (PHP). Switch back to HTML.
  993. */
  994. if (state == 2 && p > buf+2 && *(p-1) == 'm' && *(p-2) == 'x') {
  995. state = 1;
  996. break;
  997. }
  998. /* fall-through */
  999. default:
  1000. reg_char:
  1001. if (state == 0) {
  1002. *(rp++) = c;
  1003. } else if (allow && state == 1) {
  1004. tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
  1005. *(tp++) = c;
  1006. }
  1007. break;
  1008. }
  1009. c = *(++p);
  1010. i++;
  1011. }
  1012. if (rp < rbuf + len) {
  1013. *rp = '\0';
  1014. }
  1015. free(buf);
  1016. if (allow)
  1017. free(tbuf);
  1018. if (stateptr)
  1019. *stateptr = state;
  1020. return (size_t)(rp - rbuf);
  1021. }
  1022. char *string_strip_tags(const char *s, int &len, const char *allow,
  1023. int allow_len) {
  1024. ASSERT(s);
  1025. ASSERT(allow);
  1026. char *ret = string_duplicate(s, len);
  1027. char *sallow = string_duplicate(allow, allow_len);
  1028. len = strip_tags_impl(ret, len, NULL, sallow, allow_len, false);
  1029. free(sallow);
  1030. return ret;
  1031. }
  1032. ///////////////////////////////////////////////////////////////////////////////
  1033. char *string_wordwrap(const char *text, int &textlen, int linelength,
  1034. const char *breakchar, int breakcharlen, bool docut) {
  1035. ASSERT(text);
  1036. ASSERT(breakchar);
  1037. char *newtext;
  1038. int newtextlen, chk;
  1039. size_t alloced;
  1040. long current = 0, laststart = 0, lastspace = 0;
  1041. if (textlen == 0) {
  1042. return "";
  1043. }
  1044. if (breakcharlen == 0) {
  1045. // throw_invalid_argument("wordbreak: (empty)");
  1046. return NULL;
  1047. }
  1048. if (linelength == 0 && docut) {
  1049. // throw_invalid_argument("width", "can't force cut when width = 0");
  1050. return NULL;
  1051. }
  1052. /* Special case for a single-character break as it needs no
  1053. additional storage space */
  1054. if (breakcharlen == 1 && !docut) {
  1055. newtext = string_duplicate(text, textlen);
  1056. laststart = lastspace = 0;
  1057. for (current = 0; current < textlen; current++) {
  1058. if (text[current] == breakchar[0]) {
  1059. laststart = lastspace = current;
  1060. } else if (text[current] == ' ') {
  1061. if (current - laststart >= linelength) {
  1062. newtext[current] = breakchar[0];
  1063. laststart = current + 1;
  1064. }
  1065. lastspace = current;
  1066. } else if (current - laststart >= linelength && laststart != lastspace) {
  1067. newtext[lastspace] = breakchar[0];
  1068. laststart = lastspace + 1;
  1069. }
  1070. }
  1071. return newtext;
  1072. }
  1073. /* Multiple character line break or forced cut */
  1074. if (linelength > 0) {
  1075. chk = (int)(textlen/linelength + 1);
  1076. alloced = textlen + chk * breakcharlen + 1;
  1077. } else {
  1078. chk = textlen;
  1079. alloced = textlen * (breakcharlen + 1) + 1;
  1080. }
  1081. newtext = (char *)malloc(alloced);
  1082. /* now keep track of the actual new text length */
  1083. newtextlen = 0;
  1084. laststart = lastspace = 0;
  1085. for (current = 0; current < textlen; current++) {
  1086. if (chk <= 0) {
  1087. alloced += (int) (((textlen - current + 1)/linelength + 1) *
  1088. breakcharlen) + 1;
  1089. newtext = (char *)realloc(newtext, alloced);
  1090. chk = (int) ((textlen - current)/linelength) + 1;
  1091. }
  1092. /* when we hit an existing break, copy to new buffer, and
  1093. * fix up laststart and lastspace */
  1094. if (text[current] == breakchar[0]
  1095. && current + breakcharlen < textlen
  1096. && !strncmp(text+current, breakchar, breakcharlen)) {
  1097. memcpy(newtext+newtextlen, text+laststart,
  1098. current-laststart+breakcharlen);
  1099. newtextlen += current-laststart+breakcharlen;
  1100. current += breakcharlen - 1;
  1101. laststart = lastspace = current + 1;
  1102. chk--;
  1103. }
  1104. /* if it is a space, check if it is at the line boundary,
  1105. * copy and insert a break, or just keep track of it */
  1106. else if (text[current] == ' ') {
  1107. if (current - laststart >= linelength) {
  1108. memcpy(newtext+newtextlen, text+laststart, current-laststart);
  1109. newtextlen += current - laststart;
  1110. memcpy(newtext+newtextlen, breakchar, breakcharlen);
  1111. newtextlen += breakcharlen;
  1112. laststart = current + 1;
  1113. chk--;
  1114. }
  1115. lastspace = current;
  1116. }
  1117. /* if we are cutting, and we've accumulated enough
  1118. * characters, and we haven't see a space for this line,
  1119. * copy and insert a break. */
  1120. else if (current - laststart >= linelength
  1121. && docut && laststart >= lastspace) {
  1122. memcpy(newtext+newtextlen, text+laststart, current-laststart);
  1123. newtextlen += current - laststart;
  1124. memcpy(newtext+newtextlen, breakchar, breakcharlen);
  1125. newtextlen += breakcharlen;
  1126. laststart = lastspace = current;
  1127. chk--;
  1128. }
  1129. /* if the current word puts us over the linelength, copy
  1130. * back up until the last space, insert a break, and move
  1131. * up the laststart */
  1132. else if (current - laststart >= linelength
  1133. && laststart < lastspace) {
  1134. memcpy(newtext+newtextlen, text+laststart, lastspace-laststart);
  1135. newtextlen += lastspace - laststart;
  1136. memcpy(newtext+newtextlen, breakchar, breakcharlen);
  1137. newtextlen += breakcharlen;
  1138. laststart = lastspace = lastspace + 1;
  1139. chk--;
  1140. }
  1141. }
  1142. /* copy over any stragglers */
  1143. if (laststart != current) {
  1144. memcpy(newtext+newtextlen, text+laststart, current-laststart);
  1145. newtextlen += current - laststart;
  1146. }
  1147. textlen = newtextlen;
  1148. newtext[newtextlen] = '\0';
  1149. return newtext;
  1150. }
  1151. ///////////////////////////////////////////////////////////////////////////////
  1152. char *string_addcslashes(const char *str, int &length, const char *what,
  1153. int wlength) {
  1154. ASSERT(str);
  1155. ASSERT(what);
  1156. char flags[256];
  1157. string_charmask(what, wlength, flags);
  1158. char *new_str = (char *)malloc((length << 2) + 1);
  1159. const char *source;
  1160. const char *end;
  1161. char *target;
  1162. for (source = str, end = source + length, target = new_str; source < end;
  1163. source++) {
  1164. char c = *source;
  1165. if (flags[(unsigned char)c]) {
  1166. if ((unsigned char) c < 32 || (unsigned char) c > 126) {
  1167. *target++ = '\\';
  1168. switch (c) {
  1169. case '\n': *target++ = 'n'; break;
  1170. case '\t': *target++ = 't'; break;
  1171. case '\r': *target++ = 'r'; break;
  1172. case '\a': *target++ = 'a'; break;
  1173. case '\v': *target++ = 'v'; break;
  1174. case '\b': *target++ = 'b'; break;
  1175. case '\f': *target++ = 'f'; break;
  1176. default: target += sprintf(target, "%03o", (unsigned char) c);
  1177. }
  1178. continue;
  1179. }
  1180. *target++ = '\\';
  1181. }
  1182. *target++ = c;
  1183. }
  1184. *target = 0;
  1185. length = target - new_str;
  1186. return new_str;
  1187. }
  1188. char *string_stripcslashes(const char *input, int &nlen) {
  1189. ASSERT(input);
  1190. if (nlen == 0) {
  1191. return NULL;
  1192. }
  1193. char *str = string_duplicate(input, nlen);
  1194. char *source, *target, *end;
  1195. int i;
  1196. char numtmp[4];
  1197. for (source=str, end=str+nlen, target=str; source < end; source++) {
  1198. if (*source == '\\' && source+1 < end) {
  1199. source++;
  1200. switch (*source) {
  1201. case 'n': *target++='\n'; nlen--; break;
  1202. case 'r': *target++='\r'; nlen--; break;
  1203. case 'a': *target++='\a'; nlen--; break;
  1204. case 't': *target++='\t'; nlen--; break;
  1205. case 'v': *target++='\v'; nlen--; break;
  1206. case 'b': *target++='\b'; nlen--; break;
  1207. case 'f': *target++='\f'; nlen--; break;
  1208. case '\\': *target++='\\'; nlen--; break;
  1209. case 'x':
  1210. if (source+1 < end && isxdigit((int)(*(source+1)))) {
  1211. numtmp[0] = *++source;
  1212. if (source+1 < end && isxdigit((int)(*(source+1)))) {
  1213. numtmp[1] = *++source;
  1214. numtmp[2] = '\0';
  1215. nlen-=3;
  1216. } else {
  1217. numtmp[1] = '\0';
  1218. nlen-=2;
  1219. }
  1220. *target++=(char)strtol(numtmp, NULL, 16);
  1221. break;
  1222. }
  1223. /* break is left intentionally */
  1224. default:
  1225. i=0;
  1226. while (source < end && *source >= '0' && *source <= '7' && i<3) {
  1227. numtmp[i++] = *source++;
  1228. }
  1229. if (i) {
  1230. numtmp[i]='\0';
  1231. *target++=(char)strtol(numtmp, NULL, 8);
  1232. nlen-=i;
  1233. source--;
  1234. } else {
  1235. *target++=*source;
  1236. nlen--;
  1237. }
  1238. }
  1239. } else {
  1240. *target++=*source;
  1241. }
  1242. }
  1243. *target='\0';
  1244. nlen = target - str;
  1245. return str;
  1246. }
  1247. char *string_addslashes(const char *str, int &length) {
  1248. ASSERT(str);
  1249. if (length == 0) {
  1250. return NULL;
  1251. }
  1252. char *new_str = (char *)malloc((length << 1) + 1);
  1253. const char *source = str;
  1254. const char *end = source + length;
  1255. char *target = new_str;
  1256. while (source < end) {
  1257. switch (*source) {
  1258. case '\0':
  1259. *target++ = '\\';
  1260. *target++ = '0';
  1261. break;
  1262. case '\'':
  1263. case '\"':
  1264. case '\\':
  1265. *target++ = '\\';
  1266. /* break is missing *intentionally* */
  1267. default:
  1268. *target++ = *source;
  1269. break;
  1270. }
  1271. source++;
  1272. }
  1273. *target = 0;
  1274. length = target - new_str;
  1275. return new_str;
  1276. }
  1277. char *string_stripslashes(const char *input, int &l) {
  1278. ASSERT(input);
  1279. if (!*input) {
  1280. return NULL;
  1281. }
  1282. char *str = string_duplicate(input, l);
  1283. char *s, *t;
  1284. s = str;
  1285. t = str;
  1286. while (l > 0) {
  1287. if (*t == '\\') {
  1288. t++; /* skip the slash */
  1289. l--;
  1290. if (l > 0) {
  1291. if (*t == '0') {
  1292. *s++='\0';
  1293. t++;
  1294. } else {
  1295. *s++ = *t++; /* preserve the next character */
  1296. }
  1297. l--;
  1298. }
  1299. } else {
  1300. *s++ = *t++;
  1301. l--;
  1302. }
  1303. }
  1304. if (s != t) {
  1305. *s = '\0';
  1306. }
  1307. l = s - str;
  1308. return str;
  1309. }
  1310. char *string_quotemeta(const char *input, int &len) {
  1311. ASSERT(input);
  1312. if (len == 0) {
  1313. return NULL;
  1314. }
  1315. char *ret = (char *)malloc((len << 1) + 1);
  1316. char *q = ret;
  1317. for (const char *p = input; *p; p++) {
  1318. char c = *p;
  1319. switch (c) {
  1320. case '.':
  1321. case '\\':
  1322. case '+':
  1323. case '*':
  1324. case '?':
  1325. case '[':
  1326. case '^':
  1327. case ']':
  1328. case '$':
  1329. case '(':
  1330. case ')':
  1331. *q++ = '\\';
  1332. /* break is missing _intentionally_ */
  1333. default:
  1334. *q++ = c;
  1335. }
  1336. }
  1337. *q = 0;
  1338. len = q - ret;
  1339. return ret;
  1340. }
  1341. ///////////////////////////////////////////////////////////////////////////////
  1342. static char string_hex2int(int c) {
  1343. if (isdigit(c)) {
  1344. return c - '0';
  1345. }
  1346. if (c >= 'A' && c <= 'F') {
  1347. return c - 'A' + 10;
  1348. }
  1349. if (c >= 'a' && c <= 'f') {
  1350. return c - 'a' + 10;
  1351. }
  1352. return -1;
  1353. }
  1354. char *string_quoted_printable_encode(const char *input, int &len) {
  1355. const char *hex = "0123456789ABCDEF";
  1356. unsigned char *ret =
  1357. (unsigned char *)malloc(3 * len + 3 * (((3 * len)/PHP_QPRINT_MAXL) + 1));
  1358. unsigned char *d = ret;
  1359. int length = len;
  1360. unsigned char c;
  1361. unsigned long lp = 0;
  1362. while (length--) {
  1363. if (((c = *input++) == '\015') && (*input == '\012') && length > 0) {
  1364. *d++ = '\015';
  1365. *d++ = *input++;
  1366. length--;
  1367. lp = 0;
  1368. } else {
  1369. if (iscntrl (c) || (c == 0x7f) || (c & 0x80) || (c == '=') ||
  1370. ((c == ' ') && (*input == '\015'))) {
  1371. if ((lp += 3) > PHP_QPRINT_MAXL) {
  1372. *d++ = '=';
  1373. *d++ = '\015';
  1374. *d++ = '\012';
  1375. lp = 3;
  1376. }
  1377. *d++ = '=';
  1378. *d++ = hex[c >> 4];
  1379. *d++ = hex[c & 0xf];
  1380. } else {
  1381. if ((++lp) > PHP_QPRINT_MAXL) {
  1382. *d++ = '=';
  1383. *d++ = '\015';
  1384. *d++ = '\012';
  1385. lp = 1;
  1386. }
  1387. *d++ = c;
  1388. }
  1389. }
  1390. }
  1391. *d = '\0';
  1392. len = d - ret;
  1393. return (char*)ret;
  1394. }
  1395. char *string_quoted_printable_decode(const char *input, int &len, bool is_q) {
  1396. ASSERT(input);
  1397. if (len == 0) {
  1398. return NULL;
  1399. }
  1400. int i = 0, j = 0, k;
  1401. const char *str_in = input;
  1402. char *str_out = (char *)malloc(len + 1);
  1403. while (i < len && str_in[i]) {
  1404. switch (str_in[i]) {
  1405. case '=':
  1406. if (i + 2 < len && str_in[i + 1] && str_in[i + 2] &&
  1407. isxdigit((int) str_in[i + 1]) && isxdigit((int) str_in[i + 2]))
  1408. {
  1409. str_out[j++] = (string_hex2int((int) str_in[i + 1]) << 4)
  1410. + string_hex2int((int) str_in[i + 2]);
  1411. i += 3;
  1412. } else /* check for soft line break according to RFC 2045*/ {
  1413. k = 1;
  1414. while (str_in[i + k] &&
  1415. ((str_in[i + k] == 32) || (str_in[i + k] == 9))) {
  1416. /* Possibly, skip spaces/tabs at the end of line */
  1417. k++;
  1418. }
  1419. if (!str_in[i + k]) {
  1420. /* End of line reached */
  1421. i += k;
  1422. }
  1423. else if ((str_in[i + k] == 13) && (str_in[i + k + 1] == 10)) {
  1424. /* CRLF */
  1425. i += k + 2;
  1426. }
  1427. else if ((str_in[i + k] == 13) || (str_in[i + k] == 10)) {
  1428. /* CR or LF */
  1429. i += k + 1;
  1430. }
  1431. else {
  1432. str_out[j++] = str_in[i++];
  1433. }
  1434. }
  1435. break;
  1436. case '_':
  1437. if (is_q) {
  1438. str_out[j++] = ' ';
  1439. i++;
  1440. } else {
  1441. str_out[j++] = str_in[i++];
  1442. }
  1443. break;
  1444. default:
  1445. str_out[j++] = str_in[i++];
  1446. }
  1447. }
  1448. str_out[j] = '\0';
  1449. len = j;
  1450. return str_out;
  1451. }
  1452. char *string_bin2hex(const char *input, int &len) {
  1453. static char hexconvtab[] = "0123456789abcdef";
  1454. ASSERT(input);
  1455. if (len == 0) {
  1456. return NULL;
  1457. }
  1458. int i, j;
  1459. char *result = (char *)malloc((len << 1) + 1);
  1460. for (i = j = 0; i < len; i++) {
  1461. result[j++] = hexconvtab[(unsigned char)input[i] >> 4];
  1462. result[j++] = hexconvtab[(unsigned char)input[i] & 15];
  1463. }
  1464. result[j] = '\0';
  1465. len = j;
  1466. return result;
  1467. }
  1468. char *string_hex2bin(const char *input, int &len) {
  1469. len >>= 1;
  1470. char *str = (char *)malloc(len + 1);
  1471. int i, j;
  1472. for (i = j = 0; i < len; i++) {
  1473. char c = input[j++];
  1474. if (c >= '0' && c <= '9') {
  1475. str[i] = (c - '0') << 4;
  1476. } else if (c >= 'a' && c <= 'f') {
  1477. str[i] = (c - 'a' + 10) << 4;
  1478. } else if (c >= 'A' && c <= 'F') {
  1479. str[i] = (c - 'A' + 10) << 4;
  1480. } else {
  1481. free(str);
  1482. throw InvalidArgumentException("bad encoding at position", j);
  1483. }
  1484. c = input[j++];
  1485. if (c >= '0' && c <= '9') {
  1486. str[i] |= c - '0';
  1487. } else if (c >= 'a' && c <= 'f') {
  1488. str[i] |= c - 'a' + 10;
  1489. } else if (c >= 'A' && c <= 'F') {
  1490. str[i] |= c - 'A' + 10;
  1491. } else {
  1492. free(str);
  1493. throw InvalidArgumentException("bad encoding at position", j);
  1494. }
  1495. }
  1496. str[len] = '\0';
  1497. return str;
  1498. }
  1499. Variant string_base_to_numeric(const char *s, int len, int base) {
  1500. int64 num = 0;
  1501. double fnum = 0;
  1502. int mode = 0;
  1503. int64 cutoff;
  1504. int cutlim;
  1505. ASSERT(string_validate_base(base));
  1506. cutoff = LONG_MAX / base;
  1507. cutlim = LONG_MAX % base;
  1508. for (int i = len; i > 0; i--) {
  1509. char c = *s++;
  1510. /* might not work for EBCDIC */
  1511. if (c >= '0' && c <= '9')
  1512. c -= '0';
  1513. else if (c >= 'A' && c <= 'Z')
  1514. c -= 'A' - 10;
  1515. else if (c >= 'a' && c <= 'z')
  1516. c -= 'a' - 10;
  1517. else
  1518. continue;
  1519. if (c >= base)
  1520. continue;
  1521. switch (mode) {
  1522. case 0: /* Integer */
  1523. if (num < cutoff || (num == cutoff && c <= cutlim)) {
  1524. num = num * base + c;
  1525. break;
  1526. } else {
  1527. fnum = num;
  1528. mode = 1;
  1529. }
  1530. /* fall-through */
  1531. case 1: /* Float */
  1532. fnum = fnum * base + c;
  1533. }
  1534. }
  1535. if (mode == 1) {
  1536. return fnum;
  1537. }
  1538. return num;
  1539. }
  1540. char *string_long_to_base(unsigned long value, int base) {
  1541. static char digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
  1542. char buf[(sizeof(unsigned long) << 3) + 1];
  1543. char *ptr, *end;
  1544. ASSERT(string_validate_base(base));
  1545. end = ptr = buf + sizeof(buf) - 1;
  1546. *ptr = '\0';
  1547. do {
  1548. *--ptr = digits[value % base];
  1549. value /= base;
  1550. } while (ptr > buf && value);
  1551. return string_duplicate(ptr, end - ptr);
  1552. }
  1553. char *string_numeric_to_base(CVarRef value, int base) {
  1554. static char digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
  1555. ASSERT(string_validate_base(base));
  1556. if ((!value.isInteger() && !value.isDouble())) {
  1557. return string_duplicate("", 0);
  1558. }
  1559. if (value.isDouble()) {
  1560. double fvalue = floor(value); /* floor it just in case */
  1561. char *ptr, *end;
  1562. char buf[(sizeof(double) << 3) + 1];
  1563. /* Don't try to convert +/- infinity */
  1564. if (fvalue == HUGE_VAL || fvalue == -HUGE_VAL) {
  1565. // raise_warning("Number too large");
  1566. return string_duplicate("", 0);
  1567. }
  1568. end = ptr = buf + sizeof(buf) - 1;
  1569. *ptr = '\0';
  1570. do {
  1571. *--ptr = digits[(int) fmod(fvalue, base)];
  1572. fvalue /= base;
  1573. } while (ptr > buf && fabs(fvalue) >= 1);
  1574. return string_duplicate(ptr, end - ptr);
  1575. }
  1576. return string_long_to_base(value.toInt64(), base);
  1577. }
  1578. ///////////////////////////////////////////////////////////////////////////////
  1579. // uuencode
  1580. #define PHP_UU_ENC(c) \
  1581. ((c) ? ((c) & 077) + ' ' : '`')
  1582. #define PHP_UU_ENC_C2(c) \
  1583. PHP_UU_ENC(((*(c) << 4) & 060) | ((*((c) + 1) >> 4) & 017))
  1584. #define PHP_UU_ENC_C3(c) \
  1585. PHP_UU_ENC(((*(c + 1) << 2) & 074) | ((*((c) + 2) >> 6) & 03))
  1586. #define PHP_UU_DEC(c) \
  1587. (((c) - ' ') & 077)
  1588. char *string_uuencode(const char *src, int src_len, int &dest_len) {
  1589. ASSERT(src);
  1590. ASSERT(src_len);
  1591. int len = 45;
  1592. char *p;
  1593. const char *s, *e, *ee;
  1594. char *dest;
  1595. /* encoded length is ~ 38% greater then the original */
  1596. p = dest = (char *)malloc((int)ceil(src_len * 1.38) + 46);
  1597. s = src;
  1598. e = src + src_len;
  1599. while ((s + 3) < e) {
  1600. ee = s + len;
  1601. if (ee > e) {
  1602. ee = e;
  1603. len = ee - s;
  1604. if (len % 3) {
  1605. ee = s + (int) (floor(len / 3) * 3);
  1606. }
  1607. }
  1608. *p++ = PHP_UU_ENC(len);
  1609. while (s < ee) {
  1610. *p++ = PHP_UU_ENC(*s >> 2);
  1611. *p++ = PHP_UU_ENC_C2(s);
  1612. *p++ = PHP_UU_ENC_C3(s);
  1613. *p++ = PHP_UU_ENC(*(s + 2) & 077);
  1614. s += 3;
  1615. }
  1616. if (len == 45) {
  1617. *p++ = '\n';
  1618. }
  1619. }
  1620. if (s < e) {
  1621. if (len == 45) {
  1622. *p++ = PHP_UU_ENC(e - s);
  1623. len = 0;
  1624. }
  1625. *p++ = PHP_UU_ENC(*s >> 2);
  1626. *p++ = PHP_UU_ENC_C2(s);
  1627. *p++ = ((e - s) > 1) ? PHP_UU_ENC_C3(s) : PHP_UU_ENC('\0');
  1628. *p++ = ((e - s) > 2) ? PHP_UU_ENC(*(s + 2) & 077) : PHP_UU_ENC('\0');
  1629. }
  1630. if (len < 45) {
  1631. *p++ = '\n';
  1632. }
  1633. *p++ = PHP_UU_ENC('\0');
  1634. *p++ = '\n';
  1635. *p = '\0';
  1636. dest_len = p - dest;
  1637. return dest;
  1638. }
  1639. char *string_uudecode(const char *src, int src_len, int &total_len) {
  1640. total_len = 0;
  1641. int len;
  1642. const char *s, *e, *ee;
  1643. char *p, *dest;
  1644. p = dest = (char *)malloc((int)ceil(src_len * 0.75) + 1);
  1645. s = src;
  1646. e = src + src_len;
  1647. while (s < e) {
  1648. if ((len = PHP_UU_DEC(*s++)) <= 0) {
  1649. break;
  1650. }
  1651. /* sanity check */
  1652. if (len > src_len) {
  1653. goto err;
  1654. }
  1655. total_len += len;
  1656. ee = s + (len == 45 ? 60 : (int) floor(len * 1.33));
  1657. /* sanity check */
  1658. if (ee > e) {
  1659. goto err;
  1660. }
  1661. while (s < ee) {
  1662. *p++ = PHP_UU_DEC(*s) << 2 | PHP_UU_DEC(*(s + 1)) >> 4;
  1663. *p++ = PHP_UU_DEC(*(s + 1)) << 4 | PHP_UU_DEC(*(s + 2)) >> 2;
  1664. *p++ = PHP_UU_DEC(*(s + 2)) << 6 | PHP_UU_DEC(*(s + 3));
  1665. s += 4;
  1666. }
  1667. if (len < 45) {
  1668. break;
  1669. }
  1670. /* skip \n */
  1671. s++;
  1672. }
  1673. if ((len = total_len > (p - dest))) {
  1674. *p++ = PHP_UU_DEC(*s) << 2 | PHP_UU_DEC(*(s + 1)) >> 4;
  1675. if (len > 1) {
  1676. *p++ = PHP_UU_DEC(*(s + 1)) << 4 | PHP_UU_DEC(*(s + 2)) >> 2;
  1677. if (len > 2) {
  1678. *p++ = PHP_UU_DEC(*(s + 2)) << 6 | PHP_UU_DEC(*(s + 3));
  1679. }
  1680. }
  1681. }
  1682. *(dest + total_len) = '\0';
  1683. return dest;
  1684. err:
  1685. free(dest);
  1686. return NULL;
  1687. }
  1688. ///////////////////////////////////////////////////////////////////////////////
  1689. // base64
  1690. static const char base64_table[] = {
  1691. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  1692. 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  1693. 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  1694. 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  1695. '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
  1696. };
  1697. static const char base64_pad = '=';
  1698. static const short base64_reverse_table[256] = {
  1699. -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
  1700. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1701. -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
  1702. 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
  1703. -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
  1704. 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
  1705. -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  1706. 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
  1707. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1708. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1709. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1710. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1711. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1712. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1713. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  1714. -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
  1715. };
  1716. static unsigned char *php_base64_encode(const unsigned char *str, int length,
  1717. int *ret_length) {
  1718. const unsigned char *current = str;
  1719. unsigned char *p;
  1720. unsigned char *result;
  1721. if ((length + 2) < 0 || ((length + 2) / 3) >= (1 << (sizeof(int) * 8 - 2))) {
  1722. if (ret_length != NULL) {
  1723. *ret_length = 0;
  1724. }
  1725. return NULL;
  1726. }
  1727. result = (unsigned char *)malloc(((length + 2) / 3) * 4 + 1);
  1728. p = result;
  1729. while (length > 2) { /* keep going until we have less than 24 bits */
  1730. *p++ = base64_table[current[0] >> 2];
  1731. *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
  1732. *p++ = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)];
  1733. *p++ = base64_table[current[2] & 0x3f];
  1734. current += 3;
  1735. length -= 3; /* we just handle 3 octets of data */
  1736. }
  1737. /* now deal with the tail end of things */
  1738. if (length != 0) {
  1739. *p++ = base64_table[current[0] >> 2];
  1740. if (length > 1) {
  1741. *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
  1742. *p++ = base64_table[(current[1] & 0x0f) << 2];
  1743. *p++ = base64_pad;
  1744. } else {
  1745. *p++ = base64_table[(current[0] & 0x03) << 4];
  1746. *p++ = base64_pad;
  1747. *p++ = base64_pad;
  1748. }
  1749. }
  1750. if (ret_length != NULL) {
  1751. *ret_length = (int)(p - result);
  1752. }
  1753. *p = '\0';
  1754. return result;
  1755. }
  1756. static unsigned char *php_base64_decode(const unsigned char *str,
  1757. int length, int *ret_length,
  1758. bool strict) {
  1759. const unsigned char *current = str;
  1760. int ch, i = 0, j = 0, k;
  1761. /* this sucks for threaded environments */
  1762. unsigned char *result;
  1763. result = (unsigned char *)malloc(length + 1);
  1764. /* run through the whole string, converting as we go */
  1765. while ((ch = *current++) != '\0' && length-- > 0) {
  1766. if (ch == base64_pad) {
  1767. if (*current != '=' && (i % 4) == 1) {
  1768. free(result);
  1769. return NULL;
  1770. }
  1771. continue;
  1772. }
  1773. ch = base64_reverse_table[ch];
  1774. if ((!strict && ch < 0) || ch == -1) {
  1775. /* a space or some other separator character, we simply skip over */
  1776. continue;
  1777. } else if (ch == -2) {
  1778. free(result);
  1779. return NULL;
  1780. }
  1781. switch(i % 4) {
  1782. case 0:
  1783. result[j] = ch << 2;
  1784. break;
  1785. case 1:
  1786. result[j++] |= ch >> 4;
  1787. result[j] = (ch & 0x0f) << 4;
  1788. break;
  1789. case 2:
  1790. result[j++] |= ch >>2;
  1791. result[j] = (ch & 0x03) << 6;
  1792. break;
  1793. case 3:
  1794. result[j++] |= ch;
  1795. break;
  1796. }
  1797. i++;
  1798. }
  1799. k = j;
  1800. /* mop things up if we ended on a boundary */
  1801. if (ch == base64_pad) {
  1802. switch(i % 4) {
  1803. case 1:
  1804. free(result);
  1805. return NULL;
  1806. case 2:
  1807. k++;
  1808. case 3:
  1809. result[k] = 0;
  1810. }
  1811. }
  1812. if(ret_length) {
  1813. *ret_length = j;
  1814. }
  1815. result[j] = '\0';
  1816. return result;
  1817. }
  1818. char *string_base64_encode(const char *input, int &len) {
  1819. return (char *)php_base64_encode((unsigned char *)input, len, &len);
  1820. }
  1821. char *string_base64_decode(const char *input, int &len, bool strict) {
  1822. return (char *)php_base64_decode((unsigned char *)input, len, &len, strict);
  1823. }
  1824. ///////////////////////////////////////////////////////////////////////////////
  1825. char *string_escape_shell_arg(const char *str) {
  1826. int x, y, l;
  1827. char *cmd;
  1828. y = 0;
  1829. l = strlen(str);
  1830. cmd = (char *)malloc((l << 2) + 3); /* worst case */
  1831. cmd[y++] = '\'';
  1832. for (x = 0; x < l; x++) {
  1833. switch (str[x]) {
  1834. case '\'':
  1835. cmd[y++] = '\'';
  1836. cmd[y++] = '\\';
  1837. cmd[y++] = '\'';
  1838. /* fall-through */
  1839. default:
  1840. cmd[y++] = str[x];
  1841. }
  1842. }
  1843. cmd[y++] = '\'';
  1844. cmd[y] = '\0';
  1845. return cmd;
  1846. }
  1847. char *string_escape_shell_cmd(const char *str) {
  1848. register int x, y, l;
  1849. char *cmd;
  1850. char *p = NULL;
  1851. l = strlen(str);
  1852. cmd = (char *)malloc((l << 1) + 1);
  1853. for (x = 0, y = 0; x < l; x++) {
  1854. switch (str[x]) {
  1855. case '"':
  1856. case '\'':
  1857. if (!p && (p = (char *)memchr(str + x + 1, str[x], l - x - 1))) {
  1858. /* noop */
  1859. } else if (p && *p == str[x]) {
  1860. p = NULL;
  1861. } else {
  1862. cmd[y++] = '\\';
  1863. }
  1864. cmd[y++] = str[x];
  1865. break;
  1866. case '#': /* This is character-set independent */
  1867. case '&':
  1868. case ';':
  1869. case '`':
  1870. case '|':
  1871. case '*':
  1872. case '?':
  1873. case '~':
  1874. case '<':
  1875. case '>':
  1876. case '^':
  1877. case '(':
  1878. case ')':
  1879. case '[':
  1880. case ']':
  1881. case '{':
  1882. case '}':
  1883. case '$':
  1884. case '\\':
  1885. case '\x0A': /* excluding these two */
  1886. case '\xFF':
  1887. cmd[y++] = '\\';
  1888. /* fall-through */
  1889. default:
  1890. cmd[y++] = str[x];
  1891. }
  1892. }
  1893. cmd[y] = '\0';
  1894. return cmd;
  1895. }
  1896. ///////////////////////////////////////////////////////////////////////////////
  1897. void string_translate(char *str, int len, const char *str_from,
  1898. const char *str_to, int trlen) {
  1899. int i;
  1900. unsigned char xlat[256];
  1901. if ((trlen < 1) || (len < 1)) {
  1902. return;
  1903. }
  1904. for (i = 0; i < 256; xlat[i] = i, i++);
  1905. for (i = 0; i < trlen; i++) {
  1906. xlat[(unsigned char) str_from[i]] = str_to[i];
  1907. }
  1908. for (i = 0; i < len; i++) {
  1909. str[i] = xlat[(unsigned char) str[i]];
  1910. }
  1911. }
  1912. char *string_rot13(const char *input, int len) {
  1913. ASSERT(input);
  1914. static char rot13_from[] =
  1915. "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
  1916. static char rot13_to[] =
  1917. "nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM";
  1918. if (len == 0) {
  1919. return NULL;
  1920. }
  1921. char *ret = string_duplicate(input, len);
  1922. string_translate(ret, len, rot13_from, rot13_to, 52);
  1923. return ret;
  1924. }
  1925. ///////////////////////////////////////////////////////////////////////////////
  1926. static void string_similar_str(const char *txt1, int len1,
  1927. const char *txt2, int len2,
  1928. int *pos1, int *pos2, int *max) {
  1929. const char *p, *q;
  1930. const char *end1 = txt1 + len1;
  1931. const char *end2 = txt2 + len2;
  1932. int l;
  1933. *max = 0;
  1934. for (p = txt1; p < end1; p++) {
  1935. for (q = txt2; q < end2; q++) {
  1936. for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);
  1937. if (l > *max) {
  1938. *max = l;
  1939. *pos1 = p - txt1;
  1940. *pos2 = q - txt2;
  1941. }
  1942. }
  1943. }
  1944. }
  1945. static int string_similar_char(const char *txt1, int len1,
  1946. const char *txt2, int len2) {
  1947. int sum;
  1948. int pos1 = 0, pos2 = 0, max;
  1949. string_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max);
  1950. if ((sum = max)) {
  1951. if (pos1 && pos2) {
  1952. sum += string_similar_char(txt1, pos1, txt2, pos2);
  1953. }
  1954. if ((pos1 + max < len1) && (pos2 + max < len2)) {
  1955. sum += string_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
  1956. txt2 + pos2 + max, len2 - pos2 - max);
  1957. }
  1958. }
  1959. return sum;
  1960. }
  1961. int string_similar_text(const char *t1, int len1,
  1962. const char *t2, int len2, float *percent) {
  1963. if (len1 == 0 && len2 == 0) {
  1964. if (percent) *percent = 0.0;
  1965. return 0;
  1966. }
  1967. int sim = string_similar_char(t1, len1, t2, len2);
  1968. if (percent) *percent = sim * 200.0 / (len1 + len2);
  1969. return sim;
  1970. }
  1971. ///////////////////////////////////////////////////////////////////////////////
  1972. #define LEVENSHTEIN_MAX_LENTH 255
  1973. // reference implementation, only optimized for memory usage, not speed
  1974. int string_levenshtein(const char *s1, int l1, const char *s2, int l2,
  1975. int cost_ins, int cost_rep, int cost_del ) {
  1976. int *p1, *p2, *tmp;
  1977. int i1, i2, c0, c1, c2;
  1978. if(l1==0) return l2*cost_ins;
  1979. if(l2==0) return l1*cost_del;
  1980. if((l1>LEVENSHTEIN_MAX_LENTH)||(l2>LEVENSHTEIN_MAX_LENTH)) {
  1981. // raise_warning("levenshtein(): Argument string(s) too long");
  1982. return -1;
  1983. }
  1984. p1 = (int*)malloc((l2+1) * sizeof(int));
  1985. p2 = (int*)malloc((l2+1) * sizeof(int));
  1986. for(i2=0;i2<=l2;i2++) {
  1987. p1[i2] = i2*cost_ins;
  1988. }
  1989. for(i1=0;i1<l1;i1++) {
  1990. p2[0]=p1[0]+cost_del;
  1991. for(i2=0;i2<l2;i2++) {
  1992. c0=p1[i2]+((s1[i1]==s2[i2])?0:cost_rep);
  1993. c1=p1[i2+1]+cost_del; if(c1<c0) c0=c1;
  1994. c2=p2[i2]+cost_ins; if(c2<c0) c0=c2;
  1995. p2[i2+1]=c0;
  1996. }
  1997. tmp=p1; p1=p2; p2=tmp;
  1998. }
  1999. c0=p1[l2];
  2000. free(p1);
  2001. free(p2);
  2002. return c0;
  2003. }
  2004. ///////////////////////////////////////////////////////////////////////////////
  2005. // crc32
  2006. /*
  2007. * This code implements the AUTODIN II polynomial
  2008. * The variable corresponding to the macro argument "crc" should
  2009. * be an unsigned long.
  2010. * Original code by Spencer Garrett <srg@quick.com>
  2011. */
  2012. #define CRC32(crc, ch) (crc = (crc >> 8) ^ crc32tab[(crc ^ (ch)) & 0xff])
  2013. /* generated using the AUTODIN II polynomial
  2014. * x^32 + x^26 + x^23 + x^22 + x^16 +
  2015. * x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + 1
  2016. */
  2017. static const unsigned int crc32tab[256] = {
  2018. 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
  2019. 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
  2020. 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
  2021. 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
  2022. 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
  2023. 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
  2024. 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
  2025. 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
  2026. 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
  2027. 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
  2028. 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
  2029. 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
  2030. 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
  2031. 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
  2032. 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
  2033. 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
  2034. 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
  2035. 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
  2036. 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
  2037. 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
  2038. 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
  2039. 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
  2040. 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
  2041. 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
  2042. 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
  2043. 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
  2044. 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
  2045. 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
  2046. 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
  2047. 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
  2048. 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
  2049. 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
  2050. 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
  2051. 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
  2052. 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
  2053. 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
  2054. 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
  2055. 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
  2056. 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
  2057. 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
  2058. 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
  2059. 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
  2060. 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
  2061. 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
  2062. 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
  2063. 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
  2064. 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
  2065. 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
  2066. 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
  2067. 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
  2068. 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
  2069. 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
  2070. 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
  2071. 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
  2072. 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
  2073. 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
  2074. 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
  2075. 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
  2076. 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
  2077. 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
  2078. 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
  2079. 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
  2080. 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
  2081. 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
  2082. };
  2083. int string_crc32(const char *p, int len) {
  2084. uint32 crcinit = 0;
  2085. register int32 crc = crcinit ^ 0xFFFFFFFF;
  2086. for (; len--; ++p) {
  2087. crc = ((crc >> 8) & 0x00FFFFFF) ^ crc32tab[(crc ^ (*p)) & 0xFF];
  2088. }
  2089. return crc ^ 0xFFFFFFFF;
  2090. }
  2091. ///////////////////////////////////////////////////////////////////////////////
  2092. // crypt
  2093. #include <unistd.h>
  2094. #if !defined(__APPLE__) && !defined(__FREEBSD__)
  2095. # include <crypt.h>
  2096. #endif
  2097. static unsigned char itoa64[] =
  2098. "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
  2099. static void ito64(char *s, long v, int n) {
  2100. while (--n >= 0) {
  2101. *s++ = itoa64[v&0x3f];
  2102. v >>= 6;
  2103. }
  2104. }
  2105. ///////////////////////////////////////////////////////////////////////////////
  2106. char *string_money_format(const char *format, double value) {
  2107. bool check = false;
  2108. const char *p = format;
  2109. while ((p = strchr(p, '%'))) {
  2110. if (*(p + 1) == '%') {
  2111. p += 2;
  2112. } else if (!check) {
  2113. check = true;
  2114. p++;
  2115. } else {
  2116. // throw_invalid_argument
  2117. // ("format: Only a single %%i or %%n token can be used");
  2118. return NULL;
  2119. }
  2120. }
  2121. int format_len = strlen(format);
  2122. int str_len = format_len + 1024;
  2123. char *str = (char *)malloc(str_len);
  2124. if ((str_len = strfmon(str, str_len, format, value)) < 0) {
  2125. free(str);
  2126. return NULL;
  2127. }
  2128. str[str_len] = 0;
  2129. return str;
  2130. }
  2131. ///////////////////////////////////////////////////////////////////////////////
  2132. char *string_number_format(double d, int dec, char dec_point,
  2133. char thousand_sep) {
  2134. char *tmpbuf = NULL, *resbuf;
  2135. char *s, *t; /* source, target */
  2136. char *dp;
  2137. int integral;
  2138. int tmplen, reslen=0;
  2139. int count=0;
  2140. int is_negative=0;
  2141. if (d < 0) {
  2142. is_negative = 1;
  2143. d = -d;
  2144. }
  2145. if (dec < 0) dec = 0;
  2146. PHP_ROUND_WITH_FUZZ(d, dec);
  2147. // departure from PHP: we got rid of dependencies on spprintf() here.
  2148. tmpbuf = (char *)malloc(64);
  2149. snprintf(tmpbuf, 64, "%.*F", dec, d);
  2150. tmplen = strlen(tmpbuf);
  2151. if (tmpbuf == NULL || !isdigit((int)tmpbuf[0])) {
  2152. return tmpbuf;
  2153. }
  2154. /* find decimal point, if expected */
  2155. if (dec) {
  2156. dp = strpbrk(tmpbuf, ".,");
  2157. } else {
  2158. dp = NULL;
  2159. }
  2160. /* calculate the length of the return buffer */
  2161. if (dp) {
  2162. integral = dp - tmpbuf;
  2163. } else {
  2164. /* no decimal point was found */
  2165. integral = tmplen;
  2166. }
  2167. /* allow for thousand separators */
  2168. if (thousand_sep) {
  2169. integral += (integral-1) / 3;
  2170. }
  2171. reslen = integral;
  2172. if (dec) {
  2173. reslen += dec;
  2174. if (dec_point) {
  2175. reslen++;
  2176. }
  2177. }
  2178. /* add a byte for minus sign */
  2179. if (is_negative) {
  2180. reslen++;
  2181. }
  2182. resbuf = (char *) malloc(reslen+1); /* +1 for NUL terminator */
  2183. s = tmpbuf+tmplen-1;
  2184. t = resbuf+reslen;
  2185. *t-- = '\0';
  2186. /* copy the decimal places.
  2187. * Take care, as the sprintf implementation may return less places than
  2188. * we requested due to internal buffer limitations */
  2189. if (dec) {
  2190. int declen = dp ? s - dp : 0;
  2191. int topad = dec > declen ? dec - declen : 0;
  2192. /* pad with '0's */
  2193. while (topad--) {
  2194. *t-- = '0';
  2195. }
  2196. if (dp) {
  2197. s -= declen + 1; /* +1 to skip the point */
  2198. t -= declen;
  2199. /* now copy the chars after the point */
  2200. memcpy(t + 1, dp + 1, declen);
  2201. }
  2202. /* add decimal point */
  2203. if (dec_point) {
  2204. *t-- = dec_point;
  2205. }
  2206. }
  2207. /* copy the numbers before the decimal point, adding thousand
  2208. * separator every three digits */
  2209. while(s >= tmpbuf) {
  2210. *t-- = *s--;
  2211. if (thousand_sep && (++count%3)==0 && s>=tmpbuf) {
  2212. *t-- = thousand_sep;
  2213. }
  2214. }
  2215. /* and a minus sign, if needed */
  2216. if (is_negative) {
  2217. *t-- = '-';
  2218. }
  2219. free(tmpbuf);
  2220. return resbuf;
  2221. }
  2222. ///////////////////////////////////////////////////////////////////////////////
  2223. // soundex
  2224. /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
  2225. char *string_soundex(const char *str) {
  2226. ASSERT(str);
  2227. int _small, code, last;
  2228. char soundex[4 + 1];
  2229. static char soundex_table[26] = {
  2230. 0, /* A */
  2231. '1', /* B */
  2232. '2', /* C */
  2233. '3', /* D */
  2234. 0, /* E */
  2235. '1', /* F */
  2236. '2', /* G */
  2237. 0, /* H */
  2238. 0, /* I */
  2239. '2', /* J */
  2240. '2', /* K */
  2241. '4', /* L */
  2242. '5', /* M */
  2243. '5', /* N */
  2244. 0, /* O */
  2245. '1', /* P */
  2246. '2', /* Q */
  2247. '6', /* R */
  2248. '2', /* S */
  2249. '3', /* T */
  2250. 0, /* U */
  2251. '1', /* V */
  2252. 0, /* W */
  2253. '2', /* X */
  2254. 0, /* Y */
  2255. '2' /* Z */
  2256. };
  2257. if (!*str) {
  2258. return NULL;
  2259. }
  2260. /* build soundex string */
  2261. last = -1;
  2262. const char *p = str;
  2263. for (_small = 0; *p && _small < 4; p++) {
  2264. /* convert chars to upper case and strip non-letter chars */
  2265. /* BUG: should also map here accented letters used in non */
  2266. /* English words or names (also found in English text!): */
  2267. /* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
  2268. code = toupper((int)(unsigned char)(*p));
  2269. if (code >= 'A' && code <= 'Z') {
  2270. if (_small == 0) {
  2271. /* remember first valid char */
  2272. soundex[_small++] = code;
  2273. last = soundex_table[code - 'A'];
  2274. } else {
  2275. /* ignore sequences of consonants with same soundex */
  2276. /* code in trail, and vowels unless they separate */
  2277. /* consonant letters */
  2278. code = soundex_table[code - 'A'];
  2279. if (code != last) {
  2280. if (code != 0) {
  2281. soundex[_small++] = code;
  2282. }
  2283. last = code;
  2284. }
  2285. }
  2286. }
  2287. }
  2288. /* pad with '0' and terminate with 0 ;-) */
  2289. while (_small < 4) {
  2290. soundex[_small++] = '0';
  2291. }
  2292. soundex[_small] = '\0';
  2293. return strdup(soundex);
  2294. }
  2295. ///////////////////////////////////////////////////////////////////////////////
  2296. // metaphone
  2297. /**
  2298. * this is now the original code by Michael G Schwern:
  2299. * i've changed it just a slightly bit (use emalloc,
  2300. * get rid of includes etc)
  2301. * - thies - 13.09.1999
  2302. */
  2303. /*----------------------------- */
  2304. /* this used to be "metaphone.h" */
  2305. /*----------------------------- */
  2306. /* Special encodings */
  2307. #define SH 'X'
  2308. #define TH '0'
  2309. /*----------------------------- */
  2310. /* end of "metaphone.h" */
  2311. /*----------------------------- */
  2312. /*----------------------------- */
  2313. /* this used to be "metachar.h" */
  2314. /*----------------------------- */
  2315. /* Metachar.h ... little bits about characters for metaphone */
  2316. /*-- Character encoding array & accessing macros --*/
  2317. /* Stolen directly out of the book... */
  2318. char _codes[26] = { 1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0};
  2319. #define ENCODE(c) (isalpha(c) ? _codes[((toupper(c)) - 'A')] : 0)
  2320. #define isvowel(c) (ENCODE(c) & 1) /* AEIOU */
  2321. /* These letters are passed through unchanged */
  2322. #define NOCHANGE(c) (ENCODE(c) & 2) /* FJMNR */
  2323. /* These form dipthongs when preceding H */
  2324. #define AFFECTH(c) (ENCODE(c) & 4) /* CGPST */
  2325. /* These make C and G soft */
  2326. #define MAKESOFT(c) (ENCODE(c) & 8) /* EIY */
  2327. /* These prevent GH from becoming F */
  2328. #define NOGHTOF(c) (ENCODE(c) & 16) /* BDH */
  2329. /*----------------------------- */
  2330. /* end of "metachar.h" */
  2331. /*----------------------------- */
  2332. /* I suppose I could have been using a character pointer instead of
  2333. * accesssing the array directly... */
  2334. /* Look at the next letter in the word */
  2335. #define Next_Letter (toupper(word[w_idx+1]))
  2336. /* Look at the current letter in the word */
  2337. #define Curr_Letter (toupper(word[w_idx]))
  2338. /* Go N letters back. */
  2339. #define Look_Back_Letter(n) (w_idx >= n ? toupper(word[w_idx-n]) : '\0')
  2340. /* Previous letter. I dunno, should this return null on failure? */
  2341. #define Prev_Letter (Look_Back_Letter(1))
  2342. /* Look two letters down. It makes sure you don't walk off the string. */
  2343. #define After_Next_Letter (Next_Letter != '\0' ? toupper(word[w_idx+2]) \
  2344. : '\0')
  2345. #define Look_Ahead_Letter(n) (toupper(Lookahead(word+w_idx, n)))
  2346. /* Allows us to safely look ahead an arbitrary # of letters */
  2347. /* I probably could have just used strlen... */
  2348. static char Lookahead(unsigned char *word, int how_far) {
  2349. char letter_ahead = '\0'; /* null by default */
  2350. int idx;
  2351. for (idx = 0; word[idx] != '\0' && idx < how_far; idx++);
  2352. /* Edge forward in the string... */
  2353. letter_ahead = (char)word[idx]; /* idx will be either == to how_far or
  2354. * at the end of the string
  2355. */
  2356. return letter_ahead;
  2357. }
  2358. /* phonize one letter
  2359. * We don't know the buffers size in advance. On way to solve this is to just
  2360. * re-allocate the buffer size. We're using an extra of 2 characters (this
  2361. * could be one though; or more too). */
  2362. #define Phonize(c) { \
  2363. if (p_idx >= max_buffer_len) { \
  2364. phoned_word = (char *)realloc(phoned_word, max_buffer_len + 2); \
  2365. max_buffer_len += 2; \
  2366. } \
  2367. phoned_word[p_idx++] = c; \
  2368. }
  2369. /* Slap a null character on the end of the phoned word */
  2370. #define End_Phoned_Word {phoned_word[p_idx] = '\0';}
  2371. /* How long is the phoned word? */
  2372. #define Phone_Len (p_idx)
  2373. /* Note is a letter is a 'break' in the word */
  2374. #define Isbreak(c) (!isalpha(c))
  2375. char *string_metaphone(const char *input, int word_len, long max_phonemes,
  2376. int traditional) {
  2377. unsigned char *word = (unsigned char *)input;
  2378. char *phoned_word;
  2379. int w_idx = 0; /* point in the phonization we're at. */
  2380. int p_idx = 0; /* end of the phoned phrase */
  2381. int max_buffer_len = 0; /* maximum length of the destination buffer */
  2382. /*-- Parameter checks --*/
  2383. /* Negative phoneme length is meaningless */
  2384. if (max_phonemes < 0)
  2385. return NULL;
  2386. /* Empty/null string is meaningless */
  2387. /* Overly paranoid */
  2388. /* assert(word != NULL && word[0] != '\0'); */
  2389. if (word == NULL)
  2390. return NULL;
  2391. /*-- Allocate memory for our phoned_phrase --*/
  2392. if (max_phonemes == 0) { /* Assume largest possible */
  2393. max_buffer_len = word_len;
  2394. phoned_word = (char *)malloc(word_len + 1);
  2395. } else {
  2396. max_buffer_len = max_phonemes;
  2397. phoned_word = (char *)malloc(max_phonemes +1);
  2398. }
  2399. /*-- The first phoneme has to be processed specially. --*/
  2400. /* Find our first letter */
  2401. for (; !isalpha(Curr_Letter); w_idx++) {
  2402. /* On the off chance we were given nothing but crap... */
  2403. if (Curr_Letter == '\0') {
  2404. End_Phoned_Word
  2405. return phoned_word; /* For testing */
  2406. }
  2407. }
  2408. switch (Curr_Letter) {
  2409. /* AE becomes E */
  2410. case 'A':
  2411. if (Next_Letter == 'E') {
  2412. Phonize('E');
  2413. w_idx += 2;
  2414. }
  2415. /* Remember, preserve vowels at the beginning */
  2416. else {
  2417. Phonize('A');
  2418. w_idx++;
  2419. }
  2420. break;
  2421. /* [GKP]N becomes N */
  2422. case 'G':
  2423. case 'K':
  2424. case 'P':
  2425. if (Next_Letter == 'N') {
  2426. Phonize('N');
  2427. w_idx += 2;
  2428. }
  2429. break;
  2430. /* WH becomes H,
  2431. WR becomes R
  2432. W if followed by a vowel */
  2433. case 'W':
  2434. if (Next_Letter == 'H' ||
  2435. Next_Letter == 'R') {
  2436. Phonize(Next_Letter);
  2437. w_idx += 2;
  2438. } else if (isvowel(Next_Letter)) {
  2439. Phonize('W');
  2440. w_idx += 2;
  2441. }
  2442. /* else ignore */
  2443. break;
  2444. /* X becomes S */
  2445. case 'X':
  2446. Phonize('S');
  2447. w_idx++;
  2448. break;
  2449. /* Vowels are kept */
  2450. /* We did A already
  2451. case 'A':
  2452. case 'a':
  2453. */
  2454. case 'E':
  2455. case 'I':
  2456. case 'O':
  2457. case 'U':
  2458. Phonize(Curr_Letter);
  2459. w_idx++;
  2460. break;
  2461. default:
  2462. /* do nothing */
  2463. break;
  2464. }
  2465. /* On to the metaphoning */
  2466. for (; Curr_Letter != '\0' &&
  2467. (max_phonemes == 0 || Phone_Len < max_phonemes);
  2468. w_idx++) {
  2469. /* How many letters to skip because an eariler encoding handled
  2470. * multiple letters */
  2471. unsigned short int skip_letter = 0;
  2472. /* THOUGHT: It would be nice if, rather than having things like...
  2473. * well, SCI. For SCI you encode the S, then have to remember
  2474. * to skip the C. So the phonome SCI invades both S and C. It would
  2475. * be better, IMHO, to skip the C from the S part of the encoding.
  2476. * Hell, I'm trying it.
  2477. */
  2478. /* Ignore non-alphas */
  2479. if (!isalpha(Curr_Letter))
  2480. continue;
  2481. /* Drop duplicates, except CC */
  2482. if (Curr_Letter == Prev_Letter &&
  2483. Curr_Letter != 'C')
  2484. continue;
  2485. switch (Curr_Letter) {
  2486. /* B -> B unless in MB */
  2487. case 'B':
  2488. if (Prev_Letter != 'M')
  2489. Phonize('B');
  2490. break;
  2491. /* 'sh' if -CIA- or -CH, but not SCH, except SCHW.
  2492. * (SCHW is handled in S)
  2493. * S if -CI-, -CE- or -CY-
  2494. * dropped if -SCI-, SCE-, -SCY- (handed in S)
  2495. * else K
  2496. */
  2497. case 'C':
  2498. if (MAKESOFT(Next_Letter)) { /* C[IEY] */
  2499. if (After_Next_Letter == 'A' &&
  2500. Next_Letter == 'I') { /* CIA */
  2501. Phonize(SH);
  2502. }
  2503. /* SC[IEY] */
  2504. else if (Prev_Letter == 'S') {
  2505. /* Dropped */
  2506. } else {
  2507. Phonize('S');
  2508. }
  2509. } else if (Next_Letter == 'H') {
  2510. if ((!traditional) && (After_Next_Letter == 'R' ||
  2511. Prev_Letter == 'S')) { /* Christ, School */
  2512. Phonize('K');
  2513. } else {
  2514. Phonize(SH);
  2515. }
  2516. skip_letter++;
  2517. } else {
  2518. Phonize('K');
  2519. }
  2520. break;
  2521. /* J if in -DGE-, -DGI- or -DGY-
  2522. * else T
  2523. */
  2524. case 'D':
  2525. if (Next_Letter == 'G' && MAKESOFT(After_Next_Letter)) {
  2526. Phonize('J');
  2527. skip_letter++;
  2528. } else
  2529. Phonize('T');
  2530. break;
  2531. /* F if in -GH and not B--GH, D--GH, -H--GH, -H---GH
  2532. * else dropped if -GNED, -GN,
  2533. * else dropped if -DGE-, -DGI- or -DGY- (handled in D)
  2534. * else J if in -GE-, -GI, -GY and not GG
  2535. * else K
  2536. */
  2537. case 'G':
  2538. if (Next_Letter == 'H') {
  2539. if (!(NOGHTOF(Look_Back_Letter(3)) || Look_Back_Letter(4) == 'H')) {
  2540. Phonize('F');
  2541. skip_letter++;
  2542. } else {
  2543. /* silent */
  2544. }
  2545. } else if (Next_Letter == 'N') {
  2546. if (Isbreak(After_Next_Letter) ||
  2547. (After_Next_Letter == 'E' && Look_Ahead_Letter(3) == 'D')) {
  2548. /* dropped */
  2549. } else
  2550. Phonize('K');
  2551. } else if (MAKESOFT(Next_Letter) && Prev_Letter != 'G') {
  2552. Phonize('J');
  2553. } else {
  2554. Phonize('K');
  2555. }
  2556. break;
  2557. /* H if before a vowel and not after C,G,P,S,T */
  2558. case 'H':
  2559. if (isvowel(Next_Letter) && !AFFECTH(Prev_Letter))
  2560. Phonize('H');
  2561. break;
  2562. /* dropped if after C
  2563. * else K
  2564. */
  2565. case 'K':
  2566. if (Prev_Letter != 'C')
  2567. Phonize('K');
  2568. break;
  2569. /* F if before H
  2570. * else P
  2571. */
  2572. case 'P':
  2573. if (Next_Letter == 'H') {
  2574. Phonize('F');
  2575. } else {
  2576. Phonize('P');
  2577. }
  2578. break;
  2579. /* K
  2580. */
  2581. case 'Q':
  2582. Phonize('K');
  2583. break;
  2584. /* 'sh' in -SH-, -SIO- or -SIA- or -SCHW-
  2585. * else S
  2586. */
  2587. case 'S':
  2588. if (Next_Letter == 'I' &&
  2589. (After_Next_Letter == 'O' || After_Next_Letter == 'A')) {
  2590. Phonize(SH);
  2591. } else if (Next_Letter == 'H') {
  2592. Phonize(SH);
  2593. skip_letter++;
  2594. } else if ((!traditional) &&
  2595. (Next_Letter == 'C' && Look_Ahead_Letter(2) == 'H' &&
  2596. Look_Ahead_Letter(3) == 'W')) {
  2597. Phonize(SH);
  2598. skip_letter += 2;
  2599. } else {
  2600. Phonize('S');
  2601. }
  2602. break;
  2603. /* 'sh' in -TIA- or -TIO-
  2604. * else 'th' before H
  2605. * else T
  2606. */
  2607. case 'T':
  2608. if (Next_Letter == 'I' &&
  2609. (After_Next_Letter == 'O' || After_Next_Letter == 'A')) {
  2610. Phonize(SH);
  2611. } else if (Next_Letter == 'H') {
  2612. Phonize(TH);
  2613. skip_letter++;
  2614. } else {
  2615. Phonize('T');
  2616. }
  2617. break;
  2618. /* F */
  2619. case 'V':
  2620. Phonize('F');
  2621. break;
  2622. /* W before a vowel, else dropped */
  2623. case 'W':
  2624. if (isvowel(Next_Letter))
  2625. Phonize('W');
  2626. break;
  2627. /* KS */
  2628. case 'X':
  2629. Phonize('K');
  2630. Phonize('S');
  2631. break;
  2632. /* Y if followed by a vowel */
  2633. case 'Y':
  2634. if (isvowel(Next_Letter))
  2635. Phonize('Y');
  2636. break;
  2637. /* S */
  2638. case 'Z':
  2639. Phonize('S');
  2640. break;
  2641. /* No transformation */
  2642. case 'F':
  2643. case 'J':
  2644. case 'L':
  2645. case 'M':
  2646. case 'N':
  2647. case 'R':
  2648. Phonize(Curr_Letter);
  2649. break;
  2650. default:
  2651. /* nothing */
  2652. break;
  2653. } /* END SWITCH */
  2654. w_idx += skip_letter;
  2655. } /* END FOR */
  2656. End_Phoned_Word;
  2657. return phoned_word;
  2658. }
  2659. ///////////////////////////////////////////////////////////////////////////////
  2660. // Cyrillic
  2661. /**
  2662. * This is codetables for different Cyrillic charsets (relative to koi8-r).
  2663. * Each table contains data for 128-255 symbols from ASCII table.
  2664. * First 256 symbols are for conversion from koi8-r to corresponding charset,
  2665. * second 256 symbols are for reverse conversion, from charset to koi8-r.
  2666. *
  2667. * Here we have the following tables:
  2668. * _cyr_win1251 - for windows-1251 charset
  2669. * _cyr_iso88595 - for iso8859-5 charset
  2670. * _cyr_cp866 - for x-cp866 charset
  2671. * _cyr_mac - for x-mac-cyrillic charset
  2672. */
  2673. typedef unsigned char _cyr_charset_table[512];
  2674. static const _cyr_charset_table _cyr_win1251 = {
  2675. 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  2676. 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  2677. 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  2678. 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  2679. 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  2680. 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  2681. 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  2682. 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  2683. 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
  2684. 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
  2685. 154,174,190,46,159,189,46,46,179,191,180,157,46,46,156,183,
  2686. 46,46,182,166,173,46,46,158,163,152,164,155,46,46,46,167,
  2687. 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
  2688. 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
  2689. 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
  2690. 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
  2691. 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  2692. 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  2693. 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  2694. 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  2695. 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  2696. 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  2697. 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  2698. 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  2699. 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  2700. 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  2701. 32,32,32,184,186,32,179,191,32,32,32,32,32,180,162,32,
  2702. 32,32,32,168,170,32,178,175,32,32,32,32,32,165,161,169,
  2703. 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
  2704. 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
  2705. 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,
  2706. 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218,
  2707. };
  2708. static const _cyr_charset_table _cyr_cp866 = {
  2709. 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  2710. 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  2711. 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  2712. 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  2713. 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  2714. 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  2715. 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  2716. 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  2717. 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
  2718. 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
  2719. 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
  2720. 35,35,35,124,124,124,124,43,43,124,124,43,43,43,43,43,
  2721. 43,45,45,124,45,43,124,124,43,43,45,45,124,45,43,45,
  2722. 45,45,45,43,43,43,43,43,43,43,43,35,35,124,124,35,
  2723. 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
  2724. 179,163,180,164,183,167,190,174,32,149,158,32,152,159,148,154,
  2725. 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  2726. 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  2727. 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  2728. 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  2729. 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  2730. 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  2731. 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  2732. 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  2733. 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  2734. 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  2735. 205,186,213,241,243,201,32,245,187,212,211,200,190,32,247,198,
  2736. 199,204,181,240,242,185,32,244,203,207,208,202,216,32,246,32,
  2737. 238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174,
  2738. 175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234,
  2739. 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
  2740. 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
  2741. };
  2742. static const _cyr_charset_table _cyr_iso88595 = {
  2743. 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  2744. 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  2745. 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  2746. 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  2747. 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  2748. 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  2749. 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  2750. 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  2751. 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  2752. 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  2753. 32,179,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  2754. 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
  2755. 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
  2756. 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
  2757. 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
  2758. 32,163,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  2759. 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  2760. 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  2761. 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  2762. 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  2763. 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  2764. 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  2765. 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  2766. 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  2767. 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  2768. 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  2769. 32,32,32,241,32,32,32,32,32,32,32,32,32,32,32,32,
  2770. 32,32,32,161,32,32,32,32,32,32,32,32,32,32,32,32,
  2771. 238,208,209,230,212,213,228,211,229,216,217,218,219,220,221,222,
  2772. 223,239,224,225,226,227,214,210,236,235,215,232,237,233,231,234,
  2773. 206,176,177,198,180,181,196,179,197,184,185,186,187,188,189,190,
  2774. 191,207,192,193,194,195,182,178,204,203,183,200,205,201,199,202,
  2775. };
  2776. static const _cyr_charset_table _cyr_mac = {
  2777. 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  2778. 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  2779. 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  2780. 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  2781. 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  2782. 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  2783. 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  2784. 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  2785. 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
  2786. 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
  2787. 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  2788. 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  2789. 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  2790. 144,145,146,147,148,149,150,151,152,153,154,155,156,179,163,209,
  2791. 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
  2792. 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,255,
  2793. 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  2794. 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  2795. 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  2796. 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  2797. 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  2798. 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  2799. 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  2800. 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  2801. 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  2802. 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
  2803. 160,161,162,222,164,165,166,167,168,169,170,171,172,173,174,175,
  2804. 176,177,178,221,180,181,182,183,184,185,186,187,188,189,190,191,
  2805. 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
  2806. 239,223,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
  2807. 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
  2808. 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
  2809. };
  2810. /**
  2811. * This is the function that performs real in-place conversion of the string
  2812. * between charsets.
  2813. * Parameters:
  2814. * str - string to be converted
  2815. * from,to - one-symbol label of source and destination charset
  2816. * The following symbols are used as labels:
  2817. * k - koi8-r
  2818. * w - windows-1251
  2819. * i - iso8859-5
  2820. * a - x-cp866
  2821. * d - x-cp866
  2822. * m - x-mac-cyrillic
  2823. */
  2824. char *string_convert_cyrillic_string(const char *input, int length,
  2825. char from, char to) {
  2826. ASSERT(input);
  2827. const unsigned char *from_table, *to_table;
  2828. unsigned char tmp;
  2829. unsigned char *str = (unsigned char *)string_duplicate(input, length);
  2830. from_table = NULL;
  2831. to_table = NULL;
  2832. switch (toupper((int)(unsigned char)from)) {
  2833. case 'W': from_table = _cyr_win1251; break;
  2834. case 'A':
  2835. case 'D': from_table = _cyr_cp866; break;
  2836. case 'I': from_table = _cyr_iso88595; break;
  2837. case 'M': from_table = _cyr_mac; break;
  2838. case 'K':
  2839. break;
  2840. default:
  2841. // throw_invalid_argument("Unknown source charset: %c", from);
  2842. break;
  2843. }
  2844. switch (toupper((int)(unsigned char)to)) {
  2845. case 'W': to_table = _cyr_win1251; break;
  2846. case 'A':
  2847. case 'D': to_table = _cyr_cp866; break;
  2848. case 'I': to_table = _cyr_iso88595; break;
  2849. case 'M': to_table = _cyr_mac; break;
  2850. case 'K':
  2851. break;
  2852. default:
  2853. // throw_invalid_argument("Unknown destination charset: %c", to);
  2854. break;
  2855. }
  2856. if (!str) {
  2857. return (char *)str;
  2858. }
  2859. for (int i = 0; i<length; i++) {
  2860. tmp = (from_table == NULL)? str[i] : from_table[ str[i] ];
  2861. str[i] = (to_table == NULL) ? tmp : to_table[tmp + 256];
  2862. }
  2863. return (char *)str;
  2864. }
  2865. ///////////////////////////////////////////////////////////////////////////////
  2866. // Hebrew
  2867. #define _HEB_BLOCK_TYPE_ENG 1
  2868. #define _HEB_BLOCK_TYPE_HEB 2
  2869. #define isheb(c) \
  2870. (((((unsigned char) c) >= 224) && (((unsigned char) c) <= 250)) ? 1 : 0)
  2871. #define _isblank(c) \
  2872. (((((unsigned char) c) == ' ' || ((unsigned char) c) == '\t')) ? 1 : 0)
  2873. #define _isnewline(c) \
  2874. (((((unsigned char) c) == '\n' || ((unsigned char) c) == '\r')) ? 1 : 0)
  2875. /**
  2876. * Converts Logical Hebrew text (Hebrew Windows style) to Visual text
  2877. * Cheers/complaints/flames - Zeev Suraski <zeev@php.net>
  2878. */
  2879. char *string_convert_hebrew_string(const char *str, int &str_len,
  2880. int max_chars_per_line,
  2881. int convert_newlines) {
  2882. ASSERT(str);
  2883. const char *tmp;
  2884. char *heb_str, *broken_str;
  2885. char *target;
  2886. int block_start, block_end, block_type, block_length, i;
  2887. long max_chars=0;
  2888. int begin, end, char_count, orig_begin;
  2889. if (str_len == 0) {
  2890. return NULL;
  2891. }
  2892. tmp = str;
  2893. block_start=block_end=0;
  2894. heb_str = (char *) malloc(str_len + 1);
  2895. target = heb_str+str_len;
  2896. *target = 0;
  2897. target--;
  2898. block_length=0;
  2899. if (isheb(*tmp)) {
  2900. block_type = _HEB_BLOCK_TYPE_HEB;
  2901. } else {
  2902. block_type = _HEB_BLOCK_TYPE_ENG;
  2903. }
  2904. do {
  2905. if (block_type == _HEB_BLOCK_TYPE_HEB) {
  2906. while ((isheb((int)*(tmp+1)) ||
  2907. _isblank((int)*(tmp+1)) ||
  2908. ispunct((int)*(tmp+1)) ||
  2909. (int)*(tmp+1)=='\n' ) && block_end<str_len-1) {
  2910. tmp++;
  2911. block_end++;
  2912. block_length++;
  2913. }
  2914. for (i = block_start; i<= block_end; i++) {
  2915. *target = str[i];
  2916. switch (*target) {
  2917. case '(': *target = ')'; break;
  2918. case ')': *target = '('; break;
  2919. case '[': *target = ']'; break;
  2920. case ']': *target = '['; break;
  2921. case '{': *target = '}'; break;
  2922. case '}': *target = '{'; break;
  2923. case '<': *target = '>'; break;
  2924. case '>': *target = '<'; break;
  2925. case '\\': *target = '/'; break;
  2926. case '/': *target = '\\'; break;
  2927. default:
  2928. break;
  2929. }
  2930. target--;
  2931. }
  2932. block_type = _HEB_BLOCK_TYPE_ENG;
  2933. } else {
  2934. while (!isheb(*(tmp+1)) &&
  2935. (int)*(tmp+1)!='\n' && block_end < str_len-1) {
  2936. tmp++;
  2937. block_end++;
  2938. block_length++;
  2939. }
  2940. while ((_isblank((int)*tmp) ||
  2941. ispunct((int)*tmp)) && *tmp!='/' &&
  2942. *tmp!='-' && block_end > block_start) {
  2943. tmp--;
  2944. block_end--;
  2945. }
  2946. for (i = block_end; i >= block_start; i--) {
  2947. *target = str[i];
  2948. target--;
  2949. }
  2950. block_type = _HEB_BLOCK_TYPE_HEB;
  2951. }
  2952. block_start=block_end+1;
  2953. } while (block_end < str_len-1);
  2954. broken_str = (char *) malloc(str_len+1);
  2955. begin=end=str_len-1;
  2956. target = broken_str;
  2957. while (1) {
  2958. char_count=0;
  2959. while ((!max_chars || char_count < max_chars) && begin > 0) {
  2960. char_count++;
  2961. begin--;
  2962. if (begin <= 0 || _isnewline(heb_str[begin])) {
  2963. while (begin > 0 && _isnewline(heb_str[begin-1])) {
  2964. begin--;
  2965. char_count++;
  2966. }
  2967. break;
  2968. }
  2969. }
  2970. if (char_count == max_chars) { /* try to avoid breaking words */
  2971. int new_char_count=char_count, new_begin=begin;
  2972. while (new_char_count > 0) {
  2973. if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) {
  2974. break;
  2975. }
  2976. new_begin++;
  2977. new_char_count--;
  2978. }
  2979. if (new_char_count > 0) {
  2980. char_count=new_char_count;
  2981. begin=new_begin;
  2982. }
  2983. }
  2984. orig_begin=begin;
  2985. if (_isblank(heb_str[begin])) {
  2986. heb_str[begin]='\n';
  2987. }
  2988. while (begin <= end && _isnewline(heb_str[begin])) {
  2989. /* skip leading newlines */
  2990. begin++;
  2991. }
  2992. for (i = begin; i <= end; i++) { /* copy content */
  2993. *target = heb_str[i];
  2994. target++;
  2995. }
  2996. for (i = orig_begin; i <= end && _isnewline(heb_str[i]); i++) {
  2997. *target = heb_str[i];
  2998. target++;
  2999. }
  3000. begin=orig_begin;
  3001. if (begin <= 0) {
  3002. *target = 0;
  3003. break;
  3004. }
  3005. begin--;
  3006. end=begin;
  3007. }
  3008. free((void*)heb_str);
  3009. if (convert_newlines) {
  3010. int count;
  3011. char *ret = string_replace(broken_str, str_len, "\n", strlen("\n"),
  3012. "<br />\n", strlen("<br />\n"), count, true);
  3013. if (ret) {
  3014. free(broken_str);
  3015. return ret;
  3016. }
  3017. }
  3018. return broken_str;
  3019. }
  3020. #if defined(__APPLE__)
  3021. void *memrchr(const void *s, int c, size_t n) {
  3022. for (const char *p = (const char *)s + n - 1; p >= s; p--) {
  3023. if (*p == c) return (void *)p;
  3024. }
  3025. return NULL;
  3026. }
  3027. #endif
  3028. ///////////////////////////////////////////////////////////////////////////////
  3029. }