/contrib/tcsh/tc.str.c

https://bitbucket.org/freebsd/freebsd-head/ · C · 695 lines · 561 code · 69 blank · 65 comment · 176 complexity · c588345f024dcdcfa8e70582a81ceb4c MD5 · raw file

  1. /* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $ */
  2. /*
  3. * tc.str.c: Short string package
  4. * This has been a lesson of how to write buggy code!
  5. */
  6. /*-
  7. * Copyright (c) 1980, 1991 The Regents of the University of California.
  8. * All rights reserved.
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. * 1. Redistributions of source code must retain the above copyright
  14. * notice, this list of conditions and the following disclaimer.
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. * 3. Neither the name of the University nor the names of its contributors
  19. * may be used to endorse or promote products derived from this software
  20. * without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32. * SUCH DAMAGE.
  33. */
  34. #include "sh.h"
  35. #include <assert.h>
  36. #include <limits.h>
  37. RCSID("$tcsh: tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $")
  38. #define MALLOC_INCR 128
  39. #ifdef WIDE_STRINGS
  40. #define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */
  41. #else
  42. #define MALLOC_SURPLUS 0
  43. #endif
  44. #ifdef WIDE_STRINGS
  45. size_t
  46. one_mbtowc(Char *pwc, const char *s, size_t n)
  47. {
  48. int len;
  49. len = rt_mbtowc(pwc, s, n);
  50. if (len == -1) {
  51. reset_mbtowc();
  52. *pwc = (unsigned char)*s | INVALID_BYTE;
  53. }
  54. if (len <= 0)
  55. len = 1;
  56. return len;
  57. }
  58. size_t
  59. one_wctomb(char *s, Char wchar)
  60. {
  61. int len;
  62. if (wchar & INVALID_BYTE) {
  63. s[0] = wchar & 0xFF;
  64. len = 1;
  65. } else {
  66. #ifdef UTF16_STRINGS
  67. if (wchar >= 0x10000) {
  68. /* UTF-16 systems can't handle these values directly in calls to
  69. wctomb. Convert value to UTF-16 surrogate and call wcstombs to
  70. convert the "string" to the correct multibyte representation,
  71. if any. */
  72. wchar_t ws[3];
  73. wchar -= 0x10000;
  74. ws[0] = 0xd800 | (wchar >> 10);
  75. ws[1] = 0xdc00 | (wchar & 0x3ff);
  76. ws[2] = 0;
  77. /* The return value of wcstombs excludes the trailing 0, so len is
  78. the correct number of multibytes for the Unicode char. */
  79. len = wcstombs (s, ws, MB_CUR_MAX + 1);
  80. } else
  81. #endif
  82. len = wctomb(s, (wchar_t) wchar);
  83. if (len == -1)
  84. s[0] = wchar;
  85. if (len <= 0)
  86. len = 1;
  87. }
  88. return len;
  89. }
  90. int
  91. rt_mbtowc(Char *pwc, const char *s, size_t n)
  92. {
  93. int ret;
  94. char back[MB_LEN_MAX];
  95. wchar_t tmp;
  96. #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
  97. # if defined(AUTOSET_KANJI)
  98. static mbstate_t mb_zero, mb;
  99. /*
  100. * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
  101. */
  102. if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
  103. !memcmp(&mb, &mb_zero, sizeof(mb)))
  104. {
  105. *pwc = *s;
  106. return 1;
  107. }
  108. # else
  109. mbstate_t mb;
  110. # endif
  111. memset (&mb, 0, sizeof mb);
  112. ret = mbrtowc(&tmp, s, n, &mb);
  113. #else
  114. ret = mbtowc(&tmp, s, n);
  115. #endif
  116. if (ret > 0) {
  117. *pwc = tmp;
  118. #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
  119. if (tmp >= 0xd800 && tmp <= 0xdbff) {
  120. /* UTF-16 surrogate pair. Fetch second half and compute
  121. UTF-32 value. Dispense with the inverse test in this case. */
  122. size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
  123. if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
  124. ret = -1;
  125. else {
  126. *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
  127. ret += n2;
  128. }
  129. } else
  130. #endif
  131. if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
  132. ret = -1;
  133. } else if (ret == -2)
  134. ret = -1;
  135. else if (ret == 0)
  136. *pwc = '\0';
  137. return ret;
  138. }
  139. #endif
  140. #ifdef SHORT_STRINGS
  141. Char **
  142. blk2short(char **src)
  143. {
  144. size_t n;
  145. Char **sdst, **dst;
  146. /*
  147. * Count
  148. */
  149. for (n = 0; src[n] != NULL; n++)
  150. continue;
  151. sdst = dst = xmalloc((n + 1) * sizeof(Char *));
  152. for (; *src != NULL; src++)
  153. *dst++ = SAVE(*src);
  154. *dst = NULL;
  155. return (sdst);
  156. }
  157. char **
  158. short2blk(Char **src)
  159. {
  160. size_t n;
  161. char **sdst, **dst;
  162. /*
  163. * Count
  164. */
  165. for (n = 0; src[n] != NULL; n++)
  166. continue;
  167. sdst = dst = xmalloc((n + 1) * sizeof(char *));
  168. for (; *src != NULL; src++)
  169. *dst++ = strsave(short2str(*src));
  170. *dst = NULL;
  171. return (sdst);
  172. }
  173. Char *
  174. str2short(const char *src)
  175. {
  176. static struct Strbuf buf; /* = Strbuf_INIT; */
  177. if (src == NULL)
  178. return (NULL);
  179. buf.len = 0;
  180. while (*src) {
  181. Char wc;
  182. src += one_mbtowc(&wc, src, MB_LEN_MAX);
  183. Strbuf_append1(&buf, wc);
  184. }
  185. Strbuf_terminate(&buf);
  186. return buf.s;
  187. }
  188. char *
  189. short2str(const Char *src)
  190. {
  191. static char *sdst = NULL;
  192. static size_t dstsize = 0;
  193. char *dst, *edst;
  194. if (src == NULL)
  195. return (NULL);
  196. if (sdst == NULL) {
  197. dstsize = MALLOC_INCR;
  198. sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
  199. }
  200. dst = sdst;
  201. edst = &dst[dstsize];
  202. while (*src) {
  203. dst += one_wctomb(dst, *src & CHAR);
  204. src++;
  205. if (dst >= edst) {
  206. char *wdst = dst;
  207. char *wedst = edst;
  208. dstsize += MALLOC_INCR;
  209. sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
  210. edst = &sdst[dstsize];
  211. dst = &edst[-MALLOC_INCR];
  212. while (wdst > wedst) {
  213. dst++;
  214. wdst--;
  215. }
  216. }
  217. }
  218. *dst = 0;
  219. return (sdst);
  220. }
  221. #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
  222. Char *
  223. s_strcpy(Char *dst, const Char *src)
  224. {
  225. Char *sdst;
  226. sdst = dst;
  227. while ((*dst++ = *src++) != '\0')
  228. continue;
  229. return (sdst);
  230. }
  231. Char *
  232. s_strncpy(Char *dst, const Char *src, size_t n)
  233. {
  234. Char *sdst;
  235. if (n == 0)
  236. return(dst);
  237. sdst = dst;
  238. do
  239. if ((*dst++ = *src++) == '\0') {
  240. while (--n != 0)
  241. *dst++ = '\0';
  242. return(sdst);
  243. }
  244. while (--n != 0);
  245. return (sdst);
  246. }
  247. Char *
  248. s_strcat(Char *dst, const Char *src)
  249. {
  250. Strcpy(Strend(dst), src);
  251. return dst;
  252. }
  253. #ifdef NOTUSED
  254. Char *
  255. s_strncat(Char *dst, const Char *src, size_t n)
  256. {
  257. Char *sdst;
  258. if (n == 0)
  259. return (dst);
  260. sdst = dst;
  261. while (*dst)
  262. dst++;
  263. do
  264. if ((*dst++ = *src++) == '\0')
  265. return(sdst);
  266. while (--n != 0)
  267. continue;
  268. *dst = '\0';
  269. return (sdst);
  270. }
  271. #endif
  272. Char *
  273. s_strchr(const Char *str, int ch)
  274. {
  275. do
  276. if (*str == ch)
  277. return ((Char *)(intptr_t)str);
  278. while (*str++);
  279. return (NULL);
  280. }
  281. Char *
  282. s_strrchr(const Char *str, int ch)
  283. {
  284. const Char *rstr;
  285. rstr = NULL;
  286. do
  287. if (*str == ch)
  288. rstr = str;
  289. while (*str++);
  290. return ((Char *)(intptr_t)rstr);
  291. }
  292. size_t
  293. s_strlen(const Char *str)
  294. {
  295. size_t n;
  296. for (n = 0; *str++; n++)
  297. continue;
  298. return (n);
  299. }
  300. int
  301. s_strcmp(const Char *str1, const Char *str2)
  302. {
  303. for (; *str1 && *str1 == *str2; str1++, str2++)
  304. continue;
  305. /*
  306. * The following case analysis is necessary so that characters which look
  307. * negative collate low against normal characters but high against the
  308. * end-of-string NUL.
  309. */
  310. if (*str1 == '\0' && *str2 == '\0')
  311. return (0);
  312. else if (*str1 == '\0')
  313. return (-1);
  314. else if (*str2 == '\0')
  315. return (1);
  316. else
  317. return (*str1 - *str2);
  318. }
  319. int
  320. s_strncmp(const Char *str1, const Char *str2, size_t n)
  321. {
  322. if (n == 0)
  323. return (0);
  324. do {
  325. if (*str1 != *str2) {
  326. /*
  327. * The following case analysis is necessary so that characters
  328. * which look negative collate low against normal characters
  329. * but high against the end-of-string NUL.
  330. */
  331. if (*str1 == '\0')
  332. return (-1);
  333. else if (*str2 == '\0')
  334. return (1);
  335. else
  336. return (*str1 - *str2);
  337. }
  338. if (*str1 == '\0')
  339. return(0);
  340. str1++, str2++;
  341. } while (--n != 0);
  342. return(0);
  343. }
  344. #endif /* not WIDE_STRINGS */
  345. int
  346. s_strcasecmp(const Char *str1, const Char *str2)
  347. {
  348. #ifdef WIDE_STRINGS
  349. wint_t l1 = 0, l2 = 0;
  350. for (; *str1; str1++, str2++)
  351. if (*str1 == *str2)
  352. l1 = l2 = 0;
  353. else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
  354. break;
  355. #else
  356. unsigned char l1 = 0, l2 = 0;
  357. for (; *str1; str1++, str2++)
  358. if (*str1 == *str2)
  359. l1 = l2 = 0;
  360. else if ((l1 = tolower((unsigned char)*str1)) !=
  361. (l2 = tolower((unsigned char)*str2)))
  362. break;
  363. #endif
  364. /*
  365. * The following case analysis is necessary so that characters which look
  366. * negative collate low against normal characters but high against the
  367. * end-of-string NUL.
  368. */
  369. if (*str1 == '\0' && *str2 == '\0')
  370. return (0);
  371. else if (*str1 == '\0')
  372. return (-1);
  373. else if (*str2 == '\0')
  374. return (1);
  375. else if (l1 == l2) /* They are zero when they are equal */
  376. return (*str1 - *str2);
  377. else
  378. return (l1 - l2);
  379. }
  380. Char *
  381. s_strnsave(const Char *s, size_t len)
  382. {
  383. Char *n;
  384. n = xmalloc((len + 1) * sizeof (*n));
  385. memcpy(n, s, len * sizeof (*n));
  386. n[len] = '\0';
  387. return n;
  388. }
  389. Char *
  390. s_strsave(const Char *s)
  391. {
  392. Char *n;
  393. size_t size;
  394. if (s == NULL)
  395. s = STRNULL;
  396. size = (Strlen(s) + 1) * sizeof(*n);
  397. n = xmalloc(size);
  398. memcpy(n, s, size);
  399. return (n);
  400. }
  401. Char *
  402. s_strspl(const Char *cp, const Char *dp)
  403. {
  404. Char *res, *ep;
  405. const Char *p, *q;
  406. if (!cp)
  407. cp = STRNULL;
  408. if (!dp)
  409. dp = STRNULL;
  410. for (p = cp; *p++;)
  411. continue;
  412. for (q = dp; *q++;)
  413. continue;
  414. res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
  415. for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
  416. continue;
  417. for (ep--, q = dp; (*ep++ = *q++) != '\0';)
  418. continue;
  419. return (res);
  420. }
  421. Char *
  422. s_strend(const Char *cp)
  423. {
  424. if (!cp)
  425. return ((Char *)(intptr_t) cp);
  426. while (*cp)
  427. cp++;
  428. return ((Char *)(intptr_t) cp);
  429. }
  430. Char *
  431. s_strstr(const Char *s, const Char *t)
  432. {
  433. do {
  434. const Char *ss = s;
  435. const Char *tt = t;
  436. do
  437. if (*tt == '\0')
  438. return ((Char *)(intptr_t) s);
  439. while (*ss++ == *tt++);
  440. } while (*s++ != '\0');
  441. return (NULL);
  442. }
  443. #else /* !SHORT_STRINGS */
  444. char *
  445. caching_strip(const char *s)
  446. {
  447. static char *buf = NULL;
  448. static size_t buf_size = 0;
  449. size_t size;
  450. if (s == NULL)
  451. return NULL;
  452. size = strlen(s) + 1;
  453. if (buf_size < size) {
  454. buf = xrealloc(buf, size);
  455. buf_size = size;
  456. }
  457. memcpy(buf, s, size);
  458. strip(buf);
  459. return buf;
  460. }
  461. #endif
  462. char *
  463. short2qstr(const Char *src)
  464. {
  465. static char *sdst = NULL;
  466. static size_t dstsize = 0;
  467. char *dst, *edst;
  468. if (src == NULL)
  469. return (NULL);
  470. if (sdst == NULL) {
  471. dstsize = MALLOC_INCR;
  472. sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
  473. }
  474. dst = sdst;
  475. edst = &dst[dstsize];
  476. while (*src) {
  477. if (*src & QUOTE) {
  478. *dst++ = '\\';
  479. if (dst == edst) {
  480. dstsize += MALLOC_INCR;
  481. sdst = xrealloc(sdst,
  482. (dstsize + MALLOC_SURPLUS) * sizeof(char));
  483. edst = &sdst[dstsize];
  484. dst = &edst[-MALLOC_INCR];
  485. }
  486. }
  487. dst += one_wctomb(dst, *src & CHAR);
  488. src++;
  489. if (dst >= edst) {
  490. ptrdiff_t i = dst - edst;
  491. dstsize += MALLOC_INCR;
  492. sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
  493. edst = &sdst[dstsize];
  494. dst = &edst[-MALLOC_INCR + i];
  495. }
  496. }
  497. *dst = 0;
  498. return (sdst);
  499. }
  500. struct blk_buf *
  501. bb_alloc()
  502. {
  503. return xcalloc(1, sizeof(struct blk_buf));
  504. }
  505. static void
  506. bb_store(struct blk_buf *bb, Char *str)
  507. {
  508. if (bb->len == bb->size) { /* Keep space for terminating NULL */
  509. if (bb->size == 0)
  510. bb->size = 16; /* Arbitrary */
  511. else
  512. bb->size *= 2;
  513. bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
  514. }
  515. bb->vec[bb->len] = str;
  516. }
  517. void
  518. bb_append(struct blk_buf *bb, Char *str)
  519. {
  520. bb_store(bb, str);
  521. bb->len++;
  522. }
  523. void
  524. bb_cleanup(void *xbb)
  525. {
  526. struct blk_buf *bb;
  527. size_t i;
  528. bb = xbb;
  529. for (i = 0; i < bb->len; i++)
  530. xfree(bb->vec[i]);
  531. xfree(bb->vec);
  532. }
  533. void
  534. bb_free(void *bb)
  535. {
  536. bb_cleanup(bb);
  537. xfree(bb);
  538. }
  539. Char **
  540. bb_finish(struct blk_buf *bb)
  541. {
  542. bb_store(bb, NULL);
  543. return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
  544. }
  545. #define DO_STRBUF(STRBUF, CHAR, STRLEN) \
  546. \
  547. struct STRBUF * \
  548. STRBUF##_alloc(void) \
  549. { \
  550. return xcalloc(1, sizeof(struct STRBUF)); \
  551. } \
  552. \
  553. static void \
  554. STRBUF##_store1(struct STRBUF *buf, CHAR c) \
  555. { \
  556. if (buf->size == buf->len) { \
  557. if (buf->size == 0) \
  558. buf->size = 64; /* Arbitrary */ \
  559. else \
  560. buf->size *= 2; \
  561. buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
  562. } \
  563. assert(buf->s); \
  564. buf->s[buf->len] = c; \
  565. } \
  566. \
  567. /* Like strbuf_append1(buf, '\0'), but don't advance len */ \
  568. void \
  569. STRBUF##_terminate(struct STRBUF *buf) \
  570. { \
  571. STRBUF##_store1(buf, '\0'); \
  572. } \
  573. \
  574. void \
  575. STRBUF##_append1(struct STRBUF *buf, CHAR c) \
  576. { \
  577. STRBUF##_store1(buf, c); \
  578. buf->len++; \
  579. } \
  580. \
  581. void \
  582. STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \
  583. { \
  584. if (buf->size < buf->len + len) { \
  585. if (buf->size == 0) \
  586. buf->size = 64; /* Arbitrary */ \
  587. while (buf->size < buf->len + len) \
  588. buf->size *= 2; \
  589. buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
  590. } \
  591. memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \
  592. buf->len += len; \
  593. } \
  594. \
  595. void \
  596. STRBUF##_append(struct STRBUF *buf, const CHAR *s) \
  597. { \
  598. STRBUF##_appendn(buf, s, STRLEN(s)); \
  599. } \
  600. \
  601. CHAR * \
  602. STRBUF##_finish(struct STRBUF *buf) \
  603. { \
  604. STRBUF##_append1(buf, 0); \
  605. return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \
  606. } \
  607. \
  608. void \
  609. STRBUF##_cleanup(void *xbuf) \
  610. { \
  611. struct STRBUF *buf; \
  612. \
  613. buf = xbuf; \
  614. xfree(buf->s); \
  615. } \
  616. \
  617. void \
  618. STRBUF##_free(void *xbuf) \
  619. { \
  620. STRBUF##_cleanup(xbuf); \
  621. xfree(xbuf); \
  622. } \
  623. \
  624. const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
  625. DO_STRBUF(strbuf, char, strlen);
  626. DO_STRBUF(Strbuf, Char, Strlen);