/contrib/tcsh/tc.str.c
https://bitbucket.org/freebsd/freebsd-head/ · C · 695 lines · 561 code · 69 blank · 65 comment · 176 complexity · c588345f024dcdcfa8e70582a81ceb4c MD5 · raw file
- /* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $ */
- /*
- * tc.str.c: Short string package
- * This has been a lesson of how to write buggy code!
- */
- /*-
- * Copyright (c) 1980, 1991 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- #include "sh.h"
- #include <assert.h>
- #include <limits.h>
- RCSID("$tcsh: tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $")
- #define MALLOC_INCR 128
- #ifdef WIDE_STRINGS
- #define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */
- #else
- #define MALLOC_SURPLUS 0
- #endif
- #ifdef WIDE_STRINGS
- size_t
- one_mbtowc(Char *pwc, const char *s, size_t n)
- {
- int len;
- len = rt_mbtowc(pwc, s, n);
- if (len == -1) {
- reset_mbtowc();
- *pwc = (unsigned char)*s | INVALID_BYTE;
- }
- if (len <= 0)
- len = 1;
- return len;
- }
- size_t
- one_wctomb(char *s, Char wchar)
- {
- int len;
- if (wchar & INVALID_BYTE) {
- s[0] = wchar & 0xFF;
- len = 1;
- } else {
- #ifdef UTF16_STRINGS
- if (wchar >= 0x10000) {
- /* UTF-16 systems can't handle these values directly in calls to
- wctomb. Convert value to UTF-16 surrogate and call wcstombs to
- convert the "string" to the correct multibyte representation,
- if any. */
- wchar_t ws[3];
- wchar -= 0x10000;
- ws[0] = 0xd800 | (wchar >> 10);
- ws[1] = 0xdc00 | (wchar & 0x3ff);
- ws[2] = 0;
- /* The return value of wcstombs excludes the trailing 0, so len is
- the correct number of multibytes for the Unicode char. */
- len = wcstombs (s, ws, MB_CUR_MAX + 1);
- } else
- #endif
- len = wctomb(s, (wchar_t) wchar);
- if (len == -1)
- s[0] = wchar;
- if (len <= 0)
- len = 1;
- }
- return len;
- }
- int
- rt_mbtowc(Char *pwc, const char *s, size_t n)
- {
- int ret;
- char back[MB_LEN_MAX];
- wchar_t tmp;
- #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
- # if defined(AUTOSET_KANJI)
- static mbstate_t mb_zero, mb;
- /*
- * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
- */
- if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
- !memcmp(&mb, &mb_zero, sizeof(mb)))
- {
- *pwc = *s;
- return 1;
- }
- # else
- mbstate_t mb;
- # endif
- memset (&mb, 0, sizeof mb);
- ret = mbrtowc(&tmp, s, n, &mb);
- #else
- ret = mbtowc(&tmp, s, n);
- #endif
- if (ret > 0) {
- *pwc = tmp;
- #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
- if (tmp >= 0xd800 && tmp <= 0xdbff) {
- /* UTF-16 surrogate pair. Fetch second half and compute
- UTF-32 value. Dispense with the inverse test in this case. */
- size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
- if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
- ret = -1;
- else {
- *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
- ret += n2;
- }
- } else
- #endif
- if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
- ret = -1;
- } else if (ret == -2)
- ret = -1;
- else if (ret == 0)
- *pwc = '\0';
- return ret;
- }
- #endif
- #ifdef SHORT_STRINGS
- Char **
- blk2short(char **src)
- {
- size_t n;
- Char **sdst, **dst;
- /*
- * Count
- */
- for (n = 0; src[n] != NULL; n++)
- continue;
- sdst = dst = xmalloc((n + 1) * sizeof(Char *));
- for (; *src != NULL; src++)
- *dst++ = SAVE(*src);
- *dst = NULL;
- return (sdst);
- }
- char **
- short2blk(Char **src)
- {
- size_t n;
- char **sdst, **dst;
- /*
- * Count
- */
- for (n = 0; src[n] != NULL; n++)
- continue;
- sdst = dst = xmalloc((n + 1) * sizeof(char *));
- for (; *src != NULL; src++)
- *dst++ = strsave(short2str(*src));
- *dst = NULL;
- return (sdst);
- }
- Char *
- str2short(const char *src)
- {
- static struct Strbuf buf; /* = Strbuf_INIT; */
- if (src == NULL)
- return (NULL);
- buf.len = 0;
- while (*src) {
- Char wc;
- src += one_mbtowc(&wc, src, MB_LEN_MAX);
- Strbuf_append1(&buf, wc);
- }
- Strbuf_terminate(&buf);
- return buf.s;
- }
- char *
- short2str(const Char *src)
- {
- static char *sdst = NULL;
- static size_t dstsize = 0;
- char *dst, *edst;
- if (src == NULL)
- return (NULL);
- if (sdst == NULL) {
- dstsize = MALLOC_INCR;
- sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
- }
- dst = sdst;
- edst = &dst[dstsize];
- while (*src) {
- dst += one_wctomb(dst, *src & CHAR);
- src++;
- if (dst >= edst) {
- char *wdst = dst;
- char *wedst = edst;
- dstsize += MALLOC_INCR;
- sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
- edst = &sdst[dstsize];
- dst = &edst[-MALLOC_INCR];
- while (wdst > wedst) {
- dst++;
- wdst--;
- }
- }
- }
- *dst = 0;
- return (sdst);
- }
- #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
- Char *
- s_strcpy(Char *dst, const Char *src)
- {
- Char *sdst;
- sdst = dst;
- while ((*dst++ = *src++) != '\0')
- continue;
- return (sdst);
- }
- Char *
- s_strncpy(Char *dst, const Char *src, size_t n)
- {
- Char *sdst;
- if (n == 0)
- return(dst);
- sdst = dst;
- do
- if ((*dst++ = *src++) == '\0') {
- while (--n != 0)
- *dst++ = '\0';
- return(sdst);
- }
- while (--n != 0);
- return (sdst);
- }
- Char *
- s_strcat(Char *dst, const Char *src)
- {
- Strcpy(Strend(dst), src);
- return dst;
- }
- #ifdef NOTUSED
- Char *
- s_strncat(Char *dst, const Char *src, size_t n)
- {
- Char *sdst;
- if (n == 0)
- return (dst);
- sdst = dst;
- while (*dst)
- dst++;
- do
- if ((*dst++ = *src++) == '\0')
- return(sdst);
- while (--n != 0)
- continue;
- *dst = '\0';
- return (sdst);
- }
- #endif
- Char *
- s_strchr(const Char *str, int ch)
- {
- do
- if (*str == ch)
- return ((Char *)(intptr_t)str);
- while (*str++);
- return (NULL);
- }
- Char *
- s_strrchr(const Char *str, int ch)
- {
- const Char *rstr;
- rstr = NULL;
- do
- if (*str == ch)
- rstr = str;
- while (*str++);
- return ((Char *)(intptr_t)rstr);
- }
- size_t
- s_strlen(const Char *str)
- {
- size_t n;
- for (n = 0; *str++; n++)
- continue;
- return (n);
- }
- int
- s_strcmp(const Char *str1, const Char *str2)
- {
- for (; *str1 && *str1 == *str2; str1++, str2++)
- continue;
- /*
- * The following case analysis is necessary so that characters which look
- * negative collate low against normal characters but high against the
- * end-of-string NUL.
- */
- if (*str1 == '\0' && *str2 == '\0')
- return (0);
- else if (*str1 == '\0')
- return (-1);
- else if (*str2 == '\0')
- return (1);
- else
- return (*str1 - *str2);
- }
- int
- s_strncmp(const Char *str1, const Char *str2, size_t n)
- {
- if (n == 0)
- return (0);
- do {
- if (*str1 != *str2) {
- /*
- * The following case analysis is necessary so that characters
- * which look negative collate low against normal characters
- * but high against the end-of-string NUL.
- */
- if (*str1 == '\0')
- return (-1);
- else if (*str2 == '\0')
- return (1);
- else
- return (*str1 - *str2);
- }
- if (*str1 == '\0')
- return(0);
- str1++, str2++;
- } while (--n != 0);
- return(0);
- }
- #endif /* not WIDE_STRINGS */
- int
- s_strcasecmp(const Char *str1, const Char *str2)
- {
- #ifdef WIDE_STRINGS
- wint_t l1 = 0, l2 = 0;
- for (; *str1; str1++, str2++)
- if (*str1 == *str2)
- l1 = l2 = 0;
- else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
- break;
- #else
- unsigned char l1 = 0, l2 = 0;
- for (; *str1; str1++, str2++)
- if (*str1 == *str2)
- l1 = l2 = 0;
- else if ((l1 = tolower((unsigned char)*str1)) !=
- (l2 = tolower((unsigned char)*str2)))
- break;
- #endif
- /*
- * The following case analysis is necessary so that characters which look
- * negative collate low against normal characters but high against the
- * end-of-string NUL.
- */
- if (*str1 == '\0' && *str2 == '\0')
- return (0);
- else if (*str1 == '\0')
- return (-1);
- else if (*str2 == '\0')
- return (1);
- else if (l1 == l2) /* They are zero when they are equal */
- return (*str1 - *str2);
- else
- return (l1 - l2);
- }
- Char *
- s_strnsave(const Char *s, size_t len)
- {
- Char *n;
- n = xmalloc((len + 1) * sizeof (*n));
- memcpy(n, s, len * sizeof (*n));
- n[len] = '\0';
- return n;
- }
- Char *
- s_strsave(const Char *s)
- {
- Char *n;
- size_t size;
- if (s == NULL)
- s = STRNULL;
- size = (Strlen(s) + 1) * sizeof(*n);
- n = xmalloc(size);
- memcpy(n, s, size);
- return (n);
- }
- Char *
- s_strspl(const Char *cp, const Char *dp)
- {
- Char *res, *ep;
- const Char *p, *q;
- if (!cp)
- cp = STRNULL;
- if (!dp)
- dp = STRNULL;
- for (p = cp; *p++;)
- continue;
- for (q = dp; *q++;)
- continue;
- res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
- for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
- continue;
- for (ep--, q = dp; (*ep++ = *q++) != '\0';)
- continue;
- return (res);
- }
- Char *
- s_strend(const Char *cp)
- {
- if (!cp)
- return ((Char *)(intptr_t) cp);
- while (*cp)
- cp++;
- return ((Char *)(intptr_t) cp);
- }
- Char *
- s_strstr(const Char *s, const Char *t)
- {
- do {
- const Char *ss = s;
- const Char *tt = t;
- do
- if (*tt == '\0')
- return ((Char *)(intptr_t) s);
- while (*ss++ == *tt++);
- } while (*s++ != '\0');
- return (NULL);
- }
- #else /* !SHORT_STRINGS */
- char *
- caching_strip(const char *s)
- {
- static char *buf = NULL;
- static size_t buf_size = 0;
- size_t size;
- if (s == NULL)
- return NULL;
- size = strlen(s) + 1;
- if (buf_size < size) {
- buf = xrealloc(buf, size);
- buf_size = size;
- }
- memcpy(buf, s, size);
- strip(buf);
- return buf;
- }
- #endif
- char *
- short2qstr(const Char *src)
- {
- static char *sdst = NULL;
- static size_t dstsize = 0;
- char *dst, *edst;
- if (src == NULL)
- return (NULL);
- if (sdst == NULL) {
- dstsize = MALLOC_INCR;
- sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
- }
- dst = sdst;
- edst = &dst[dstsize];
- while (*src) {
- if (*src & QUOTE) {
- *dst++ = '\\';
- if (dst == edst) {
- dstsize += MALLOC_INCR;
- sdst = xrealloc(sdst,
- (dstsize + MALLOC_SURPLUS) * sizeof(char));
- edst = &sdst[dstsize];
- dst = &edst[-MALLOC_INCR];
- }
- }
- dst += one_wctomb(dst, *src & CHAR);
- src++;
- if (dst >= edst) {
- ptrdiff_t i = dst - edst;
- dstsize += MALLOC_INCR;
- sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
- edst = &sdst[dstsize];
- dst = &edst[-MALLOC_INCR + i];
- }
- }
- *dst = 0;
- return (sdst);
- }
- struct blk_buf *
- bb_alloc()
- {
- return xcalloc(1, sizeof(struct blk_buf));
- }
- static void
- bb_store(struct blk_buf *bb, Char *str)
- {
- if (bb->len == bb->size) { /* Keep space for terminating NULL */
- if (bb->size == 0)
- bb->size = 16; /* Arbitrary */
- else
- bb->size *= 2;
- bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
- }
- bb->vec[bb->len] = str;
- }
- void
- bb_append(struct blk_buf *bb, Char *str)
- {
- bb_store(bb, str);
- bb->len++;
- }
- void
- bb_cleanup(void *xbb)
- {
- struct blk_buf *bb;
- size_t i;
- bb = xbb;
- for (i = 0; i < bb->len; i++)
- xfree(bb->vec[i]);
- xfree(bb->vec);
- }
- void
- bb_free(void *bb)
- {
- bb_cleanup(bb);
- xfree(bb);
- }
- Char **
- bb_finish(struct blk_buf *bb)
- {
- bb_store(bb, NULL);
- return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
- }
- #define DO_STRBUF(STRBUF, CHAR, STRLEN) \
- \
- struct STRBUF * \
- STRBUF##_alloc(void) \
- { \
- return xcalloc(1, sizeof(struct STRBUF)); \
- } \
- \
- static void \
- STRBUF##_store1(struct STRBUF *buf, CHAR c) \
- { \
- if (buf->size == buf->len) { \
- if (buf->size == 0) \
- buf->size = 64; /* Arbitrary */ \
- else \
- buf->size *= 2; \
- buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
- } \
- assert(buf->s); \
- buf->s[buf->len] = c; \
- } \
- \
- /* Like strbuf_append1(buf, '\0'), but don't advance len */ \
- void \
- STRBUF##_terminate(struct STRBUF *buf) \
- { \
- STRBUF##_store1(buf, '\0'); \
- } \
- \
- void \
- STRBUF##_append1(struct STRBUF *buf, CHAR c) \
- { \
- STRBUF##_store1(buf, c); \
- buf->len++; \
- } \
- \
- void \
- STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \
- { \
- if (buf->size < buf->len + len) { \
- if (buf->size == 0) \
- buf->size = 64; /* Arbitrary */ \
- while (buf->size < buf->len + len) \
- buf->size *= 2; \
- buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
- } \
- memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \
- buf->len += len; \
- } \
- \
- void \
- STRBUF##_append(struct STRBUF *buf, const CHAR *s) \
- { \
- STRBUF##_appendn(buf, s, STRLEN(s)); \
- } \
- \
- CHAR * \
- STRBUF##_finish(struct STRBUF *buf) \
- { \
- STRBUF##_append1(buf, 0); \
- return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \
- } \
- \
- void \
- STRBUF##_cleanup(void *xbuf) \
- { \
- struct STRBUF *buf; \
- \
- buf = xbuf; \
- xfree(buf->s); \
- } \
- \
- void \
- STRBUF##_free(void *xbuf) \
- { \
- STRBUF##_cleanup(xbuf); \
- xfree(xbuf); \
- } \
- \
- const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
- DO_STRBUF(strbuf, char, strlen);
- DO_STRBUF(Strbuf, Char, Strlen);