/src/racket/src/string.c
C | 5657 lines | 4630 code | 683 blank | 344 comment | 1048 complexity | 0664fdaee9cf399f842278273762caf3 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
Large files files are truncated, but you can click here to view the full file
- /*
- Racket
- Copyright (c) 2004-2011 PLT Scheme Inc.
- Copyright (c) 1995-2001 Matthew Flatt
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301 USA.
- libscheme
- Copyright (c) 1994 Brent Benson
- All rights reserved.
- */
- #include "schpriv.h"
- #include "schvers.h"
- #include <string.h>
- #include <ctype.h>
- #ifndef DONT_USE_LOCALE
- # include <locale.h>
- # ifdef MZ_NO_ICONV
- # define USE_ICONV_DLL
- # endif
- # ifndef USE_ICONV_DLL
- # include <iconv.h>
- # include <langinfo.h>
- # endif
- # include <wchar.h>
- # include <wctype.h>
- # include <errno.h>
- # ifdef MACOS_UNICODE_SUPPORT
- # include <CoreFoundation/CFString.h>
- # include <CoreFoundation/CFLocale.h>
- # endif
- # ifdef WINDOWS_UNICODE_SUPPORT
- # include <windows.h>
- # endif
- #endif
- #ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH
- # include "schsys.h"
- #endif
- #include "schustr.inc"
- #ifdef USE_ICONV_DLL
- typedef intptr_t iconv_t;
- typedef int *(*errno_proc_t)();
- typedef size_t (*iconv_proc_t)(iconv_t cd,
- char **inbuf, size_t *inbytesleft,
- char **outbuf, size_t *outbytesleft);
- typedef iconv_t (*iconv_open_proc_t)(const char *tocode, const char *fromcode);
- typedef void (*iconv_close_proc_t)(iconv_t cd);
- typedef char *(*locale_charset_proc_t)();
- static errno_proc_t iconv_errno;
- static iconv_proc_t iconv;
- static iconv_open_proc_t iconv_open;
- static iconv_close_proc_t iconv_close;
- static locale_charset_proc_t locale_charset; /* Not used, currently */
- #define mzCHK_PROC(x) x
- static int get_iconv_errno(void)
- {
- int *a;
- a = iconv_errno();
- return *a;
- }
- # undef HAVE_CODESET
- # define HAVE_CODESET 1
- # define CODESET 0
- # define ICONV_errno get_iconv_errno()
- extern wchar_t *scheme_get_dll_path(wchar_t *s);
- static int iconv_ready = 0;
- static void init_iconv()
- {
- # ifdef MZ_NO_ICONV
- # else
- HMODULE m;
- m = LoadLibraryW(scheme_get_dll_path(L"iconv.dll"));
- if (!m)
- m = LoadLibraryW(scheme_get_dll_path(L"libiconv.dll"));
- if (!m)
- m = LoadLibraryW(scheme_get_dll_path(L"libiconv-2.dll"));
- if (!m)
- m = LoadLibrary("iconv.dll");
- if (!m)
- m = LoadLibrary("libiconv.dll");
- if (!m)
- m = LoadLibrary("libiconv-2.dll");
- if (m) {
- iconv = (iconv_proc_t)GetProcAddress(m, "libiconv");
- iconv_open = (iconv_open_proc_t)GetProcAddress(m, "libiconv_open");
- iconv_close = (iconv_close_proc_t)GetProcAddress(m, "libiconv_close");
- locale_charset = (locale_charset_proc_t)GetProcAddress(m, "locale_charset");
- /* Make sure we have all of them or none: */
- if (!iconv || !iconv_open || !iconv_close) {
- iconv = NULL;
- iconv_open = NULL;
- iconv_close = NULL;
- }
- }
- if (iconv) {
- iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno");
- if (!iconv_errno) {
- /* The iconv.dll distributed with PLT Scheme links to msvcrt.dll.
- It's a slighly dangerous assumption that whatever iconv we
- found also uses msvcrt.dll. */
- m = LoadLibrary("msvcrt.dll");
- if (m) {
- iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno");
- if (!iconv_errno) {
- iconv = NULL;
- iconv_open = NULL;
- iconv_close = NULL;
- }
- }
- }
- }
- # endif
- iconv_ready = 1;
- }
- #else
- # define ICONV_errno errno
- # define iconv_ready 1
- # define mzCHK_PROC(x) 1
- static void init_iconv() { }
- #endif
- #ifdef MACOS_UNICODE_SUPPORT
- # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
- #endif
- #ifdef WINDOWS_UNICODE_SUPPORT
- # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
- #endif
- #ifndef mzLOCALE_IS_UTF_8
- # define mzLOCALE_IS_UTF_8(s) !mzCHK_PROC(iconv_open)
- #endif
- #define mzICONV_KIND 0
- #define mzUTF8_KIND 1
- #define mzUTF8_TO_UTF16_KIND 2
- #define mzUTF16_TO_UTF8_KIND 3
- typedef struct Scheme_Converter {
- Scheme_Object so;
- short closed;
- short kind;
- iconv_t cd;
- int permissive;
- Scheme_Custodian_Reference *mref;
- } Scheme_Converter;
- /* locals */
- /* These two locale variables are only valid when reset_locale()
- is called after continuation marks (and hence parameterization)
- may have changed. Similarly, setlocale() is only up-to-date
- when reset_locale() has been called. */
- THREAD_LOCAL_DECL(static int locale_on);
- THREAD_LOCAL_DECL(static void *current_locale_name_ptr);
- static void reset_locale(void);
- #define current_locale_name ((const mzchar *)current_locale_name_ptr)
- #ifdef USE_ICONV_DLL
- static char *nl_langinfo(int which)
- {
- int i;
- reset_locale();
- if (!current_locale_name)
- current_locale_name_ptr = "\0\0\0\0";
- if ((current_locale_name[0] == 'C')
- && !current_locale_name[1])
- return "US-ASCII";
- for (i = 0; current_locale_name[i]; i++) {
- if (current_locale_name[i] == '.') {
- if (current_locale_name[i + 1]) {
- int len, j = 0;
- char *enc;
- i++;
- len = scheme_char_strlen(current_locale_name) - i;
- enc = (char *)scheme_malloc_atomic(len + 1);
- while (current_locale_name[i]) {
- if (current_locale_name[i] > 127)
- return "UTF-8";
- enc[j++] = current_locale_name[i++];
- }
- enc[j] = 0;
- return enc;
- }
- }
- }
- return "UTF-8";
- }
- #endif
- #ifdef DONT_USE_LOCALE
- # define mz_iconv_nl_langinfo() ""
- #else
- static char *mz_iconv_nl_langinfo(){
- char *s;
- # if HAVE_CODESET
- s = nl_langinfo(CODESET);
- # else
- s = NULL;
- # endif
- if (!s)
- return "";
- else
- return s;
- }
- #endif
- READ_ONLY static const char * const STRING_IS_NOT_UTF_8 = "string is not a well-formed UTF-8 encoding: ";
- static Scheme_Object *make_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_p (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_length (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_ci_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_ci_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_lt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_lt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_gt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_gt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_lt_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_gt_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_ci_lt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_ci_lt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_ci_gt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_ci_gt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_ci_lt_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_ci_gt_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_upcase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_downcase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_titlecase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_foldcase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_upcase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_downcase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *substring (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_append (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_to_list (int argc, Scheme_Object *argv[]);
- static Scheme_Object *list_to_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_copy (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_copy_bang (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_fill (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_to_immutable (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_normalize_c (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_normalize_kc (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_normalize_d (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_normalize_kd (int argc, Scheme_Object *argv[]);
- static Scheme_Object *make_shared_byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *shared_byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *make_byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_p (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_p (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_length (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_lt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_gt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_substring (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_append (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_to_list (int argc, Scheme_Object *argv[]);
- static Scheme_Object *list_to_byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_copy (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_copy_bang (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_fill (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_to_immutable (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_utf8_index (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_utf8_ref (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_utf8_length (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_to_char_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_to_char_string_locale (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[]);
- static Scheme_Object *char_string_to_byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *char_string_to_byte_string_locale (int argc, Scheme_Object *argv[]);
- static Scheme_Object *char_string_to_byte_string_latin1 (int argc, Scheme_Object *argv[]);
- static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[]);
- static Scheme_Object *version(int argc, Scheme_Object *argv[]);
- static Scheme_Object *format(int argc, Scheme_Object *argv[]);
- static Scheme_Object *sch_printf(int argc, Scheme_Object *argv[]);
- static Scheme_Object *sch_eprintf(int argc, Scheme_Object *argv[]);
- static Scheme_Object *sch_fprintf(int argc, Scheme_Object *argv[]);
- static Scheme_Object *banner(int argc, Scheme_Object *argv[]);
- static Scheme_Object *sch_getenv(int argc, Scheme_Object *argv[]);
- static Scheme_Object *sch_putenv(int argc, Scheme_Object *argv[]);
- static Scheme_Object *system_type(int argc, Scheme_Object *argv[]);
- static Scheme_Object *system_library_subpath(int argc, Scheme_Object *argv[]);
- static Scheme_Object *cmdline_args(int argc, Scheme_Object *argv[]);
- static Scheme_Object *current_locale(int argc, Scheme_Object *argv[]);
- static Scheme_Object *locale_string_encoding(int argc, Scheme_Object *argv[]);
- static Scheme_Object *system_language_country(int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_open_converter(int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_close_converter(int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_convert(int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_convert_end(int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_converter_p(int argc, Scheme_Object *argv[]);
- #ifdef MZ_PRECISE_GC
- static void register_traversers(void);
- #endif
- static int mz_char_strcmp(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
- static int mz_char_strcmp_ci(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
- static int mz_strcmp(const char *who, unsigned char *str1, intptr_t l1, unsigned char *str2, intptr_t l2);
- XFORM_NONGCING static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t end,
- unsigned int *us, intptr_t dstart, intptr_t dend,
- intptr_t *ipos, intptr_t *jpos,
- char compact, char utf16,
- int *state, int might_continue, int permissive);
- XFORM_NONGCING static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t end,
- unsigned char *s, intptr_t dstart, intptr_t dend,
- intptr_t *_ipos, intptr_t *_opos, char utf16);
- static char *string_to_from_locale(int to_bytes,
- char *in, intptr_t delta, intptr_t len,
- intptr_t *olen, int perm,
- int *no_cvt);
- #define portable_isspace(x) (((x) < 128) && isspace(x))
- ROSYM static Scheme_Object *sys_symbol;
- ROSYM static Scheme_Object *platform_3m_path, *platform_cgc_path;
- READ_ONLY static Scheme_Object *zero_length_char_string;
- READ_ONLY static Scheme_Object *zero_length_byte_string;
- SHARED_OK static Scheme_Hash_Table *putenv_str_table;
- SHARED_OK static char *embedding_banner;
- SHARED_OK static Scheme_Object *vers_str;
- SHARED_OK static Scheme_Object *banner_str;
- READ_ONLY static Scheme_Object *complete_symbol, *continues_symbol, *aborts_symbol, *error_symbol;
- void
- scheme_init_string (Scheme_Env *env)
- {
- Scheme_Object *p;
- REGISTER_SO(sys_symbol);
- sys_symbol = scheme_intern_symbol(SYSTEM_TYPE_NAME);
- REGISTER_SO(zero_length_char_string);
- REGISTER_SO(zero_length_byte_string);
- zero_length_char_string = scheme_alloc_char_string(0, 0);
- zero_length_byte_string = scheme_alloc_byte_string(0, 0);
- REGISTER_SO(complete_symbol);
- REGISTER_SO(continues_symbol);
- REGISTER_SO(aborts_symbol);
- REGISTER_SO(error_symbol);
- complete_symbol = scheme_intern_symbol("complete");
- continues_symbol = scheme_intern_symbol("continues");
- aborts_symbol = scheme_intern_symbol("aborts");
- error_symbol = scheme_intern_symbol("error");
- REGISTER_SO(platform_3m_path);
- #ifdef UNIX_FILE_SYSTEM
- # define MZ3M_SUBDIR "/3m"
- #else
- # ifdef DOS_FILE_SYSTEM
- # define MZ3M_SUBDIR "\\3m"
- # else
- # define MZ3M_SUBDIR ":3m"
- # endif
- #endif
- REGISTER_SO(platform_3m_path);
- REGISTER_SO(platform_cgc_path);
- platform_cgc_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH);
- platform_3m_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH MZ3M_SUBDIR);
- REGISTER_SO(putenv_str_table);
- REGISTER_SO(embedding_banner);
- REGISTER_SO(vers_str);
- REGISTER_SO(banner_str);
- vers_str = scheme_make_utf8_string(scheme_version());
- SCHEME_SET_CHAR_STRING_IMMUTABLE(vers_str);
- banner_str = scheme_make_utf8_string(scheme_banner());
- SCHEME_SET_CHAR_STRING_IMMUTABLE(banner_str);
- p = scheme_make_folding_prim(string_p, "string?", 1, 1, 1);
- SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
- scheme_add_global_constant("string?", p, env);
- scheme_add_global_constant("make-string",
- scheme_make_immed_prim(make_string,
- "make-string",
- 1, 2),
- env);
- scheme_add_global_constant("string",
- scheme_make_immed_prim(string,
- "string",
- 0, -1),
- env);
- scheme_add_global_constant("string-length",
- scheme_make_folding_prim(string_length,
- "string-length",
- 1, 1, 1),
- env);
- p = scheme_make_immed_prim(scheme_checked_string_ref, "string-ref", 2, 2);
- SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED;
- scheme_add_global_constant("string-ref", p, env);
- p = scheme_make_immed_prim(scheme_checked_string_set, "string-set!", 3, 3);
- SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
- scheme_add_global_constant("string-set!", p, env);
- scheme_add_global_constant("string=?",
- scheme_make_immed_prim(string_eq,
- "string=?",
- 2, -1),
- env);
- scheme_add_global_constant("string-locale=?",
- scheme_make_immed_prim(string_locale_eq,
- "string-locale=?",
- 2, -1),
- env);
- scheme_add_global_constant("string-ci=?",
- scheme_make_immed_prim(string_ci_eq,
- "string-ci=?",
- 2, -1),
- env);
- scheme_add_global_constant("string-locale-ci=?",
- scheme_make_immed_prim(string_locale_ci_eq,
- "string-locale-ci=?",
- 2, -1),
- env);
- scheme_add_global_constant("string<?",
- scheme_make_immed_prim(string_lt,
- "string<?",
- 2, -1),
- env);
- scheme_add_global_constant("string-locale<?",
- scheme_make_immed_prim(string_locale_lt,
- "string-locale<?",
- 2, -1),
- env);
- scheme_add_global_constant("string>?",
- scheme_make_immed_prim(string_gt,
- "string>?",
- 2, -1),
- env);
- scheme_add_global_constant("string-locale>?",
- scheme_make_immed_prim(string_locale_gt,
- "string-locale>?",
- 2, -1),
- env);
- scheme_add_global_constant("string<=?",
- scheme_make_immed_prim(string_lt_eq,
- "string<=?",
- 2, -1),
- env);
- scheme_add_global_constant("string>=?",
- scheme_make_immed_prim(string_gt_eq,
- "string>=?",
- 2, -1),
- env);
- scheme_add_global_constant("string-ci<?",
- scheme_make_immed_prim(string_ci_lt,
- "string-ci<?",
- 2, -1),
- env);
- scheme_add_global_constant("string-locale-ci<?",
- scheme_make_immed_prim(string_locale_ci_lt,
- "string-locale-ci<?",
- 2, -1),
- env);
- scheme_add_global_constant("string-ci>?",
- scheme_make_immed_prim(string_ci_gt,
- "string-ci>?",
- 2, -1),
- env);
- scheme_add_global_constant("string-locale-ci>?",
- scheme_make_immed_prim(string_locale_ci_gt,
- "string-locale-ci>?",
- 2, -1),
- env);
- scheme_add_global_constant("string-ci<=?",
- scheme_make_immed_prim(string_ci_lt_eq,
- "string-ci<=?",
- 2, -1),
- env);
- scheme_add_global_constant("string-ci>=?",
- scheme_make_immed_prim(string_ci_gt_eq,
- "string-ci>=?",
- 2, -1),
- env);
- scheme_add_global_constant("substring",
- scheme_make_immed_prim(substring,
- "substring",
- 2, 3),
- env);
- scheme_add_global_constant("string-append",
- scheme_make_immed_prim(string_append,
- "string-append",
- 0, -1),
- env);
- scheme_add_global_constant("string->list",
- scheme_make_immed_prim(string_to_list,
- "string->list",
- 1, 1),
- env);
- scheme_add_global_constant("list->string",
- scheme_make_immed_prim(list_to_string,
- "list->string",
- 1, 1),
- env);
- scheme_add_global_constant("string-copy",
- scheme_make_immed_prim(string_copy,
- "string-copy",
- 1, 1),
- env);
- scheme_add_global_constant("string-copy!",
- scheme_make_immed_prim(string_copy_bang,
- "string-copy!",
- 3, 5),
- env);
- scheme_add_global_constant("string-fill!",
- scheme_make_immed_prim(string_fill,
- "string-fill!",
- 2, 2),
- env);
- scheme_add_global_constant("string->immutable-string",
- scheme_make_immed_prim(string_to_immutable,
- "string->immutable-string",
- 1, 1),
- env);
- scheme_add_global_constant("string-normalize-nfc",
- scheme_make_immed_prim(string_normalize_c,
- "string-normalize-nfc",
- 1, 1),
- env);
- scheme_add_global_constant("string-normalize-nfkc",
- scheme_make_immed_prim(string_normalize_kc,
- "string-normalize-nfkc",
- 1, 1),
- env);
- scheme_add_global_constant("string-normalize-nfd",
- scheme_make_immed_prim(string_normalize_d,
- "string-normalize-nfd",
- 1, 1),
- env);
- scheme_add_global_constant("string-normalize-nfkd",
- scheme_make_immed_prim(string_normalize_kd,
- "string-normalize-nfkd",
- 1, 1),
- env);
- scheme_add_global_constant("string-upcase",
- scheme_make_immed_prim(string_upcase,
- "string-upcase",
- 1, 1),
- env);
- scheme_add_global_constant("string-downcase",
- scheme_make_immed_prim(string_downcase,
- "string-downcase",
- 1, 1),
- env);
- scheme_add_global_constant("string-titlecase",
- scheme_make_immed_prim(string_titlecase,
- "string-titlecase",
- 1, 1),
- env);
- scheme_add_global_constant("string-foldcase",
- scheme_make_immed_prim(string_foldcase,
- "string-foldcase",
- 1, 1),
- env);
- scheme_add_global_constant("string-locale-upcase",
- scheme_make_immed_prim(string_locale_upcase,
- "string-locale-upcase",
- 1, 1),
- env);
- scheme_add_global_constant("string-locale-downcase",
- scheme_make_immed_prim(string_locale_downcase,
- "string-locale-downcase",
- 1, 1),
- env);
- scheme_add_global_constant("current-locale",
- scheme_register_parameter(current_locale,
- "current-locale",
- MZCONFIG_LOCALE),
- env);
- scheme_add_global_constant("locale-string-encoding",
- scheme_make_immed_prim(locale_string_encoding,
- "locale-string-encoding",
- 0, 0),
- env);
- scheme_add_global_constant("system-language+country",
- scheme_make_immed_prim(system_language_country,
- "system-language+country",
- 0, 0),
- env);
- scheme_add_global_constant("bytes-converter?",
- scheme_make_immed_prim(byte_converter_p,
- "bytes-converter?",
- 1, 1),
- env);
- scheme_add_global_constant("bytes-convert",
- scheme_make_prim_w_arity2(byte_string_convert,
- "bytes-convert",
- 1, 7,
- 3, 3),
- env);
- scheme_add_global_constant("bytes-convert-end",
- scheme_make_prim_w_arity2(byte_string_convert_end,
- "bytes-convert-end",
- 0, 3,
- 2, 2),
- env);
- scheme_add_global_constant("bytes-open-converter",
- scheme_make_immed_prim(byte_string_open_converter,
- "bytes-open-converter",
- 2, 2),
- env);
- scheme_add_global_constant("bytes-close-converter",
- scheme_make_immed_prim(byte_string_close_converter,
- "bytes-close-converter",
- 1, 1),
- env);
- scheme_add_global_constant("format",
- scheme_make_noncm_prim(format,
- "format",
- 1, -1),
- env);
- scheme_add_global_constant("printf",
- scheme_make_noncm_prim(sch_printf,
- "printf",
- 1, -1),
- env);
- scheme_add_global_constant("eprintf",
- scheme_make_noncm_prim(sch_eprintf,
- "eprintf",
- 1, -1),
- env);
- scheme_add_global_constant("fprintf",
- scheme_make_noncm_prim(sch_fprintf,
- "fprintf",
- 2, -1),
- env);
- scheme_add_global_constant("byte?",
- scheme_make_folding_prim(byte_p,
- "byte?",
- 1, 1, 1),
- env);
- p = scheme_make_folding_prim(byte_string_p, "bytes?", 1, 1, 1);
- SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
- scheme_add_global_constant("bytes?", p, env);
- scheme_add_global_constant("make-bytes",
- scheme_make_immed_prim(make_byte_string,
- "make-bytes",
- 1, 2),
- env);
- scheme_add_global_constant("bytes",
- scheme_make_immed_prim(byte_string,
- "bytes",
- 0, -1),
- env);
- GLOBAL_PRIM_W_ARITY("make-shared-bytes", make_shared_byte_string, 1, 2, env);
- GLOBAL_PRIM_W_ARITY("shared-bytes", shared_byte_string, 0, -1, env);
- scheme_add_global_constant("bytes-length",
- scheme_make_folding_prim(byte_string_length,
- "bytes-length",
- 1, 1, 1),
- env);
- p = scheme_make_immed_prim(scheme_checked_byte_string_ref, "bytes-ref", 2, 2);
- SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED;
- scheme_add_global_constant("bytes-ref", p, env);
- p = scheme_make_immed_prim(scheme_checked_byte_string_set, "bytes-set!", 3, 3);
- SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
- scheme_add_global_constant("bytes-set!", p, env);
- scheme_add_global_constant("bytes=?",
- scheme_make_immed_prim(byte_string_eq,
- "bytes=?",
- 2, -1),
- env);
- scheme_add_global_constant("bytes<?",
- scheme_make_immed_prim(byte_string_lt,
- "bytes<?",
- 2, -1),
- env);
- scheme_add_global_constant("bytes>?",
- scheme_make_immed_prim(byte_string_gt,
- "bytes>?",
- 2, -1),
- env);
- scheme_add_global_constant("subbytes",
- scheme_make_immed_prim(byte_substring,
- "subbytes",
- 2, 3),
- env);
- scheme_add_global_constant("bytes-append",
- scheme_make_immed_prim(byte_string_append,
- "bytes-append",
- 0, -1),
- env);
- scheme_add_global_constant("bytes->list",
- scheme_make_immed_prim(byte_string_to_list,
- "bytes->list",
- 1, 1),
- env);
- scheme_add_global_constant("list->bytes",
- scheme_make_immed_prim(list_to_byte_string,
- "list->bytes",
- 1, 1),
- env);
- scheme_add_global_constant("bytes-copy",
- scheme_make_immed_prim(byte_string_copy,
- "bytes-copy",
- 1, 1),
- env);
- scheme_add_global_constant("bytes-copy!",
- scheme_make_immed_prim(byte_string_copy_bang,
- "bytes-copy!",
- 3, 5),
- env);
- scheme_add_global_constant("bytes-fill!",
- scheme_make_immed_prim(byte_string_fill,
- "bytes-fill!",
- 2, 2),
- env);
- scheme_add_global_constant("bytes->immutable-bytes",
- scheme_make_immed_prim(byte_string_to_immutable,
- "bytes->immutable-bytes",
- 1, 1),
- env);
- scheme_add_global_constant("bytes-utf-8-index",
- scheme_make_immed_prim(byte_string_utf8_index,
- "bytes-utf-8-index",
- 2, 4),
- env);
- scheme_add_global_constant("bytes-utf-8-length",
- scheme_make_immed_prim(byte_string_utf8_length,
- "bytes-utf-8-length",
- 1, 4),
- env);
- scheme_add_global_constant("bytes-utf-8-ref",
- scheme_make_immed_prim(byte_string_utf8_ref,
- "bytes-utf-8-ref",
- 2, 4),
- env);
- scheme_add_global_constant("bytes->string/utf-8",
- scheme_make_immed_prim(byte_string_to_char_string,
- "bytes->string/utf-8",
- 1, 4),
- env);
- scheme_add_global_constant("bytes->string/locale",
- scheme_make_immed_prim(byte_string_to_char_string_locale,
- "bytes->string/locale",
- 1, 4),
- env);
- scheme_add_global_constant("bytes->string/latin-1",
- scheme_make_immed_prim(byte_string_to_char_string_latin1,
- "bytes->string/latin-1",
- 1, 4),
- env);
- scheme_add_global_constant("string->bytes/utf-8",
- scheme_make_immed_prim(char_string_to_byte_string,
- "string->bytes/utf-8",
- 1, 4),
- env);
- scheme_add_global_constant("string->bytes/locale",
- scheme_make_immed_prim(char_string_to_byte_string_locale,
- "string->bytes/locale",
- 1, 4),
- env);
- scheme_add_global_constant("string->bytes/latin-1",
- scheme_make_immed_prim(char_string_to_byte_string_latin1,
- "string->bytes/latin-1",
- 1, 4),
- env);
- scheme_add_global_constant("string-utf-8-length",
- scheme_make_immed_prim(char_string_utf8_length,
- "string-utf-8-length",
- 1, 3),
- env);
- /* In principle, `version' could be foldable, but it invites
- more problems than it solves... */
- scheme_add_global_constant("version",
- scheme_make_immed_prim(version,
- "version",
- 0, 0),
- env);
- scheme_add_global_constant("banner",
- scheme_make_immed_prim(banner,
- "banner",
- 0, 0),
- env);
- scheme_add_global_constant("getenv",
- scheme_make_immed_prim(sch_getenv,
- "getenv",
- 1, 1),
- env);
- scheme_add_global_constant("putenv",
- scheme_make_immed_prim(sch_putenv,
- "putenv",
- 2, 2),
- env);
- /* Don't make these folding, since they're platform-specific: */
- scheme_add_global_constant("system-type",
- scheme_make_immed_prim(system_type,
- "system-type",
- 0, 1),
- env);
- scheme_add_global_constant("system-library-subpath",
- scheme_make_immed_prim(system_library_subpath,
- "system-library-subpath",
- 0, 1),
- env);
- scheme_add_global_constant("current-command-line-arguments",
- scheme_register_parameter(cmdline_args,
- "current-command-line-arguments",
- MZCONFIG_CMDLINE_ARGS),
- env);
- #ifdef MZ_PRECISE_GC
- register_traversers();
- #endif
- }
- void scheme_init_string_places(void) {
- REGISTER_SO(current_locale_name_ptr);
- current_locale_name_ptr = "xxxx\0\0\0\0";
- }
- /**********************************************************************/
- /* UTF-8 char constructors */
- /**********************************************************************/
- Scheme_Object *scheme_make_sized_offset_utf8_string(char *chars, intptr_t d, intptr_t len)
- {
- intptr_t ulen;
- mzchar *us;
- if (len) {
- ulen = scheme_utf8_decode((unsigned char *)chars, d, d + len,
- NULL, 0, -1,
- NULL, 0 /* not UTF-16 */, 0xFFFD);
- us = scheme_malloc_atomic(sizeof(mzchar) * (ulen + 1));
- scheme_utf8_decode((unsigned char *)chars, d, d + len,
- us, 0, -1,
- NULL, 0 /* not UTF-16 */, 0xFFFD);
- us[ulen] = 0;
- } else {
- us = (mzchar *)"\0\0\0";
- ulen = 0;
- }
- return scheme_make_sized_offset_char_string(us, 0, ulen, 0);
- }
- Scheme_Object *
- scheme_make_sized_utf8_string(char *chars, intptr_t len)
- {
- return scheme_make_sized_offset_utf8_string(chars, 0, len);
- }
- Scheme_Object *
- scheme_make_immutable_sized_utf8_string(char *chars, intptr_t len)
- {
- Scheme_Object *s;
- s = scheme_make_sized_offset_utf8_string(chars, 0, len);
- if (len)
- SCHEME_SET_CHAR_STRING_IMMUTABLE(s);
- return s;
- }
- Scheme_Object *
- scheme_make_utf8_string(const char *chars)
- {
- return scheme_make_sized_offset_utf8_string((char *)chars, 0, -1);
- }
- Scheme_Object *
- scheme_make_locale_string(const char *chars)
- {
- return scheme_byte_string_to_char_string_locale(scheme_make_byte_string((char *)chars));
- }
- /**********************************************************************/
- /* index helpers */
- /**********************************************************************/
- void scheme_out_of_string_range(const char *name, const char *which,
- Scheme_Object *i, Scheme_Object *s,
- intptr_t start, intptr_t len)
- {
- int is_byte;
- is_byte = SCHEME_BYTE_STRINGP(s);
- if (len) {
- char *sstr;
- intptr_t slen;
- sstr = scheme_make_provided_string(s, 2, &slen);
- scheme_raise_exn(MZEXN_FAIL_CONTRACT,
- "%s: %sindex %s out of range [%d, %d] for %s%s: %t",
- name, which,
- scheme_make_provided_string(i, 2, NULL),
- ((start < 0) ? 0 : start),
- ((start < 0) ? (len - 1) : len),
- is_byte ? "byte-" : "",
- SCHEME_CHAPERONE_VECTORP(s) ? "vector" : "string",
- sstr, slen);
- } else {
- scheme_raise_exn(MZEXN_FAIL_CONTRACT,
- "%s: %sindex %s out of range for empty %s%s",
- name, which,
- scheme_make_provided_string(i, 0, NULL),
- is_byte ? "byte-" : "",
- SCHEME_CHAPERONE_VECTORP(s) ? "vector" : "string");
- }
- }
- intptr_t scheme_extract_index(const char *name, int pos, int argc, Scheme_Object **argv, intptr_t top, int false_ok)
- {
- intptr_t i;
- int is_top = 0;
- if (SCHEME_INTP(argv[pos])) {
- i = SCHEME_INT_VAL(argv[pos]);
- } else if (SCHEME_BIGNUMP(argv[pos])) {
- if (SCHEME_BIGPOS(argv[pos])) {
- i = top; /* out-of-bounds */
- is_top = 1;
- } else
- i = -1; /* negative */
- } else
- i = -1;
- if (!is_top && (i < 0))
- scheme_wrong_type(name,
- (false_ok ? "non-negative exact integer or #f" : "non-negative exact integer"),
- pos, argc, argv);
- return i;
- }
- void scheme_get_substring_indices(const char *name, Scheme_Object *str,
- int argc, Scheme_Object **argv,
- int spos, int fpos, intptr_t *_start, intptr_t *_finish)
- {
- intptr_t len;
- intptr_t start, finish;
- if (SCHEME_CHAPERONE_VECTORP(str))
- len = SCHEME_VEC_SIZE(str);
- else if (SCHEME_CHAR_STRINGP(str))
- len = SCHEME_CHAR_STRTAG_VAL(str);
- else
- len = SCHEME_BYTE_STRTAG_VAL(str);
- if (argc > spos)
- start = scheme_extract_index(name, spos, argc, argv, len + 1, 0);
- else
- start = 0;
- if (argc > fpos)
- finish = scheme_extract_index(name, fpos, argc, argv, len + 1, 0);
- else
- finish = len;
- if (!(start <= len)) {
- scheme_out_of_string_range(name, (fpos < 100) ? "starting " : "", argv[spos], str, 0, len);
- }
- if (!(finish >= start && finish <= len)) {
- scheme_out_of_string_range(name, "ending ", argv[fpos], str, start, len);
- }
- *_start = start;
- *_finish = finish;
- }
- void scheme_do_get_substring_indices(const char *name, Scheme_Object *str,
- int argc, Scheme_Object **argv,
- int spos, int fpos, intptr_t *_start, intptr_t *_finish, intptr_t len)
- {
- if (argc > spos) {
- if (SCHEME_INTP(argv[spos])) {
- intptr_t start = SCHEME_INT_VAL(argv[spos]);
- if ((start >= 0) && (start < len)) {
- *_start = start;
- if (argc > fpos) {
- intptr_t finish = SCHEME_INT_VAL(argv[fpos]);
- if ((finish >= start) && (finish <= len)) {
- *_finish = finish;
- return;
- }
- } else {
- *_finish = len;
- return;
- }
- }
- }
- } else {
- *_start = 0;
- *_finish = len;
- return;
- }
- scheme_get_substring_indices(name, str, argc, argv, spos, fpos, _start, _finish);
- }
- /**********************************************************************/
- /* char strings */
- /**********************************************************************/
- #define SCHEME_X_STR_VAL(x) SCHEME_CHAR_STR_VAL(x)
- #define SCHEME_X_STRTAG_VAL(x) SCHEME_CHAR_STRTAG_VAL(x)
- #define SCHEME_X_STRINGP(x) SCHEME_CHAR_STRINGP(x)
- #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_CHAR_STRINGP(x)
- #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_CHAR_STRING_IMMUTABLE(x)
- #define scheme_x_string_type scheme_char_string_type
- #define X(a, b) a##_char##b
- #define X_(a, b) a##_##b
- #define X__(a) a
- #define EMPTY (mzchar *)"\0\0\0"
- #define Xchar mzchar
- #define uXchar mzchar
- #define XSTR ""
- #define XSTRINGSTR "string"
- #define SUBXSTR "substring"
- #define CHARP(x) SCHEME_CHARP(x)
- #define CHAR_VAL(x) SCHEME_CHAR_VAL(x)
- #define CHAR_STR "character"
- #define MAKE_CHAR(x) _scheme_make_char(x)
- #define xstrlen scheme_char_strlen
- #include "strops.inc"
- #define GEN_STRING_COMP(name, scheme_name, comp, op, ul, size_shortcut) \
- static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
- { mzchar *s, *prev; int i, sl, pl; int falz = 0;\
- if (!SCHEME_CHAR_STRINGP(argv[0])) \
- scheme_wrong_type(scheme_name, "string", 0, argc, argv); \
- prev = SCHEME_CHAR_STR_VAL(argv[0]); pl = SCHEME_CHAR_STRTAG_VAL(argv[0]); \
- for (i = 1; i < argc; i++) { \
- if (!SCHEME_CHAR_STRINGP(argv[i])) \
- scheme_wrong_type(scheme_name, "string", i, argc, argv); \
- s = SCHEME_CHAR_STR_VAL(argv[i]); sl = SCHEME_CHAR_STRTAG_VAL(argv[i]); \
- if (!falz) if (!(comp(scheme_name, \
- prev, pl, \
- s, sl, ul, size_shortcut) op 0)) falz = 1; \
- prev = s; pl = sl; \
- } \
- return falz ? scheme_false : scheme_true; \
- }
- GEN_STRING_COMP(string_eq, "string=?", mz_char_strcmp, ==, 0, 1)
- GEN_STRING_COMP(string_lt, "string<?", mz_char_strcmp, <, 0, 0)
- GEN_STRING_COMP(string_gt, "string>?", mz_char_strcmp, >, 0, 0)
- GEN_STRING_COMP(string_lt_eq, "string<=?", mz_char_strcmp, <=, 0, 0)
- GEN_STRING_COMP(string_gt_eq, "string>=?", mz_char_strcmp, >=, 0, 0)
- GEN_STRING_COMP(string_ci_eq, "string-ci=?", mz_char_strcmp_ci, ==, 0, 0)
- GEN_STRING_COMP(string_ci_lt, "string-ci<?", mz_char_strcmp_ci, <, 0, 0)
- GEN_STRING_COMP(string_ci_gt, "string-ci>?", mz_char_strcmp_ci, >, 0, 0)
- GEN_STRING_COMP(string_ci_lt_eq, "string-ci<=?", mz_char_strcmp_ci, <=, 0, 0)
- GEN_STRING_COMP(string_ci_gt_eq, "string-ci>=?", mz_char_strcmp_ci, >=, 0, 0)
- GEN_STRING_COMP(string_locale_eq, "string-locale=?", mz_char_strcmp, ==, 1, 0)
- GEN_STRING_COMP(string_locale_lt, "string-locale<?", mz_char_strcmp, <, 1, 0)
- GEN_STRING_COMP(string_locale_gt, "string-locale>?", mz_char_strcmp, >, 1, 0)
- GEN_STRING_COMP(string_locale_ci_eq, "string-locale-ci=?", mz_char_strcmp_ci, ==, 1, 0)
- GEN_STRING_COMP(string_locale_ci_lt, "string-locale-ci<?", mz_char_strcmp_ci, <, 1, 0)
- GEN_STRING_COMP(string_locale_ci_gt, "string-locale-ci>?", mz_char_strcmp_ci, >, 1, 0)
- /**********************************************************************/
- /* byte strings */
- /**********************************************************************/
- #define SCHEME_BYTEP(x) ((SCHEME_INTP(x)) && (SCHEME_INT_VAL(x) >= 0) && (SCHEME_INT_VAL(x) <= 255))
- #define BYTE_STR "exact integer in [0,255]"
- static Scheme_Object *
- byte_p(int argc, Scheme_Object *argv[])
- {
- return (SCHEME_BYTEP(argv[0]) ? scheme_true : scheme_false);
- }
- #define SCHEME_X_STR_VAL(x) SCHEME_BYTE_STR_VAL(x)
- #define SCHEME_X_STRTAG_VAL(x) SCHEME_BYTE_STRTAG_VAL(x)
- #define SCHEME_X_STRINGP(x) SCHEME_BYTE_STRINGP(x)
- #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_BYTE_STRINGP(x)
- #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_BYTE_STRING_IMMUTABLE(x)
- #define scheme_x_string_type scheme_byte_string_type
- #define X(a, b) a##_byte##b
- #define X_(a, b) a##_byte_##b
- #define X__(a) byte_##a
- #define EMPTY ""
- #define Xchar char
- #define uXchar unsigned char
- #define XSTR "byte "
- #define XSTRINGSTR "bytes"
- #define SUBXSTR "subbytes"
- #define CHARP(x) SCHEME_BYTEP(x)
- #define CHAR_VAL(x) SCHEME_INT_VAL(x)
- #define CHAR_STR BYTE_STR
- #define MAKE_CHAR(x) scheme_make_integer_value(x)
- #define xstrlen strlen
- #define GENERATING_BYTE
- #include "strops.inc"
- #undef GENERATING_BYTE
- /* comparisons */
- #define GEN_BYTE_STRING_COMP(name, scheme_name, comp, op) \
- static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
- { char *s, *prev; int i, sl, pl; int falz = 0;\
- if (!SCHEME_BYTE_STRINGP(argv[0])) \
- scheme_wrong_type(scheme_name, "byte string", 0, argc, argv); \
- prev = SCHEME_BYTE_STR_VAL(argv[0]); pl = SCHEME_BYTE_STRTAG_VAL(argv[0]); \
- for (i = 1; i < argc; i++) { \
- if (!SCHEME_BYTE_STRINGP(argv[i])) \
- scheme_wrong_type(scheme_name, "byte string", i, argc, argv); \
- s = SCHEME_BYTE_STR_VAL(argv[i]); sl = SCHEME_BYTE_STRTAG_VAL(argv[i]); \
- if (!falz) if (!(comp(scheme_name, \
- (unsigned char *)prev, pl, \
- (unsigned char *)s, sl) op 0)) falz = 1; \
- prev = s; pl = sl; \
- } \
- return falz ? scheme_false : scheme_true; \
- }
- GEN_BYTE_STRING_COMP(byte_string_eq, "bytes=?", mz_strcmp, ==)
- GEN_BYTE_STRING_COMP(byte_string_lt, "bytes<?", mz_strcmp, <)
- GEN_BYTE_STRING_COMP(byte_string_gt, "bytes>?", mz_strcmp, >)
- /**********************************************************************/
- /* byte string <-> char string */
- /**********************************************************************/
- /************************* bytes->string *************************/
- static Scheme_Object *
- do_byte_string_to_char_string(const char *who,
- Scheme_Object *bstr,
- intptr_t istart, intptr_t ifinish,
- int perm, int as_locale)
- {
- int i, ulen;
- char *chars;
- unsigned int *v;
- chars = SCHEME_BYTE_STR_VAL(bstr);
- ulen = utf8_decode_x((unsigned char *)chars, istart, ifinish,
- NULL, 0, -1,
- NULL, NULL, 0, 0,
- NULL, 0,
- (perm > -1) ? 0xD800 : 0);
- if (ulen < 0) {
- scheme_arg_mismatch(who,
- STRING_IS_NOT_UTF_8,
- bstr);
- }
- v = (unsigned int *)scheme_malloc_atomic((ulen + 1) * sizeof(unsigned int));
- utf8_decode_x((unsigned char *)chars, istart, ifinish,
- v, 0, -1,
- NULL, NULL, 0, 0,
- NULL, 0,
- (perm > -1) ? 0xD800 : 0);
-
- if (perm > -1) {
- for (i = 0; i < ulen; i++) {
- if (v[i] == 0xD800)
- v[i] = perm;
- }
- }
- v[ulen] = 0;
- return scheme_make_sized_char_string(v, ulen, 0);
- }
- static Scheme_Object *
- do_byte_string_to_char_string_locale(const char *who,
- Scheme_Object *bstr,
- intptr_t istart, intptr_t ifinish,
- int perm)
- {
- char *us;
- intptr_t olen;
- reset_locale();
- if (!iconv_ready) init_iconv();
- if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open))
- return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
- if (istart < ifinish) {
- int no_cvt;
- us = string_to_from_locale(0, SCHEME_BYTE_STR_VAL(bstr),
- istart, ifinish - istart,
- &olen, perm, &no_cvt);
- if (!us) {
- if (no_cvt) {
- return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
- } else {
- scheme_arg_mismatch(who,
- "byte string is not a valid encoding for the current locale: ",
- bstr);
- }
- }
- ((mzchar *)us)[olen] = 0;
- } else {
- us = "\0\0\0";
- olen = 0;
- }
- return scheme_make_sized_char_string((mzchar *)us, olen, 0);
- }
- static Scheme_Object *
- do_string_to_vector(const char *who, int mode, int argc, Scheme_Object *argv[])
- {
- int permc;
- intptr_t istart, ifinish;
- if (!SCHEME_BYTE_STRINGP(argv[0]))
- scheme_wrong_type(who, "byte string", 0, argc, argv);
- if ((argc < 2) || SCHEME_FALSEP(argv[1]))
- permc = -1;
- else {
- if (!SCHEME_CHARP(argv[1]))
- scheme_wrong_type(who, "character or #f", 1, argc, argv);
- permc = SCHEME_CHAR_VAL(argv[1]);
- }
- scheme_get_substring_indices(who, argv[0], argc, argv,
- 2, 3,
- &istart, &ifinish);
- if (mode == 0)
- return do_byte_string_to_char_string(who, argv[0], istart, ifinish, permc, 0);
- else if (mode == 1)
- return do_byte_string_to_char_string_locale(who, argv[0], istart, ifinish, permc);
- else {
- /* Latin-1 */
- mzchar *us;
- unsigned char *s;
- intptr_t i, len;
- len = ifinish - istart;
- s = (unsigned char *)SCHEME_BYTE_STR_VAL(argv[0]);
- us = (mzchar *)scheme_malloc_atomic((len + 1) * sizeof(mzchar));
- for (i = istart; i < ifinish; i++) {
- us[i - istart] = s[i];
- }
- us[len] = 0;
- return scheme_make_sized_char_string(us, len, 0);
- }
- }
- static Scheme_Object *
- byte_string_to_char_string (int argc, Scheme_Object *argv[])
- {
- return do_string_to_vector("bytes->string/utf-8", 0, argc, argv);
- }
- static Scheme_Object *
- byte_string_to_char_string_locale (int argc, Scheme_Object *argv[])
- {
- return do_string_to_vector("bytes->string/locale", 1, argc, argv);
- }
- static Scheme_Object *
- byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[])
- {
- return do_string_to_vector("bytes->string/latin-1", 2, argc, argv);
- }
- Scheme_Object *scheme_byte_string_to_char_string(Scheme_Object *o)
- {
- return do_byte_string_to_char_string("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD, 0);
- }
- Scheme_Object *scheme_byte_string_to_char_string_locale(Scheme_Object *o)
- {
- return do_byte_string_to_char_string_locale("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD);
- }
- /************************* string->bytes *************************/
- static Scheme_Object *do_char_string_to_byte_string(Scheme_Object *s, intptr_t istart, intptr_t ifinish,
- int as_locale)
- {
- char *bs;
- int slen;
- slen = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
- NULL, 0,
- 0 /* UTF-16 */);
- bs = (char *)scheme_malloc_atomic(slen + 1);
- scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
- (unsigned char *)bs, 0,
- 0 /* UTF-16 */);
- bs[slen] = 0;
- return scheme_make_sized_byte_string(bs, slen, 0);
- }
- static Scheme_Object *
- do_char_string_to_byte_string_locale(const char *who,
- Scheme_Object *cstr,
- intptr_t istart, intptr_t ifinish,
- int perm)
- {
- char *s;
- intptr_t olen;
- reset_locale();
- if (!iconv_ready) init_iconv();
- if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open))
- return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
- if (istart < ifinish) {
- int no_cvt;
- s = string_to_from_locale(1, (char *)SCHEME_CHAR_STR_VAL(cstr),
- istart, ifinish - istart,
- &olen, perm, &no_cvt);
- if (!s) {
- if (no_cvt) {
- return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
- } else {
- scheme_arg_mismatch(who,
- "string cannot be encoded for the current locale: ",
- cstr);
- }
- }
- s[olen] = 0;
- } else {
- s = "";
- olen = 0;
- }
- return scheme_make_sized_byte_string(s, olen, 0);
- }
- Scheme_Object *scheme_char_string_to_byte_string(Scheme_Object *s)
- {
- return do_char_string_to_byte_string(s, 0, SCHEME_CHAR_STRLEN_VAL(s), 0);
- }
- Scheme_Object *scheme_char_string_to_byte_string_locale(Scheme_Object *s)
- {
- return do_char_string_to_byte_string_locale("s->s", s, 0, SCHEME_CHAR_STRLEN_VAL(s), '?');
- }
- static Scheme_Object *do_chars_to_bytes(const char *who, int mode,
- int argc, Scheme_Object *argv[])
- {
- intptr_t istart, ifinish;
- int permc;
- if (!SCHEME_CHAR_STRINGP(argv[0]))
- scheme_wrong_type(who, "string", 0, argc, argv);
- if ((argc < 2) || SCHEME_FALSEP(argv[1]))
- permc = -1;
- else {
- if (!SCHEME_BYTEP(argv[1]))
- scheme_wrong_type(who, "byte or #f", 1, argc, argv);
- permc = SCHEME_INT_VAL(argv[1]);
- }
- scheme_get_substring_indices(who, argv[0], argc, argv,
- 2, 3, &istart, &ifinish);
- if (mode == 1)
- return do_char_string_to_byte_string_locale(who, argv[0], istart, ifinish, permc);
- else if (mode == 0)
- return do_char_string_to_byte_string(argv[0], istart, ifinish, 0);
- else {
- /* Latin-1 */
- mzchar *us;
- unsigned char *s;
- intptr_t i, len;
- len = ifinish - istart;
- us = SCHEME_CHAR_STR_VAL(argv[0]);
- s = (unsigned char *)scheme_malloc_atomic(len + 1);
- for (i = istart; i < ifinish; i++) {
- if (us[i] < 256)
- s[i - istart] = us[i];
- else if (permc >= 0) {
- s[i - istart] = permc;
- } else {
- scheme_arg_mismatch(who,
- "string cannot be encoded in Latin-1: ",
- argv[0]);
- }
- }
- s[len] = 0;
- return scheme_make_sized_byte_string((char *)s, len, 0);
- }
- }
- static Scheme_Object *char_string_to_byte_string(int argc, Scheme_Object *argv[])
- {
- return do_chars_to_bytes("string->bytes/utf-8", 0, argc, argv);
- }
- static Scheme_Object *char_string_to_byte_string_locale(int argc, Scheme_Object *argv[])
- {
- return do_chars_to_bytes("string->bytes/locale", 1, argc, argv);
- }
- static Scheme_Object *char_string_to_byte_string_latin1(int argc, Scheme_Object *argv[])
- {
- return do_chars_to_bytes("string->bytes/latin-1", 2, argc, argv);
- }
- /************************* Other *************************/
- static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[])
- {
- intptr_t istart, ifinish, len;
- if (!SCHEME_CHAR_STRINGP(argv[0]))
- scheme_wrong_type("string-utf-8-length", "string", 0, argc, argv);
- scheme_get_substring_indices("string-utf-8-length", argv[0], argc, argv,
- 1, 2, &istart, &ifinish);
- len = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(argv[0]), istart, ifinish,
- NULL, 0, 0);
- return scheme_make_integer(len);
- }
- static Scheme_Object *
- byte_string_utf8_length (int argc, Scheme_Object *argv[])
- {
- int len, perm;
- intptr_t istart, ifinish;
- char *chars;
- if (!SCHEME_BYTE_STRINGP(argv[0]))
- scheme_wrong_type("bytes-utf-8-length", "string", 0, argc, argv);
- chars = SCHEME_BYTE_STR_VAL(argv[0]);
- if ((argc > 1) && !SCHEME_FALSEP(argv[1])) {
- if (!SCHEME_CHARP(argv[1]))
- scheme_wrong_type("bytes-utf-8-length", "character or #f", 1, argc, argv);
- perm = 1;
- } else
- perm = 0;
- scheme_get_substring_indices("bytes-utf-8-length", argv[0], argc, argv,
- 2, 3,
- &istart, &ifinish);
- len = scheme_utf8_decode((unsigned char *)chars, istart, ifinish,
- NULL, 0, -1,
- NULL, 0, perm);
- if (len < 0)
- return scheme_false;
- else
- return scheme_make_integer(len);
- }
- static Scheme_Object *
- byte_string_utf8_index(int argc, Scheme_Object *argv[])
- {
- intptr_t istart, ifinish, pos = -1, opos, ipos;
- int result, perm;
- char *chars;
- if (!SCHEME_BYTE_STRINGP(argv[0]))
- scheme_wrong_type("bytes-utf-8-index", "byte string", 0, argc, argv);
- chars = SCHEME_BYTE_STR_VAL(argv[0]);
- if (SCHEME_INTP(argv[1])) {
- …
Large files files are truncated, but you can click here to view the full file