/racket/src/racket/src/string.c
C | 5470 lines | 4467 code | 688 blank | 315 comment | 1009 complexity | bf7442ff4502f7e2a6bbc6b06af41f7a MD5 | raw file
Possible License(s): LGPL-2.1, BSD-3-Clause, ISC, LGPL-2.0
Large files files are truncated, but you can click here to view the full file
- /*
- Racket
- Copyright (c) 2004-2017 PLT Design Inc.
- Copyright (c) 1995-2001 Matthew Flatt
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301 USA.
- libscheme
- Copyright (c) 1994 Brent Benson
- All rights reserved.
- */
- #include "schpriv.h"
- #include "schvers.h"
- #include "schrktio.h"
- #include <string.h>
- #include <ctype.h>
- #ifdef NO_ERRNO_GLOBAL
- # define errno -1
- #else
- # include <errno.h>
- #endif
- #ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH
- # include "schsys.h"
- #endif
- #ifndef SPLS_SUFFIX
- # define SPLS_SUFFIX ""
- #endif
- #include "schustr.inc"
- #ifdef MACOS_UNICODE_SUPPORT
- # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
- #endif
- #ifdef WINDOWS_UNICODE_SUPPORT
- # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
- #endif
- #ifndef mzLOCALE_IS_UTF_8
- # define mzLOCALE_IS_UTF_8(s) (!(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED))
- #endif
- #define mzICONV_KIND 0
- #define mzUTF8_KIND 1
- #define mzUTF8_TO_UTF16_KIND 2
- #define mzUTF16_TO_UTF8_KIND 3
- typedef struct Scheme_Converter {
- Scheme_Object so;
- short closed;
- short kind;
- rktio_converter_t *cd;
- int permissive;
- Scheme_Custodian_Reference *mref;
- } Scheme_Converter;
- /* locals */
- static Scheme_Object *make_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_p (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_length (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_ci_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_ci_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_lt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_lt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_gt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_gt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_lt_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_gt_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_ci_lt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_ci_lt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_ci_gt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_ci_gt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_ci_lt_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_ci_gt_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_upcase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_downcase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_titlecase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_foldcase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_upcase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_locale_downcase (int argc, Scheme_Object *argv[]);
- static Scheme_Object *substring (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_append (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_to_list (int argc, Scheme_Object *argv[]);
- static Scheme_Object *list_to_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_copy (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_copy_bang (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_fill (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_to_immutable (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_normalize_c (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_normalize_kc (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_normalize_d (int argc, Scheme_Object *argv[]);
- static Scheme_Object *string_normalize_kd (int argc, Scheme_Object *argv[]);
- static Scheme_Object *make_shared_byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *shared_byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *make_byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_p (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_p (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_length (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_eq (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_lt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_gt (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_substring (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_append (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_to_list (int argc, Scheme_Object *argv[]);
- static Scheme_Object *list_to_byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_copy (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_copy_bang (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_fill (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_to_immutable (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_utf8_index (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_utf8_ref (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_utf8_length (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_to_char_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_to_char_string_locale (int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[]);
- static Scheme_Object *char_string_to_byte_string (int argc, Scheme_Object *argv[]);
- static Scheme_Object *char_string_to_byte_string_locale (int argc, Scheme_Object *argv[]);
- static Scheme_Object *char_string_to_byte_string_latin1 (int argc, Scheme_Object *argv[]);
- static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[]);
- static Scheme_Object *version(int argc, Scheme_Object *argv[]);
- static Scheme_Object *format(int argc, Scheme_Object *argv[]);
- static Scheme_Object *sch_printf(int argc, Scheme_Object *argv[]);
- static Scheme_Object *sch_eprintf(int argc, Scheme_Object *argv[]);
- static Scheme_Object *sch_fprintf(int argc, Scheme_Object *argv[]);
- static Scheme_Object *banner(int argc, Scheme_Object *argv[]);
- static Scheme_Object *env_p(int argc, Scheme_Object *argv[]);
- static Scheme_Object *sch_getenv(int argc, Scheme_Object *argv[]);
- static Scheme_Object *sch_getenv_names(int argc, Scheme_Object *argv[]);
- static Scheme_Object *sch_putenv(int argc, Scheme_Object *argv[]);
- static Scheme_Object *env_copy(int argc, Scheme_Object *argv[]);
- static Scheme_Object *env_make(int argc, Scheme_Object *argv[]);
- static Scheme_Object *current_environment_variables(int argc, Scheme_Object *argv[]);
- static Scheme_Object *system_type(int argc, Scheme_Object *argv[]);
- static Scheme_Object *system_library_subpath(int argc, Scheme_Object *argv[]);
- static Scheme_Object *cmdline_args(int argc, Scheme_Object *argv[]);
- static Scheme_Object *current_locale(int argc, Scheme_Object *argv[]);
- static Scheme_Object *locale_string_encoding(int argc, Scheme_Object *argv[]);
- static Scheme_Object *system_language_country(int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_open_converter(int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_close_converter(int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_convert(int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_string_convert_end(int argc, Scheme_Object *argv[]);
- static Scheme_Object *byte_converter_p(int argc, Scheme_Object *argv[]);
- static Scheme_Object *path_lt (int argc, Scheme_Object *argv[]);
- #ifdef MZ_PRECISE_GC
- static void register_traversers(void);
- #endif
- static int mz_char_strcmp(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
- static int mz_char_strcmp_ci(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
- static int mz_strcmp(const char *who, unsigned char *str1, intptr_t l1, unsigned char *str2, intptr_t l2);
- XFORM_NONGCING static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t end,
- unsigned int *us, intptr_t dstart, intptr_t dend,
- intptr_t *ipos, intptr_t *jpos,
- char compact, char utf16,
- int *state, int might_continue, int permissive);
- XFORM_NONGCING static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t end,
- unsigned char *s, intptr_t dstart, intptr_t dend,
- intptr_t *_ipos, intptr_t *_opos, char utf16);
- static char *string_to_from_locale(int to_bytes,
- char *in, intptr_t delta, intptr_t len,
- intptr_t *olen, int perm,
- int *no_cvt);
- #define portable_isspace(x) (((x) < 128) && isspace(x))
- ROSYM static Scheme_Object *sys_symbol;
- ROSYM static Scheme_Object *link_symbol, *machine_symbol, *vm_symbol, *gc_symbol;
- ROSYM static Scheme_Object *so_suffix_symbol, *so_mode_symbol, *word_symbol;
- ROSYM static Scheme_Object *os_symbol, *fs_change_symbol, *cross_symbol;
- ROSYM static Scheme_Object *racket_symbol, *cgc_symbol, *_3m_symbol;
- ROSYM static Scheme_Object *force_symbol, *infer_symbol;
- ROSYM static Scheme_Object *platform_3m_path, *platform_cgc_path;
- READ_ONLY static Scheme_Object *zero_length_char_string;
- READ_ONLY static Scheme_Object *zero_length_byte_string;
- SHARED_OK static char *embedding_banner;
- SHARED_OK static Scheme_Object *vers_str;
- SHARED_OK static Scheme_Object *banner_str;
- THREAD_LOCAL_DECL(static Scheme_Object *fs_change_props);
- READ_ONLY static Scheme_Object *complete_symbol, *continues_symbol, *aborts_symbol, *error_symbol;
- READ_ONLY Scheme_Object *scheme_string_p_proc;
- READ_ONLY Scheme_Object *scheme_byte_string_p_proc;
- READ_ONLY static int cross_compile_mode;
- /* These two locale variables are only valid when reset_locale()
- is called after continuation marks (and hence parameterization)
- may have changed. Similarly, setlocale() is only up-to-date
- when reset_locale() has been called. */
- THREAD_LOCAL_DECL(static int locale_on);
- THREAD_LOCAL_DECL(static void *current_locale_name_ptr);
- static void reset_locale(void);
- #define current_locale_name ((const mzchar *)current_locale_name_ptr)
- static const mzchar empty_char_string[1] = { 0 };
- static const mzchar xes_char_string[2] = { 0x78787878, 0 };
- void
- scheme_init_string (Scheme_Env *env)
- {
- Scheme_Object *p;
- REGISTER_SO(sys_symbol);
- sys_symbol = scheme_intern_symbol(SYSTEM_TYPE_NAME);
- REGISTER_SO(link_symbol);
- REGISTER_SO(machine_symbol);
- REGISTER_SO(gc_symbol);
- REGISTER_SO(vm_symbol);
- REGISTER_SO(so_suffix_symbol);
- REGISTER_SO(so_mode_symbol);
- REGISTER_SO(word_symbol);
- REGISTER_SO(os_symbol);
- REGISTER_SO(fs_change_symbol);
- REGISTER_SO(cross_symbol);
- link_symbol = scheme_intern_symbol("link");
- machine_symbol = scheme_intern_symbol("machine");
- vm_symbol = scheme_intern_symbol("vm");
- gc_symbol = scheme_intern_symbol("gc");
- so_suffix_symbol = scheme_intern_symbol("so-suffix");
- so_mode_symbol = scheme_intern_symbol("so-mode");
- word_symbol = scheme_intern_symbol("word");
- os_symbol = scheme_intern_symbol("os");
- fs_change_symbol = scheme_intern_symbol("fs-change");
- cross_symbol = scheme_intern_symbol("cross");
- REGISTER_SO(racket_symbol);
- REGISTER_SO(cgc_symbol);
- REGISTER_SO(_3m_symbol);
- racket_symbol = scheme_intern_symbol("racket");
- cgc_symbol = scheme_intern_symbol("cgc");
- _3m_symbol = scheme_intern_symbol("3m");
- REGISTER_SO(force_symbol);
- REGISTER_SO(infer_symbol);
- force_symbol = scheme_intern_symbol("force");
- infer_symbol = scheme_intern_symbol("infer");
- REGISTER_SO(zero_length_char_string);
- REGISTER_SO(zero_length_byte_string);
- zero_length_char_string = scheme_alloc_char_string(0, 0);
- zero_length_byte_string = scheme_alloc_byte_string(0, 0);
- REGISTER_SO(complete_symbol);
- REGISTER_SO(continues_symbol);
- REGISTER_SO(aborts_symbol);
- REGISTER_SO(error_symbol);
- complete_symbol = scheme_intern_symbol("complete");
- continues_symbol = scheme_intern_symbol("continues");
- aborts_symbol = scheme_intern_symbol("aborts");
- error_symbol = scheme_intern_symbol("error");
- REGISTER_SO(platform_3m_path);
- #ifdef UNIX_FILE_SYSTEM
- # define MZ3M_SUBDIR "/3m"
- #else
- # ifdef DOS_FILE_SYSTEM
- # define MZ3M_SUBDIR "\\3m"
- # else
- # define MZ3M_SUBDIR ":3m"
- # endif
- #endif
- REGISTER_SO(platform_3m_path);
- REGISTER_SO(platform_cgc_path);
- platform_cgc_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH SPLS_SUFFIX);
- platform_3m_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH SPLS_SUFFIX MZ3M_SUBDIR);
- REGISTER_SO(embedding_banner);
- REGISTER_SO(vers_str);
- REGISTER_SO(banner_str);
- vers_str = scheme_make_utf8_string(scheme_version());
- SCHEME_SET_CHAR_STRING_IMMUTABLE(vers_str);
- banner_str = scheme_make_utf8_string(scheme_banner());
- SCHEME_SET_CHAR_STRING_IMMUTABLE(banner_str);
- REGISTER_SO(scheme_string_p_proc);
- p = scheme_make_folding_prim(string_p, "string?", 1, 1, 1);
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
- | SCHEME_PRIM_IS_OMITABLE);
- scheme_add_global_constant("string?", p, env);
- scheme_string_p_proc = p;
- scheme_add_global_constant("make-string",
- scheme_make_immed_prim(make_string,
- "make-string",
- 1, 2),
- env);
- scheme_add_global_constant("string",
- scheme_make_immed_prim(string,
- "string",
- 0, -1),
- env);
-
- p = scheme_make_folding_prim(string_length, "string-length", 1, 1, 1);
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
- |SCHEME_PRIM_PRODUCES_FIXNUM);
- scheme_add_global_constant("string-length", p,
- env);
- p = scheme_make_immed_prim(scheme_checked_string_ref, "string-ref", 2, 2);
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED);
- scheme_add_global_constant("string-ref", p, env);
- p = scheme_make_immed_prim(scheme_checked_string_set, "string-set!", 3, 3);
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_NARY_INLINED);
- scheme_add_global_constant("string-set!", p, env);
- p = scheme_make_immed_prim(string_eq, "string=?", 2, -1);
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED);
- scheme_add_global_constant("string=?", p, env);
- scheme_add_global_constant("string-locale=?",
- scheme_make_immed_prim(string_locale_eq,
- "string-locale=?",
- 2, -1),
- env);
- scheme_add_global_constant("string-ci=?",
- scheme_make_immed_prim(string_ci_eq,
- "string-ci=?",
- 2, -1),
- env);
- scheme_add_global_constant("string-locale-ci=?",
- scheme_make_immed_prim(string_locale_ci_eq,
- "string-locale-ci=?",
- 2, -1),
- env);
- scheme_add_global_constant("string<?",
- scheme_make_immed_prim(string_lt,
- "string<?",
- 2, -1),
- env);
- scheme_add_global_constant("string-locale<?",
- scheme_make_immed_prim(string_locale_lt,
- "string-locale<?",
- 2, -1),
- env);
- scheme_add_global_constant("string>?",
- scheme_make_immed_prim(string_gt,
- "string>?",
- 2, -1),
- env);
- scheme_add_global_constant("string-locale>?",
- scheme_make_immed_prim(string_locale_gt,
- "string-locale>?",
- 2, -1),
- env);
- scheme_add_global_constant("string<=?",
- scheme_make_immed_prim(string_lt_eq,
- "string<=?",
- 2, -1),
- env);
- scheme_add_global_constant("string>=?",
- scheme_make_immed_prim(string_gt_eq,
- "string>=?",
- 2, -1),
- env);
- scheme_add_global_constant("string-ci<?",
- scheme_make_immed_prim(string_ci_lt,
- "string-ci<?",
- 2, -1),
- env);
- scheme_add_global_constant("string-locale-ci<?",
- scheme_make_immed_prim(string_locale_ci_lt,
- "string-locale-ci<?",
- 2, -1),
- env);
- scheme_add_global_constant("string-ci>?",
- scheme_make_immed_prim(string_ci_gt,
- "string-ci>?",
- 2, -1),
- env);
- scheme_add_global_constant("string-locale-ci>?",
- scheme_make_immed_prim(string_locale_ci_gt,
- "string-locale-ci>?",
- 2, -1),
- env);
- scheme_add_global_constant("string-ci<=?",
- scheme_make_immed_prim(string_ci_lt_eq,
- "string-ci<=?",
- 2, -1),
- env);
- scheme_add_global_constant("string-ci>=?",
- scheme_make_immed_prim(string_ci_gt_eq,
- "string-ci>=?",
- 2, -1),
- env);
- scheme_add_global_constant("substring",
- scheme_make_immed_prim(substring,
- "substring",
- 2, 3),
- env);
- scheme_add_global_constant("string-append",
- scheme_make_immed_prim(string_append,
- "string-append",
- 0, -1),
- env);
- scheme_add_global_constant("string->list",
- scheme_make_immed_prim(string_to_list,
- "string->list",
- 1, 1),
- env);
- scheme_add_global_constant("list->string",
- scheme_make_immed_prim(list_to_string,
- "list->string",
- 1, 1),
- env);
- scheme_add_global_constant("string-copy",
- scheme_make_immed_prim(string_copy,
- "string-copy",
- 1, 1),
- env);
- scheme_add_global_constant("string-copy!",
- scheme_make_immed_prim(string_copy_bang,
- "string-copy!",
- 3, 5),
- env);
- scheme_add_global_constant("string-fill!",
- scheme_make_immed_prim(string_fill,
- "string-fill!",
- 2, 2),
- env);
- scheme_add_global_constant("string->immutable-string",
- scheme_make_immed_prim(string_to_immutable,
- "string->immutable-string",
- 1, 1),
- env);
- scheme_add_global_constant("string-normalize-nfc",
- scheme_make_immed_prim(string_normalize_c,
- "string-normalize-nfc",
- 1, 1),
- env);
- scheme_add_global_constant("string-normalize-nfkc",
- scheme_make_immed_prim(string_normalize_kc,
- "string-normalize-nfkc",
- 1, 1),
- env);
- scheme_add_global_constant("string-normalize-nfd",
- scheme_make_immed_prim(string_normalize_d,
- "string-normalize-nfd",
- 1, 1),
- env);
- scheme_add_global_constant("string-normalize-nfkd",
- scheme_make_immed_prim(string_normalize_kd,
- "string-normalize-nfkd",
- 1, 1),
- env);
- scheme_add_global_constant("string-upcase",
- scheme_make_immed_prim(string_upcase,
- "string-upcase",
- 1, 1),
- env);
- scheme_add_global_constant("string-downcase",
- scheme_make_immed_prim(string_downcase,
- "string-downcase",
- 1, 1),
- env);
- scheme_add_global_constant("string-titlecase",
- scheme_make_immed_prim(string_titlecase,
- "string-titlecase",
- 1, 1),
- env);
- scheme_add_global_constant("string-foldcase",
- scheme_make_immed_prim(string_foldcase,
- "string-foldcase",
- 1, 1),
- env);
- scheme_add_global_constant("string-locale-upcase",
- scheme_make_immed_prim(string_locale_upcase,
- "string-locale-upcase",
- 1, 1),
- env);
- scheme_add_global_constant("string-locale-downcase",
- scheme_make_immed_prim(string_locale_downcase,
- "string-locale-downcase",
- 1, 1),
- env);
- scheme_add_global_constant("current-locale",
- scheme_register_parameter(current_locale,
- "current-locale",
- MZCONFIG_LOCALE),
- env);
- scheme_add_global_constant("locale-string-encoding",
- scheme_make_immed_prim(locale_string_encoding,
- "locale-string-encoding",
- 0, 0),
- env);
- scheme_add_global_constant("system-language+country",
- scheme_make_immed_prim(system_language_country,
- "system-language+country",
- 0, 0),
- env);
- scheme_add_global_constant("bytes-converter?",
- scheme_make_immed_prim(byte_converter_p,
- "bytes-converter?",
- 1, 1),
- env);
- scheme_add_global_constant("bytes-convert",
- scheme_make_prim_w_arity2(byte_string_convert,
- "bytes-convert",
- 1, 7,
- 3, 3),
- env);
- scheme_add_global_constant("bytes-convert-end",
- scheme_make_prim_w_arity2(byte_string_convert_end,
- "bytes-convert-end",
- 0, 3,
- 2, 2),
- env);
- scheme_add_global_constant("bytes-open-converter",
- scheme_make_immed_prim(byte_string_open_converter,
- "bytes-open-converter",
- 2, 2),
- env);
- scheme_add_global_constant("bytes-close-converter",
- scheme_make_immed_prim(byte_string_close_converter,
- "bytes-close-converter",
- 1, 1),
- env);
- scheme_add_global_constant("format",
- scheme_make_noncm_prim(format,
- "format",
- 1, -1),
- env);
- scheme_add_global_constant("printf",
- scheme_make_noncm_prim(sch_printf,
- "printf",
- 1, -1),
- env);
- scheme_add_global_constant("eprintf",
- scheme_make_noncm_prim(sch_eprintf,
- "eprintf",
- 1, -1),
- env);
- scheme_add_global_constant("fprintf",
- scheme_make_noncm_prim(sch_fprintf,
- "fprintf",
- 2, -1),
- env);
- scheme_add_global_constant("byte?",
- scheme_make_folding_prim(byte_p,
- "byte?",
- 1, 1, 1),
- env);
- REGISTER_SO(scheme_byte_string_p_proc);
- p = scheme_make_folding_prim(byte_string_p, "bytes?", 1, 1, 1);
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
- | SCHEME_PRIM_IS_OMITABLE);
- scheme_add_global_constant("bytes?", p, env);
- scheme_byte_string_p_proc = p;
- scheme_add_global_constant("make-bytes",
- scheme_make_immed_prim(make_byte_string,
- "make-bytes",
- 1, 2),
- env);
- scheme_add_global_constant("bytes",
- scheme_make_immed_prim(byte_string,
- "bytes",
- 0, -1),
- env);
- GLOBAL_PRIM_W_ARITY("make-shared-bytes", make_shared_byte_string, 1, 2, env);
- GLOBAL_PRIM_W_ARITY("shared-bytes", shared_byte_string, 0, -1, env);
- p = scheme_make_folding_prim(byte_string_length, "bytes-length", 1, 1, 1);
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
- |SCHEME_PRIM_PRODUCES_FIXNUM);
- scheme_add_global_constant("bytes-length", p, env);
- p = scheme_make_immed_prim(scheme_checked_byte_string_ref, "bytes-ref", 2, 2);
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED
- | SCHEME_PRIM_PRODUCES_FIXNUM);
- scheme_add_global_constant("bytes-ref", p, env);
- p = scheme_make_immed_prim(scheme_checked_byte_string_set, "bytes-set!", 3, 3);
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_NARY_INLINED);
- scheme_add_global_constant("bytes-set!", p, env);
- p = scheme_make_immed_prim(byte_string_eq, "bytes=?", 2, -1);
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED);
- scheme_add_global_constant("bytes=?", p, env);
- scheme_add_global_constant("bytes<?",
- scheme_make_immed_prim(byte_string_lt,
- "bytes<?",
- 2, -1),
- env);
- scheme_add_global_constant("bytes>?",
- scheme_make_immed_prim(byte_string_gt,
- "bytes>?",
- 2, -1),
- env);
- scheme_add_global_constant("subbytes",
- scheme_make_immed_prim(byte_substring,
- "subbytes",
- 2, 3),
- env);
- scheme_add_global_constant("bytes-append",
- scheme_make_immed_prim(byte_string_append,
- "bytes-append",
- 0, -1),
- env);
- scheme_add_global_constant("bytes->list",
- scheme_make_immed_prim(byte_string_to_list,
- "bytes->list",
- 1, 1),
- env);
- scheme_add_global_constant("list->bytes",
- scheme_make_immed_prim(list_to_byte_string,
- "list->bytes",
- 1, 1),
- env);
- scheme_add_global_constant("bytes-copy",
- scheme_make_immed_prim(byte_string_copy,
- "bytes-copy",
- 1, 1),
- env);
- scheme_add_global_constant("bytes-copy!",
- scheme_make_immed_prim(byte_string_copy_bang,
- "bytes-copy!",
- 3, 5),
- env);
- scheme_add_global_constant("bytes-fill!",
- scheme_make_immed_prim(byte_string_fill,
- "bytes-fill!",
- 2, 2),
- env);
- scheme_add_global_constant("bytes->immutable-bytes",
- scheme_make_immed_prim(byte_string_to_immutable,
- "bytes->immutable-bytes",
- 1, 1),
- env);
- p = scheme_make_immed_prim(byte_string_utf8_index, "bytes-utf-8-index", 2, 4);
- /* Incorrect, since the result can be #f:
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM); */
- scheme_add_global_constant("bytes-utf-8-index", p, env);
- p = scheme_make_immed_prim(byte_string_utf8_length, "bytes-utf-8-length", 1, 4);
- /* Incorrect, since the result can be #f:
- SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM); */
- scheme_add_global_constant("bytes-utf-8-length", p, env);
- scheme_add_global_constant("bytes-utf-8-ref",
- scheme_make_immed_prim(byte_string_utf8_ref,
- "bytes-utf-8-ref",
- 2, 4),
- env);
- scheme_add_global_constant("bytes->string/utf-8",
- scheme_make_immed_prim(byte_string_to_char_string,
- "bytes->string/utf-8",
- 1, 4),
- env);
- scheme_add_global_constant("bytes->string/locale",
- scheme_make_immed_prim(byte_string_to_char_string_locale,
- "bytes->string/locale",
- 1, 4),
- env);
- scheme_add_global_constant("bytes->string/latin-1",
- scheme_make_immed_prim(byte_string_to_char_string_latin1,
- "bytes->string/latin-1",
- 1, 4),
- env);
- scheme_add_global_constant("string->bytes/utf-8",
- scheme_make_immed_prim(char_string_to_byte_string,
- "string->bytes/utf-8",
- 1, 4),
- env);
- scheme_add_global_constant("string->bytes/locale",
- scheme_make_immed_prim(char_string_to_byte_string_locale,
- "string->bytes/locale",
- 1, 4),
- env);
- scheme_add_global_constant("string->bytes/latin-1",
- scheme_make_immed_prim(char_string_to_byte_string_latin1,
- "string->bytes/latin-1",
- 1, 4),
- env);
- scheme_add_global_constant("string-utf-8-length",
- scheme_make_immed_prim(char_string_utf8_length,
- "string-utf-8-length",
- 1, 3),
- env);
- /* In principle, `version' could be foldable, but it invites
- more problems than it solves... */
- scheme_add_global_constant("version",
- scheme_make_immed_prim(version,
- "version",
- 0, 0),
- env);
- scheme_add_global_constant("banner",
- scheme_make_immed_prim(banner,
- "banner",
- 0, 0),
- env);
- /* Environment variables */
- scheme_add_global_constant("environment-variables?",
- scheme_make_folding_prim(env_p,
- "environment-variables?",
- 1, 1, 1),
- env);
- scheme_add_global_constant("current-environment-variables",
- scheme_register_parameter(current_environment_variables,
- "current-environment-variables",
- MZCONFIG_CURRENT_ENV_VARS),
- env);
- scheme_add_global_constant("environment-variables-ref",
- scheme_make_immed_prim(sch_getenv,
- "environment-variables-ref",
- 2, 2),
- env);
- scheme_add_global_constant("environment-variables-set!",
- scheme_make_prim_w_arity(sch_putenv,
- "environment-variables-set!",
- 3, 4),
- env);
- scheme_add_global_constant("environment-variables-names",
- scheme_make_immed_prim(sch_getenv_names,
- "environment-variables-names",
- 1, 1),
- env);
- scheme_add_global_constant("environment-variables-copy",
- scheme_make_immed_prim(env_copy,
- "environment-variables-copy",
- 1, 1),
- env);
- scheme_add_global_constant("make-environment-variables",
- scheme_make_immed_prim(env_make,
- "make-environment-variables",
- 0, -1),
- env);
- /* Don't make these folding, since they're platform-specific: */
- scheme_add_global_constant("system-type",
- scheme_make_immed_prim(system_type,
- "system-type",
- 0, 1),
- env);
- scheme_add_global_constant("system-library-subpath",
- scheme_make_immed_prim(system_library_subpath,
- "system-library-subpath",
- 0, 1),
- env);
- scheme_add_global_constant("current-command-line-arguments",
- scheme_register_parameter(cmdline_args,
- "current-command-line-arguments",
- MZCONFIG_CMDLINE_ARGS),
- env);
- scheme_add_global_constant("path<?",
- scheme_make_immed_prim(path_lt,
- "path<?",
- 2, -1),
- env);
- #ifdef MZ_PRECISE_GC
- register_traversers();
- #endif
- }
- void scheme_init_string_places(void) {
- REGISTER_SO(current_locale_name_ptr);
- current_locale_name_ptr = (void *)xes_char_string;
- REGISTER_SO(fs_change_props);
- {
- int supported, scalable, low_latency, file_level;
- Scheme_Object *s;
- scheme_fs_change_properties(&supported, &scalable, &low_latency, &file_level);
- fs_change_props = scheme_make_vector(4, scheme_false);
- if (supported) {
- s = scheme_intern_symbol("supported");
- SCHEME_VEC_ELS(fs_change_props)[0] = s;
- }
- if (scalable) {
- s = scheme_intern_symbol("scalable");
- SCHEME_VEC_ELS(fs_change_props)[1] = s;
- }
- if (low_latency) {
- s = scheme_intern_symbol("low-latency");
- SCHEME_VEC_ELS(fs_change_props)[2] = s;
- }
- if (file_level) {
- s = scheme_intern_symbol("file-level");
- SCHEME_VEC_ELS(fs_change_props)[3] = s;
- }
- SCHEME_SET_IMMUTABLE(fs_change_props);
- }
- }
- /**********************************************************************/
- /* UTF-8 char constructors */
- /**********************************************************************/
- Scheme_Object *scheme_make_sized_offset_utf8_string(char *chars, intptr_t d, intptr_t len)
- {
- intptr_t ulen;
- mzchar *us;
- if (len) {
- ulen = scheme_utf8_decode((unsigned char *)chars, d, d + len,
- NULL, 0, -1,
- NULL, 0 /* not UTF-16 */, 0xFFFD);
- us = scheme_malloc_atomic(sizeof(mzchar) * (ulen + 1));
- scheme_utf8_decode((unsigned char *)chars, d, d + len,
- us, 0, -1,
- NULL, 0 /* not UTF-16 */, 0xFFFD);
- us[ulen] = 0;
- } else {
- us = (mzchar *)empty_char_string;
- ulen = 0;
- }
- return scheme_make_sized_offset_char_string(us, 0, ulen, 0);
- }
- Scheme_Object *
- scheme_make_sized_utf8_string(char *chars, intptr_t len)
- {
- return scheme_make_sized_offset_utf8_string(chars, 0, len);
- }
- Scheme_Object *
- scheme_make_immutable_sized_utf8_string(char *chars, intptr_t len)
- {
- Scheme_Object *s;
- s = scheme_make_sized_offset_utf8_string(chars, 0, len);
- if (len)
- SCHEME_SET_CHAR_STRING_IMMUTABLE(s);
- return s;
- }
- Scheme_Object *
- scheme_make_utf8_string(const char *chars)
- {
- return scheme_make_sized_offset_utf8_string((char *)chars, 0, -1);
- }
- Scheme_Object *
- scheme_make_locale_string(const char *chars)
- {
- return scheme_byte_string_to_char_string_locale(scheme_make_byte_string((char *)chars));
- }
- Scheme_Object *scheme_append_strings(Scheme_Object *s1, Scheme_Object *s2)
- {
- Scheme_Object *a[2];
- a[0] = s1;
- a[1] = s2;
- return string_append(2, a);
- }
- /**********************************************************************/
- /* index helpers */
- /**********************************************************************/
- intptr_t scheme_extract_index(const char *name, int pos, int argc, Scheme_Object **argv, intptr_t top, int false_ok)
- {
- intptr_t i;
- int is_top = 0;
- if (SCHEME_INTP(argv[pos])) {
- i = SCHEME_INT_VAL(argv[pos]);
- } else if (SCHEME_BIGNUMP(argv[pos])) {
- if (SCHEME_BIGPOS(argv[pos])) {
- i = top; /* out-of-bounds */
- is_top = 1;
- } else
- i = -1; /* negative */
- } else
- i = -1;
- if (!is_top && (i < 0))
- scheme_wrong_contract(name,
- (false_ok ? "(or/c exact-nonnegative-integer? #f)" : "exact-nonnegative-integer?"),
- pos, argc, argv);
- return i;
- }
- void scheme_get_substring_indices(const char *name, Scheme_Object *str,
- int argc, Scheme_Object **argv,
- int spos, int fpos, intptr_t *_start, intptr_t *_finish)
- {
- intptr_t len;
- intptr_t start, finish;
- if (SCHEME_CHAPERONE_VECTORP(str))
- len = SCHEME_CHAPERONE_VEC_SIZE(str);
- else if (SCHEME_CHAR_STRINGP(str))
- len = SCHEME_CHAR_STRTAG_VAL(str);
- else
- len = SCHEME_BYTE_STRTAG_VAL(str);
- if (argc > spos)
- start = scheme_extract_index(name, spos, argc, argv, len + 1, 0);
- else
- start = 0;
- if (argc > fpos)
- finish = scheme_extract_index(name, fpos, argc, argv, len + 1, 0);
- else
- finish = len;
- if (!(start <= len)) {
- scheme_out_of_range(name, NULL, (fpos < 100) ? "starting " : "", argv[spos], str, 0, len);
- }
- if (!(finish >= start && finish <= len)) {
- scheme_out_of_range(name, NULL, "ending ", argv[fpos], str, start, len);
- }
- *_start = start;
- *_finish = finish;
- }
- void scheme_do_get_substring_indices(const char *name, Scheme_Object *str,
- int argc, Scheme_Object **argv,
- int spos, int fpos, intptr_t *_start, intptr_t *_finish, intptr_t len)
- {
- if (argc > spos) {
- if (SCHEME_INTP(argv[spos])) {
- intptr_t start = SCHEME_INT_VAL(argv[spos]);
- if ((start >= 0) && (start < len)) {
- *_start = start;
- if (argc > fpos) {
- intptr_t finish = SCHEME_INT_VAL(argv[fpos]);
- if ((finish >= start) && (finish <= len)) {
- *_finish = finish;
- return;
- }
- } else {
- *_finish = len;
- return;
- }
- }
- }
- } else {
- *_start = 0;
- *_finish = len;
- return;
- }
- scheme_get_substring_indices(name, str, argc, argv, spos, fpos, _start, _finish);
- }
- /**********************************************************************/
- /* char strings */
- /**********************************************************************/
- #define SCHEME_X_STR_VAL(x) SCHEME_CHAR_STR_VAL(x)
- #define SCHEME_X_STRTAG_VAL(x) SCHEME_CHAR_STRTAG_VAL(x)
- #define SCHEME_X_STRINGP(x) SCHEME_CHAR_STRINGP(x)
- #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_CHAR_STRINGP(x)
- #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_CHAR_STRING_IMMUTABLE(x)
- #define scheme_x_string_type scheme_char_string_type
- #define X(a, b) a##_char##b
- #define X_(a, b) a##_##b
- #define X__(a) a
- #define EMPTY (mzchar *)"\0\0\0"
- #define Xchar mzchar
- #define uXchar mzchar
- #define XSTR ""
- #define IS_STR "string?"
- #define XSTRINGSTR "string"
- #define SUBXSTR "substring"
- #define CHARP(x) SCHEME_CHARP(x)
- #define CHAR_VAL(x) SCHEME_CHAR_VAL(x)
- #define CHAR_STR "char?"
- #define MAKE_CHAR(x) _scheme_make_char(x)
- #define xstrlen scheme_char_strlen
- #include "strops.inc"
- #define GEN_STRING_COMP(name, scheme_name, comp, op, ul, size_shortcut) \
- static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
- { mzchar *s, *prev; int i, sl, pl; int falz = 0;\
- if (!SCHEME_CHAR_STRINGP(argv[0])) \
- scheme_wrong_contract(scheme_name, "string?", 0, argc, argv); \
- prev = SCHEME_CHAR_STR_VAL(argv[0]); pl = SCHEME_CHAR_STRTAG_VAL(argv[0]); \
- for (i = 1; i < argc; i++) { \
- if (!SCHEME_CHAR_STRINGP(argv[i])) \
- scheme_wrong_contract(scheme_name, "string?", i, argc, argv); \
- s = SCHEME_CHAR_STR_VAL(argv[i]); sl = SCHEME_CHAR_STRTAG_VAL(argv[i]); \
- if (!falz) if (!(comp(scheme_name, \
- prev, pl, \
- s, sl, ul, size_shortcut) op 0)) falz = 1; \
- prev = s; pl = sl; \
- } \
- return falz ? scheme_false : scheme_true; \
- }
- GEN_STRING_COMP(string_eq, "string=?", mz_char_strcmp, ==, 0, 1)
- GEN_STRING_COMP(string_lt, "string<?", mz_char_strcmp, <, 0, 0)
- GEN_STRING_COMP(string_gt, "string>?", mz_char_strcmp, >, 0, 0)
- GEN_STRING_COMP(string_lt_eq, "string<=?", mz_char_strcmp, <=, 0, 0)
- GEN_STRING_COMP(string_gt_eq, "string>=?", mz_char_strcmp, >=, 0, 0)
- GEN_STRING_COMP(string_ci_eq, "string-ci=?", mz_char_strcmp_ci, ==, 0, 0)
- GEN_STRING_COMP(string_ci_lt, "string-ci<?", mz_char_strcmp_ci, <, 0, 0)
- GEN_STRING_COMP(string_ci_gt, "string-ci>?", mz_char_strcmp_ci, >, 0, 0)
- GEN_STRING_COMP(string_ci_lt_eq, "string-ci<=?", mz_char_strcmp_ci, <=, 0, 0)
- GEN_STRING_COMP(string_ci_gt_eq, "string-ci>=?", mz_char_strcmp_ci, >=, 0, 0)
- GEN_STRING_COMP(string_locale_eq, "string-locale=?", mz_char_strcmp, ==, 1, 0)
- GEN_STRING_COMP(string_locale_lt, "string-locale<?", mz_char_strcmp, <, 1, 0)
- GEN_STRING_COMP(string_locale_gt, "string-locale>?", mz_char_strcmp, >, 1, 0)
- GEN_STRING_COMP(string_locale_ci_eq, "string-locale-ci=?", mz_char_strcmp_ci, ==, 1, 0)
- GEN_STRING_COMP(string_locale_ci_lt, "string-locale-ci<?", mz_char_strcmp_ci, <, 1, 0)
- GEN_STRING_COMP(string_locale_ci_gt, "string-locale-ci>?", mz_char_strcmp_ci, >, 1, 0)
- Scheme_Object *scheme_string_eq_2(Scheme_Object *str1, Scheme_Object *str2)
- {
- Scheme_Object *a[2];
- a[0] = str1;
- a[1] = str2;
- return string_eq(2, a);
- }
- /**********************************************************************/
- /* byte strings */
- /**********************************************************************/
- #define SCHEME_BYTEP(x) ((SCHEME_INTP(x)) && (SCHEME_INT_VAL(x) >= 0) && (SCHEME_INT_VAL(x) <= 255))
- static Scheme_Object *
- byte_p(int argc, Scheme_Object *argv[])
- {
- return (SCHEME_BYTEP(argv[0]) ? scheme_true : scheme_false);
- }
- #define SCHEME_X_STR_VAL(x) SCHEME_BYTE_STR_VAL(x)
- #define SCHEME_X_STRTAG_VAL(x) SCHEME_BYTE_STRTAG_VAL(x)
- #define SCHEME_X_STRINGP(x) SCHEME_BYTE_STRINGP(x)
- #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_BYTE_STRINGP(x)
- #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_BYTE_STRING_IMMUTABLE(x)
- #define scheme_x_string_type scheme_byte_string_type
- #define X(a, b) a##_byte##b
- #define X_(a, b) a##_byte_##b
- #define X__(a) byte_##a
- #define EMPTY ""
- #define Xchar char
- #define uXchar unsigned char
- #define XSTR "byte "
- #define IS_STR "bytes?"
- #define XSTRINGSTR "bytes"
- #define SUBXSTR "subbytes"
- #define CHARP(x) SCHEME_BYTEP(x)
- #define CHAR_VAL(x) SCHEME_INT_VAL(x)
- #define CHAR_STR "byte?"
- #define MAKE_CHAR(x) scheme_make_integer_value(x)
- #define xstrlen strlen
- #define GENERATING_BYTE
- #include "strops.inc"
- #undef GENERATING_BYTE
- /* comparisons */
- #define GEN_BYTE_STRING_PATH_COMP(name, scheme_name, comp, op, PRED, contract) \
- static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
- { char *s, *prev; int i, sl, pl; int falz = 0;\
- if (!PRED(argv[0])) \
- scheme_wrong_contract(scheme_name, contract, 0, argc, argv); \
- prev = SCHEME_BYTE_STR_VAL(argv[0]); pl = SCHEME_BYTE_STRTAG_VAL(argv[0]); \
- for (i = 1; i < argc; i++) { \
- if (!PRED(argv[i])) \
- scheme_wrong_contract(scheme_name, contract, i, argc, argv); \
- s = SCHEME_BYTE_STR_VAL(argv[i]); sl = SCHEME_BYTE_STRTAG_VAL(argv[i]); \
- if (!falz) if (!(comp(scheme_name, \
- (unsigned char *)prev, pl, \
- (unsigned char *)s, sl) op 0)) falz = 1; \
- prev = s; pl = sl; \
- } \
- return falz ? scheme_false : scheme_true; \
- }
- #define GEN_BYTE_STRING_COMP(name, scheme_name, comp, op) \
- GEN_BYTE_STRING_PATH_COMP(name, scheme_name, comp, op, SCHEME_BYTE_STRINGP, "bytes?") \
- GEN_BYTE_STRING_COMP(byte_string_eq, "bytes=?", mz_strcmp, ==)
- GEN_BYTE_STRING_COMP(byte_string_lt, "bytes<?", mz_strcmp, <)
- GEN_BYTE_STRING_COMP(byte_string_gt, "bytes>?", mz_strcmp, >)
- GEN_BYTE_STRING_PATH_COMP(path_lt, "path<?", mz_strcmp, <, SCHEME_PATHP, "path?")
- Scheme_Object *scheme_byte_string_eq_2(Scheme_Object *str1, Scheme_Object *str2)
- {
- Scheme_Object *a[2];
- a[0] = str1;
- a[1] = str2;
- return byte_string_eq(2, a);
- }
- /**********************************************************************/
- /* byte string <-> char string */
- /**********************************************************************/
- /************************* bytes->string *************************/
- static Scheme_Object *
- do_byte_string_to_char_string(const char *who,
- Scheme_Object *bstr,
- intptr_t istart, intptr_t ifinish,
- int perm, int as_locale)
- {
- int i, ulen;
- char *chars;
- unsigned int *v;
- chars = SCHEME_BYTE_STR_VAL(bstr);
- ulen = utf8_decode_x((unsigned char *)chars, istart, ifinish,
- NULL, 0, -1,
- NULL, NULL, 0, 0,
- NULL, 0,
- (perm > -1) ? 0xD800 : 0);
- if (ulen < 0) {
- scheme_contract_error(who,
- "string is not a well-formed UTF-8 encoding",
- "string", 1, bstr,
- NULL);
- }
- v = (unsigned int *)scheme_malloc_atomic((ulen + 1) * sizeof(unsigned int));
- utf8_decode_x((unsigned char *)chars, istart, ifinish,
- v, 0, -1,
- NULL, NULL, 0, 0,
- NULL, 0,
- (perm > -1) ? 0xD800 : 0);
-
- if (perm > -1) {
- for (i = 0; i < ulen; i++) {
- if (v[i] == 0xD800)
- v[i] = perm;
- }
- }
- v[ulen] = 0;
- return scheme_make_sized_char_string(v, ulen, 0);
- }
- static Scheme_Object *
- do_byte_string_to_char_string_locale(const char *who,
- Scheme_Object *bstr,
- intptr_t istart, intptr_t ifinish,
- int perm)
- {
- char *us;
- intptr_t olen;
- reset_locale();
- if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on
- || !(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED))
- return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
- if (istart < ifinish) {
- int no_cvt;
- us = string_to_from_locale(0, SCHEME_BYTE_STR_VAL(bstr),
- istart, ifinish - istart,
- &olen, perm, &no_cvt);
- if (!us) {
- if (no_cvt) {
- return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
- } else {
- scheme_contract_error(who,
- "byte string is not a valid encoding for the current locale",
- "byte string", 1, bstr,
- NULL);
- }
- }
- ((mzchar *)us)[olen] = 0;
- } else {
- us = "\0\0\0";
- olen = 0;
- }
- return scheme_make_sized_char_string((mzchar *)us, olen, 0);
- }
- static Scheme_Object *
- do_string_to_vector(const char *who, int mode, int argc, Scheme_Object *argv[])
- {
- int permc;
- intptr_t istart, ifinish;
- if (!SCHEME_BYTE_STRINGP(argv[0]))
- scheme_wrong_contract(who, "bytes?", 0, argc, argv);
- if ((argc < 2) || SCHEME_FALSEP(argv[1]))
- permc = -1;
- else {
- if (!SCHEME_CHARP(argv[1]))
- scheme_wrong_contract(who, "(or/c char? #f)", 1, argc, argv);
- permc = SCHEME_CHAR_VAL(argv[1]);
- }
- scheme_get_substring_indices(who, argv[0], argc, argv,
- 2, 3,
- &istart, &ifinish);
- if (mode == 0)
- return do_byte_string_to_char_string(who, argv[0], istart, ifinish, permc, 0);
- else if (mode == 1)
- return do_byte_string_to_char_string_locale(who, argv[0], istart, ifinish, permc);
- else {
- /* Latin-1 */
- mzchar *us;
- unsigned char *s;
- intptr_t i, len;
- len = ifinish - istart;
- s = (unsigned char *)SCHEME_BYTE_STR_VAL(argv[0]);
- us = (mzchar *)scheme_malloc_atomic((len + 1) * sizeof(mzchar));
- for (i = istart; i < ifinish; i++) {
- us[i - istart] = s[i];
- }
- us[len] = 0;
- return scheme_make_sized_char_string(us, len, 0);
- }
- }
- static Scheme_Object *
- byte_string_to_char_string (int argc, Scheme_Object *argv[])
- {
- return do_string_to_vector("bytes->string/utf-8", 0, argc, argv);
- }
- static Scheme_Object *
- byte_string_to_char_string_locale (int argc, Scheme_Object *argv[])
- {
- return do_string_to_vector("bytes->string/locale", 1, argc, argv);
- }
- static Scheme_Object *
- byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[])
- {
- return do_string_to_vector("bytes->string/latin-1", 2, argc, argv);
- }
- Scheme_Object *scheme_byte_string_to_char_string(Scheme_Object *o)
- {
- return do_byte_string_to_char_string("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD, 0);
- }
- Scheme_Object *scheme_byte_string_to_char_string_locale(Scheme_Object *o)
- {
- return do_byte_string_to_char_string_locale("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD);
- }
- /************************* string->bytes *************************/
- static Scheme_Object *do_char_string_to_byte_string(Scheme_Object *s, intptr_t istart, intptr_t ifinish,
- int as_locale)
- {
- char *bs;
- int slen;
- slen = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
- NULL, 0,
- 0 /* UTF-16 */);
- bs = (char *)scheme_malloc_atomic(slen + 1);
- scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
- (unsigned char *)bs, 0,
- 0 /* UTF-16 */);
- bs[slen] = 0;
- return scheme_make_sized_byte_string(bs, slen, 0);
- }
- static Scheme_Object *
- do_char_string_to_byte_string_locale(const char *who,
- Scheme_Object *cstr,
- intptr_t istart, intptr_t ifinish,
- int perm)
- {
- char *s;
- intptr_t olen;
- reset_locale();
- if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on
- || !(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED))
- return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
- if (istart < ifinish) {
- int no_cvt;
- s = string_to_from_locale(1, (char *)SCHEME_CHAR_STR_VAL(cstr),
- istart, ifinish - istart,
- &olen, perm, &no_cvt);
- if (!s) {
- if (no_cvt) {
- return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
- } else {
- scheme_contract_error(who,
- "string cannot be encoded for the current locale",
- "string", 1, cstr,
- NULL);
- }
- }
- s[olen] = 0;
- } else {
- s = "";
- olen = 0;
- }
- return scheme_make_sized_byte_string(s, olen, 0);
- }
- Scheme_Object *scheme_char_string_to_byte_string(Scheme_Object *s)
- {
- return do_char_string_to_byte_string(s, 0, SCHEME_CHAR_STRLEN_VAL(s), 0);
- }
- Scheme_Object *scheme_char_string_to_byte_string_locale(Scheme_Object *s)
- {
- return do_char_string_to_byte_string_locale("s->s", s, 0, SCHEME_CHAR_STRLEN_VAL(s), '?');
- }
- static Scheme_Object *do_chars_to_bytes(const char *who, int mode,
- int argc, Scheme_Object *argv[])
- {
- intptr_t istart, ifinish;
- int permc;
- if (!SCHEME_CHAR_STRINGP(argv[0]))
- scheme_wrong_contract(who, "string?", 0, argc, argv);
- if ((argc < 2) || SCHEME_FALSEP(argv[1]))
- permc = -1;
- else {
- if (!SCHEME_BYTEP(argv[1]))
- scheme_wrong_contract(who, "(or/c byte? #f)", 1, argc, argv);
- permc = SCHEME_INT_VAL(argv[1]);
- }
- scheme_get_substring_indices(who, argv[0], argc, argv,
- 2, 3, &istart, &ifinish);
- if (mode == 1)
- return do_char_string_to_byte_string_locale(who, argv[0], istart, ifinish, permc);
- else if (mode == 0)
- return do_char_string_to_byte_string(argv[0], istart, ifinish, 0);
- else {
- /* Latin-1 */
- mzchar *us;
- unsigned char *s;
- intptr_t i, len;
- len = ifinish - istart;
- us = SCHEME_CHAR_STR_VAL(argv[0]);
- s = (unsigned char *)scheme_malloc_atomic(len + 1);
- for (i = istart; i < ifinish; i++) {
- if (us[i] < 256)
- s[i - istart] = us[i];
- else if (permc >= 0) {
- s[i - istart] = permc;
- } else {
- scheme_contract_error(who,
- "string cannot be encoded in Latin-1",
- "string", 1, argv[0],
- NULL);
- }
- }
- s[len] = 0;
- return scheme_ma…
Large files files are truncated, but you can click here to view the full file