PageRenderTime 158ms CodeModel.GetById 80ms app.highlight 58ms RepoModel.GetById 1ms app.codeStats 1ms

/src/racket/src/string.c

http://github.com/4z3/racket
C | 5644 lines | 4619 code | 681 blank | 344 comment | 1048 complexity | fbd548973c5671534bdc00809fd405bb MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2  Racket
   3  Copyright (c) 2004-2010 PLT Scheme Inc.
   4  Copyright (c) 1995-2001 Matthew Flatt
   5
   6    This library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Library General Public
   8    License as published by the Free Software Foundation; either
   9    version 2 of the License, or (at your option) any later version.
  10
  11    This library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Library General Public License for more details.
  15
  16    You should have received a copy of the GNU Library General Public
  17    License along with this library; if not, write to the Free
  18    Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19    Boston, MA 02110-1301 USA.
  20
  21  libscheme
  22  Copyright (c) 1994 Brent Benson
  23  All rights reserved.
  24*/
  25
  26#include "schpriv.h"
  27#include "schvers.h"
  28#include <string.h>
  29#include <ctype.h>
  30#ifndef DONT_USE_LOCALE
  31# include <locale.h>
  32# ifdef MZ_NO_ICONV
  33#  define USE_ICONV_DLL
  34# endif
  35# ifndef USE_ICONV_DLL
  36#  include <iconv.h>
  37#  include <langinfo.h>
  38# endif
  39# include <wchar.h>
  40# include <wctype.h>
  41# include <errno.h>
  42# ifdef MACOS_UNICODE_SUPPORT
  43#  include <CoreFoundation/CFString.h>
  44#  include <CoreFoundation/CFLocale.h>
  45# endif
  46# ifdef WINDOWS_UNICODE_SUPPORT
  47#  include <windows.h>
  48# endif
  49#endif
  50
  51#ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH
  52# include "schsys.h"
  53#endif
  54
  55#include "schustr.inc"
  56
  57#ifdef USE_ICONV_DLL
  58typedef intptr_t iconv_t;
  59typedef int *(*errno_proc_t)();
  60typedef size_t (*iconv_proc_t)(iconv_t cd,
  61			       char **inbuf, size_t *inbytesleft,
  62			       char **outbuf, size_t *outbytesleft);
  63typedef iconv_t (*iconv_open_proc_t)(const char *tocode, const char *fromcode);
  64typedef void (*iconv_close_proc_t)(iconv_t cd);
  65typedef char *(*locale_charset_proc_t)();
  66static errno_proc_t iconv_errno;
  67static iconv_proc_t iconv;
  68static iconv_open_proc_t iconv_open;
  69static iconv_close_proc_t iconv_close;
  70static locale_charset_proc_t locale_charset; /* Not used, currently */
  71#define mzCHK_PROC(x) x
  72static int get_iconv_errno(void)
  73{
  74  int *a;
  75  a = iconv_errno();
  76  return *a;
  77}
  78# undef HAVE_CODESET
  79# define HAVE_CODESET 1
  80# define CODESET 0
  81# define ICONV_errno get_iconv_errno()
  82extern wchar_t *scheme_get_dll_path(wchar_t *s);
  83static int iconv_ready = 0;
  84static void init_iconv()
  85{
  86# ifdef MZ_NO_ICONV
  87# else
  88  HMODULE m;
  89  m = LoadLibraryW(scheme_get_dll_path(L"iconv.dll"));
  90  if (!m)
  91    m = LoadLibraryW(scheme_get_dll_path(L"libiconv.dll"));
  92  if (!m)
  93    m = LoadLibraryW(scheme_get_dll_path(L"libiconv-2.dll"));
  94  if (!m)
  95    m = LoadLibrary("iconv.dll");
  96  if (!m)
  97    m = LoadLibrary("libiconv.dll");
  98  if (!m)
  99    m = LoadLibrary("libiconv-2.dll");
 100  if (m) {
 101    iconv = (iconv_proc_t)GetProcAddress(m, "libiconv");
 102    iconv_open = (iconv_open_proc_t)GetProcAddress(m, "libiconv_open");
 103    iconv_close = (iconv_close_proc_t)GetProcAddress(m, "libiconv_close");
 104    locale_charset = (locale_charset_proc_t)GetProcAddress(m, "locale_charset");
 105    /* Make sure we have all of them or none: */
 106    if (!iconv || !iconv_open || !iconv_close) {
 107      iconv = NULL;
 108      iconv_open = NULL;
 109      iconv_close = NULL;
 110    }
 111  }
 112  if (iconv) {
 113    iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno");
 114    if (!iconv_errno) {
 115      /* The iconv.dll distributed with PLT Scheme links to msvcrt.dll.
 116	 It's a slighly dangerous assumption that whaetever iconv we
 117	 found also uses msvcrt.dll. */
 118      m = LoadLibrary("msvcrt.dll");
 119      if (m) {
 120	iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno");
 121	if (!iconv_errno) {
 122	  iconv = NULL;
 123	  iconv_open = NULL;
 124	  iconv_close = NULL;
 125	}
 126      }
 127    }
 128  }
 129# endif
 130  iconv_ready = 1;
 131}
 132#else
 133# define ICONV_errno errno
 134# define iconv_ready 1
 135# define mzCHK_PROC(x) 1
 136static void init_iconv() { }
 137#endif
 138
 139#ifdef MACOS_UNICODE_SUPPORT
 140# define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
 141#endif
 142#ifdef WINDOWS_UNICODE_SUPPORT
 143# define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
 144#endif
 145#ifndef mzLOCALE_IS_UTF_8
 146# define mzLOCALE_IS_UTF_8(s) !mzCHK_PROC(iconv_open)
 147#endif
 148
 149#define mzICONV_KIND 0
 150#define mzUTF8_KIND 1
 151#define mzUTF8_TO_UTF16_KIND 2
 152#define mzUTF16_TO_UTF8_KIND 3
 153
 154typedef struct Scheme_Converter {
 155  Scheme_Object so;
 156  short closed;
 157  short kind;
 158  iconv_t cd;
 159  int permissive;
 160  Scheme_Custodian_Reference *mref;
 161} Scheme_Converter;
 162
 163/* locals */
 164
 165/* These two locale variables are only valid when reset_locale()
 166   is called after continuation marks (and hence parameterization)
 167   may have changed. Similarly, setlocale() is only up-to-date
 168   when reset_locale() has been called. */
 169THREAD_LOCAL_DECL(static int locale_on);
 170THREAD_LOCAL_DECL(static void *current_locale_name_ptr);
 171static void reset_locale(void);
 172
 173#define current_locale_name ((const mzchar *)current_locale_name_ptr)
 174
 175#ifdef USE_ICONV_DLL
 176static char *nl_langinfo(int which)
 177{
 178  int i;
 179
 180  reset_locale();
 181  if (!current_locale_name)
 182    current_locale_name_ptr = "\0\0\0\0";
 183
 184  if ((current_locale_name[0] == 'C')
 185      && !current_locale_name[1])
 186    return "US-ASCII";
 187
 188  for (i = 0; current_locale_name[i]; i++) {
 189    if (current_locale_name[i] == '.') {
 190      if (current_locale_name[i + 1]) {
 191	int len, j = 0;
 192	char *enc;
 193	i++;
 194	len = scheme_char_strlen(current_locale_name) - i;
 195	enc = (char *)scheme_malloc_atomic(len + 1);
 196	while (current_locale_name[i]) {
 197	  if (current_locale_name[i] > 127)
 198	    return "UTF-8";
 199	  enc[j++] = current_locale_name[i++];
 200	}
 201	enc[j] = 0;
 202	return enc;
 203      }
 204    }
 205  }
 206
 207  return "UTF-8";
 208}
 209#endif
 210
 211#ifdef DONT_USE_LOCALE
 212# define mz_iconv_nl_langinfo() ""
 213#else
 214static char *mz_iconv_nl_langinfo(){
 215  char *s;
 216# if HAVE_CODESET
 217  s = nl_langinfo(CODESET);
 218# else
 219  s = NULL;
 220# endif
 221  if (!s)
 222    return "";
 223  else
 224    return s;
 225}
 226#endif
 227
 228READ_ONLY static const char * const STRING_IS_NOT_UTF_8 = "string is not a well-formed UTF-8 encoding: ";
 229
 230static Scheme_Object *make_string (int argc, Scheme_Object *argv[]);
 231static Scheme_Object *string (int argc, Scheme_Object *argv[]);
 232static Scheme_Object *string_p (int argc, Scheme_Object *argv[]);
 233static Scheme_Object *string_length (int argc, Scheme_Object *argv[]);
 234static Scheme_Object *string_eq (int argc, Scheme_Object *argv[]);
 235static Scheme_Object *string_locale_eq (int argc, Scheme_Object *argv[]);
 236static Scheme_Object *string_ci_eq (int argc, Scheme_Object *argv[]);
 237static Scheme_Object *string_locale_ci_eq (int argc, Scheme_Object *argv[]);
 238static Scheme_Object *string_lt (int argc, Scheme_Object *argv[]);
 239static Scheme_Object *string_locale_lt (int argc, Scheme_Object *argv[]);
 240static Scheme_Object *string_gt (int argc, Scheme_Object *argv[]);
 241static Scheme_Object *string_locale_gt (int argc, Scheme_Object *argv[]);
 242static Scheme_Object *string_lt_eq (int argc, Scheme_Object *argv[]);
 243static Scheme_Object *string_gt_eq (int argc, Scheme_Object *argv[]);
 244static Scheme_Object *string_ci_lt (int argc, Scheme_Object *argv[]);
 245static Scheme_Object *string_locale_ci_lt (int argc, Scheme_Object *argv[]);
 246static Scheme_Object *string_ci_gt (int argc, Scheme_Object *argv[]);
 247static Scheme_Object *string_locale_ci_gt (int argc, Scheme_Object *argv[]);
 248static Scheme_Object *string_ci_lt_eq (int argc, Scheme_Object *argv[]);
 249static Scheme_Object *string_ci_gt_eq (int argc, Scheme_Object *argv[]);
 250static Scheme_Object *string_upcase (int argc, Scheme_Object *argv[]);
 251static Scheme_Object *string_downcase (int argc, Scheme_Object *argv[]);
 252static Scheme_Object *string_titlecase (int argc, Scheme_Object *argv[]);
 253static Scheme_Object *string_foldcase (int argc, Scheme_Object *argv[]);
 254static Scheme_Object *string_locale_upcase (int argc, Scheme_Object *argv[]);
 255static Scheme_Object *string_locale_downcase (int argc, Scheme_Object *argv[]);
 256static Scheme_Object *substring (int argc, Scheme_Object *argv[]);
 257static Scheme_Object *string_append (int argc, Scheme_Object *argv[]);
 258static Scheme_Object *string_to_list (int argc, Scheme_Object *argv[]);
 259static Scheme_Object *list_to_string (int argc, Scheme_Object *argv[]);
 260static Scheme_Object *string_copy (int argc, Scheme_Object *argv[]);
 261static Scheme_Object *string_copy_bang (int argc, Scheme_Object *argv[]);
 262static Scheme_Object *string_fill (int argc, Scheme_Object *argv[]);
 263static Scheme_Object *string_to_immutable (int argc, Scheme_Object *argv[]);
 264static Scheme_Object *string_normalize_c (int argc, Scheme_Object *argv[]);
 265static Scheme_Object *string_normalize_kc (int argc, Scheme_Object *argv[]);
 266static Scheme_Object *string_normalize_d (int argc, Scheme_Object *argv[]);
 267static Scheme_Object *string_normalize_kd (int argc, Scheme_Object *argv[]);
 268
 269#if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
 270static Scheme_Object *make_shared_byte_string (int argc, Scheme_Object *argv[]);
 271static Scheme_Object *shared_byte_string (int argc, Scheme_Object *argv[]);
 272#endif
 273
 274static Scheme_Object *make_byte_string (int argc, Scheme_Object *argv[]);
 275static Scheme_Object *byte_string (int argc, Scheme_Object *argv[]);
 276static Scheme_Object *byte_p (int argc, Scheme_Object *argv[]);
 277static Scheme_Object *byte_string_p (int argc, Scheme_Object *argv[]);
 278static Scheme_Object *byte_string_length (int argc, Scheme_Object *argv[]);
 279static Scheme_Object *byte_string_eq (int argc, Scheme_Object *argv[]);
 280static Scheme_Object *byte_string_lt (int argc, Scheme_Object *argv[]);
 281static Scheme_Object *byte_string_gt (int argc, Scheme_Object *argv[]);
 282static Scheme_Object *byte_substring (int argc, Scheme_Object *argv[]);
 283static Scheme_Object *byte_string_append (int argc, Scheme_Object *argv[]);
 284static Scheme_Object *byte_string_to_list (int argc, Scheme_Object *argv[]);
 285static Scheme_Object *list_to_byte_string (int argc, Scheme_Object *argv[]);
 286static Scheme_Object *byte_string_copy (int argc, Scheme_Object *argv[]);
 287static Scheme_Object *byte_string_copy_bang (int argc, Scheme_Object *argv[]);
 288static Scheme_Object *byte_string_fill (int argc, Scheme_Object *argv[]);
 289static Scheme_Object *byte_string_to_immutable (int argc, Scheme_Object *argv[]);
 290
 291static Scheme_Object *byte_string_utf8_index (int argc, Scheme_Object *argv[]);
 292static Scheme_Object *byte_string_utf8_ref (int argc, Scheme_Object *argv[]);
 293static Scheme_Object *byte_string_utf8_length (int argc, Scheme_Object *argv[]);
 294
 295static Scheme_Object *byte_string_to_char_string (int argc, Scheme_Object *argv[]);
 296static Scheme_Object *byte_string_to_char_string_locale (int argc, Scheme_Object *argv[]);
 297static Scheme_Object *byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[]);
 298static Scheme_Object *char_string_to_byte_string (int argc, Scheme_Object *argv[]);
 299static Scheme_Object *char_string_to_byte_string_locale (int argc, Scheme_Object *argv[]);
 300static Scheme_Object *char_string_to_byte_string_latin1 (int argc, Scheme_Object *argv[]);
 301static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[]);
 302
 303static Scheme_Object *version(int argc, Scheme_Object *argv[]);
 304static Scheme_Object *format(int argc, Scheme_Object *argv[]);
 305static Scheme_Object *sch_printf(int argc, Scheme_Object *argv[]);
 306static Scheme_Object *sch_eprintf(int argc, Scheme_Object *argv[]);
 307static Scheme_Object *sch_fprintf(int argc, Scheme_Object *argv[]);
 308static Scheme_Object *banner(int argc, Scheme_Object *argv[]);
 309static Scheme_Object *sch_getenv(int argc, Scheme_Object *argv[]);
 310static Scheme_Object *sch_putenv(int argc, Scheme_Object *argv[]);
 311static Scheme_Object *system_type(int argc, Scheme_Object *argv[]);
 312static Scheme_Object *system_library_subpath(int argc, Scheme_Object *argv[]);
 313static Scheme_Object *cmdline_args(int argc, Scheme_Object *argv[]);
 314static Scheme_Object *current_locale(int argc, Scheme_Object *argv[]);
 315static Scheme_Object *locale_string_encoding(int argc, Scheme_Object *argv[]);
 316static Scheme_Object *system_language_country(int argc, Scheme_Object *argv[]);
 317
 318static Scheme_Object *byte_string_open_converter(int argc, Scheme_Object *argv[]);
 319static Scheme_Object *byte_string_close_converter(int argc, Scheme_Object *argv[]);
 320static Scheme_Object *byte_string_convert(int argc, Scheme_Object *argv[]);
 321static Scheme_Object *byte_string_convert_end(int argc, Scheme_Object *argv[]);
 322static Scheme_Object *byte_converter_p(int argc, Scheme_Object *argv[]);
 323
 324#ifdef MZ_PRECISE_GC
 325static void register_traversers(void);
 326#endif
 327
 328static int mz_char_strcmp(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
 329static int mz_char_strcmp_ci(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
 330static int mz_strcmp(const char *who, unsigned char *str1, intptr_t l1, unsigned char *str2, intptr_t l2);
 331
 332XFORM_NONGCING static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t end,
 333					unsigned int *us, intptr_t dstart, intptr_t dend,
 334					intptr_t *ipos, intptr_t *jpos,
 335					char compact, char utf16,
 336					int *state, int might_continue, int permissive);
 337XFORM_NONGCING static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t end,
 338					unsigned char *s, intptr_t dstart, intptr_t dend,
 339					intptr_t *_ipos, intptr_t *_opos, char utf16);
 340
 341static char *string_to_from_locale(int to_bytes,
 342				   char *in, intptr_t delta, intptr_t len,
 343				   intptr_t *olen, int perm,
 344				   int *no_cvt);
 345
 346#define portable_isspace(x) (((x) < 128) && isspace(x))
 347
 348ROSYM static Scheme_Object *sys_symbol;
 349ROSYM static Scheme_Object *platform_3m_path, *platform_cgc_path;
 350READ_ONLY static Scheme_Object *zero_length_char_string;
 351READ_ONLY static Scheme_Object *zero_length_byte_string;
 352
 353SHARED_OK static Scheme_Hash_Table *putenv_str_table;
 354
 355SHARED_OK static char *embedding_banner;
 356SHARED_OK static Scheme_Object *vers_str;
 357SHARED_OK static Scheme_Object *banner_str;
 358
 359READ_ONLY static Scheme_Object *complete_symbol, *continues_symbol, *aborts_symbol, *error_symbol;
 360
 361void
 362scheme_init_string (Scheme_Env *env)
 363{
 364  Scheme_Object *p;
 365
 366  REGISTER_SO(sys_symbol);
 367  sys_symbol = scheme_intern_symbol(SYSTEM_TYPE_NAME);
 368
 369  REGISTER_SO(zero_length_char_string);
 370  REGISTER_SO(zero_length_byte_string);
 371  zero_length_char_string = scheme_alloc_char_string(0, 0);
 372  zero_length_byte_string = scheme_alloc_byte_string(0, 0);
 373
 374  REGISTER_SO(complete_symbol);
 375  REGISTER_SO(continues_symbol);
 376  REGISTER_SO(aborts_symbol);
 377  REGISTER_SO(error_symbol);
 378  complete_symbol = scheme_intern_symbol("complete");
 379  continues_symbol = scheme_intern_symbol("continues");
 380  aborts_symbol = scheme_intern_symbol("aborts");
 381  error_symbol = scheme_intern_symbol("error");
 382
 383  REGISTER_SO(platform_3m_path);
 384#ifdef UNIX_FILE_SYSTEM
 385# define MZ3M_SUBDIR "/3m"
 386#else
 387# ifdef DOS_FILE_SYSTEM
 388#  define MZ3M_SUBDIR "\\3m"
 389# else
 390#  define MZ3M_SUBDIR ":3m"
 391# endif
 392#endif
 393  REGISTER_SO(platform_3m_path);
 394  REGISTER_SO(platform_cgc_path);
 395  platform_cgc_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH);
 396  platform_3m_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH MZ3M_SUBDIR);
 397
 398  REGISTER_SO(putenv_str_table);
 399
 400  REGISTER_SO(embedding_banner);
 401  REGISTER_SO(vers_str);
 402  REGISTER_SO(banner_str);
 403
 404  vers_str = scheme_make_utf8_string(scheme_version());
 405  SCHEME_SET_CHAR_STRING_IMMUTABLE(vers_str);
 406  banner_str = scheme_make_utf8_string(scheme_banner());
 407  SCHEME_SET_CHAR_STRING_IMMUTABLE(banner_str);
 408
 409  p = scheme_make_folding_prim(string_p, "string?", 1, 1, 1);
 410  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
 411  scheme_add_global_constant("string?", p, env);
 412
 413  scheme_add_global_constant("make-string",
 414			     scheme_make_immed_prim(make_string,
 415						    "make-string",
 416						    1, 2),
 417			     env);
 418  scheme_add_global_constant("string",
 419			     scheme_make_immed_prim(string,
 420						    "string",
 421						    0, -1),
 422			     env);
 423  scheme_add_global_constant("string-length",
 424			     scheme_make_folding_prim(string_length,
 425						      "string-length",
 426						      1, 1, 1),
 427			     env);
 428
 429  p = scheme_make_immed_prim(scheme_checked_string_ref, "string-ref", 2, 2);
 430  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED;
 431  scheme_add_global_constant("string-ref", p, env);
 432
 433
 434  p = scheme_make_immed_prim(scheme_checked_string_set, "string-set!", 3, 3);
 435  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
 436  scheme_add_global_constant("string-set!", p, env);
 437
 438  scheme_add_global_constant("string=?",
 439			     scheme_make_immed_prim(string_eq,
 440						    "string=?",
 441						    2, -1),
 442			     env);
 443  scheme_add_global_constant("string-locale=?",
 444			     scheme_make_immed_prim(string_locale_eq,
 445						    "string-locale=?",
 446						    2, -1),
 447			     env);
 448  scheme_add_global_constant("string-ci=?",
 449			     scheme_make_immed_prim(string_ci_eq,
 450						    "string-ci=?",
 451						    2, -1),
 452			     env);
 453  scheme_add_global_constant("string-locale-ci=?",
 454			     scheme_make_immed_prim(string_locale_ci_eq,
 455						    "string-locale-ci=?",
 456						    2, -1),
 457			     env);
 458  scheme_add_global_constant("string<?",
 459			     scheme_make_immed_prim(string_lt,
 460						    "string<?",
 461						    2, -1),
 462			     env);
 463  scheme_add_global_constant("string-locale<?",
 464			     scheme_make_immed_prim(string_locale_lt,
 465						    "string-locale<?",
 466						    2, -1),
 467			     env);
 468  scheme_add_global_constant("string>?",
 469			     scheme_make_immed_prim(string_gt,
 470						    "string>?",
 471						    2, -1),
 472			     env);
 473  scheme_add_global_constant("string-locale>?",
 474			     scheme_make_immed_prim(string_locale_gt,
 475						    "string-locale>?",
 476						    2, -1),
 477			     env);
 478  scheme_add_global_constant("string<=?",
 479			     scheme_make_immed_prim(string_lt_eq,
 480						    "string<=?",
 481						    2, -1),
 482			     env);
 483  scheme_add_global_constant("string>=?",
 484			     scheme_make_immed_prim(string_gt_eq,
 485						    "string>=?",
 486						    2, -1),
 487			     env);
 488  scheme_add_global_constant("string-ci<?",
 489			     scheme_make_immed_prim(string_ci_lt,
 490						    "string-ci<?",
 491						    2, -1),
 492			     env);
 493  scheme_add_global_constant("string-locale-ci<?",
 494			     scheme_make_immed_prim(string_locale_ci_lt,
 495						    "string-locale-ci<?",
 496						    2, -1),
 497			     env);
 498  scheme_add_global_constant("string-ci>?",
 499			     scheme_make_immed_prim(string_ci_gt,
 500						    "string-ci>?",
 501						    2, -1),
 502			     env);
 503  scheme_add_global_constant("string-locale-ci>?",
 504			     scheme_make_immed_prim(string_locale_ci_gt,
 505						    "string-locale-ci>?",
 506						    2, -1),
 507			     env);
 508  scheme_add_global_constant("string-ci<=?",
 509			     scheme_make_immed_prim(string_ci_lt_eq,
 510						    "string-ci<=?",
 511						    2, -1),
 512			     env);
 513  scheme_add_global_constant("string-ci>=?",
 514			     scheme_make_immed_prim(string_ci_gt_eq,
 515						    "string-ci>=?",
 516						    2, -1),
 517			     env);
 518
 519  scheme_add_global_constant("substring",
 520			     scheme_make_immed_prim(substring,
 521						    "substring",
 522						    2, 3),
 523			     env);
 524  scheme_add_global_constant("string-append",
 525			     scheme_make_immed_prim(string_append,
 526						    "string-append",
 527						    0, -1),
 528			     env);
 529  scheme_add_global_constant("string->list",
 530			     scheme_make_immed_prim(string_to_list,
 531						    "string->list",
 532						    1, 1),
 533			     env);
 534  scheme_add_global_constant("list->string",
 535			     scheme_make_immed_prim(list_to_string,
 536						    "list->string",
 537						    1, 1),
 538			     env);
 539  scheme_add_global_constant("string-copy",
 540			     scheme_make_immed_prim(string_copy,
 541						    "string-copy",
 542						    1, 1),
 543			     env);
 544  scheme_add_global_constant("string-copy!",
 545			     scheme_make_immed_prim(string_copy_bang,
 546						    "string-copy!",
 547						    3, 5),
 548			     env);
 549  scheme_add_global_constant("string-fill!",
 550			     scheme_make_immed_prim(string_fill,
 551						    "string-fill!",
 552						    2, 2),
 553			     env);
 554  scheme_add_global_constant("string->immutable-string",
 555			     scheme_make_immed_prim(string_to_immutable,
 556						    "string->immutable-string",
 557						    1, 1),
 558			     env);
 559  scheme_add_global_constant("string-normalize-nfc",
 560			     scheme_make_immed_prim(string_normalize_c,
 561						    "string-normalize-nfc",
 562						    1, 1),
 563			     env);
 564  scheme_add_global_constant("string-normalize-nfkc",
 565			     scheme_make_immed_prim(string_normalize_kc,
 566						    "string-normalize-nfkc",
 567						    1, 1),
 568			     env);
 569  scheme_add_global_constant("string-normalize-nfd",
 570			     scheme_make_immed_prim(string_normalize_d,
 571						    "string-normalize-nfd",
 572						    1, 1),
 573			     env);
 574  scheme_add_global_constant("string-normalize-nfkd",
 575			     scheme_make_immed_prim(string_normalize_kd,
 576						    "string-normalize-nfkd",
 577						    1, 1),
 578			     env);
 579
 580  scheme_add_global_constant("string-upcase",
 581			     scheme_make_immed_prim(string_upcase,
 582						    "string-upcase",
 583						    1, 1),
 584			     env);
 585  scheme_add_global_constant("string-downcase",
 586			     scheme_make_immed_prim(string_downcase,
 587						    "string-downcase",
 588						    1, 1),
 589			     env);
 590  scheme_add_global_constant("string-titlecase",
 591			     scheme_make_immed_prim(string_titlecase,
 592						    "string-titlecase",
 593						    1, 1),
 594			     env);
 595  scheme_add_global_constant("string-foldcase",
 596			     scheme_make_immed_prim(string_foldcase,
 597						    "string-foldcase",
 598						    1, 1),
 599			     env);
 600
 601  scheme_add_global_constant("string-locale-upcase",
 602			     scheme_make_immed_prim(string_locale_upcase,
 603						    "string-locale-upcase",
 604						    1, 1),
 605			     env);
 606  scheme_add_global_constant("string-locale-downcase",
 607			     scheme_make_immed_prim(string_locale_downcase,
 608						    "string-locale-downcase",
 609						    1, 1),
 610			     env);
 611
 612  scheme_add_global_constant("current-locale",
 613			     scheme_register_parameter(current_locale,
 614						       "current-locale",
 615						       MZCONFIG_LOCALE),
 616			     env);
 617  scheme_add_global_constant("locale-string-encoding",
 618			     scheme_make_immed_prim(locale_string_encoding,
 619						    "locale-string-encoding",
 620						    0, 0),
 621			     env);
 622  scheme_add_global_constant("system-language+country",
 623			     scheme_make_immed_prim(system_language_country,
 624						    "system-language+country",
 625						    0, 0),
 626			     env);
 627
 628  scheme_add_global_constant("bytes-converter?",
 629			     scheme_make_immed_prim(byte_converter_p,
 630						    "bytes-converter?",
 631						    1, 1),
 632			     env);
 633  scheme_add_global_constant("bytes-convert",
 634			     scheme_make_prim_w_arity2(byte_string_convert,
 635						       "bytes-convert",
 636						       1, 7,
 637						       3, 3),
 638			     env);
 639  scheme_add_global_constant("bytes-convert-end",
 640			     scheme_make_prim_w_arity2(byte_string_convert_end,
 641						       "bytes-convert-end",
 642						       0, 3,
 643						       2, 2),
 644			     env);
 645  scheme_add_global_constant("bytes-open-converter",
 646			     scheme_make_immed_prim(byte_string_open_converter,
 647						    "bytes-open-converter",
 648						    2, 2),
 649			     env);
 650  scheme_add_global_constant("bytes-close-converter",
 651			     scheme_make_immed_prim(byte_string_close_converter,
 652						    "bytes-close-converter",
 653						    1, 1),
 654			     env);
 655
 656  scheme_add_global_constant("format",
 657			     scheme_make_noncm_prim(format,
 658                                                    "format",
 659                                                    1, -1),
 660			     env);
 661  scheme_add_global_constant("printf",
 662			     scheme_make_noncm_prim(sch_printf,
 663                                                    "printf",
 664                                                    1, -1),
 665			     env);
 666  scheme_add_global_constant("eprintf",
 667			     scheme_make_noncm_prim(sch_eprintf,
 668                                                    "eprintf",
 669                                                    1, -1),
 670			     env);
 671  scheme_add_global_constant("fprintf",
 672			     scheme_make_noncm_prim(sch_fprintf,
 673                                                    "fprintf",
 674                                                    2, -1),
 675			     env);
 676
 677  scheme_add_global_constant("byte?",
 678			     scheme_make_folding_prim(byte_p,
 679						      "byte?",
 680						      1, 1, 1),
 681			     env);
 682
 683  p = scheme_make_folding_prim(byte_string_p, "bytes?", 1, 1, 1);
 684  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
 685  scheme_add_global_constant("bytes?", p, env);
 686
 687  scheme_add_global_constant("make-bytes",
 688			     scheme_make_immed_prim(make_byte_string,
 689						    "make-bytes",
 690						    1, 2),
 691			     env);
 692  scheme_add_global_constant("bytes",
 693			     scheme_make_immed_prim(byte_string,
 694						    "bytes",
 695						    0, -1),
 696			     env);
 697
 698#if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
 699  GLOBAL_PRIM_W_ARITY("make-shared-bytes", make_shared_byte_string, 1, 2, env);
 700  GLOBAL_PRIM_W_ARITY("shared-bytes", shared_byte_string, 0, -1, env);
 701#else
 702  GLOBAL_PRIM_W_ARITY("make-shared-bytes", make_byte_string, 1, 2, env);
 703  GLOBAL_PRIM_W_ARITY("shared-bytes", byte_string, 0, -1, env);
 704#endif
 705
 706  scheme_add_global_constant("bytes-length",
 707			     scheme_make_folding_prim(byte_string_length,
 708						      "bytes-length",
 709						      1, 1, 1),
 710			     env);
 711
 712  p = scheme_make_immed_prim(scheme_checked_byte_string_ref, "bytes-ref", 2, 2);
 713  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED;
 714  scheme_add_global_constant("bytes-ref", p, env);
 715
 716  p = scheme_make_immed_prim(scheme_checked_byte_string_set, "bytes-set!", 3, 3);
 717  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
 718  scheme_add_global_constant("bytes-set!", p, env);
 719
 720  scheme_add_global_constant("bytes=?",
 721			     scheme_make_immed_prim(byte_string_eq,
 722						    "bytes=?",
 723						    2, -1),
 724			     env);
 725  scheme_add_global_constant("bytes<?",
 726			     scheme_make_immed_prim(byte_string_lt,
 727						    "bytes<?",
 728						    2, -1),
 729			     env);
 730  scheme_add_global_constant("bytes>?",
 731			     scheme_make_immed_prim(byte_string_gt,
 732						    "bytes>?",
 733						    2, -1),
 734			     env);
 735
 736  scheme_add_global_constant("subbytes",
 737			     scheme_make_immed_prim(byte_substring,
 738						    "subbytes",
 739						    2, 3),
 740			     env);
 741  scheme_add_global_constant("bytes-append",
 742			     scheme_make_immed_prim(byte_string_append,
 743						    "bytes-append",
 744						    0, -1),
 745			     env);
 746  scheme_add_global_constant("bytes->list",
 747			     scheme_make_immed_prim(byte_string_to_list,
 748						    "bytes->list",
 749						    1, 1),
 750			     env);
 751  scheme_add_global_constant("list->bytes",
 752			     scheme_make_immed_prim(list_to_byte_string,
 753						    "list->bytes",
 754						    1, 1),
 755			     env);
 756  scheme_add_global_constant("bytes-copy",
 757			     scheme_make_immed_prim(byte_string_copy,
 758						    "bytes-copy",
 759						    1, 1),
 760			     env);
 761  scheme_add_global_constant("bytes-copy!",
 762			     scheme_make_immed_prim(byte_string_copy_bang,
 763						    "bytes-copy!",
 764						    3, 5),
 765			     env);
 766  scheme_add_global_constant("bytes-fill!",
 767			     scheme_make_immed_prim(byte_string_fill,
 768						    "bytes-fill!",
 769						    2, 2),
 770			     env);
 771  scheme_add_global_constant("bytes->immutable-bytes",
 772			     scheme_make_immed_prim(byte_string_to_immutable,
 773						    "bytes->immutable-bytes",
 774						    1, 1),
 775			     env);
 776
 777
 778  scheme_add_global_constant("bytes-utf-8-index",
 779			     scheme_make_immed_prim(byte_string_utf8_index,
 780						    "bytes-utf-8-index",
 781						    2, 4),
 782			     env);
 783  scheme_add_global_constant("bytes-utf-8-length",
 784			     scheme_make_immed_prim(byte_string_utf8_length,
 785						    "bytes-utf-8-length",
 786						    1, 4),
 787			     env);
 788  scheme_add_global_constant("bytes-utf-8-ref",
 789			     scheme_make_immed_prim(byte_string_utf8_ref,
 790						    "bytes-utf-8-ref",
 791						    2, 4),
 792			     env);
 793
 794  scheme_add_global_constant("bytes->string/utf-8",
 795			     scheme_make_immed_prim(byte_string_to_char_string,
 796						    "bytes->string/utf-8",
 797						    1, 4),
 798			     env);
 799  scheme_add_global_constant("bytes->string/locale",
 800			     scheme_make_immed_prim(byte_string_to_char_string_locale,
 801						    "bytes->string/locale",
 802						    1, 4),
 803			     env);
 804  scheme_add_global_constant("bytes->string/latin-1",
 805			     scheme_make_immed_prim(byte_string_to_char_string_latin1,
 806						    "bytes->string/latin-1",
 807						    1, 4),
 808			     env);
 809  scheme_add_global_constant("string->bytes/utf-8",
 810			     scheme_make_immed_prim(char_string_to_byte_string,
 811						    "string->bytes/utf-8",
 812						    1, 4),
 813			     env);
 814  scheme_add_global_constant("string->bytes/locale",
 815			     scheme_make_immed_prim(char_string_to_byte_string_locale,
 816						    "string->bytes/locale",
 817						    1, 4),
 818			     env);
 819  scheme_add_global_constant("string->bytes/latin-1",
 820			     scheme_make_immed_prim(char_string_to_byte_string_latin1,
 821						    "string->bytes/latin-1",
 822						    1, 4),
 823			     env);
 824
 825  scheme_add_global_constant("string-utf-8-length",
 826			     scheme_make_immed_prim(char_string_utf8_length,
 827						    "string-utf-8-length",
 828						    1, 3),
 829			     env);
 830
 831
 832  /* In principle, `version' could be foldable, but it invites
 833     more problems than it solves... */
 834
 835  scheme_add_global_constant("version",
 836			     scheme_make_immed_prim(version,
 837						    "version",
 838						    0, 0),
 839			     env);
 840  scheme_add_global_constant("banner",
 841			     scheme_make_immed_prim(banner,
 842						    "banner",
 843						    0, 0),
 844			     env);
 845
 846  scheme_add_global_constant("getenv",
 847			     scheme_make_immed_prim(sch_getenv,
 848						    "getenv",
 849						    1, 1),
 850			     env);
 851  scheme_add_global_constant("putenv",
 852			     scheme_make_immed_prim(sch_putenv,
 853						    "putenv",
 854						    2, 2),
 855			     env);
 856
 857  /* Don't make these folding, since they're platform-specific: */
 858
 859  scheme_add_global_constant("system-type",
 860			     scheme_make_immed_prim(system_type,
 861						    "system-type",
 862						    0, 1),
 863			     env);
 864  scheme_add_global_constant("system-library-subpath",
 865			     scheme_make_immed_prim(system_library_subpath,
 866						    "system-library-subpath",
 867						    0, 1),
 868			     env);
 869
 870  scheme_add_global_constant("current-command-line-arguments",
 871			     scheme_register_parameter(cmdline_args,
 872						       "current-command-line-arguments",
 873						       MZCONFIG_CMDLINE_ARGS),
 874			     env);
 875
 876#ifdef MZ_PRECISE_GC
 877  register_traversers();
 878#endif
 879}
 880
 881void scheme_init_string_places(void) {
 882  REGISTER_SO(current_locale_name_ptr);
 883  current_locale_name_ptr = "xxxx\0\0\0\0";
 884}
 885
 886/**********************************************************************/
 887/*                     UTF-8 char constructors                        */
 888/**********************************************************************/
 889
 890Scheme_Object *scheme_make_sized_offset_utf8_string(char *chars, intptr_t d, intptr_t len)
 891{
 892  intptr_t ulen;
 893  mzchar *us;
 894
 895  if (len) {
 896    ulen = scheme_utf8_decode((unsigned char *)chars, d, d + len,
 897			      NULL, 0, -1,
 898			      NULL, 0 /* not UTF-16 */, 0xFFFD);
 899    us = scheme_malloc_atomic(sizeof(mzchar) * (ulen + 1));
 900    scheme_utf8_decode((unsigned char *)chars, d, d + len,
 901		       us, 0, -1,
 902		       NULL, 0 /* not UTF-16 */, 0xFFFD);
 903
 904    us[ulen] = 0;
 905  } else {
 906    us = (mzchar *)"\0\0\0";
 907    ulen = 0;
 908  }
 909  return scheme_make_sized_offset_char_string(us, 0, ulen, 0);
 910}
 911
 912Scheme_Object *
 913scheme_make_sized_utf8_string(char *chars, intptr_t len)
 914{
 915  return scheme_make_sized_offset_utf8_string(chars, 0, len);
 916}
 917
 918Scheme_Object *
 919scheme_make_immutable_sized_utf8_string(char *chars, intptr_t len)
 920{
 921  Scheme_Object *s;
 922
 923  s = scheme_make_sized_offset_utf8_string(chars, 0, len);
 924  if (len)
 925    SCHEME_SET_CHAR_STRING_IMMUTABLE(s);
 926
 927  return s;
 928}
 929
 930Scheme_Object *
 931scheme_make_utf8_string(const char *chars)
 932{
 933  return scheme_make_sized_offset_utf8_string((char *)chars, 0, -1);
 934}
 935
 936Scheme_Object *
 937scheme_make_locale_string(const char *chars)
 938{
 939  return scheme_byte_string_to_char_string_locale(scheme_make_byte_string((char *)chars));
 940}
 941
 942/**********************************************************************/
 943/*                         index helpers                              */
 944/**********************************************************************/
 945
 946void scheme_out_of_string_range(const char *name, const char *which,
 947				Scheme_Object *i, Scheme_Object *s,
 948				intptr_t start, intptr_t len)
 949{
 950  int is_byte;
 951
 952  is_byte = SCHEME_BYTE_STRINGP(s);
 953
 954  if (len) {
 955    char *sstr;
 956    intptr_t slen;
 957
 958    sstr = scheme_make_provided_string(s, 2, &slen);
 959    scheme_raise_exn(MZEXN_FAIL_CONTRACT,
 960		     "%s: %sindex %s out of range [%d, %d] for %s%s: %t",
 961		     name, which,
 962		     scheme_make_provided_string(i, 2, NULL),
 963		     start, len,
 964		     is_byte ? "byte-" : "",
 965                     SCHEME_CHAPERONE_VECTORP(s) ? "vector" : "string",
 966		     sstr, slen);
 967  } else {
 968    scheme_raise_exn(MZEXN_FAIL_CONTRACT,
 969		     "%s: %sindex %s out of range for empty %s%s",
 970		     name, which,
 971		     scheme_make_provided_string(i, 0, NULL),
 972		     is_byte ? "byte-" : "",
 973                     SCHEME_CHAPERONE_VECTORP(s) ? "vector" : "string");
 974  }
 975}
 976
 977intptr_t scheme_extract_index(const char *name, int pos, int argc, Scheme_Object **argv, intptr_t top, int false_ok)
 978{
 979  intptr_t i;
 980  int is_top = 0;
 981
 982  if (SCHEME_INTP(argv[pos])) {
 983    i = SCHEME_INT_VAL(argv[pos]);
 984  } else if (SCHEME_BIGNUMP(argv[pos])) {
 985    if (SCHEME_BIGPOS(argv[pos])) {
 986      i = top; /* out-of-bounds */
 987      is_top = 1;
 988    } else
 989      i = -1; /* negative */
 990  } else
 991    i = -1;
 992
 993  if (!is_top && (i < 0))
 994    scheme_wrong_type(name,
 995		      (false_ok ? "non-negative exact integer or #f" : "non-negative exact integer"),
 996		      pos, argc, argv);
 997
 998  return i;
 999}
1000
1001void scheme_get_substring_indices(const char *name, Scheme_Object *str,
1002                                  int argc, Scheme_Object **argv,
1003                                  int spos, int fpos, intptr_t *_start, intptr_t *_finish)
1004{
1005  intptr_t len;
1006  intptr_t start, finish;
1007
1008  if (SCHEME_CHAPERONE_VECTORP(str))
1009    len = SCHEME_VEC_SIZE(str);
1010  else if (SCHEME_CHAR_STRINGP(str))
1011    len = SCHEME_CHAR_STRTAG_VAL(str);
1012  else
1013    len = SCHEME_BYTE_STRTAG_VAL(str);
1014
1015  if (argc > spos)
1016    start = scheme_extract_index(name, spos, argc, argv, len + 1, 0);
1017  else
1018    start = 0;
1019  if (argc > fpos)
1020    finish = scheme_extract_index(name, fpos, argc, argv, len + 1, 0);
1021  else
1022    finish = len;
1023
1024  if (!(start <= len)) {
1025    scheme_out_of_string_range(name, (fpos < 100) ? "starting " : "", argv[spos], str, 0, len);
1026  }
1027  if (!(finish >= start && finish <= len)) {
1028    scheme_out_of_string_range(name, "ending ", argv[fpos], str, start, len);
1029  }
1030
1031  *_start = start;
1032  *_finish = finish;
1033}
1034
1035void scheme_do_get_substring_indices(const char *name, Scheme_Object *str,
1036                                     int argc, Scheme_Object **argv,
1037                                     int spos, int fpos, intptr_t *_start, intptr_t *_finish, intptr_t len)
1038{
1039  if (argc > spos) {
1040    if (SCHEME_INTP(argv[spos])) {
1041      intptr_t start = SCHEME_INT_VAL(argv[spos]);
1042      if ((start >= 0) && (start < len)) {
1043        *_start = start;
1044        if (argc > fpos) {
1045          intptr_t finish = SCHEME_INT_VAL(argv[fpos]);
1046          if ((finish >= start) && (finish <= len)) {
1047            *_finish = finish;
1048            return;
1049          }
1050        } else {
1051          *_finish = len;
1052          return;
1053        }
1054      }
1055    }
1056  } else {
1057    *_start = 0;
1058    *_finish = len;
1059    return;
1060  }
1061
1062  scheme_get_substring_indices(name, str, argc, argv, spos, fpos, _start, _finish);
1063}
1064
1065/**********************************************************************/
1066/*                          char strings                              */
1067/**********************************************************************/
1068
1069#define SCHEME_X_STR_VAL(x) SCHEME_CHAR_STR_VAL(x)
1070#define SCHEME_X_STRTAG_VAL(x) SCHEME_CHAR_STRTAG_VAL(x)
1071#define SCHEME_X_STRINGP(x) SCHEME_CHAR_STRINGP(x)
1072#define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_CHAR_STRINGP(x)
1073#define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_CHAR_STRING_IMMUTABLE(x)
1074#define scheme_x_string_type scheme_char_string_type
1075#define X(a, b) a##_char##b
1076#define X_(a, b) a##_##b
1077#define X__(a) a
1078#define EMPTY (mzchar *)"\0\0\0"
1079#define Xchar mzchar
1080#define uXchar mzchar
1081#define XSTR ""
1082#define XSTRINGSTR "string"
1083#define SUBXSTR "substring"
1084#define CHARP(x) SCHEME_CHARP(x)
1085#define CHAR_VAL(x) SCHEME_CHAR_VAL(x)
1086#define CHAR_STR "character"
1087#define MAKE_CHAR(x) _scheme_make_char(x)
1088#define xstrlen scheme_char_strlen
1089#include "strops.inc"
1090
1091#define GEN_STRING_COMP(name, scheme_name, comp, op, ul, size_shortcut)     \
1092static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
1093{  mzchar *s, *prev; int i, sl, pl; int falz = 0;\
1094   if (!SCHEME_CHAR_STRINGP(argv[0])) \
1095    scheme_wrong_type(scheme_name, "string", 0, argc, argv); \
1096   prev = SCHEME_CHAR_STR_VAL(argv[0]); pl = SCHEME_CHAR_STRTAG_VAL(argv[0]); \
1097   for (i = 1; i < argc; i++) { \
1098     if (!SCHEME_CHAR_STRINGP(argv[i])) \
1099      scheme_wrong_type(scheme_name, "string", i, argc, argv); \
1100     s = SCHEME_CHAR_STR_VAL(argv[i]); sl = SCHEME_CHAR_STRTAG_VAL(argv[i]); \
1101     if (!falz) if (!(comp(scheme_name, \
1102                           prev, pl, \
1103                           s, sl, ul, size_shortcut) op 0)) falz = 1; \
1104     prev = s; pl = sl; \
1105  } \
1106  return falz ? scheme_false : scheme_true; \
1107}
1108
1109GEN_STRING_COMP(string_eq, "string=?", mz_char_strcmp, ==, 0, 1)
1110GEN_STRING_COMP(string_lt, "string<?", mz_char_strcmp, <, 0, 0)
1111GEN_STRING_COMP(string_gt, "string>?", mz_char_strcmp, >, 0, 0)
1112GEN_STRING_COMP(string_lt_eq, "string<=?", mz_char_strcmp, <=, 0, 0)
1113GEN_STRING_COMP(string_gt_eq, "string>=?", mz_char_strcmp, >=, 0, 0)
1114
1115GEN_STRING_COMP(string_ci_eq, "string-ci=?", mz_char_strcmp_ci, ==, 0, 0)
1116GEN_STRING_COMP(string_ci_lt, "string-ci<?", mz_char_strcmp_ci, <, 0, 0)
1117GEN_STRING_COMP(string_ci_gt, "string-ci>?", mz_char_strcmp_ci, >, 0, 0)
1118GEN_STRING_COMP(string_ci_lt_eq, "string-ci<=?", mz_char_strcmp_ci, <=, 0, 0)
1119GEN_STRING_COMP(string_ci_gt_eq, "string-ci>=?", mz_char_strcmp_ci, >=, 0, 0)
1120
1121GEN_STRING_COMP(string_locale_eq, "string-locale=?", mz_char_strcmp, ==, 1, 0)
1122GEN_STRING_COMP(string_locale_lt, "string-locale<?", mz_char_strcmp, <, 1, 0)
1123GEN_STRING_COMP(string_locale_gt, "string-locale>?", mz_char_strcmp, >, 1, 0)
1124GEN_STRING_COMP(string_locale_ci_eq, "string-locale-ci=?", mz_char_strcmp_ci, ==, 1, 0)
1125GEN_STRING_COMP(string_locale_ci_lt, "string-locale-ci<?", mz_char_strcmp_ci, <, 1, 0)
1126GEN_STRING_COMP(string_locale_ci_gt, "string-locale-ci>?", mz_char_strcmp_ci, >, 1, 0)
1127
1128/**********************************************************************/
1129/*                         byte strings                               */
1130/**********************************************************************/
1131
1132#define SCHEME_BYTEP(x) ((SCHEME_INTP(x)) && (SCHEME_INT_VAL(x) >= 0) && (SCHEME_INT_VAL(x) <= 255))
1133#define BYTE_STR "exact integer in [0,255]"
1134
1135static Scheme_Object *
1136byte_p(int argc, Scheme_Object *argv[])
1137{
1138  return (SCHEME_BYTEP(argv[0]) ? scheme_true : scheme_false);
1139}
1140
1141#define SCHEME_X_STR_VAL(x) SCHEME_BYTE_STR_VAL(x)
1142#define SCHEME_X_STRTAG_VAL(x) SCHEME_BYTE_STRTAG_VAL(x)
1143#define SCHEME_X_STRINGP(x) SCHEME_BYTE_STRINGP(x)
1144#define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_BYTE_STRINGP(x)
1145#define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_BYTE_STRING_IMMUTABLE(x)
1146#define scheme_x_string_type scheme_byte_string_type
1147#define X(a, b) a##_byte##b
1148#define X_(a, b) a##_byte_##b
1149#define X__(a) byte_##a
1150#define EMPTY ""
1151#define Xchar char
1152#define uXchar unsigned char
1153#define XSTR "byte "
1154#define XSTRINGSTR "bytes"
1155#define SUBXSTR "subbytes"
1156#define CHARP(x) SCHEME_BYTEP(x)
1157#define CHAR_VAL(x) SCHEME_INT_VAL(x)
1158#define CHAR_STR BYTE_STR
1159#define MAKE_CHAR(x) scheme_make_integer_value(x)
1160#define xstrlen strlen
1161#define GENERATING_BYTE
1162#include "strops.inc"
1163#undef GENERATING_BYTE
1164
1165/* comparisons */
1166
1167#define GEN_BYTE_STRING_COMP(name, scheme_name, comp, op) \
1168static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
1169{  char *s, *prev; int i, sl, pl; int falz = 0;\
1170   if (!SCHEME_BYTE_STRINGP(argv[0])) \
1171    scheme_wrong_type(scheme_name, "byte string", 0, argc, argv); \
1172   prev = SCHEME_BYTE_STR_VAL(argv[0]); pl = SCHEME_BYTE_STRTAG_VAL(argv[0]); \
1173   for (i = 1; i < argc; i++) { \
1174     if (!SCHEME_BYTE_STRINGP(argv[i])) \
1175      scheme_wrong_type(scheme_name, "byte string", i, argc, argv); \
1176     s = SCHEME_BYTE_STR_VAL(argv[i]); sl = SCHEME_BYTE_STRTAG_VAL(argv[i]); \
1177     if (!falz) if (!(comp(scheme_name, \
1178                           (unsigned char *)prev, pl, \
1179                           (unsigned char *)s, sl) op 0)) falz = 1; \
1180     prev = s; pl = sl; \
1181  } \
1182  return falz ? scheme_false : scheme_true; \
1183}
1184
1185GEN_BYTE_STRING_COMP(byte_string_eq, "bytes=?", mz_strcmp, ==)
1186GEN_BYTE_STRING_COMP(byte_string_lt, "bytes<?", mz_strcmp, <)
1187GEN_BYTE_STRING_COMP(byte_string_gt, "bytes>?", mz_strcmp, >)
1188
1189/**********************************************************************/
1190/*                   byte string <-> char string                      */
1191/**********************************************************************/
1192
1193/************************* bytes->string *************************/
1194
1195static Scheme_Object *
1196do_byte_string_to_char_string(const char *who,
1197			      Scheme_Object *bstr,
1198			      intptr_t istart, intptr_t ifinish,
1199			      int perm, int as_locale)
1200{
1201  int i, ulen;
1202  char *chars;
1203  unsigned int *v;
1204
1205  chars = SCHEME_BYTE_STR_VAL(bstr);
1206
1207  ulen = utf8_decode_x((unsigned char *)chars, istart, ifinish,
1208		       NULL, 0, -1,
1209		       NULL, NULL, 0, 0,
1210		       NULL, 0, 
1211		       (perm > -1) ? 0xD800 : 0);
1212  if (ulen < 0) {
1213    scheme_arg_mismatch(who,
1214			STRING_IS_NOT_UTF_8,
1215			bstr);
1216  }
1217
1218  v = (unsigned int *)scheme_malloc_atomic((ulen + 1) * sizeof(unsigned int));
1219  utf8_decode_x((unsigned char *)chars, istart, ifinish,
1220		v, 0, -1,
1221		NULL, NULL, 0, 0,
1222		NULL, 0, 
1223		(perm > -1) ? 0xD800 : 0);
1224  
1225  if (perm > -1) {
1226    for (i = 0; i < ulen; i++) {
1227      if (v[i] == 0xD800)
1228	v[i] = perm;
1229    }
1230  }
1231  v[ulen] = 0;
1232
1233  return scheme_make_sized_char_string(v, ulen, 0);
1234}
1235
1236static Scheme_Object *
1237do_byte_string_to_char_string_locale(const char *who,
1238				     Scheme_Object *bstr,
1239				     intptr_t istart, intptr_t ifinish,
1240				     int perm)
1241{
1242  char *us;
1243  intptr_t olen;
1244
1245  reset_locale();
1246  if (!iconv_ready) init_iconv();
1247
1248  if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open))
1249    return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
1250
1251  if (istart < ifinish) {
1252    int no_cvt;
1253
1254    us = string_to_from_locale(0, SCHEME_BYTE_STR_VAL(bstr),
1255			       istart, ifinish - istart,
1256			       &olen, perm, &no_cvt);
1257
1258    if (!us) {
1259      if (no_cvt) {
1260	return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
1261      } else {
1262	scheme_arg_mismatch(who,
1263			    "byte string is not a valid encoding for the current locale: ",
1264			    bstr);
1265      }
1266    }
1267    ((mzchar *)us)[olen] = 0;
1268  } else {
1269    us = "\0\0\0";
1270    olen = 0;
1271  }
1272
1273  return scheme_make_sized_char_string((mzchar *)us, olen, 0);
1274}
1275
1276static Scheme_Object *
1277do_string_to_vector(const char *who, int mode, int argc, Scheme_Object *argv[])
1278{
1279  int permc;
1280  intptr_t istart, ifinish;
1281
1282  if (!SCHEME_BYTE_STRINGP(argv[0]))
1283    scheme_wrong_type(who, "byte string", 0, argc, argv);
1284
1285  if ((argc < 2) || SCHEME_FALSEP(argv[1]))
1286    permc = -1;
1287  else {
1288    if (!SCHEME_CHARP(argv[1]))
1289      scheme_wrong_type(who, "character or #f", 1, argc, argv);
1290    permc = SCHEME_CHAR_VAL(argv[1]);
1291  }
1292
1293  scheme_get_substring_indices(who, argv[0], argc, argv,
1294			       2, 3,
1295			       &istart, &ifinish);
1296
1297  if (mode == 0)
1298    return do_byte_string_to_char_string(who, argv[0], istart, ifinish, permc, 0);
1299  else if (mode == 1)
1300    return do_byte_string_to_char_string_locale(who, argv[0], istart, ifinish, permc);
1301  else {
1302    /* Latin-1 */
1303    mzchar *us;
1304    unsigned char *s;
1305    intptr_t i, len;
1306    len = ifinish - istart;
1307    s = (unsigned char *)SCHEME_BYTE_STR_VAL(argv[0]);
1308    us = (mzchar *)scheme_malloc_atomic((len + 1) * sizeof(mzchar));
1309    for (i = istart; i < ifinish; i++) {
1310      us[i - istart] = s[i];
1311    }
1312    us[len] = 0;
1313
1314    return scheme_make_sized_char_string(us, len, 0);
1315  }
1316}
1317
1318
1319static Scheme_Object *
1320byte_string_to_char_string (int argc, Scheme_Object *argv[])
1321{
1322  return do_string_to_vector("bytes->string/utf-8", 0, argc, argv);
1323}
1324
1325static Scheme_Object *
1326byte_string_to_char_string_locale (int argc, Scheme_Object *argv[])
1327{
1328  return do_string_to_vector("bytes->string/locale", 1, argc, argv);
1329}
1330
1331static Scheme_Object *
1332byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[])
1333{
1334  return do_string_to_vector("bytes->string/latin-1", 2, argc, argv);
1335}
1336
1337Scheme_Object *scheme_byte_string_to_char_string(Scheme_Object *o)
1338{
1339  return do_byte_string_to_char_string("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD, 0);
1340}
1341
1342Scheme_Object *scheme_byte_string_to_char_string_locale(Scheme_Object *o)
1343{
1344  return do_byte_string_to_char_string_locale("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD);
1345}
1346
1347/************************* string->bytes *************************/
1348
1349static Scheme_Object *do_char_string_to_byte_string(Scheme_Object *s, intptr_t istart, intptr_t ifinish, 
1350						    int as_locale)
1351{
1352  char *bs;
1353  int slen;
1354
1355  slen = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
1356			    NULL, 0,
1357			    0 /* UTF-16 */);
1358  bs = (char *)scheme_malloc_atomic(slen + 1);
1359  scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
1360		     (unsigned char *)bs, 0,
1361		     0 /* UTF-16 */);
1362  bs[slen] = 0;
1363
1364  return scheme_make_sized_byte_string(bs, slen, 0);
1365}
1366
1367static Scheme_Object *
1368do_char_string_to_byte_string_locale(const char *who,
1369				     Scheme_Object *cstr,
1370				     intptr_t istart, intptr_t ifinish,
1371				     int perm)
1372{
1373  char *s;
1374  intptr_t olen;
1375
1376  reset_locale();
1377  if (!iconv_ready) init_iconv();
1378
1379  if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open))
1380    return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
1381
1382  if (istart < ifinish) {
1383    int no_cvt;
1384
1385    s = string_to_from_locale(1, (char *)SCHEME_CHAR_STR_VAL(cstr),
1386			      istart, ifinish - istart,
1387			      &olen, perm, &no_cvt);
1388
1389    if (!s) {
1390      if (no_cvt) {
1391	return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
1392      } else {
1393	scheme_arg_mismatch(who,
1394			    "string cannot be encoded for the current locale: ",
1395			    cstr);
1396      }
1397    }
1398    s[olen] = 0;
1399  } else {
1400    s = "";
1401    olen = 0;
1402  }
1403
1404  return scheme_make_sized_byte_string(s, olen, 0);
1405}
1406
1407
1408Scheme_Object *scheme_char_string_to_byte_string(Scheme_Object *s)
1409{
1410  return do_char_string_to_byte_string(s, 0, SCHEME_CHAR_STRLEN_VAL(s), 0);
1411}
1412
1413Scheme_Object *scheme_char_string_to_byte_string_locale(Scheme_Object *s)
1414{
1415  return do_char_string_to_byte_string_locale("s->s", s, 0, SCHEME_CHAR_STRLEN_VAL(s), '?');
1416}
1417
1418static Scheme_Object *do_chars_to_bytes(const char *who, int mode,
1419					int argc, Scheme_Object *argv[])
1420{
1421  intptr_t istart, ifinish;
1422  int permc;
1423
1424  if (!SCHEME_CHAR_STRINGP(argv[0]))
1425    scheme_wrong_type(who, "string", 0, argc, argv);
1426
1427  if ((argc < 2) || SCHEME_FALSEP(argv[1]))
1428    permc = -1;
1429  else {
1430    if (!SCHEME_BYTEP(argv[1]))
1431      scheme_wrong_type(who, "byte or #f", 1, argc, argv);
1432    permc = SCHEME_INT_VAL(argv[1]);
1433  }
1434
1435  scheme_get_substring_indices(who, argv[0], argc, argv,
1436			       2, 3, &istart, &ifinish);
1437
1438  if (mode == 1)
1439    return do_char_string_to_byte_string_locale(who, argv[0], istart, ifinish, permc);
1440  else if (mode == 0)
1441    return do_char_string_to_byte_string(argv[0], istart, ifinish, 0);
1442  else {
1443    /* Latin-1 */
1444    mzchar *us;
1445    unsigned char *s;
1446    intptr_t i, len;
1447    len = ifinish - istart;
1448    us = SCHEME_CHAR_STR_VAL(argv[0]);
1449    s = (unsigned char *)scheme_malloc_atomic(len + 1);
1450    for (i = istart; i < ifinish; i++) {
1451      if (us[i] < 256)
1452	s[i - istart] = us[i];
1453      else if (permc >= 0) {
1454	s[i - istart] = permc;
1455      } else {
1456	scheme_arg_mismatch(who,
1457			    "string cannot be encoded in Latin-1: ",
1458			    argv[0]);
1459      }
1460    }
1461    s[len] = 0;
1462
1463    return scheme_make_sized_byte_string((char *)s, len, 0);
1464  }
1465}
1466
1467static Scheme_Object *char_string_to_byte_string(int argc, Scheme_Object *argv[])
1468{
1469  return do_chars_to_bytes("string->bytes/utf-8", 0, argc, argv);
1470}
1471
1472static Scheme_Object *char_string_to_byte_string_locale(int argc, Scheme_Object *argv[])
1473{
1474  return do_chars_to_bytes("string->bytes/locale", 1, argc, argv);
1475}
1476
1477static Scheme_Object *char_string_to_byte_string_latin1(int argc, Scheme_Object *argv[])
1478{
1479  return do_chars_to_bytes("string->bytes/latin-1", 2, argc, argv);
1480}
1481
1482/************************* Other *************************/
1483
1484static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[])
1485{
1486  intptr_t istart, ifinish, len;
1487
1488  if (!SCHEME_CHAR_STRINGP(argv[0]))
1489    scheme_wrong_type("string-utf-8-length", "string", 0, argc, argv);
1490
1491  scheme_get_substring_indices("string-utf-8-length", argv[0], argc, argv,
1492			       1, 2, &istart, &ifinish);
1493
1494  len = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(argv[0]), istart, ifinish,
1495			   NULL, 0, 0);
1496
1497  return scheme_make_integer(len);
1498}
1499
1500static Scheme_Object *
1501byte_string_utf8_length (int argc, Scheme_Object *argv[])
1502{
1503  int len, perm;
1504  intptr_t istart, ifinish;
1505  char *chars;
1506
1507  if (!SCHEME_BYTE_STRINGP(argv[0]))
1508    scheme_wrong_type("bytes-utf-8-length", "string", 0, argc, argv);
1509
1510  chars = SCHEME_BYTE_STR_VAL(argv[0]);
1511
1512  if ((argc > 1) && !SCHEME_FALSEP(argv[1])) {
1513    if (!SCHEME_CHARP(argv[1]))
1514      scheme_wrong_type("bytes-utf-8-length", "character or #f", 1, argc, argv);
1515    perm = 1;
1516  } else
1517    perm = 0;
1518
1519  scheme_get_substring_indices("bytes-utf-8-length", argv[0], argc, argv,
1520			       2, 3,
1521			       &istart, &ifinish);
1522
1523  len = scheme_utf8_decode((unsigned char *)chars, istart, ifinish,
1524			   NULL, 0, -1,
1525			   NULL, 0, perm);
1526
1527  if (len < 0)
1528    return scheme_false;
1529  else
1530    return scheme_make_integer(len);
1531}
1532
1533static Scheme_Object *
1534byte_string_utf8_index(int argc, Scheme_Object *argv[])
1535{
1536  intptr_t istart, ifinish, pos = -1, opos, ipos;
1537  int result, perm;
1538  cha

Large files files are truncated, but you can click here to view the full file