PageRenderTime 114ms CodeModel.GetById 37ms app.highlight 58ms RepoModel.GetById 1ms app.codeStats 1ms

/src/racket/src/string.c

http://github.com/gmarceau/PLT
C | 5657 lines | 4630 code | 683 blank | 344 comment | 1048 complexity | 0664fdaee9cf399f842278273762caf3 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2  Racket
   3  Copyright (c) 2004-2011 PLT Scheme Inc.
   4  Copyright (c) 1995-2001 Matthew Flatt
   5
   6    This library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Library General Public
   8    License as published by the Free Software Foundation; either
   9    version 2 of the License, or (at your option) any later version.
  10
  11    This library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Library General Public License for more details.
  15
  16    You should have received a copy of the GNU Library General Public
  17    License along with this library; if not, write to the Free
  18    Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19    Boston, MA 02110-1301 USA.
  20
  21  libscheme
  22  Copyright (c) 1994 Brent Benson
  23  All rights reserved.
  24*/
  25
  26#include "schpriv.h"
  27#include "schvers.h"
  28#include <string.h>
  29#include <ctype.h>
  30#ifndef DONT_USE_LOCALE
  31# include <locale.h>
  32# ifdef MZ_NO_ICONV
  33#  define USE_ICONV_DLL
  34# endif
  35# ifndef USE_ICONV_DLL
  36#  include <iconv.h>
  37#  include <langinfo.h>
  38# endif
  39# include <wchar.h>
  40# include <wctype.h>
  41# include <errno.h>
  42# ifdef MACOS_UNICODE_SUPPORT
  43#  include <CoreFoundation/CFString.h>
  44#  include <CoreFoundation/CFLocale.h>
  45# endif
  46# ifdef WINDOWS_UNICODE_SUPPORT
  47#  include <windows.h>
  48# endif
  49#endif
  50
  51#ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH
  52# include "schsys.h"
  53#endif
  54
  55#include "schustr.inc"
  56
  57#ifdef USE_ICONV_DLL
  58typedef intptr_t iconv_t;
  59typedef int *(*errno_proc_t)();
  60typedef size_t (*iconv_proc_t)(iconv_t cd,
  61			       char **inbuf, size_t *inbytesleft,
  62			       char **outbuf, size_t *outbytesleft);
  63typedef iconv_t (*iconv_open_proc_t)(const char *tocode, const char *fromcode);
  64typedef void (*iconv_close_proc_t)(iconv_t cd);
  65typedef char *(*locale_charset_proc_t)();
  66static errno_proc_t iconv_errno;
  67static iconv_proc_t iconv;
  68static iconv_open_proc_t iconv_open;
  69static iconv_close_proc_t iconv_close;
  70static locale_charset_proc_t locale_charset; /* Not used, currently */
  71#define mzCHK_PROC(x) x
  72static int get_iconv_errno(void)
  73{
  74  int *a;
  75  a = iconv_errno();
  76  return *a;
  77}
  78# undef HAVE_CODESET
  79# define HAVE_CODESET 1
  80# define CODESET 0
  81# define ICONV_errno get_iconv_errno()
  82extern wchar_t *scheme_get_dll_path(wchar_t *s);
  83static int iconv_ready = 0;
  84static void init_iconv()
  85{
  86# ifdef MZ_NO_ICONV
  87# else
  88  HMODULE m;
  89  m = LoadLibraryW(scheme_get_dll_path(L"iconv.dll"));
  90  if (!m)
  91    m = LoadLibraryW(scheme_get_dll_path(L"libiconv.dll"));
  92  if (!m)
  93    m = LoadLibraryW(scheme_get_dll_path(L"libiconv-2.dll"));
  94  if (!m)
  95    m = LoadLibrary("iconv.dll");
  96  if (!m)
  97    m = LoadLibrary("libiconv.dll");
  98  if (!m)
  99    m = LoadLibrary("libiconv-2.dll");
 100  if (m) {
 101    iconv = (iconv_proc_t)GetProcAddress(m, "libiconv");
 102    iconv_open = (iconv_open_proc_t)GetProcAddress(m, "libiconv_open");
 103    iconv_close = (iconv_close_proc_t)GetProcAddress(m, "libiconv_close");
 104    locale_charset = (locale_charset_proc_t)GetProcAddress(m, "locale_charset");
 105    /* Make sure we have all of them or none: */
 106    if (!iconv || !iconv_open || !iconv_close) {
 107      iconv = NULL;
 108      iconv_open = NULL;
 109      iconv_close = NULL;
 110    }
 111  }
 112  if (iconv) {
 113    iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno");
 114    if (!iconv_errno) {
 115      /* The iconv.dll distributed with PLT Scheme links to msvcrt.dll.
 116	 It's a slighly dangerous assumption that whatever iconv we
 117	 found also uses msvcrt.dll. */
 118      m = LoadLibrary("msvcrt.dll");
 119      if (m) {
 120	iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno");
 121	if (!iconv_errno) {
 122	  iconv = NULL;
 123	  iconv_open = NULL;
 124	  iconv_close = NULL;
 125	}
 126      }
 127    }
 128  }
 129# endif
 130  iconv_ready = 1;
 131}
 132#else
 133# define ICONV_errno errno
 134# define iconv_ready 1
 135# define mzCHK_PROC(x) 1
 136static void init_iconv() { }
 137#endif
 138
 139#ifdef MACOS_UNICODE_SUPPORT
 140# define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
 141#endif
 142#ifdef WINDOWS_UNICODE_SUPPORT
 143# define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
 144#endif
 145#ifndef mzLOCALE_IS_UTF_8
 146# define mzLOCALE_IS_UTF_8(s) !mzCHK_PROC(iconv_open)
 147#endif
 148
 149#define mzICONV_KIND 0
 150#define mzUTF8_KIND 1
 151#define mzUTF8_TO_UTF16_KIND 2
 152#define mzUTF16_TO_UTF8_KIND 3
 153
 154typedef struct Scheme_Converter {
 155  Scheme_Object so;
 156  short closed;
 157  short kind;
 158  iconv_t cd;
 159  int permissive;
 160  Scheme_Custodian_Reference *mref;
 161} Scheme_Converter;
 162
 163/* locals */
 164
 165/* These two locale variables are only valid when reset_locale()
 166   is called after continuation marks (and hence parameterization)
 167   may have changed. Similarly, setlocale() is only up-to-date
 168   when reset_locale() has been called. */
 169THREAD_LOCAL_DECL(static int locale_on);
 170THREAD_LOCAL_DECL(static void *current_locale_name_ptr);
 171static void reset_locale(void);
 172
 173#define current_locale_name ((const mzchar *)current_locale_name_ptr)
 174
 175#ifdef USE_ICONV_DLL
 176static char *nl_langinfo(int which)
 177{
 178  int i;
 179
 180  reset_locale();
 181  if (!current_locale_name)
 182    current_locale_name_ptr = "\0\0\0\0";
 183
 184  if ((current_locale_name[0] == 'C')
 185      && !current_locale_name[1])
 186    return "US-ASCII";
 187
 188  for (i = 0; current_locale_name[i]; i++) {
 189    if (current_locale_name[i] == '.') {
 190      if (current_locale_name[i + 1]) {
 191	int len, j = 0;
 192	char *enc;
 193	i++;
 194	len = scheme_char_strlen(current_locale_name) - i;
 195	enc = (char *)scheme_malloc_atomic(len + 1);
 196	while (current_locale_name[i]) {
 197	  if (current_locale_name[i] > 127)
 198	    return "UTF-8";
 199	  enc[j++] = current_locale_name[i++];
 200	}
 201	enc[j] = 0;
 202	return enc;
 203      }
 204    }
 205  }
 206
 207  return "UTF-8";
 208}
 209#endif
 210
 211#ifdef DONT_USE_LOCALE
 212# define mz_iconv_nl_langinfo() ""
 213#else
 214static char *mz_iconv_nl_langinfo(){
 215  char *s;
 216# if HAVE_CODESET
 217  s = nl_langinfo(CODESET);
 218# else
 219  s = NULL;
 220# endif
 221  if (!s)
 222    return "";
 223  else
 224    return s;
 225}
 226#endif
 227
 228READ_ONLY static const char * const STRING_IS_NOT_UTF_8 = "string is not a well-formed UTF-8 encoding: ";
 229
 230static Scheme_Object *make_string (int argc, Scheme_Object *argv[]);
 231static Scheme_Object *string (int argc, Scheme_Object *argv[]);
 232static Scheme_Object *string_p (int argc, Scheme_Object *argv[]);
 233static Scheme_Object *string_length (int argc, Scheme_Object *argv[]);
 234static Scheme_Object *string_eq (int argc, Scheme_Object *argv[]);
 235static Scheme_Object *string_locale_eq (int argc, Scheme_Object *argv[]);
 236static Scheme_Object *string_ci_eq (int argc, Scheme_Object *argv[]);
 237static Scheme_Object *string_locale_ci_eq (int argc, Scheme_Object *argv[]);
 238static Scheme_Object *string_lt (int argc, Scheme_Object *argv[]);
 239static Scheme_Object *string_locale_lt (int argc, Scheme_Object *argv[]);
 240static Scheme_Object *string_gt (int argc, Scheme_Object *argv[]);
 241static Scheme_Object *string_locale_gt (int argc, Scheme_Object *argv[]);
 242static Scheme_Object *string_lt_eq (int argc, Scheme_Object *argv[]);
 243static Scheme_Object *string_gt_eq (int argc, Scheme_Object *argv[]);
 244static Scheme_Object *string_ci_lt (int argc, Scheme_Object *argv[]);
 245static Scheme_Object *string_locale_ci_lt (int argc, Scheme_Object *argv[]);
 246static Scheme_Object *string_ci_gt (int argc, Scheme_Object *argv[]);
 247static Scheme_Object *string_locale_ci_gt (int argc, Scheme_Object *argv[]);
 248static Scheme_Object *string_ci_lt_eq (int argc, Scheme_Object *argv[]);
 249static Scheme_Object *string_ci_gt_eq (int argc, Scheme_Object *argv[]);
 250static Scheme_Object *string_upcase (int argc, Scheme_Object *argv[]);
 251static Scheme_Object *string_downcase (int argc, Scheme_Object *argv[]);
 252static Scheme_Object *string_titlecase (int argc, Scheme_Object *argv[]);
 253static Scheme_Object *string_foldcase (int argc, Scheme_Object *argv[]);
 254static Scheme_Object *string_locale_upcase (int argc, Scheme_Object *argv[]);
 255static Scheme_Object *string_locale_downcase (int argc, Scheme_Object *argv[]);
 256static Scheme_Object *substring (int argc, Scheme_Object *argv[]);
 257static Scheme_Object *string_append (int argc, Scheme_Object *argv[]);
 258static Scheme_Object *string_to_list (int argc, Scheme_Object *argv[]);
 259static Scheme_Object *list_to_string (int argc, Scheme_Object *argv[]);
 260static Scheme_Object *string_copy (int argc, Scheme_Object *argv[]);
 261static Scheme_Object *string_copy_bang (int argc, Scheme_Object *argv[]);
 262static Scheme_Object *string_fill (int argc, Scheme_Object *argv[]);
 263static Scheme_Object *string_to_immutable (int argc, Scheme_Object *argv[]);
 264static Scheme_Object *string_normalize_c (int argc, Scheme_Object *argv[]);
 265static Scheme_Object *string_normalize_kc (int argc, Scheme_Object *argv[]);
 266static Scheme_Object *string_normalize_d (int argc, Scheme_Object *argv[]);
 267static Scheme_Object *string_normalize_kd (int argc, Scheme_Object *argv[]);
 268
 269static Scheme_Object *make_shared_byte_string (int argc, Scheme_Object *argv[]);
 270static Scheme_Object *shared_byte_string (int argc, Scheme_Object *argv[]);
 271
 272static Scheme_Object *make_byte_string (int argc, Scheme_Object *argv[]);
 273static Scheme_Object *byte_string (int argc, Scheme_Object *argv[]);
 274static Scheme_Object *byte_p (int argc, Scheme_Object *argv[]);
 275static Scheme_Object *byte_string_p (int argc, Scheme_Object *argv[]);
 276static Scheme_Object *byte_string_length (int argc, Scheme_Object *argv[]);
 277static Scheme_Object *byte_string_eq (int argc, Scheme_Object *argv[]);
 278static Scheme_Object *byte_string_lt (int argc, Scheme_Object *argv[]);
 279static Scheme_Object *byte_string_gt (int argc, Scheme_Object *argv[]);
 280static Scheme_Object *byte_substring (int argc, Scheme_Object *argv[]);
 281static Scheme_Object *byte_string_append (int argc, Scheme_Object *argv[]);
 282static Scheme_Object *byte_string_to_list (int argc, Scheme_Object *argv[]);
 283static Scheme_Object *list_to_byte_string (int argc, Scheme_Object *argv[]);
 284static Scheme_Object *byte_string_copy (int argc, Scheme_Object *argv[]);
 285static Scheme_Object *byte_string_copy_bang (int argc, Scheme_Object *argv[]);
 286static Scheme_Object *byte_string_fill (int argc, Scheme_Object *argv[]);
 287static Scheme_Object *byte_string_to_immutable (int argc, Scheme_Object *argv[]);
 288
 289static Scheme_Object *byte_string_utf8_index (int argc, Scheme_Object *argv[]);
 290static Scheme_Object *byte_string_utf8_ref (int argc, Scheme_Object *argv[]);
 291static Scheme_Object *byte_string_utf8_length (int argc, Scheme_Object *argv[]);
 292
 293static Scheme_Object *byte_string_to_char_string (int argc, Scheme_Object *argv[]);
 294static Scheme_Object *byte_string_to_char_string_locale (int argc, Scheme_Object *argv[]);
 295static Scheme_Object *byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[]);
 296static Scheme_Object *char_string_to_byte_string (int argc, Scheme_Object *argv[]);
 297static Scheme_Object *char_string_to_byte_string_locale (int argc, Scheme_Object *argv[]);
 298static Scheme_Object *char_string_to_byte_string_latin1 (int argc, Scheme_Object *argv[]);
 299static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[]);
 300
 301static Scheme_Object *version(int argc, Scheme_Object *argv[]);
 302static Scheme_Object *format(int argc, Scheme_Object *argv[]);
 303static Scheme_Object *sch_printf(int argc, Scheme_Object *argv[]);
 304static Scheme_Object *sch_eprintf(int argc, Scheme_Object *argv[]);
 305static Scheme_Object *sch_fprintf(int argc, Scheme_Object *argv[]);
 306static Scheme_Object *banner(int argc, Scheme_Object *argv[]);
 307static Scheme_Object *sch_getenv(int argc, Scheme_Object *argv[]);
 308static Scheme_Object *sch_putenv(int argc, Scheme_Object *argv[]);
 309static Scheme_Object *system_type(int argc, Scheme_Object *argv[]);
 310static Scheme_Object *system_library_subpath(int argc, Scheme_Object *argv[]);
 311static Scheme_Object *cmdline_args(int argc, Scheme_Object *argv[]);
 312static Scheme_Object *current_locale(int argc, Scheme_Object *argv[]);
 313static Scheme_Object *locale_string_encoding(int argc, Scheme_Object *argv[]);
 314static Scheme_Object *system_language_country(int argc, Scheme_Object *argv[]);
 315
 316static Scheme_Object *byte_string_open_converter(int argc, Scheme_Object *argv[]);
 317static Scheme_Object *byte_string_close_converter(int argc, Scheme_Object *argv[]);
 318static Scheme_Object *byte_string_convert(int argc, Scheme_Object *argv[]);
 319static Scheme_Object *byte_string_convert_end(int argc, Scheme_Object *argv[]);
 320static Scheme_Object *byte_converter_p(int argc, Scheme_Object *argv[]);
 321
 322#ifdef MZ_PRECISE_GC
 323static void register_traversers(void);
 324#endif
 325
 326static int mz_char_strcmp(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
 327static int mz_char_strcmp_ci(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
 328static int mz_strcmp(const char *who, unsigned char *str1, intptr_t l1, unsigned char *str2, intptr_t l2);
 329
 330XFORM_NONGCING static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t end,
 331					unsigned int *us, intptr_t dstart, intptr_t dend,
 332					intptr_t *ipos, intptr_t *jpos,
 333					char compact, char utf16,
 334					int *state, int might_continue, int permissive);
 335XFORM_NONGCING static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t end,
 336					unsigned char *s, intptr_t dstart, intptr_t dend,
 337					intptr_t *_ipos, intptr_t *_opos, char utf16);
 338
 339static char *string_to_from_locale(int to_bytes,
 340				   char *in, intptr_t delta, intptr_t len,
 341				   intptr_t *olen, int perm,
 342				   int *no_cvt);
 343
 344#define portable_isspace(x) (((x) < 128) && isspace(x))
 345
 346ROSYM static Scheme_Object *sys_symbol;
 347ROSYM static Scheme_Object *platform_3m_path, *platform_cgc_path;
 348READ_ONLY static Scheme_Object *zero_length_char_string;
 349READ_ONLY static Scheme_Object *zero_length_byte_string;
 350
 351SHARED_OK static Scheme_Hash_Table *putenv_str_table;
 352
 353SHARED_OK static char *embedding_banner;
 354SHARED_OK static Scheme_Object *vers_str;
 355SHARED_OK static Scheme_Object *banner_str;
 356
 357READ_ONLY static Scheme_Object *complete_symbol, *continues_symbol, *aborts_symbol, *error_symbol;
 358
 359void
 360scheme_init_string (Scheme_Env *env)
 361{
 362  Scheme_Object *p;
 363
 364  REGISTER_SO(sys_symbol);
 365  sys_symbol = scheme_intern_symbol(SYSTEM_TYPE_NAME);
 366
 367  REGISTER_SO(zero_length_char_string);
 368  REGISTER_SO(zero_length_byte_string);
 369  zero_length_char_string = scheme_alloc_char_string(0, 0);
 370  zero_length_byte_string = scheme_alloc_byte_string(0, 0);
 371
 372  REGISTER_SO(complete_symbol);
 373  REGISTER_SO(continues_symbol);
 374  REGISTER_SO(aborts_symbol);
 375  REGISTER_SO(error_symbol);
 376  complete_symbol = scheme_intern_symbol("complete");
 377  continues_symbol = scheme_intern_symbol("continues");
 378  aborts_symbol = scheme_intern_symbol("aborts");
 379  error_symbol = scheme_intern_symbol("error");
 380
 381  REGISTER_SO(platform_3m_path);
 382#ifdef UNIX_FILE_SYSTEM
 383# define MZ3M_SUBDIR "/3m"
 384#else
 385# ifdef DOS_FILE_SYSTEM
 386#  define MZ3M_SUBDIR "\\3m"
 387# else
 388#  define MZ3M_SUBDIR ":3m"
 389# endif
 390#endif
 391  REGISTER_SO(platform_3m_path);
 392  REGISTER_SO(platform_cgc_path);
 393  platform_cgc_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH);
 394  platform_3m_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH MZ3M_SUBDIR);
 395
 396  REGISTER_SO(putenv_str_table);
 397
 398  REGISTER_SO(embedding_banner);
 399  REGISTER_SO(vers_str);
 400  REGISTER_SO(banner_str);
 401
 402  vers_str = scheme_make_utf8_string(scheme_version());
 403  SCHEME_SET_CHAR_STRING_IMMUTABLE(vers_str);
 404  banner_str = scheme_make_utf8_string(scheme_banner());
 405  SCHEME_SET_CHAR_STRING_IMMUTABLE(banner_str);
 406
 407  p = scheme_make_folding_prim(string_p, "string?", 1, 1, 1);
 408  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
 409  scheme_add_global_constant("string?", p, env);
 410
 411  scheme_add_global_constant("make-string",
 412			     scheme_make_immed_prim(make_string,
 413						    "make-string",
 414						    1, 2),
 415			     env);
 416  scheme_add_global_constant("string",
 417			     scheme_make_immed_prim(string,
 418						    "string",
 419						    0, -1),
 420			     env);
 421  scheme_add_global_constant("string-length",
 422			     scheme_make_folding_prim(string_length,
 423						      "string-length",
 424						      1, 1, 1),
 425			     env);
 426
 427  p = scheme_make_immed_prim(scheme_checked_string_ref, "string-ref", 2, 2);
 428  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED;
 429  scheme_add_global_constant("string-ref", p, env);
 430
 431
 432  p = scheme_make_immed_prim(scheme_checked_string_set, "string-set!", 3, 3);
 433  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
 434  scheme_add_global_constant("string-set!", p, env);
 435
 436  scheme_add_global_constant("string=?",
 437			     scheme_make_immed_prim(string_eq,
 438						    "string=?",
 439						    2, -1),
 440			     env);
 441  scheme_add_global_constant("string-locale=?",
 442			     scheme_make_immed_prim(string_locale_eq,
 443						    "string-locale=?",
 444						    2, -1),
 445			     env);
 446  scheme_add_global_constant("string-ci=?",
 447			     scheme_make_immed_prim(string_ci_eq,
 448						    "string-ci=?",
 449						    2, -1),
 450			     env);
 451  scheme_add_global_constant("string-locale-ci=?",
 452			     scheme_make_immed_prim(string_locale_ci_eq,
 453						    "string-locale-ci=?",
 454						    2, -1),
 455			     env);
 456  scheme_add_global_constant("string<?",
 457			     scheme_make_immed_prim(string_lt,
 458						    "string<?",
 459						    2, -1),
 460			     env);
 461  scheme_add_global_constant("string-locale<?",
 462			     scheme_make_immed_prim(string_locale_lt,
 463						    "string-locale<?",
 464						    2, -1),
 465			     env);
 466  scheme_add_global_constant("string>?",
 467			     scheme_make_immed_prim(string_gt,
 468						    "string>?",
 469						    2, -1),
 470			     env);
 471  scheme_add_global_constant("string-locale>?",
 472			     scheme_make_immed_prim(string_locale_gt,
 473						    "string-locale>?",
 474						    2, -1),
 475			     env);
 476  scheme_add_global_constant("string<=?",
 477			     scheme_make_immed_prim(string_lt_eq,
 478						    "string<=?",
 479						    2, -1),
 480			     env);
 481  scheme_add_global_constant("string>=?",
 482			     scheme_make_immed_prim(string_gt_eq,
 483						    "string>=?",
 484						    2, -1),
 485			     env);
 486  scheme_add_global_constant("string-ci<?",
 487			     scheme_make_immed_prim(string_ci_lt,
 488						    "string-ci<?",
 489						    2, -1),
 490			     env);
 491  scheme_add_global_constant("string-locale-ci<?",
 492			     scheme_make_immed_prim(string_locale_ci_lt,
 493						    "string-locale-ci<?",
 494						    2, -1),
 495			     env);
 496  scheme_add_global_constant("string-ci>?",
 497			     scheme_make_immed_prim(string_ci_gt,
 498						    "string-ci>?",
 499						    2, -1),
 500			     env);
 501  scheme_add_global_constant("string-locale-ci>?",
 502			     scheme_make_immed_prim(string_locale_ci_gt,
 503						    "string-locale-ci>?",
 504						    2, -1),
 505			     env);
 506  scheme_add_global_constant("string-ci<=?",
 507			     scheme_make_immed_prim(string_ci_lt_eq,
 508						    "string-ci<=?",
 509						    2, -1),
 510			     env);
 511  scheme_add_global_constant("string-ci>=?",
 512			     scheme_make_immed_prim(string_ci_gt_eq,
 513						    "string-ci>=?",
 514						    2, -1),
 515			     env);
 516
 517  scheme_add_global_constant("substring",
 518			     scheme_make_immed_prim(substring,
 519						    "substring",
 520						    2, 3),
 521			     env);
 522  scheme_add_global_constant("string-append",
 523			     scheme_make_immed_prim(string_append,
 524						    "string-append",
 525						    0, -1),
 526			     env);
 527  scheme_add_global_constant("string->list",
 528			     scheme_make_immed_prim(string_to_list,
 529						    "string->list",
 530						    1, 1),
 531			     env);
 532  scheme_add_global_constant("list->string",
 533			     scheme_make_immed_prim(list_to_string,
 534						    "list->string",
 535						    1, 1),
 536			     env);
 537  scheme_add_global_constant("string-copy",
 538			     scheme_make_immed_prim(string_copy,
 539						    "string-copy",
 540						    1, 1),
 541			     env);
 542  scheme_add_global_constant("string-copy!",
 543			     scheme_make_immed_prim(string_copy_bang,
 544						    "string-copy!",
 545						    3, 5),
 546			     env);
 547  scheme_add_global_constant("string-fill!",
 548			     scheme_make_immed_prim(string_fill,
 549						    "string-fill!",
 550						    2, 2),
 551			     env);
 552  scheme_add_global_constant("string->immutable-string",
 553			     scheme_make_immed_prim(string_to_immutable,
 554						    "string->immutable-string",
 555						    1, 1),
 556			     env);
 557  scheme_add_global_constant("string-normalize-nfc",
 558			     scheme_make_immed_prim(string_normalize_c,
 559						    "string-normalize-nfc",
 560						    1, 1),
 561			     env);
 562  scheme_add_global_constant("string-normalize-nfkc",
 563			     scheme_make_immed_prim(string_normalize_kc,
 564						    "string-normalize-nfkc",
 565						    1, 1),
 566			     env);
 567  scheme_add_global_constant("string-normalize-nfd",
 568			     scheme_make_immed_prim(string_normalize_d,
 569						    "string-normalize-nfd",
 570						    1, 1),
 571			     env);
 572  scheme_add_global_constant("string-normalize-nfkd",
 573			     scheme_make_immed_prim(string_normalize_kd,
 574						    "string-normalize-nfkd",
 575						    1, 1),
 576			     env);
 577
 578  scheme_add_global_constant("string-upcase",
 579			     scheme_make_immed_prim(string_upcase,
 580						    "string-upcase",
 581						    1, 1),
 582			     env);
 583  scheme_add_global_constant("string-downcase",
 584			     scheme_make_immed_prim(string_downcase,
 585						    "string-downcase",
 586						    1, 1),
 587			     env);
 588  scheme_add_global_constant("string-titlecase",
 589			     scheme_make_immed_prim(string_titlecase,
 590						    "string-titlecase",
 591						    1, 1),
 592			     env);
 593  scheme_add_global_constant("string-foldcase",
 594			     scheme_make_immed_prim(string_foldcase,
 595						    "string-foldcase",
 596						    1, 1),
 597			     env);
 598
 599  scheme_add_global_constant("string-locale-upcase",
 600			     scheme_make_immed_prim(string_locale_upcase,
 601						    "string-locale-upcase",
 602						    1, 1),
 603			     env);
 604  scheme_add_global_constant("string-locale-downcase",
 605			     scheme_make_immed_prim(string_locale_downcase,
 606						    "string-locale-downcase",
 607						    1, 1),
 608			     env);
 609
 610  scheme_add_global_constant("current-locale",
 611			     scheme_register_parameter(current_locale,
 612						       "current-locale",
 613						       MZCONFIG_LOCALE),
 614			     env);
 615  scheme_add_global_constant("locale-string-encoding",
 616			     scheme_make_immed_prim(locale_string_encoding,
 617						    "locale-string-encoding",
 618						    0, 0),
 619			     env);
 620  scheme_add_global_constant("system-language+country",
 621			     scheme_make_immed_prim(system_language_country,
 622						    "system-language+country",
 623						    0, 0),
 624			     env);
 625
 626  scheme_add_global_constant("bytes-converter?",
 627			     scheme_make_immed_prim(byte_converter_p,
 628						    "bytes-converter?",
 629						    1, 1),
 630			     env);
 631  scheme_add_global_constant("bytes-convert",
 632			     scheme_make_prim_w_arity2(byte_string_convert,
 633						       "bytes-convert",
 634						       1, 7,
 635						       3, 3),
 636			     env);
 637  scheme_add_global_constant("bytes-convert-end",
 638			     scheme_make_prim_w_arity2(byte_string_convert_end,
 639						       "bytes-convert-end",
 640						       0, 3,
 641						       2, 2),
 642			     env);
 643  scheme_add_global_constant("bytes-open-converter",
 644			     scheme_make_immed_prim(byte_string_open_converter,
 645						    "bytes-open-converter",
 646						    2, 2),
 647			     env);
 648  scheme_add_global_constant("bytes-close-converter",
 649			     scheme_make_immed_prim(byte_string_close_converter,
 650						    "bytes-close-converter",
 651						    1, 1),
 652			     env);
 653
 654  scheme_add_global_constant("format",
 655			     scheme_make_noncm_prim(format,
 656                                                    "format",
 657                                                    1, -1),
 658			     env);
 659  scheme_add_global_constant("printf",
 660			     scheme_make_noncm_prim(sch_printf,
 661                                                    "printf",
 662                                                    1, -1),
 663			     env);
 664  scheme_add_global_constant("eprintf",
 665			     scheme_make_noncm_prim(sch_eprintf,
 666                                                    "eprintf",
 667                                                    1, -1),
 668			     env);
 669  scheme_add_global_constant("fprintf",
 670			     scheme_make_noncm_prim(sch_fprintf,
 671                                                    "fprintf",
 672                                                    2, -1),
 673			     env);
 674
 675  scheme_add_global_constant("byte?",
 676			     scheme_make_folding_prim(byte_p,
 677						      "byte?",
 678						      1, 1, 1),
 679			     env);
 680
 681  p = scheme_make_folding_prim(byte_string_p, "bytes?", 1, 1, 1);
 682  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
 683  scheme_add_global_constant("bytes?", p, env);
 684
 685  scheme_add_global_constant("make-bytes",
 686			     scheme_make_immed_prim(make_byte_string,
 687						    "make-bytes",
 688						    1, 2),
 689			     env);
 690  scheme_add_global_constant("bytes",
 691			     scheme_make_immed_prim(byte_string,
 692						    "bytes",
 693						    0, -1),
 694			     env);
 695
 696  GLOBAL_PRIM_W_ARITY("make-shared-bytes", make_shared_byte_string, 1, 2, env);
 697  GLOBAL_PRIM_W_ARITY("shared-bytes", shared_byte_string, 0, -1, env);
 698
 699  scheme_add_global_constant("bytes-length",
 700			     scheme_make_folding_prim(byte_string_length,
 701						      "bytes-length",
 702						      1, 1, 1),
 703			     env);
 704
 705  p = scheme_make_immed_prim(scheme_checked_byte_string_ref, "bytes-ref", 2, 2);
 706  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED;
 707  scheme_add_global_constant("bytes-ref", p, env);
 708
 709  p = scheme_make_immed_prim(scheme_checked_byte_string_set, "bytes-set!", 3, 3);
 710  SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
 711  scheme_add_global_constant("bytes-set!", p, env);
 712
 713  scheme_add_global_constant("bytes=?",
 714			     scheme_make_immed_prim(byte_string_eq,
 715						    "bytes=?",
 716						    2, -1),
 717			     env);
 718  scheme_add_global_constant("bytes<?",
 719			     scheme_make_immed_prim(byte_string_lt,
 720						    "bytes<?",
 721						    2, -1),
 722			     env);
 723  scheme_add_global_constant("bytes>?",
 724			     scheme_make_immed_prim(byte_string_gt,
 725						    "bytes>?",
 726						    2, -1),
 727			     env);
 728
 729  scheme_add_global_constant("subbytes",
 730			     scheme_make_immed_prim(byte_substring,
 731						    "subbytes",
 732						    2, 3),
 733			     env);
 734  scheme_add_global_constant("bytes-append",
 735			     scheme_make_immed_prim(byte_string_append,
 736						    "bytes-append",
 737						    0, -1),
 738			     env);
 739  scheme_add_global_constant("bytes->list",
 740			     scheme_make_immed_prim(byte_string_to_list,
 741						    "bytes->list",
 742						    1, 1),
 743			     env);
 744  scheme_add_global_constant("list->bytes",
 745			     scheme_make_immed_prim(list_to_byte_string,
 746						    "list->bytes",
 747						    1, 1),
 748			     env);
 749  scheme_add_global_constant("bytes-copy",
 750			     scheme_make_immed_prim(byte_string_copy,
 751						    "bytes-copy",
 752						    1, 1),
 753			     env);
 754  scheme_add_global_constant("bytes-copy!",
 755			     scheme_make_immed_prim(byte_string_copy_bang,
 756						    "bytes-copy!",
 757						    3, 5),
 758			     env);
 759  scheme_add_global_constant("bytes-fill!",
 760			     scheme_make_immed_prim(byte_string_fill,
 761						    "bytes-fill!",
 762						    2, 2),
 763			     env);
 764  scheme_add_global_constant("bytes->immutable-bytes",
 765			     scheme_make_immed_prim(byte_string_to_immutable,
 766						    "bytes->immutable-bytes",
 767						    1, 1),
 768			     env);
 769
 770
 771  scheme_add_global_constant("bytes-utf-8-index",
 772			     scheme_make_immed_prim(byte_string_utf8_index,
 773						    "bytes-utf-8-index",
 774						    2, 4),
 775			     env);
 776  scheme_add_global_constant("bytes-utf-8-length",
 777			     scheme_make_immed_prim(byte_string_utf8_length,
 778						    "bytes-utf-8-length",
 779						    1, 4),
 780			     env);
 781  scheme_add_global_constant("bytes-utf-8-ref",
 782			     scheme_make_immed_prim(byte_string_utf8_ref,
 783						    "bytes-utf-8-ref",
 784						    2, 4),
 785			     env);
 786
 787  scheme_add_global_constant("bytes->string/utf-8",
 788			     scheme_make_immed_prim(byte_string_to_char_string,
 789						    "bytes->string/utf-8",
 790						    1, 4),
 791			     env);
 792  scheme_add_global_constant("bytes->string/locale",
 793			     scheme_make_immed_prim(byte_string_to_char_string_locale,
 794						    "bytes->string/locale",
 795						    1, 4),
 796			     env);
 797  scheme_add_global_constant("bytes->string/latin-1",
 798			     scheme_make_immed_prim(byte_string_to_char_string_latin1,
 799						    "bytes->string/latin-1",
 800						    1, 4),
 801			     env);
 802  scheme_add_global_constant("string->bytes/utf-8",
 803			     scheme_make_immed_prim(char_string_to_byte_string,
 804						    "string->bytes/utf-8",
 805						    1, 4),
 806			     env);
 807  scheme_add_global_constant("string->bytes/locale",
 808			     scheme_make_immed_prim(char_string_to_byte_string_locale,
 809						    "string->bytes/locale",
 810						    1, 4),
 811			     env);
 812  scheme_add_global_constant("string->bytes/latin-1",
 813			     scheme_make_immed_prim(char_string_to_byte_string_latin1,
 814						    "string->bytes/latin-1",
 815						    1, 4),
 816			     env);
 817
 818  scheme_add_global_constant("string-utf-8-length",
 819			     scheme_make_immed_prim(char_string_utf8_length,
 820						    "string-utf-8-length",
 821						    1, 3),
 822			     env);
 823
 824
 825  /* In principle, `version' could be foldable, but it invites
 826     more problems than it solves... */
 827
 828  scheme_add_global_constant("version",
 829			     scheme_make_immed_prim(version,
 830						    "version",
 831						    0, 0),
 832			     env);
 833  scheme_add_global_constant("banner",
 834			     scheme_make_immed_prim(banner,
 835						    "banner",
 836						    0, 0),
 837			     env);
 838
 839  scheme_add_global_constant("getenv",
 840			     scheme_make_immed_prim(sch_getenv,
 841						    "getenv",
 842						    1, 1),
 843			     env);
 844  scheme_add_global_constant("putenv",
 845			     scheme_make_immed_prim(sch_putenv,
 846						    "putenv",
 847						    2, 2),
 848			     env);
 849
 850  /* Don't make these folding, since they're platform-specific: */
 851
 852  scheme_add_global_constant("system-type",
 853			     scheme_make_immed_prim(system_type,
 854						    "system-type",
 855						    0, 1),
 856			     env);
 857  scheme_add_global_constant("system-library-subpath",
 858			     scheme_make_immed_prim(system_library_subpath,
 859						    "system-library-subpath",
 860						    0, 1),
 861			     env);
 862
 863  scheme_add_global_constant("current-command-line-arguments",
 864			     scheme_register_parameter(cmdline_args,
 865						       "current-command-line-arguments",
 866						       MZCONFIG_CMDLINE_ARGS),
 867			     env);
 868
 869#ifdef MZ_PRECISE_GC
 870  register_traversers();
 871#endif
 872}
 873
 874void scheme_init_string_places(void) {
 875  REGISTER_SO(current_locale_name_ptr);
 876  current_locale_name_ptr = "xxxx\0\0\0\0";
 877}
 878
 879/**********************************************************************/
 880/*                     UTF-8 char constructors                        */
 881/**********************************************************************/
 882
 883Scheme_Object *scheme_make_sized_offset_utf8_string(char *chars, intptr_t d, intptr_t len)
 884{
 885  intptr_t ulen;
 886  mzchar *us;
 887
 888  if (len) {
 889    ulen = scheme_utf8_decode((unsigned char *)chars, d, d + len,
 890			      NULL, 0, -1,
 891			      NULL, 0 /* not UTF-16 */, 0xFFFD);
 892    us = scheme_malloc_atomic(sizeof(mzchar) * (ulen + 1));
 893    scheme_utf8_decode((unsigned char *)chars, d, d + len,
 894		       us, 0, -1,
 895		       NULL, 0 /* not UTF-16 */, 0xFFFD);
 896
 897    us[ulen] = 0;
 898  } else {
 899    us = (mzchar *)"\0\0\0";
 900    ulen = 0;
 901  }
 902  return scheme_make_sized_offset_char_string(us, 0, ulen, 0);
 903}
 904
 905Scheme_Object *
 906scheme_make_sized_utf8_string(char *chars, intptr_t len)
 907{
 908  return scheme_make_sized_offset_utf8_string(chars, 0, len);
 909}
 910
 911Scheme_Object *
 912scheme_make_immutable_sized_utf8_string(char *chars, intptr_t len)
 913{
 914  Scheme_Object *s;
 915
 916  s = scheme_make_sized_offset_utf8_string(chars, 0, len);
 917  if (len)
 918    SCHEME_SET_CHAR_STRING_IMMUTABLE(s);
 919
 920  return s;
 921}
 922
 923Scheme_Object *
 924scheme_make_utf8_string(const char *chars)
 925{
 926  return scheme_make_sized_offset_utf8_string((char *)chars, 0, -1);
 927}
 928
 929Scheme_Object *
 930scheme_make_locale_string(const char *chars)
 931{
 932  return scheme_byte_string_to_char_string_locale(scheme_make_byte_string((char *)chars));
 933}
 934
 935/**********************************************************************/
 936/*                         index helpers                              */
 937/**********************************************************************/
 938
 939void scheme_out_of_string_range(const char *name, const char *which,
 940				Scheme_Object *i, Scheme_Object *s,
 941				intptr_t start, intptr_t len)
 942{
 943  int is_byte;
 944
 945  is_byte = SCHEME_BYTE_STRINGP(s);
 946
 947  if (len) {
 948    char *sstr;
 949    intptr_t slen;
 950
 951    sstr = scheme_make_provided_string(s, 2, &slen);
 952    scheme_raise_exn(MZEXN_FAIL_CONTRACT,
 953		     "%s: %sindex %s out of range [%d, %d] for %s%s: %t",
 954		     name, which,
 955		     scheme_make_provided_string(i, 2, NULL),
 956		     ((start < 0) ? 0 : start), 
 957                     ((start < 0) ? (len - 1) : len),
 958		     is_byte ? "byte-" : "",
 959                     SCHEME_CHAPERONE_VECTORP(s) ? "vector" : "string",
 960		     sstr, slen);
 961  } else {
 962    scheme_raise_exn(MZEXN_FAIL_CONTRACT,
 963		     "%s: %sindex %s out of range for empty %s%s",
 964		     name, which,
 965		     scheme_make_provided_string(i, 0, NULL),
 966		     is_byte ? "byte-" : "",
 967                     SCHEME_CHAPERONE_VECTORP(s) ? "vector" : "string");
 968  }
 969}
 970
 971intptr_t scheme_extract_index(const char *name, int pos, int argc, Scheme_Object **argv, intptr_t top, int false_ok)
 972{
 973  intptr_t i;
 974  int is_top = 0;
 975
 976  if (SCHEME_INTP(argv[pos])) {
 977    i = SCHEME_INT_VAL(argv[pos]);
 978  } else if (SCHEME_BIGNUMP(argv[pos])) {
 979    if (SCHEME_BIGPOS(argv[pos])) {
 980      i = top; /* out-of-bounds */
 981      is_top = 1;
 982    } else
 983      i = -1; /* negative */
 984  } else
 985    i = -1;
 986
 987  if (!is_top && (i < 0))
 988    scheme_wrong_type(name,
 989		      (false_ok ? "non-negative exact integer or #f" : "non-negative exact integer"),
 990		      pos, argc, argv);
 991
 992  return i;
 993}
 994
 995void scheme_get_substring_indices(const char *name, Scheme_Object *str,
 996                                  int argc, Scheme_Object **argv,
 997                                  int spos, int fpos, intptr_t *_start, intptr_t *_finish)
 998{
 999  intptr_t len;
1000  intptr_t start, finish;
1001
1002  if (SCHEME_CHAPERONE_VECTORP(str))
1003    len = SCHEME_VEC_SIZE(str);
1004  else if (SCHEME_CHAR_STRINGP(str))
1005    len = SCHEME_CHAR_STRTAG_VAL(str);
1006  else
1007    len = SCHEME_BYTE_STRTAG_VAL(str);
1008
1009  if (argc > spos)
1010    start = scheme_extract_index(name, spos, argc, argv, len + 1, 0);
1011  else
1012    start = 0;
1013  if (argc > fpos)
1014    finish = scheme_extract_index(name, fpos, argc, argv, len + 1, 0);
1015  else
1016    finish = len;
1017
1018  if (!(start <= len)) {
1019    scheme_out_of_string_range(name, (fpos < 100) ? "starting " : "", argv[spos], str, 0, len);
1020  }
1021  if (!(finish >= start && finish <= len)) {
1022    scheme_out_of_string_range(name, "ending ", argv[fpos], str, start, len);
1023  }
1024
1025  *_start = start;
1026  *_finish = finish;
1027}
1028
1029void scheme_do_get_substring_indices(const char *name, Scheme_Object *str,
1030                                     int argc, Scheme_Object **argv,
1031                                     int spos, int fpos, intptr_t *_start, intptr_t *_finish, intptr_t len)
1032{
1033  if (argc > spos) {
1034    if (SCHEME_INTP(argv[spos])) {
1035      intptr_t start = SCHEME_INT_VAL(argv[spos]);
1036      if ((start >= 0) && (start < len)) {
1037        *_start = start;
1038        if (argc > fpos) {
1039          intptr_t finish = SCHEME_INT_VAL(argv[fpos]);
1040          if ((finish >= start) && (finish <= len)) {
1041            *_finish = finish;
1042            return;
1043          }
1044        } else {
1045          *_finish = len;
1046          return;
1047        }
1048      }
1049    }
1050  } else {
1051    *_start = 0;
1052    *_finish = len;
1053    return;
1054  }
1055
1056  scheme_get_substring_indices(name, str, argc, argv, spos, fpos, _start, _finish);
1057}
1058
1059/**********************************************************************/
1060/*                          char strings                              */
1061/**********************************************************************/
1062
1063#define SCHEME_X_STR_VAL(x) SCHEME_CHAR_STR_VAL(x)
1064#define SCHEME_X_STRTAG_VAL(x) SCHEME_CHAR_STRTAG_VAL(x)
1065#define SCHEME_X_STRINGP(x) SCHEME_CHAR_STRINGP(x)
1066#define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_CHAR_STRINGP(x)
1067#define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_CHAR_STRING_IMMUTABLE(x)
1068#define scheme_x_string_type scheme_char_string_type
1069#define X(a, b) a##_char##b
1070#define X_(a, b) a##_##b
1071#define X__(a) a
1072#define EMPTY (mzchar *)"\0\0\0"
1073#define Xchar mzchar
1074#define uXchar mzchar
1075#define XSTR ""
1076#define XSTRINGSTR "string"
1077#define SUBXSTR "substring"
1078#define CHARP(x) SCHEME_CHARP(x)
1079#define CHAR_VAL(x) SCHEME_CHAR_VAL(x)
1080#define CHAR_STR "character"
1081#define MAKE_CHAR(x) _scheme_make_char(x)
1082#define xstrlen scheme_char_strlen
1083#include "strops.inc"
1084
1085#define GEN_STRING_COMP(name, scheme_name, comp, op, ul, size_shortcut)     \
1086static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
1087{  mzchar *s, *prev; int i, sl, pl; int falz = 0;\
1088   if (!SCHEME_CHAR_STRINGP(argv[0])) \
1089    scheme_wrong_type(scheme_name, "string", 0, argc, argv); \
1090   prev = SCHEME_CHAR_STR_VAL(argv[0]); pl = SCHEME_CHAR_STRTAG_VAL(argv[0]); \
1091   for (i = 1; i < argc; i++) { \
1092     if (!SCHEME_CHAR_STRINGP(argv[i])) \
1093      scheme_wrong_type(scheme_name, "string", i, argc, argv); \
1094     s = SCHEME_CHAR_STR_VAL(argv[i]); sl = SCHEME_CHAR_STRTAG_VAL(argv[i]); \
1095     if (!falz) if (!(comp(scheme_name, \
1096                           prev, pl, \
1097                           s, sl, ul, size_shortcut) op 0)) falz = 1; \
1098     prev = s; pl = sl; \
1099  } \
1100  return falz ? scheme_false : scheme_true; \
1101}
1102
1103GEN_STRING_COMP(string_eq, "string=?", mz_char_strcmp, ==, 0, 1)
1104GEN_STRING_COMP(string_lt, "string<?", mz_char_strcmp, <, 0, 0)
1105GEN_STRING_COMP(string_gt, "string>?", mz_char_strcmp, >, 0, 0)
1106GEN_STRING_COMP(string_lt_eq, "string<=?", mz_char_strcmp, <=, 0, 0)
1107GEN_STRING_COMP(string_gt_eq, "string>=?", mz_char_strcmp, >=, 0, 0)
1108
1109GEN_STRING_COMP(string_ci_eq, "string-ci=?", mz_char_strcmp_ci, ==, 0, 0)
1110GEN_STRING_COMP(string_ci_lt, "string-ci<?", mz_char_strcmp_ci, <, 0, 0)
1111GEN_STRING_COMP(string_ci_gt, "string-ci>?", mz_char_strcmp_ci, >, 0, 0)
1112GEN_STRING_COMP(string_ci_lt_eq, "string-ci<=?", mz_char_strcmp_ci, <=, 0, 0)
1113GEN_STRING_COMP(string_ci_gt_eq, "string-ci>=?", mz_char_strcmp_ci, >=, 0, 0)
1114
1115GEN_STRING_COMP(string_locale_eq, "string-locale=?", mz_char_strcmp, ==, 1, 0)
1116GEN_STRING_COMP(string_locale_lt, "string-locale<?", mz_char_strcmp, <, 1, 0)
1117GEN_STRING_COMP(string_locale_gt, "string-locale>?", mz_char_strcmp, >, 1, 0)
1118GEN_STRING_COMP(string_locale_ci_eq, "string-locale-ci=?", mz_char_strcmp_ci, ==, 1, 0)
1119GEN_STRING_COMP(string_locale_ci_lt, "string-locale-ci<?", mz_char_strcmp_ci, <, 1, 0)
1120GEN_STRING_COMP(string_locale_ci_gt, "string-locale-ci>?", mz_char_strcmp_ci, >, 1, 0)
1121
1122/**********************************************************************/
1123/*                         byte strings                               */
1124/**********************************************************************/
1125
1126#define SCHEME_BYTEP(x) ((SCHEME_INTP(x)) && (SCHEME_INT_VAL(x) >= 0) && (SCHEME_INT_VAL(x) <= 255))
1127#define BYTE_STR "exact integer in [0,255]"
1128
1129static Scheme_Object *
1130byte_p(int argc, Scheme_Object *argv[])
1131{
1132  return (SCHEME_BYTEP(argv[0]) ? scheme_true : scheme_false);
1133}
1134
1135#define SCHEME_X_STR_VAL(x) SCHEME_BYTE_STR_VAL(x)
1136#define SCHEME_X_STRTAG_VAL(x) SCHEME_BYTE_STRTAG_VAL(x)
1137#define SCHEME_X_STRINGP(x) SCHEME_BYTE_STRINGP(x)
1138#define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_BYTE_STRINGP(x)
1139#define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_BYTE_STRING_IMMUTABLE(x)
1140#define scheme_x_string_type scheme_byte_string_type
1141#define X(a, b) a##_byte##b
1142#define X_(a, b) a##_byte_##b
1143#define X__(a) byte_##a
1144#define EMPTY ""
1145#define Xchar char
1146#define uXchar unsigned char
1147#define XSTR "byte "
1148#define XSTRINGSTR "bytes"
1149#define SUBXSTR "subbytes"
1150#define CHARP(x) SCHEME_BYTEP(x)
1151#define CHAR_VAL(x) SCHEME_INT_VAL(x)
1152#define CHAR_STR BYTE_STR
1153#define MAKE_CHAR(x) scheme_make_integer_value(x)
1154#define xstrlen strlen
1155#define GENERATING_BYTE
1156#include "strops.inc"
1157#undef GENERATING_BYTE
1158
1159/* comparisons */
1160
1161#define GEN_BYTE_STRING_COMP(name, scheme_name, comp, op) \
1162static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
1163{  char *s, *prev; int i, sl, pl; int falz = 0;\
1164   if (!SCHEME_BYTE_STRINGP(argv[0])) \
1165    scheme_wrong_type(scheme_name, "byte string", 0, argc, argv); \
1166   prev = SCHEME_BYTE_STR_VAL(argv[0]); pl = SCHEME_BYTE_STRTAG_VAL(argv[0]); \
1167   for (i = 1; i < argc; i++) { \
1168     if (!SCHEME_BYTE_STRINGP(argv[i])) \
1169      scheme_wrong_type(scheme_name, "byte string", i, argc, argv); \
1170     s = SCHEME_BYTE_STR_VAL(argv[i]); sl = SCHEME_BYTE_STRTAG_VAL(argv[i]); \
1171     if (!falz) if (!(comp(scheme_name, \
1172                           (unsigned char *)prev, pl, \
1173                           (unsigned char *)s, sl) op 0)) falz = 1; \
1174     prev = s; pl = sl; \
1175  } \
1176  return falz ? scheme_false : scheme_true; \
1177}
1178
1179GEN_BYTE_STRING_COMP(byte_string_eq, "bytes=?", mz_strcmp, ==)
1180GEN_BYTE_STRING_COMP(byte_string_lt, "bytes<?", mz_strcmp, <)
1181GEN_BYTE_STRING_COMP(byte_string_gt, "bytes>?", mz_strcmp, >)
1182
1183/**********************************************************************/
1184/*                   byte string <-> char string                      */
1185/**********************************************************************/
1186
1187/************************* bytes->string *************************/
1188
1189static Scheme_Object *
1190do_byte_string_to_char_string(const char *who,
1191			      Scheme_Object *bstr,
1192			      intptr_t istart, intptr_t ifinish,
1193			      int perm, int as_locale)
1194{
1195  int i, ulen;
1196  char *chars;
1197  unsigned int *v;
1198
1199  chars = SCHEME_BYTE_STR_VAL(bstr);
1200
1201  ulen = utf8_decode_x((unsigned char *)chars, istart, ifinish,
1202		       NULL, 0, -1,
1203		       NULL, NULL, 0, 0,
1204		       NULL, 0, 
1205		       (perm > -1) ? 0xD800 : 0);
1206  if (ulen < 0) {
1207    scheme_arg_mismatch(who,
1208			STRING_IS_NOT_UTF_8,
1209			bstr);
1210  }
1211
1212  v = (unsigned int *)scheme_malloc_atomic((ulen + 1) * sizeof(unsigned int));
1213  utf8_decode_x((unsigned char *)chars, istart, ifinish,
1214		v, 0, -1,
1215		NULL, NULL, 0, 0,
1216		NULL, 0, 
1217		(perm > -1) ? 0xD800 : 0);
1218  
1219  if (perm > -1) {
1220    for (i = 0; i < ulen; i++) {
1221      if (v[i] == 0xD800)
1222	v[i] = perm;
1223    }
1224  }
1225  v[ulen] = 0;
1226
1227  return scheme_make_sized_char_string(v, ulen, 0);
1228}
1229
1230static Scheme_Object *
1231do_byte_string_to_char_string_locale(const char *who,
1232				     Scheme_Object *bstr,
1233				     intptr_t istart, intptr_t ifinish,
1234				     int perm)
1235{
1236  char *us;
1237  intptr_t olen;
1238
1239  reset_locale();
1240  if (!iconv_ready) init_iconv();
1241
1242  if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open))
1243    return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
1244
1245  if (istart < ifinish) {
1246    int no_cvt;
1247
1248    us = string_to_from_locale(0, SCHEME_BYTE_STR_VAL(bstr),
1249			       istart, ifinish - istart,
1250			       &olen, perm, &no_cvt);
1251
1252    if (!us) {
1253      if (no_cvt) {
1254	return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
1255      } else {
1256	scheme_arg_mismatch(who,
1257			    "byte string is not a valid encoding for the current locale: ",
1258			    bstr);
1259      }
1260    }
1261    ((mzchar *)us)[olen] = 0;
1262  } else {
1263    us = "\0\0\0";
1264    olen = 0;
1265  }
1266
1267  return scheme_make_sized_char_string((mzchar *)us, olen, 0);
1268}
1269
1270static Scheme_Object *
1271do_string_to_vector(const char *who, int mode, int argc, Scheme_Object *argv[])
1272{
1273  int permc;
1274  intptr_t istart, ifinish;
1275
1276  if (!SCHEME_BYTE_STRINGP(argv[0]))
1277    scheme_wrong_type(who, "byte string", 0, argc, argv);
1278
1279  if ((argc < 2) || SCHEME_FALSEP(argv[1]))
1280    permc = -1;
1281  else {
1282    if (!SCHEME_CHARP(argv[1]))
1283      scheme_wrong_type(who, "character or #f", 1, argc, argv);
1284    permc = SCHEME_CHAR_VAL(argv[1]);
1285  }
1286
1287  scheme_get_substring_indices(who, argv[0], argc, argv,
1288			       2, 3,
1289			       &istart, &ifinish);
1290
1291  if (mode == 0)
1292    return do_byte_string_to_char_string(who, argv[0], istart, ifinish, permc, 0);
1293  else if (mode == 1)
1294    return do_byte_string_to_char_string_locale(who, argv[0], istart, ifinish, permc);
1295  else {
1296    /* Latin-1 */
1297    mzchar *us;
1298    unsigned char *s;
1299    intptr_t i, len;
1300    len = ifinish - istart;
1301    s = (unsigned char *)SCHEME_BYTE_STR_VAL(argv[0]);
1302    us = (mzchar *)scheme_malloc_atomic((len + 1) * sizeof(mzchar));
1303    for (i = istart; i < ifinish; i++) {
1304      us[i - istart] = s[i];
1305    }
1306    us[len] = 0;
1307
1308    return scheme_make_sized_char_string(us, len, 0);
1309  }
1310}
1311
1312
1313static Scheme_Object *
1314byte_string_to_char_string (int argc, Scheme_Object *argv[])
1315{
1316  return do_string_to_vector("bytes->string/utf-8", 0, argc, argv);
1317}
1318
1319static Scheme_Object *
1320byte_string_to_char_string_locale (int argc, Scheme_Object *argv[])
1321{
1322  return do_string_to_vector("bytes->string/locale", 1, argc, argv);
1323}
1324
1325static Scheme_Object *
1326byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[])
1327{
1328  return do_string_to_vector("bytes->string/latin-1", 2, argc, argv);
1329}
1330
1331Scheme_Object *scheme_byte_string_to_char_string(Scheme_Object *o)
1332{
1333  return do_byte_string_to_char_string("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD, 0);
1334}
1335
1336Scheme_Object *scheme_byte_string_to_char_string_locale(Scheme_Object *o)
1337{
1338  return do_byte_string_to_char_string_locale("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD);
1339}
1340
1341/************************* string->bytes *************************/
1342
1343static Scheme_Object *do_char_string_to_byte_string(Scheme_Object *s, intptr_t istart, intptr_t ifinish, 
1344						    int as_locale)
1345{
1346  char *bs;
1347  int slen;
1348
1349  slen = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
1350			    NULL, 0,
1351			    0 /* UTF-16 */);
1352  bs = (char *)scheme_malloc_atomic(slen + 1);
1353  scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
1354		     (unsigned char *)bs, 0,
1355		     0 /* UTF-16 */);
1356  bs[slen] = 0;
1357
1358  return scheme_make_sized_byte_string(bs, slen, 0);
1359}
1360
1361static Scheme_Object *
1362do_char_string_to_byte_string_locale(const char *who,
1363				     Scheme_Object *cstr,
1364				     intptr_t istart, intptr_t ifinish,
1365				     int perm)
1366{
1367  char *s;
1368  intptr_t olen;
1369
1370  reset_locale();
1371  if (!iconv_ready) init_iconv();
1372
1373  if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open))
1374    return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
1375
1376  if (istart < ifinish) {
1377    int no_cvt;
1378
1379    s = string_to_from_locale(1, (char *)SCHEME_CHAR_STR_VAL(cstr),
1380			      istart, ifinish - istart,
1381			      &olen, perm, &no_cvt);
1382
1383    if (!s) {
1384      if (no_cvt) {
1385	return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
1386      } else {
1387	scheme_arg_mismatch(who,
1388			    "string cannot be encoded for the current locale: ",
1389			    cstr);
1390      }
1391    }
1392    s[olen] = 0;
1393  } else {
1394    s = "";
1395    olen = 0;
1396  }
1397
1398  return scheme_make_sized_byte_string(s, olen, 0);
1399}
1400
1401
1402Scheme_Object *scheme_char_string_to_byte_string(Scheme_Object *s)
1403{
1404  return do_char_string_to_byte_string(s, 0, SCHEME_CHAR_STRLEN_VAL(s), 0);
1405}
1406
1407Scheme_Object *scheme_char_string_to_byte_string_locale(Scheme_Object *s)
1408{
1409  return do_char_string_to_byte_string_locale("s->s", s, 0, SCHEME_CHAR_STRLEN_VAL(s), '?');
1410}
1411
1412static Scheme_Object *do_chars_to_bytes(const char *who, int mode,
1413					int argc, Scheme_Object *argv[])
1414{
1415  intptr_t istart, ifinish;
1416  int permc;
1417
1418  if (!SCHEME_CHAR_STRINGP(argv[0]))
1419    scheme_wrong_type(who, "string", 0, argc, argv);
1420
1421  if ((argc < 2) || SCHEME_FALSEP(argv[1]))
1422    permc = -1;
1423  else {
1424    if (!SCHEME_BYTEP(argv[1]))
1425      scheme_wrong_type(who, "byte or #f", 1, argc, argv);
1426    permc = SCHEME_INT_VAL(argv[1]);
1427  }
1428
1429  scheme_get_substring_indices(who, argv[0], argc, argv,
1430			       2, 3, &istart, &ifinish);
1431
1432  if (mode == 1)
1433    return do_char_string_to_byte_string_locale(who, argv[0], istart, ifinish, permc);
1434  else if (mode == 0)
1435    return do_char_string_to_byte_string(argv[0], istart, ifinish, 0);
1436  else {
1437    /* Latin-1 */
1438    mzchar *us;
1439    unsigned char *s;
1440    intptr_t i, len;
1441    len = ifinish - istart;
1442    us = SCHEME_CHAR_STR_VAL(argv[0]);
1443    s = (unsigned char *)scheme_malloc_atomic(len + 1);
1444    for (i = istart; i < ifinish; i++) {
1445      if (us[i] < 256)
1446	s[i - istart] = us[i];
1447      else if (permc >= 0) {
1448	s[i - istart] = permc;
1449      } else {
1450	scheme_arg_mismatch(who,
1451			    "string cannot be encoded in Latin-1: ",
1452			    argv[0]);
1453      }
1454    }
1455    s[len] = 0;
1456
1457    return scheme_make_sized_byte_string((char *)s, len, 0);
1458  }
1459}
1460
1461static Scheme_Object *char_string_to_byte_string(int argc, Scheme_Object *argv[])
1462{
1463  return do_chars_to_bytes("string->bytes/utf-8", 0, argc, argv);
1464}
1465
1466static Scheme_Object *char_string_to_byte_string_locale(int argc, Scheme_Object *argv[])
1467{
1468  return do_chars_to_bytes("string->bytes/locale", 1, argc, argv);
1469}
1470
1471static Scheme_Object *char_string_to_byte_string_latin1(int argc, Scheme_Object *argv[])
1472{
1473  return do_chars_to_bytes("string->bytes/latin-1", 2, argc, argv);
1474}
1475
1476/************************* Other *************************/
1477
1478static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[])
1479{
1480  intptr_t istart, ifinish, len;
1481
1482  if (!SCHEME_CHAR_STRINGP(argv[0]))
1483    scheme_wrong_type("string-utf-8-length", "string", 0, argc, argv);
1484
1485  scheme_get_substring_indices("string-utf-8-length", argv[0], argc, argv,
1486			       1, 2, &istart, &ifinish);
1487
1488  len = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(argv[0]), istart, ifinish,
1489			   NULL, 0, 0);
1490
1491  return scheme_make_integer(len);
1492}
1493
1494static Scheme_Object *
1495byte_string_utf8_length (int argc, Scheme_Object *argv[])
1496{
1497  int len, perm;
1498  intptr_t istart, ifinish;
1499  char *chars;
1500
1501  if (!SCHEME_BYTE_STRINGP(argv[0]))
1502    scheme_wrong_type("bytes-utf-8-length", "string", 0, argc, argv);
1503
1504  chars = SCHEME_BYTE_STR_VAL(argv[0]);
1505
1506  if ((argc > 1) && !SCHEME_FALSEP(argv[1])) {
1507    if (!SCHEME_CHARP(argv[1]))
1508      scheme_wrong_type("bytes-utf-8-length", "character or #f", 1, argc, argv);
1509    perm = 1;
1510  } else
1511    perm = 0;
1512
1513  scheme_get_substring_indices("bytes-utf-8-length", argv[0], argc, argv,
1514			       2, 3,
1515			       &istart, &ifinish);
1516
1517  len = scheme_utf8_decode((unsigned char *)chars, istart, ifinish,
1518			   NULL, 0, -1,
1519			   NULL, 0, perm);
1520
1521  if (len < 0)
1522    return scheme_false;
1523  else
1524    return scheme_make_integer(len);
1525}
1526
1527static Scheme_Object *
1528byte_string_utf8_index(int argc, Scheme_Object *argv[])
1529{
1530  intptr_t istart, ifinish, pos = -1, opos, ipos;
1531  int result, perm;
1532  char *chars;
1533
1534  if (!SCHEME_BYTE_STRINGP(argv[0]))
1535    scheme_wrong_type("bytes-utf-8-index", "byte string", 0, argc, argv);
1536
1537  chars = SCHEME_BYTE_STR_VAL(argv[0]);
1538
1539  if (SCHEME_INTP(argv[1])) {
1540

Large files files are truncated, but you can click here to view the full file