/src/racket/src/string.c

http://github.com/4z3/racket · C · 5644 lines · 4619 code · 681 blank · 344 comment · 1049 complexity · fbd548973c5671534bdc00809fd405bb MD5 · raw file

Large files are truncated click here to view the full file

  1. /*
  2. Racket
  3. Copyright (c) 2004-2010 PLT Scheme Inc.
  4. Copyright (c) 1995-2001 Matthew Flatt
  5. This library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Library General Public
  7. License as published by the Free Software Foundation; either
  8. version 2 of the License, or (at your option) any later version.
  9. This library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Library General Public License for more details.
  13. You should have received a copy of the GNU Library General Public
  14. License along with this library; if not, write to the Free
  15. Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  16. Boston, MA 02110-1301 USA.
  17. libscheme
  18. Copyright (c) 1994 Brent Benson
  19. All rights reserved.
  20. */
  21. #include "schpriv.h"
  22. #include "schvers.h"
  23. #include <string.h>
  24. #include <ctype.h>
  25. #ifndef DONT_USE_LOCALE
  26. # include <locale.h>
  27. # ifdef MZ_NO_ICONV
  28. # define USE_ICONV_DLL
  29. # endif
  30. # ifndef USE_ICONV_DLL
  31. # include <iconv.h>
  32. # include <langinfo.h>
  33. # endif
  34. # include <wchar.h>
  35. # include <wctype.h>
  36. # include <errno.h>
  37. # ifdef MACOS_UNICODE_SUPPORT
  38. # include <CoreFoundation/CFString.h>
  39. # include <CoreFoundation/CFLocale.h>
  40. # endif
  41. # ifdef WINDOWS_UNICODE_SUPPORT
  42. # include <windows.h>
  43. # endif
  44. #endif
  45. #ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH
  46. # include "schsys.h"
  47. #endif
  48. #include "schustr.inc"
  49. #ifdef USE_ICONV_DLL
  50. typedef intptr_t iconv_t;
  51. typedef int *(*errno_proc_t)();
  52. typedef size_t (*iconv_proc_t)(iconv_t cd,
  53. char **inbuf, size_t *inbytesleft,
  54. char **outbuf, size_t *outbytesleft);
  55. typedef iconv_t (*iconv_open_proc_t)(const char *tocode, const char *fromcode);
  56. typedef void (*iconv_close_proc_t)(iconv_t cd);
  57. typedef char *(*locale_charset_proc_t)();
  58. static errno_proc_t iconv_errno;
  59. static iconv_proc_t iconv;
  60. static iconv_open_proc_t iconv_open;
  61. static iconv_close_proc_t iconv_close;
  62. static locale_charset_proc_t locale_charset; /* Not used, currently */
  63. #define mzCHK_PROC(x) x
  64. static int get_iconv_errno(void)
  65. {
  66. int *a;
  67. a = iconv_errno();
  68. return *a;
  69. }
  70. # undef HAVE_CODESET
  71. # define HAVE_CODESET 1
  72. # define CODESET 0
  73. # define ICONV_errno get_iconv_errno()
  74. extern wchar_t *scheme_get_dll_path(wchar_t *s);
  75. static int iconv_ready = 0;
  76. static void init_iconv()
  77. {
  78. # ifdef MZ_NO_ICONV
  79. # else
  80. HMODULE m;
  81. m = LoadLibraryW(scheme_get_dll_path(L"iconv.dll"));
  82. if (!m)
  83. m = LoadLibraryW(scheme_get_dll_path(L"libiconv.dll"));
  84. if (!m)
  85. m = LoadLibraryW(scheme_get_dll_path(L"libiconv-2.dll"));
  86. if (!m)
  87. m = LoadLibrary("iconv.dll");
  88. if (!m)
  89. m = LoadLibrary("libiconv.dll");
  90. if (!m)
  91. m = LoadLibrary("libiconv-2.dll");
  92. if (m) {
  93. iconv = (iconv_proc_t)GetProcAddress(m, "libiconv");
  94. iconv_open = (iconv_open_proc_t)GetProcAddress(m, "libiconv_open");
  95. iconv_close = (iconv_close_proc_t)GetProcAddress(m, "libiconv_close");
  96. locale_charset = (locale_charset_proc_t)GetProcAddress(m, "locale_charset");
  97. /* Make sure we have all of them or none: */
  98. if (!iconv || !iconv_open || !iconv_close) {
  99. iconv = NULL;
  100. iconv_open = NULL;
  101. iconv_close = NULL;
  102. }
  103. }
  104. if (iconv) {
  105. iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno");
  106. if (!iconv_errno) {
  107. /* The iconv.dll distributed with PLT Scheme links to msvcrt.dll.
  108. It's a slighly dangerous assumption that whaetever iconv we
  109. found also uses msvcrt.dll. */
  110. m = LoadLibrary("msvcrt.dll");
  111. if (m) {
  112. iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno");
  113. if (!iconv_errno) {
  114. iconv = NULL;
  115. iconv_open = NULL;
  116. iconv_close = NULL;
  117. }
  118. }
  119. }
  120. }
  121. # endif
  122. iconv_ready = 1;
  123. }
  124. #else
  125. # define ICONV_errno errno
  126. # define iconv_ready 1
  127. # define mzCHK_PROC(x) 1
  128. static void init_iconv() { }
  129. #endif
  130. #ifdef MACOS_UNICODE_SUPPORT
  131. # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
  132. #endif
  133. #ifdef WINDOWS_UNICODE_SUPPORT
  134. # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
  135. #endif
  136. #ifndef mzLOCALE_IS_UTF_8
  137. # define mzLOCALE_IS_UTF_8(s) !mzCHK_PROC(iconv_open)
  138. #endif
  139. #define mzICONV_KIND 0
  140. #define mzUTF8_KIND 1
  141. #define mzUTF8_TO_UTF16_KIND 2
  142. #define mzUTF16_TO_UTF8_KIND 3
  143. typedef struct Scheme_Converter {
  144. Scheme_Object so;
  145. short closed;
  146. short kind;
  147. iconv_t cd;
  148. int permissive;
  149. Scheme_Custodian_Reference *mref;
  150. } Scheme_Converter;
  151. /* locals */
  152. /* These two locale variables are only valid when reset_locale()
  153. is called after continuation marks (and hence parameterization)
  154. may have changed. Similarly, setlocale() is only up-to-date
  155. when reset_locale() has been called. */
  156. THREAD_LOCAL_DECL(static int locale_on);
  157. THREAD_LOCAL_DECL(static void *current_locale_name_ptr);
  158. static void reset_locale(void);
  159. #define current_locale_name ((const mzchar *)current_locale_name_ptr)
  160. #ifdef USE_ICONV_DLL
  161. static char *nl_langinfo(int which)
  162. {
  163. int i;
  164. reset_locale();
  165. if (!current_locale_name)
  166. current_locale_name_ptr = "\0\0\0\0";
  167. if ((current_locale_name[0] == 'C')
  168. && !current_locale_name[1])
  169. return "US-ASCII";
  170. for (i = 0; current_locale_name[i]; i++) {
  171. if (current_locale_name[i] == '.') {
  172. if (current_locale_name[i + 1]) {
  173. int len, j = 0;
  174. char *enc;
  175. i++;
  176. len = scheme_char_strlen(current_locale_name) - i;
  177. enc = (char *)scheme_malloc_atomic(len + 1);
  178. while (current_locale_name[i]) {
  179. if (current_locale_name[i] > 127)
  180. return "UTF-8";
  181. enc[j++] = current_locale_name[i++];
  182. }
  183. enc[j] = 0;
  184. return enc;
  185. }
  186. }
  187. }
  188. return "UTF-8";
  189. }
  190. #endif
  191. #ifdef DONT_USE_LOCALE
  192. # define mz_iconv_nl_langinfo() ""
  193. #else
  194. static char *mz_iconv_nl_langinfo(){
  195. char *s;
  196. # if HAVE_CODESET
  197. s = nl_langinfo(CODESET);
  198. # else
  199. s = NULL;
  200. # endif
  201. if (!s)
  202. return "";
  203. else
  204. return s;
  205. }
  206. #endif
  207. READ_ONLY static const char * const STRING_IS_NOT_UTF_8 = "string is not a well-formed UTF-8 encoding: ";
  208. static Scheme_Object *make_string (int argc, Scheme_Object *argv[]);
  209. static Scheme_Object *string (int argc, Scheme_Object *argv[]);
  210. static Scheme_Object *string_p (int argc, Scheme_Object *argv[]);
  211. static Scheme_Object *string_length (int argc, Scheme_Object *argv[]);
  212. static Scheme_Object *string_eq (int argc, Scheme_Object *argv[]);
  213. static Scheme_Object *string_locale_eq (int argc, Scheme_Object *argv[]);
  214. static Scheme_Object *string_ci_eq (int argc, Scheme_Object *argv[]);
  215. static Scheme_Object *string_locale_ci_eq (int argc, Scheme_Object *argv[]);
  216. static Scheme_Object *string_lt (int argc, Scheme_Object *argv[]);
  217. static Scheme_Object *string_locale_lt (int argc, Scheme_Object *argv[]);
  218. static Scheme_Object *string_gt (int argc, Scheme_Object *argv[]);
  219. static Scheme_Object *string_locale_gt (int argc, Scheme_Object *argv[]);
  220. static Scheme_Object *string_lt_eq (int argc, Scheme_Object *argv[]);
  221. static Scheme_Object *string_gt_eq (int argc, Scheme_Object *argv[]);
  222. static Scheme_Object *string_ci_lt (int argc, Scheme_Object *argv[]);
  223. static Scheme_Object *string_locale_ci_lt (int argc, Scheme_Object *argv[]);
  224. static Scheme_Object *string_ci_gt (int argc, Scheme_Object *argv[]);
  225. static Scheme_Object *string_locale_ci_gt (int argc, Scheme_Object *argv[]);
  226. static Scheme_Object *string_ci_lt_eq (int argc, Scheme_Object *argv[]);
  227. static Scheme_Object *string_ci_gt_eq (int argc, Scheme_Object *argv[]);
  228. static Scheme_Object *string_upcase (int argc, Scheme_Object *argv[]);
  229. static Scheme_Object *string_downcase (int argc, Scheme_Object *argv[]);
  230. static Scheme_Object *string_titlecase (int argc, Scheme_Object *argv[]);
  231. static Scheme_Object *string_foldcase (int argc, Scheme_Object *argv[]);
  232. static Scheme_Object *string_locale_upcase (int argc, Scheme_Object *argv[]);
  233. static Scheme_Object *string_locale_downcase (int argc, Scheme_Object *argv[]);
  234. static Scheme_Object *substring (int argc, Scheme_Object *argv[]);
  235. static Scheme_Object *string_append (int argc, Scheme_Object *argv[]);
  236. static Scheme_Object *string_to_list (int argc, Scheme_Object *argv[]);
  237. static Scheme_Object *list_to_string (int argc, Scheme_Object *argv[]);
  238. static Scheme_Object *string_copy (int argc, Scheme_Object *argv[]);
  239. static Scheme_Object *string_copy_bang (int argc, Scheme_Object *argv[]);
  240. static Scheme_Object *string_fill (int argc, Scheme_Object *argv[]);
  241. static Scheme_Object *string_to_immutable (int argc, Scheme_Object *argv[]);
  242. static Scheme_Object *string_normalize_c (int argc, Scheme_Object *argv[]);
  243. static Scheme_Object *string_normalize_kc (int argc, Scheme_Object *argv[]);
  244. static Scheme_Object *string_normalize_d (int argc, Scheme_Object *argv[]);
  245. static Scheme_Object *string_normalize_kd (int argc, Scheme_Object *argv[]);
  246. #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
  247. static Scheme_Object *make_shared_byte_string (int argc, Scheme_Object *argv[]);
  248. static Scheme_Object *shared_byte_string (int argc, Scheme_Object *argv[]);
  249. #endif
  250. static Scheme_Object *make_byte_string (int argc, Scheme_Object *argv[]);
  251. static Scheme_Object *byte_string (int argc, Scheme_Object *argv[]);
  252. static Scheme_Object *byte_p (int argc, Scheme_Object *argv[]);
  253. static Scheme_Object *byte_string_p (int argc, Scheme_Object *argv[]);
  254. static Scheme_Object *byte_string_length (int argc, Scheme_Object *argv[]);
  255. static Scheme_Object *byte_string_eq (int argc, Scheme_Object *argv[]);
  256. static Scheme_Object *byte_string_lt (int argc, Scheme_Object *argv[]);
  257. static Scheme_Object *byte_string_gt (int argc, Scheme_Object *argv[]);
  258. static Scheme_Object *byte_substring (int argc, Scheme_Object *argv[]);
  259. static Scheme_Object *byte_string_append (int argc, Scheme_Object *argv[]);
  260. static Scheme_Object *byte_string_to_list (int argc, Scheme_Object *argv[]);
  261. static Scheme_Object *list_to_byte_string (int argc, Scheme_Object *argv[]);
  262. static Scheme_Object *byte_string_copy (int argc, Scheme_Object *argv[]);
  263. static Scheme_Object *byte_string_copy_bang (int argc, Scheme_Object *argv[]);
  264. static Scheme_Object *byte_string_fill (int argc, Scheme_Object *argv[]);
  265. static Scheme_Object *byte_string_to_immutable (int argc, Scheme_Object *argv[]);
  266. static Scheme_Object *byte_string_utf8_index (int argc, Scheme_Object *argv[]);
  267. static Scheme_Object *byte_string_utf8_ref (int argc, Scheme_Object *argv[]);
  268. static Scheme_Object *byte_string_utf8_length (int argc, Scheme_Object *argv[]);
  269. static Scheme_Object *byte_string_to_char_string (int argc, Scheme_Object *argv[]);
  270. static Scheme_Object *byte_string_to_char_string_locale (int argc, Scheme_Object *argv[]);
  271. static Scheme_Object *byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[]);
  272. static Scheme_Object *char_string_to_byte_string (int argc, Scheme_Object *argv[]);
  273. static Scheme_Object *char_string_to_byte_string_locale (int argc, Scheme_Object *argv[]);
  274. static Scheme_Object *char_string_to_byte_string_latin1 (int argc, Scheme_Object *argv[]);
  275. static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[]);
  276. static Scheme_Object *version(int argc, Scheme_Object *argv[]);
  277. static Scheme_Object *format(int argc, Scheme_Object *argv[]);
  278. static Scheme_Object *sch_printf(int argc, Scheme_Object *argv[]);
  279. static Scheme_Object *sch_eprintf(int argc, Scheme_Object *argv[]);
  280. static Scheme_Object *sch_fprintf(int argc, Scheme_Object *argv[]);
  281. static Scheme_Object *banner(int argc, Scheme_Object *argv[]);
  282. static Scheme_Object *sch_getenv(int argc, Scheme_Object *argv[]);
  283. static Scheme_Object *sch_putenv(int argc, Scheme_Object *argv[]);
  284. static Scheme_Object *system_type(int argc, Scheme_Object *argv[]);
  285. static Scheme_Object *system_library_subpath(int argc, Scheme_Object *argv[]);
  286. static Scheme_Object *cmdline_args(int argc, Scheme_Object *argv[]);
  287. static Scheme_Object *current_locale(int argc, Scheme_Object *argv[]);
  288. static Scheme_Object *locale_string_encoding(int argc, Scheme_Object *argv[]);
  289. static Scheme_Object *system_language_country(int argc, Scheme_Object *argv[]);
  290. static Scheme_Object *byte_string_open_converter(int argc, Scheme_Object *argv[]);
  291. static Scheme_Object *byte_string_close_converter(int argc, Scheme_Object *argv[]);
  292. static Scheme_Object *byte_string_convert(int argc, Scheme_Object *argv[]);
  293. static Scheme_Object *byte_string_convert_end(int argc, Scheme_Object *argv[]);
  294. static Scheme_Object *byte_converter_p(int argc, Scheme_Object *argv[]);
  295. #ifdef MZ_PRECISE_GC
  296. static void register_traversers(void);
  297. #endif
  298. static int mz_char_strcmp(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
  299. static int mz_char_strcmp_ci(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
  300. static int mz_strcmp(const char *who, unsigned char *str1, intptr_t l1, unsigned char *str2, intptr_t l2);
  301. XFORM_NONGCING static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t end,
  302. unsigned int *us, intptr_t dstart, intptr_t dend,
  303. intptr_t *ipos, intptr_t *jpos,
  304. char compact, char utf16,
  305. int *state, int might_continue, int permissive);
  306. XFORM_NONGCING static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t end,
  307. unsigned char *s, intptr_t dstart, intptr_t dend,
  308. intptr_t *_ipos, intptr_t *_opos, char utf16);
  309. static char *string_to_from_locale(int to_bytes,
  310. char *in, intptr_t delta, intptr_t len,
  311. intptr_t *olen, int perm,
  312. int *no_cvt);
  313. #define portable_isspace(x) (((x) < 128) && isspace(x))
  314. ROSYM static Scheme_Object *sys_symbol;
  315. ROSYM static Scheme_Object *platform_3m_path, *platform_cgc_path;
  316. READ_ONLY static Scheme_Object *zero_length_char_string;
  317. READ_ONLY static Scheme_Object *zero_length_byte_string;
  318. SHARED_OK static Scheme_Hash_Table *putenv_str_table;
  319. SHARED_OK static char *embedding_banner;
  320. SHARED_OK static Scheme_Object *vers_str;
  321. SHARED_OK static Scheme_Object *banner_str;
  322. READ_ONLY static Scheme_Object *complete_symbol, *continues_symbol, *aborts_symbol, *error_symbol;
  323. void
  324. scheme_init_string (Scheme_Env *env)
  325. {
  326. Scheme_Object *p;
  327. REGISTER_SO(sys_symbol);
  328. sys_symbol = scheme_intern_symbol(SYSTEM_TYPE_NAME);
  329. REGISTER_SO(zero_length_char_string);
  330. REGISTER_SO(zero_length_byte_string);
  331. zero_length_char_string = scheme_alloc_char_string(0, 0);
  332. zero_length_byte_string = scheme_alloc_byte_string(0, 0);
  333. REGISTER_SO(complete_symbol);
  334. REGISTER_SO(continues_symbol);
  335. REGISTER_SO(aborts_symbol);
  336. REGISTER_SO(error_symbol);
  337. complete_symbol = scheme_intern_symbol("complete");
  338. continues_symbol = scheme_intern_symbol("continues");
  339. aborts_symbol = scheme_intern_symbol("aborts");
  340. error_symbol = scheme_intern_symbol("error");
  341. REGISTER_SO(platform_3m_path);
  342. #ifdef UNIX_FILE_SYSTEM
  343. # define MZ3M_SUBDIR "/3m"
  344. #else
  345. # ifdef DOS_FILE_SYSTEM
  346. # define MZ3M_SUBDIR "\\3m"
  347. # else
  348. # define MZ3M_SUBDIR ":3m"
  349. # endif
  350. #endif
  351. REGISTER_SO(platform_3m_path);
  352. REGISTER_SO(platform_cgc_path);
  353. platform_cgc_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH);
  354. platform_3m_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH MZ3M_SUBDIR);
  355. REGISTER_SO(putenv_str_table);
  356. REGISTER_SO(embedding_banner);
  357. REGISTER_SO(vers_str);
  358. REGISTER_SO(banner_str);
  359. vers_str = scheme_make_utf8_string(scheme_version());
  360. SCHEME_SET_CHAR_STRING_IMMUTABLE(vers_str);
  361. banner_str = scheme_make_utf8_string(scheme_banner());
  362. SCHEME_SET_CHAR_STRING_IMMUTABLE(banner_str);
  363. p = scheme_make_folding_prim(string_p, "string?", 1, 1, 1);
  364. SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
  365. scheme_add_global_constant("string?", p, env);
  366. scheme_add_global_constant("make-string",
  367. scheme_make_immed_prim(make_string,
  368. "make-string",
  369. 1, 2),
  370. env);
  371. scheme_add_global_constant("string",
  372. scheme_make_immed_prim(string,
  373. "string",
  374. 0, -1),
  375. env);
  376. scheme_add_global_constant("string-length",
  377. scheme_make_folding_prim(string_length,
  378. "string-length",
  379. 1, 1, 1),
  380. env);
  381. p = scheme_make_immed_prim(scheme_checked_string_ref, "string-ref", 2, 2);
  382. SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED;
  383. scheme_add_global_constant("string-ref", p, env);
  384. p = scheme_make_immed_prim(scheme_checked_string_set, "string-set!", 3, 3);
  385. SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
  386. scheme_add_global_constant("string-set!", p, env);
  387. scheme_add_global_constant("string=?",
  388. scheme_make_immed_prim(string_eq,
  389. "string=?",
  390. 2, -1),
  391. env);
  392. scheme_add_global_constant("string-locale=?",
  393. scheme_make_immed_prim(string_locale_eq,
  394. "string-locale=?",
  395. 2, -1),
  396. env);
  397. scheme_add_global_constant("string-ci=?",
  398. scheme_make_immed_prim(string_ci_eq,
  399. "string-ci=?",
  400. 2, -1),
  401. env);
  402. scheme_add_global_constant("string-locale-ci=?",
  403. scheme_make_immed_prim(string_locale_ci_eq,
  404. "string-locale-ci=?",
  405. 2, -1),
  406. env);
  407. scheme_add_global_constant("string<?",
  408. scheme_make_immed_prim(string_lt,
  409. "string<?",
  410. 2, -1),
  411. env);
  412. scheme_add_global_constant("string-locale<?",
  413. scheme_make_immed_prim(string_locale_lt,
  414. "string-locale<?",
  415. 2, -1),
  416. env);
  417. scheme_add_global_constant("string>?",
  418. scheme_make_immed_prim(string_gt,
  419. "string>?",
  420. 2, -1),
  421. env);
  422. scheme_add_global_constant("string-locale>?",
  423. scheme_make_immed_prim(string_locale_gt,
  424. "string-locale>?",
  425. 2, -1),
  426. env);
  427. scheme_add_global_constant("string<=?",
  428. scheme_make_immed_prim(string_lt_eq,
  429. "string<=?",
  430. 2, -1),
  431. env);
  432. scheme_add_global_constant("string>=?",
  433. scheme_make_immed_prim(string_gt_eq,
  434. "string>=?",
  435. 2, -1),
  436. env);
  437. scheme_add_global_constant("string-ci<?",
  438. scheme_make_immed_prim(string_ci_lt,
  439. "string-ci<?",
  440. 2, -1),
  441. env);
  442. scheme_add_global_constant("string-locale-ci<?",
  443. scheme_make_immed_prim(string_locale_ci_lt,
  444. "string-locale-ci<?",
  445. 2, -1),
  446. env);
  447. scheme_add_global_constant("string-ci>?",
  448. scheme_make_immed_prim(string_ci_gt,
  449. "string-ci>?",
  450. 2, -1),
  451. env);
  452. scheme_add_global_constant("string-locale-ci>?",
  453. scheme_make_immed_prim(string_locale_ci_gt,
  454. "string-locale-ci>?",
  455. 2, -1),
  456. env);
  457. scheme_add_global_constant("string-ci<=?",
  458. scheme_make_immed_prim(string_ci_lt_eq,
  459. "string-ci<=?",
  460. 2, -1),
  461. env);
  462. scheme_add_global_constant("string-ci>=?",
  463. scheme_make_immed_prim(string_ci_gt_eq,
  464. "string-ci>=?",
  465. 2, -1),
  466. env);
  467. scheme_add_global_constant("substring",
  468. scheme_make_immed_prim(substring,
  469. "substring",
  470. 2, 3),
  471. env);
  472. scheme_add_global_constant("string-append",
  473. scheme_make_immed_prim(string_append,
  474. "string-append",
  475. 0, -1),
  476. env);
  477. scheme_add_global_constant("string->list",
  478. scheme_make_immed_prim(string_to_list,
  479. "string->list",
  480. 1, 1),
  481. env);
  482. scheme_add_global_constant("list->string",
  483. scheme_make_immed_prim(list_to_string,
  484. "list->string",
  485. 1, 1),
  486. env);
  487. scheme_add_global_constant("string-copy",
  488. scheme_make_immed_prim(string_copy,
  489. "string-copy",
  490. 1, 1),
  491. env);
  492. scheme_add_global_constant("string-copy!",
  493. scheme_make_immed_prim(string_copy_bang,
  494. "string-copy!",
  495. 3, 5),
  496. env);
  497. scheme_add_global_constant("string-fill!",
  498. scheme_make_immed_prim(string_fill,
  499. "string-fill!",
  500. 2, 2),
  501. env);
  502. scheme_add_global_constant("string->immutable-string",
  503. scheme_make_immed_prim(string_to_immutable,
  504. "string->immutable-string",
  505. 1, 1),
  506. env);
  507. scheme_add_global_constant("string-normalize-nfc",
  508. scheme_make_immed_prim(string_normalize_c,
  509. "string-normalize-nfc",
  510. 1, 1),
  511. env);
  512. scheme_add_global_constant("string-normalize-nfkc",
  513. scheme_make_immed_prim(string_normalize_kc,
  514. "string-normalize-nfkc",
  515. 1, 1),
  516. env);
  517. scheme_add_global_constant("string-normalize-nfd",
  518. scheme_make_immed_prim(string_normalize_d,
  519. "string-normalize-nfd",
  520. 1, 1),
  521. env);
  522. scheme_add_global_constant("string-normalize-nfkd",
  523. scheme_make_immed_prim(string_normalize_kd,
  524. "string-normalize-nfkd",
  525. 1, 1),
  526. env);
  527. scheme_add_global_constant("string-upcase",
  528. scheme_make_immed_prim(string_upcase,
  529. "string-upcase",
  530. 1, 1),
  531. env);
  532. scheme_add_global_constant("string-downcase",
  533. scheme_make_immed_prim(string_downcase,
  534. "string-downcase",
  535. 1, 1),
  536. env);
  537. scheme_add_global_constant("string-titlecase",
  538. scheme_make_immed_prim(string_titlecase,
  539. "string-titlecase",
  540. 1, 1),
  541. env);
  542. scheme_add_global_constant("string-foldcase",
  543. scheme_make_immed_prim(string_foldcase,
  544. "string-foldcase",
  545. 1, 1),
  546. env);
  547. scheme_add_global_constant("string-locale-upcase",
  548. scheme_make_immed_prim(string_locale_upcase,
  549. "string-locale-upcase",
  550. 1, 1),
  551. env);
  552. scheme_add_global_constant("string-locale-downcase",
  553. scheme_make_immed_prim(string_locale_downcase,
  554. "string-locale-downcase",
  555. 1, 1),
  556. env);
  557. scheme_add_global_constant("current-locale",
  558. scheme_register_parameter(current_locale,
  559. "current-locale",
  560. MZCONFIG_LOCALE),
  561. env);
  562. scheme_add_global_constant("locale-string-encoding",
  563. scheme_make_immed_prim(locale_string_encoding,
  564. "locale-string-encoding",
  565. 0, 0),
  566. env);
  567. scheme_add_global_constant("system-language+country",
  568. scheme_make_immed_prim(system_language_country,
  569. "system-language+country",
  570. 0, 0),
  571. env);
  572. scheme_add_global_constant("bytes-converter?",
  573. scheme_make_immed_prim(byte_converter_p,
  574. "bytes-converter?",
  575. 1, 1),
  576. env);
  577. scheme_add_global_constant("bytes-convert",
  578. scheme_make_prim_w_arity2(byte_string_convert,
  579. "bytes-convert",
  580. 1, 7,
  581. 3, 3),
  582. env);
  583. scheme_add_global_constant("bytes-convert-end",
  584. scheme_make_prim_w_arity2(byte_string_convert_end,
  585. "bytes-convert-end",
  586. 0, 3,
  587. 2, 2),
  588. env);
  589. scheme_add_global_constant("bytes-open-converter",
  590. scheme_make_immed_prim(byte_string_open_converter,
  591. "bytes-open-converter",
  592. 2, 2),
  593. env);
  594. scheme_add_global_constant("bytes-close-converter",
  595. scheme_make_immed_prim(byte_string_close_converter,
  596. "bytes-close-converter",
  597. 1, 1),
  598. env);
  599. scheme_add_global_constant("format",
  600. scheme_make_noncm_prim(format,
  601. "format",
  602. 1, -1),
  603. env);
  604. scheme_add_global_constant("printf",
  605. scheme_make_noncm_prim(sch_printf,
  606. "printf",
  607. 1, -1),
  608. env);
  609. scheme_add_global_constant("eprintf",
  610. scheme_make_noncm_prim(sch_eprintf,
  611. "eprintf",
  612. 1, -1),
  613. env);
  614. scheme_add_global_constant("fprintf",
  615. scheme_make_noncm_prim(sch_fprintf,
  616. "fprintf",
  617. 2, -1),
  618. env);
  619. scheme_add_global_constant("byte?",
  620. scheme_make_folding_prim(byte_p,
  621. "byte?",
  622. 1, 1, 1),
  623. env);
  624. p = scheme_make_folding_prim(byte_string_p, "bytes?", 1, 1, 1);
  625. SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
  626. scheme_add_global_constant("bytes?", p, env);
  627. scheme_add_global_constant("make-bytes",
  628. scheme_make_immed_prim(make_byte_string,
  629. "make-bytes",
  630. 1, 2),
  631. env);
  632. scheme_add_global_constant("bytes",
  633. scheme_make_immed_prim(byte_string,
  634. "bytes",
  635. 0, -1),
  636. env);
  637. #if defined(MZ_USE_PLACES) && defined(MZ_PRECISE_GC)
  638. GLOBAL_PRIM_W_ARITY("make-shared-bytes", make_shared_byte_string, 1, 2, env);
  639. GLOBAL_PRIM_W_ARITY("shared-bytes", shared_byte_string, 0, -1, env);
  640. #else
  641. GLOBAL_PRIM_W_ARITY("make-shared-bytes", make_byte_string, 1, 2, env);
  642. GLOBAL_PRIM_W_ARITY("shared-bytes", byte_string, 0, -1, env);
  643. #endif
  644. scheme_add_global_constant("bytes-length",
  645. scheme_make_folding_prim(byte_string_length,
  646. "bytes-length",
  647. 1, 1, 1),
  648. env);
  649. p = scheme_make_immed_prim(scheme_checked_byte_string_ref, "bytes-ref", 2, 2);
  650. SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED;
  651. scheme_add_global_constant("bytes-ref", p, env);
  652. p = scheme_make_immed_prim(scheme_checked_byte_string_set, "bytes-set!", 3, 3);
  653. SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED;
  654. scheme_add_global_constant("bytes-set!", p, env);
  655. scheme_add_global_constant("bytes=?",
  656. scheme_make_immed_prim(byte_string_eq,
  657. "bytes=?",
  658. 2, -1),
  659. env);
  660. scheme_add_global_constant("bytes<?",
  661. scheme_make_immed_prim(byte_string_lt,
  662. "bytes<?",
  663. 2, -1),
  664. env);
  665. scheme_add_global_constant("bytes>?",
  666. scheme_make_immed_prim(byte_string_gt,
  667. "bytes>?",
  668. 2, -1),
  669. env);
  670. scheme_add_global_constant("subbytes",
  671. scheme_make_immed_prim(byte_substring,
  672. "subbytes",
  673. 2, 3),
  674. env);
  675. scheme_add_global_constant("bytes-append",
  676. scheme_make_immed_prim(byte_string_append,
  677. "bytes-append",
  678. 0, -1),
  679. env);
  680. scheme_add_global_constant("bytes->list",
  681. scheme_make_immed_prim(byte_string_to_list,
  682. "bytes->list",
  683. 1, 1),
  684. env);
  685. scheme_add_global_constant("list->bytes",
  686. scheme_make_immed_prim(list_to_byte_string,
  687. "list->bytes",
  688. 1, 1),
  689. env);
  690. scheme_add_global_constant("bytes-copy",
  691. scheme_make_immed_prim(byte_string_copy,
  692. "bytes-copy",
  693. 1, 1),
  694. env);
  695. scheme_add_global_constant("bytes-copy!",
  696. scheme_make_immed_prim(byte_string_copy_bang,
  697. "bytes-copy!",
  698. 3, 5),
  699. env);
  700. scheme_add_global_constant("bytes-fill!",
  701. scheme_make_immed_prim(byte_string_fill,
  702. "bytes-fill!",
  703. 2, 2),
  704. env);
  705. scheme_add_global_constant("bytes->immutable-bytes",
  706. scheme_make_immed_prim(byte_string_to_immutable,
  707. "bytes->immutable-bytes",
  708. 1, 1),
  709. env);
  710. scheme_add_global_constant("bytes-utf-8-index",
  711. scheme_make_immed_prim(byte_string_utf8_index,
  712. "bytes-utf-8-index",
  713. 2, 4),
  714. env);
  715. scheme_add_global_constant("bytes-utf-8-length",
  716. scheme_make_immed_prim(byte_string_utf8_length,
  717. "bytes-utf-8-length",
  718. 1, 4),
  719. env);
  720. scheme_add_global_constant("bytes-utf-8-ref",
  721. scheme_make_immed_prim(byte_string_utf8_ref,
  722. "bytes-utf-8-ref",
  723. 2, 4),
  724. env);
  725. scheme_add_global_constant("bytes->string/utf-8",
  726. scheme_make_immed_prim(byte_string_to_char_string,
  727. "bytes->string/utf-8",
  728. 1, 4),
  729. env);
  730. scheme_add_global_constant("bytes->string/locale",
  731. scheme_make_immed_prim(byte_string_to_char_string_locale,
  732. "bytes->string/locale",
  733. 1, 4),
  734. env);
  735. scheme_add_global_constant("bytes->string/latin-1",
  736. scheme_make_immed_prim(byte_string_to_char_string_latin1,
  737. "bytes->string/latin-1",
  738. 1, 4),
  739. env);
  740. scheme_add_global_constant("string->bytes/utf-8",
  741. scheme_make_immed_prim(char_string_to_byte_string,
  742. "string->bytes/utf-8",
  743. 1, 4),
  744. env);
  745. scheme_add_global_constant("string->bytes/locale",
  746. scheme_make_immed_prim(char_string_to_byte_string_locale,
  747. "string->bytes/locale",
  748. 1, 4),
  749. env);
  750. scheme_add_global_constant("string->bytes/latin-1",
  751. scheme_make_immed_prim(char_string_to_byte_string_latin1,
  752. "string->bytes/latin-1",
  753. 1, 4),
  754. env);
  755. scheme_add_global_constant("string-utf-8-length",
  756. scheme_make_immed_prim(char_string_utf8_length,
  757. "string-utf-8-length",
  758. 1, 3),
  759. env);
  760. /* In principle, `version' could be foldable, but it invites
  761. more problems than it solves... */
  762. scheme_add_global_constant("version",
  763. scheme_make_immed_prim(version,
  764. "version",
  765. 0, 0),
  766. env);
  767. scheme_add_global_constant("banner",
  768. scheme_make_immed_prim(banner,
  769. "banner",
  770. 0, 0),
  771. env);
  772. scheme_add_global_constant("getenv",
  773. scheme_make_immed_prim(sch_getenv,
  774. "getenv",
  775. 1, 1),
  776. env);
  777. scheme_add_global_constant("putenv",
  778. scheme_make_immed_prim(sch_putenv,
  779. "putenv",
  780. 2, 2),
  781. env);
  782. /* Don't make these folding, since they're platform-specific: */
  783. scheme_add_global_constant("system-type",
  784. scheme_make_immed_prim(system_type,
  785. "system-type",
  786. 0, 1),
  787. env);
  788. scheme_add_global_constant("system-library-subpath",
  789. scheme_make_immed_prim(system_library_subpath,
  790. "system-library-subpath",
  791. 0, 1),
  792. env);
  793. scheme_add_global_constant("current-command-line-arguments",
  794. scheme_register_parameter(cmdline_args,
  795. "current-command-line-arguments",
  796. MZCONFIG_CMDLINE_ARGS),
  797. env);
  798. #ifdef MZ_PRECISE_GC
  799. register_traversers();
  800. #endif
  801. }
  802. void scheme_init_string_places(void) {
  803. REGISTER_SO(current_locale_name_ptr);
  804. current_locale_name_ptr = "xxxx\0\0\0\0";
  805. }
  806. /**********************************************************************/
  807. /* UTF-8 char constructors */
  808. /**********************************************************************/
  809. Scheme_Object *scheme_make_sized_offset_utf8_string(char *chars, intptr_t d, intptr_t len)
  810. {
  811. intptr_t ulen;
  812. mzchar *us;
  813. if (len) {
  814. ulen = scheme_utf8_decode((unsigned char *)chars, d, d + len,
  815. NULL, 0, -1,
  816. NULL, 0 /* not UTF-16 */, 0xFFFD);
  817. us = scheme_malloc_atomic(sizeof(mzchar) * (ulen + 1));
  818. scheme_utf8_decode((unsigned char *)chars, d, d + len,
  819. us, 0, -1,
  820. NULL, 0 /* not UTF-16 */, 0xFFFD);
  821. us[ulen] = 0;
  822. } else {
  823. us = (mzchar *)"\0\0\0";
  824. ulen = 0;
  825. }
  826. return scheme_make_sized_offset_char_string(us, 0, ulen, 0);
  827. }
  828. Scheme_Object *
  829. scheme_make_sized_utf8_string(char *chars, intptr_t len)
  830. {
  831. return scheme_make_sized_offset_utf8_string(chars, 0, len);
  832. }
  833. Scheme_Object *
  834. scheme_make_immutable_sized_utf8_string(char *chars, intptr_t len)
  835. {
  836. Scheme_Object *s;
  837. s = scheme_make_sized_offset_utf8_string(chars, 0, len);
  838. if (len)
  839. SCHEME_SET_CHAR_STRING_IMMUTABLE(s);
  840. return s;
  841. }
  842. Scheme_Object *
  843. scheme_make_utf8_string(const char *chars)
  844. {
  845. return scheme_make_sized_offset_utf8_string((char *)chars, 0, -1);
  846. }
  847. Scheme_Object *
  848. scheme_make_locale_string(const char *chars)
  849. {
  850. return scheme_byte_string_to_char_string_locale(scheme_make_byte_string((char *)chars));
  851. }
  852. /**********************************************************************/
  853. /* index helpers */
  854. /**********************************************************************/
  855. void scheme_out_of_string_range(const char *name, const char *which,
  856. Scheme_Object *i, Scheme_Object *s,
  857. intptr_t start, intptr_t len)
  858. {
  859. int is_byte;
  860. is_byte = SCHEME_BYTE_STRINGP(s);
  861. if (len) {
  862. char *sstr;
  863. intptr_t slen;
  864. sstr = scheme_make_provided_string(s, 2, &slen);
  865. scheme_raise_exn(MZEXN_FAIL_CONTRACT,
  866. "%s: %sindex %s out of range [%d, %d] for %s%s: %t",
  867. name, which,
  868. scheme_make_provided_string(i, 2, NULL),
  869. start, len,
  870. is_byte ? "byte-" : "",
  871. SCHEME_CHAPERONE_VECTORP(s) ? "vector" : "string",
  872. sstr, slen);
  873. } else {
  874. scheme_raise_exn(MZEXN_FAIL_CONTRACT,
  875. "%s: %sindex %s out of range for empty %s%s",
  876. name, which,
  877. scheme_make_provided_string(i, 0, NULL),
  878. is_byte ? "byte-" : "",
  879. SCHEME_CHAPERONE_VECTORP(s) ? "vector" : "string");
  880. }
  881. }
  882. intptr_t scheme_extract_index(const char *name, int pos, int argc, Scheme_Object **argv, intptr_t top, int false_ok)
  883. {
  884. intptr_t i;
  885. int is_top = 0;
  886. if (SCHEME_INTP(argv[pos])) {
  887. i = SCHEME_INT_VAL(argv[pos]);
  888. } else if (SCHEME_BIGNUMP(argv[pos])) {
  889. if (SCHEME_BIGPOS(argv[pos])) {
  890. i = top; /* out-of-bounds */
  891. is_top = 1;
  892. } else
  893. i = -1; /* negative */
  894. } else
  895. i = -1;
  896. if (!is_top && (i < 0))
  897. scheme_wrong_type(name,
  898. (false_ok ? "non-negative exact integer or #f" : "non-negative exact integer"),
  899. pos, argc, argv);
  900. return i;
  901. }
  902. void scheme_get_substring_indices(const char *name, Scheme_Object *str,
  903. int argc, Scheme_Object **argv,
  904. int spos, int fpos, intptr_t *_start, intptr_t *_finish)
  905. {
  906. intptr_t len;
  907. intptr_t start, finish;
  908. if (SCHEME_CHAPERONE_VECTORP(str))
  909. len = SCHEME_VEC_SIZE(str);
  910. else if (SCHEME_CHAR_STRINGP(str))
  911. len = SCHEME_CHAR_STRTAG_VAL(str);
  912. else
  913. len = SCHEME_BYTE_STRTAG_VAL(str);
  914. if (argc > spos)
  915. start = scheme_extract_index(name, spos, argc, argv, len + 1, 0);
  916. else
  917. start = 0;
  918. if (argc > fpos)
  919. finish = scheme_extract_index(name, fpos, argc, argv, len + 1, 0);
  920. else
  921. finish = len;
  922. if (!(start <= len)) {
  923. scheme_out_of_string_range(name, (fpos < 100) ? "starting " : "", argv[spos], str, 0, len);
  924. }
  925. if (!(finish >= start && finish <= len)) {
  926. scheme_out_of_string_range(name, "ending ", argv[fpos], str, start, len);
  927. }
  928. *_start = start;
  929. *_finish = finish;
  930. }
  931. void scheme_do_get_substring_indices(const char *name, Scheme_Object *str,
  932. int argc, Scheme_Object **argv,
  933. int spos, int fpos, intptr_t *_start, intptr_t *_finish, intptr_t len)
  934. {
  935. if (argc > spos) {
  936. if (SCHEME_INTP(argv[spos])) {
  937. intptr_t start = SCHEME_INT_VAL(argv[spos]);
  938. if ((start >= 0) && (start < len)) {
  939. *_start = start;
  940. if (argc > fpos) {
  941. intptr_t finish = SCHEME_INT_VAL(argv[fpos]);
  942. if ((finish >= start) && (finish <= len)) {
  943. *_finish = finish;
  944. return;
  945. }
  946. } else {
  947. *_finish = len;
  948. return;
  949. }
  950. }
  951. }
  952. } else {
  953. *_start = 0;
  954. *_finish = len;
  955. return;
  956. }
  957. scheme_get_substring_indices(name, str, argc, argv, spos, fpos, _start, _finish);
  958. }
  959. /**********************************************************************/
  960. /* char strings */
  961. /**********************************************************************/
  962. #define SCHEME_X_STR_VAL(x) SCHEME_CHAR_STR_VAL(x)
  963. #define SCHEME_X_STRTAG_VAL(x) SCHEME_CHAR_STRTAG_VAL(x)
  964. #define SCHEME_X_STRINGP(x) SCHEME_CHAR_STRINGP(x)
  965. #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_CHAR_STRINGP(x)
  966. #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_CHAR_STRING_IMMUTABLE(x)
  967. #define scheme_x_string_type scheme_char_string_type
  968. #define X(a, b) a##_char##b
  969. #define X_(a, b) a##_##b
  970. #define X__(a) a
  971. #define EMPTY (mzchar *)"\0\0\0"
  972. #define Xchar mzchar
  973. #define uXchar mzchar
  974. #define XSTR ""
  975. #define XSTRINGSTR "string"
  976. #define SUBXSTR "substring"
  977. #define CHARP(x) SCHEME_CHARP(x)
  978. #define CHAR_VAL(x) SCHEME_CHAR_VAL(x)
  979. #define CHAR_STR "character"
  980. #define MAKE_CHAR(x) _scheme_make_char(x)
  981. #define xstrlen scheme_char_strlen
  982. #include "strops.inc"
  983. #define GEN_STRING_COMP(name, scheme_name, comp, op, ul, size_shortcut) \
  984. static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
  985. { mzchar *s, *prev; int i, sl, pl; int falz = 0;\
  986. if (!SCHEME_CHAR_STRINGP(argv[0])) \
  987. scheme_wrong_type(scheme_name, "string", 0, argc, argv); \
  988. prev = SCHEME_CHAR_STR_VAL(argv[0]); pl = SCHEME_CHAR_STRTAG_VAL(argv[0]); \
  989. for (i = 1; i < argc; i++) { \
  990. if (!SCHEME_CHAR_STRINGP(argv[i])) \
  991. scheme_wrong_type(scheme_name, "string", i, argc, argv); \
  992. s = SCHEME_CHAR_STR_VAL(argv[i]); sl = SCHEME_CHAR_STRTAG_VAL(argv[i]); \
  993. if (!falz) if (!(comp(scheme_name, \
  994. prev, pl, \
  995. s, sl, ul, size_shortcut) op 0)) falz = 1; \
  996. prev = s; pl = sl; \
  997. } \
  998. return falz ? scheme_false : scheme_true; \
  999. }
  1000. GEN_STRING_COMP(string_eq, "string=?", mz_char_strcmp, ==, 0, 1)
  1001. GEN_STRING_COMP(string_lt, "string<?", mz_char_strcmp, <, 0, 0)
  1002. GEN_STRING_COMP(string_gt, "string>?", mz_char_strcmp, >, 0, 0)
  1003. GEN_STRING_COMP(string_lt_eq, "string<=?", mz_char_strcmp, <=, 0, 0)
  1004. GEN_STRING_COMP(string_gt_eq, "string>=?", mz_char_strcmp, >=, 0, 0)
  1005. GEN_STRING_COMP(string_ci_eq, "string-ci=?", mz_char_strcmp_ci, ==, 0, 0)
  1006. GEN_STRING_COMP(string_ci_lt, "string-ci<?", mz_char_strcmp_ci, <, 0, 0)
  1007. GEN_STRING_COMP(string_ci_gt, "string-ci>?", mz_char_strcmp_ci, >, 0, 0)
  1008. GEN_STRING_COMP(string_ci_lt_eq, "string-ci<=?", mz_char_strcmp_ci, <=, 0, 0)
  1009. GEN_STRING_COMP(string_ci_gt_eq, "string-ci>=?", mz_char_strcmp_ci, >=, 0, 0)
  1010. GEN_STRING_COMP(string_locale_eq, "string-locale=?", mz_char_strcmp, ==, 1, 0)
  1011. GEN_STRING_COMP(string_locale_lt, "string-locale<?", mz_char_strcmp, <, 1, 0)
  1012. GEN_STRING_COMP(string_locale_gt, "string-locale>?", mz_char_strcmp, >, 1, 0)
  1013. GEN_STRING_COMP(string_locale_ci_eq, "string-locale-ci=?", mz_char_strcmp_ci, ==, 1, 0)
  1014. GEN_STRING_COMP(string_locale_ci_lt, "string-locale-ci<?", mz_char_strcmp_ci, <, 1, 0)
  1015. GEN_STRING_COMP(string_locale_ci_gt, "string-locale-ci>?", mz_char_strcmp_ci, >, 1, 0)
  1016. /**********************************************************************/
  1017. /* byte strings */
  1018. /**********************************************************************/
  1019. #define SCHEME_BYTEP(x) ((SCHEME_INTP(x)) && (SCHEME_INT_VAL(x) >= 0) && (SCHEME_INT_VAL(x) <= 255))
  1020. #define BYTE_STR "exact integer in [0,255]"
  1021. static Scheme_Object *
  1022. byte_p(int argc, Scheme_Object *argv[])
  1023. {
  1024. return (SCHEME_BYTEP(argv[0]) ? scheme_true : scheme_false);
  1025. }
  1026. #define SCHEME_X_STR_VAL(x) SCHEME_BYTE_STR_VAL(x)
  1027. #define SCHEME_X_STRTAG_VAL(x) SCHEME_BYTE_STRTAG_VAL(x)
  1028. #define SCHEME_X_STRINGP(x) SCHEME_BYTE_STRINGP(x)
  1029. #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_BYTE_STRINGP(x)
  1030. #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_BYTE_STRING_IMMUTABLE(x)
  1031. #define scheme_x_string_type scheme_byte_string_type
  1032. #define X(a, b) a##_byte##b
  1033. #define X_(a, b) a##_byte_##b
  1034. #define X__(a) byte_##a
  1035. #define EMPTY ""
  1036. #define Xchar char
  1037. #define uXchar unsigned char
  1038. #define XSTR "byte "
  1039. #define XSTRINGSTR "bytes"
  1040. #define SUBXSTR "subbytes"
  1041. #define CHARP(x) SCHEME_BYTEP(x)
  1042. #define CHAR_VAL(x) SCHEME_INT_VAL(x)
  1043. #define CHAR_STR BYTE_STR
  1044. #define MAKE_CHAR(x) scheme_make_integer_value(x)
  1045. #define xstrlen strlen
  1046. #define GENERATING_BYTE
  1047. #include "strops.inc"
  1048. #undef GENERATING_BYTE
  1049. /* comparisons */
  1050. #define GEN_BYTE_STRING_COMP(name, scheme_name, comp, op) \
  1051. static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
  1052. { char *s, *prev; int i, sl, pl; int falz = 0;\
  1053. if (!SCHEME_BYTE_STRINGP(argv[0])) \
  1054. scheme_wrong_type(scheme_name, "byte string", 0, argc, argv); \
  1055. prev = SCHEME_BYTE_STR_VAL(argv[0]); pl = SCHEME_BYTE_STRTAG_VAL(argv[0]); \
  1056. for (i = 1; i < argc; i++) { \
  1057. if (!SCHEME_BYTE_STRINGP(argv[i])) \
  1058. scheme_wrong_type(scheme_name, "byte string", i, argc, argv); \
  1059. s = SCHEME_BYTE_STR_VAL(argv[i]); sl = SCHEME_BYTE_STRTAG_VAL(argv[i]); \
  1060. if (!falz) if (!(comp(scheme_name, \
  1061. (unsigned char *)prev, pl, \
  1062. (unsigned char *)s, sl) op 0)) falz = 1; \
  1063. prev = s; pl = sl; \
  1064. } \
  1065. return falz ? scheme_false : scheme_true; \
  1066. }
  1067. GEN_BYTE_STRING_COMP(byte_string_eq, "bytes=?", mz_strcmp, ==)
  1068. GEN_BYTE_STRING_COMP(byte_string_lt, "bytes<?", mz_strcmp, <)
  1069. GEN_BYTE_STRING_COMP(byte_string_gt, "bytes>?", mz_strcmp, >)
  1070. /**********************************************************************/
  1071. /* byte string <-> char string */
  1072. /**********************************************************************/
  1073. /************************* bytes->string *************************/
  1074. static Scheme_Object *
  1075. do_byte_string_to_char_string(const char *who,
  1076. Scheme_Object *bstr,
  1077. intptr_t istart, intptr_t ifinish,
  1078. int perm, int as_locale)
  1079. {
  1080. int i, ulen;
  1081. char *chars;
  1082. unsigned int *v;
  1083. chars = SCHEME_BYTE_STR_VAL(bstr);
  1084. ulen = utf8_decode_x((unsigned char *)chars, istart, ifinish,
  1085. NULL, 0, -1,
  1086. NULL, NULL, 0, 0,
  1087. NULL, 0,
  1088. (perm > -1) ? 0xD800 : 0);
  1089. if (ulen < 0) {
  1090. scheme_arg_mismatch(who,
  1091. STRING_IS_NOT_UTF_8,
  1092. bstr);
  1093. }
  1094. v = (unsigned int *)scheme_malloc_atomic((ulen + 1) * sizeof(unsigned int));
  1095. utf8_decode_x((unsigned char *)chars, istart, ifinish,
  1096. v, 0, -1,
  1097. NULL, NULL, 0, 0,
  1098. NULL, 0,
  1099. (perm > -1) ? 0xD800 : 0);
  1100. if (perm > -1) {
  1101. for (i = 0; i < ulen; i++) {
  1102. if (v[i] == 0xD800)
  1103. v[i] = perm;
  1104. }
  1105. }
  1106. v[ulen] = 0;
  1107. return scheme_make_sized_char_string(v, ulen, 0);
  1108. }
  1109. static Scheme_Object *
  1110. do_byte_string_to_char_string_locale(const char *who,
  1111. Scheme_Object *bstr,
  1112. intptr_t istart, intptr_t ifinish,
  1113. int perm)
  1114. {
  1115. char *us;
  1116. intptr_t olen;
  1117. reset_locale();
  1118. if (!iconv_ready) init_iconv();
  1119. if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open))
  1120. return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
  1121. if (istart < ifinish) {
  1122. int no_cvt;
  1123. us = string_to_from_locale(0, SCHEME_BYTE_STR_VAL(bstr),
  1124. istart, ifinish - istart,
  1125. &olen, perm, &no_cvt);
  1126. if (!us) {
  1127. if (no_cvt) {
  1128. return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
  1129. } else {
  1130. scheme_arg_mismatch(who,
  1131. "byte string is not a valid encoding for the current locale: ",
  1132. bstr);
  1133. }
  1134. }
  1135. ((mzchar *)us)[olen] = 0;
  1136. } else {
  1137. us = "\0\0\0";
  1138. olen = 0;
  1139. }
  1140. return scheme_make_sized_char_string((mzchar *)us, olen, 0);
  1141. }
  1142. static Scheme_Object *
  1143. do_string_to_vector(const char *who, int mode, int argc, Scheme_Object *argv[])
  1144. {
  1145. int permc;
  1146. intptr_t istart, ifinish;
  1147. if (!SCHEME_BYTE_STRINGP(argv[0]))
  1148. scheme_wrong_type(who, "byte string", 0, argc, argv);
  1149. if ((argc < 2) || SCHEME_FALSEP(argv[1]))
  1150. permc = -1;
  1151. else {
  1152. if (!SCHEME_CHARP(argv[1]))
  1153. scheme_wrong_type(who, "character or #f", 1, argc, argv);
  1154. permc = SCHEME_CHAR_VAL(argv[1]);
  1155. }
  1156. scheme_get_substring_indices(who, argv[0], argc, argv,
  1157. 2, 3,
  1158. &istart, &ifinish);
  1159. if (mode == 0)
  1160. return do_byte_string_to_char_string(who, argv[0], istart, ifinish, permc, 0);
  1161. else if (mode == 1)
  1162. return do_byte_string_to_char_string_locale(who, argv[0], istart, ifinish, permc);
  1163. else {
  1164. /* Latin-1 */
  1165. mzchar *us;
  1166. unsigned char *s;
  1167. intptr_t i, len;
  1168. len = ifinish - istart;
  1169. s = (unsigned char *)SCHEME_BYTE_STR_VAL(argv[0]);
  1170. us = (mzchar *)scheme_malloc_atomic((len + 1) * sizeof(mzchar));
  1171. for (i = istart; i < ifinish; i++) {
  1172. us[i - istart] = s[i];
  1173. }
  1174. us[len] = 0;
  1175. return scheme_make_sized_char_string(us, len, 0);
  1176. }
  1177. }
  1178. static Scheme_Object *
  1179. byte_string_to_char_string (int argc, Scheme_Object *argv[])
  1180. {
  1181. return do_string_to_vector("bytes->string/utf-8", 0, argc, argv);
  1182. }
  1183. static Scheme_Object *
  1184. byte_string_to_char_string_locale (int argc, Scheme_Object *argv[])
  1185. {
  1186. return do_string_to_vector("bytes->string/locale", 1, argc, argv);
  1187. }
  1188. static Scheme_Object *
  1189. byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[])
  1190. {
  1191. return do_string_to_vector("bytes->string/latin-1", 2, argc, argv);
  1192. }
  1193. Scheme_Object *scheme_byte_string_to_char_string(Scheme_Object *o)
  1194. {
  1195. return do_byte_string_to_char_string("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD, 0);
  1196. }
  1197. Scheme_Object *scheme_byte_string_to_char_string_locale(Scheme_Object *o)
  1198. {
  1199. return do_byte_string_to_char_string_locale("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD);
  1200. }
  1201. /************************* string->bytes *************************/
  1202. static Scheme_Object *do_char_string_to_byte_string(Scheme_Object *s, intptr_t istart, intptr_t ifinish,
  1203. int as_locale)
  1204. {
  1205. char *bs;
  1206. int slen;
  1207. slen = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
  1208. NULL, 0,
  1209. 0 /* UTF-16 */);
  1210. bs = (char *)scheme_malloc_atomic(slen + 1);
  1211. scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
  1212. (unsigned char *)bs, 0,
  1213. 0 /* UTF-16 */);
  1214. bs[slen] = 0;
  1215. return scheme_make_sized_byte_string(bs, slen, 0);
  1216. }
  1217. static Scheme_Object *
  1218. do_char_string_to_byte_string_locale(const char *who,
  1219. Scheme_Object *cstr,
  1220. intptr_t istart, intptr_t ifinish,
  1221. int perm)
  1222. {
  1223. char *s;
  1224. intptr_t olen;
  1225. reset_locale();
  1226. if (!iconv_ready) init_iconv();
  1227. if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open))
  1228. return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
  1229. if (istart < ifinish) {
  1230. int no_cvt;
  1231. s = string_to_from_locale(1, (char *)SCHEME_CHAR_STR_VAL(cstr),
  1232. istart, ifinish - istart,
  1233. &olen, perm, &no_cvt);
  1234. if (!s) {
  1235. if (no_cvt) {
  1236. return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
  1237. } else {
  1238. scheme_arg_mismatch(who,
  1239. "string cannot be encoded for the current locale: ",
  1240. cstr);
  1241. }
  1242. }
  1243. s[olen] = 0;
  1244. } else {
  1245. s = "";
  1246. olen = 0;
  1247. }
  1248. return scheme_make_sized_byte_string(s, olen, 0);
  1249. }
  1250. Scheme_Object *scheme_char_string_to_byte_string(Scheme_Object *s)
  1251. {
  1252. return do_char_string_to_byte_string(s, 0, SCHEME_CHAR_STRLEN_VAL(s), 0);
  1253. }
  1254. Scheme_Object *scheme_char_string_to_byte_string_locale(Scheme_Object *s)
  1255. {
  1256. return do_char_string_to_byte_string_locale("s->s", s, 0, SCHEME_CHAR_STRLEN_VAL(s), '?');
  1257. }
  1258. static Scheme_Object *do_chars_to_bytes(const char *who, int mode,
  1259. int argc, Scheme_Object *argv[])
  1260. {
  1261. intptr_t istart, ifinish;
  1262. int permc;
  1263. if (!SCHEME_CHAR_STRINGP(argv[0]))
  1264. scheme_wrong_type(who, "string", 0, argc, argv);
  1265. if ((argc < 2) || SCHEME_FALSEP(argv[1]))
  1266. permc = -1;
  1267. else {
  1268. if (!SCHEME_BYTEP(argv[1]))
  1269. scheme_wrong_type(who, "byte or #f", 1, argc, argv);
  1270. permc = SCHEME_INT_VAL(argv[1]);
  1271. }
  1272. scheme_get_substring_indices(who, argv[0], argc, argv,
  1273. 2, 3, &istart, &ifinish);
  1274. if (mode == 1)
  1275. return do_char_string_to_byte_string_locale(who, argv[0], istart, ifinish, permc);
  1276. else if (mode == 0)
  1277. return do_char_string_to_byte_string(argv[0], istart, ifinish, 0);
  1278. else {
  1279. /* Latin-1 */
  1280. mzchar *us;
  1281. unsigned char *s;
  1282. intptr_t i, len;
  1283. len = ifinish - istart;
  1284. us = SCHEME_CHAR_STR_VAL(argv[0]);
  1285. s = (unsigned char *)scheme_malloc_atomic(len + 1);
  1286. for (i = istart; i < ifinish; i++) {
  1287. if (us[i] < 256)
  1288. s[i - istart] = us[i];
  1289. else if (permc >= 0) {
  1290. s[i - istart] = permc;
  1291. } else {
  1292. scheme_arg_mismatch(who,
  1293. "string cannot be encoded in Latin-1: ",
  1294. argv[0]);
  1295. }
  1296. }
  1297. s[len] = 0;
  1298. return scheme_make_sized_byte_string((char *)s, len, 0);
  1299. }
  1300. }
  1301. static Scheme_Object *char_string_to_byte_string(int argc, Scheme_Object *argv[])
  1302. {
  1303. return do_chars_to_bytes("string->bytes/utf-8", 0, argc, argv);
  1304. }
  1305. static Scheme_Object *char_string_to_byte_string_locale(int argc, Scheme_Object *argv[])
  1306. {
  1307. return do_chars_to_bytes("string->bytes/locale", 1, argc, argv);
  1308. }
  1309. static Scheme_Object *char_string_to_byte_string_latin1(int argc, Scheme_Object *argv[])
  1310. {
  1311. return do_chars_to_bytes("string->bytes/latin-1", 2, argc, argv);
  1312. }
  1313. /************************* Other *************************/
  1314. static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[])
  1315. {
  1316. intptr_t istart, ifinish, len;
  1317. if (!SCHEME_CHAR_STRINGP(argv[0]))
  1318. scheme_wrong_type("string-utf-8-length", "string", 0, argc, argv);
  1319. scheme_get_substring_indices("string-utf-8-length", argv[0], argc, argv,
  1320. 1, 2, &istart, &ifinish);
  1321. len = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(argv[0]), istart, ifinish,
  1322. NULL, 0, 0);
  1323. return scheme_make_integer(len);
  1324. }
  1325. static Scheme_Object *
  1326. byte_string_utf8_length (int argc, Scheme_Object *argv[])
  1327. {
  1328. int len, perm;
  1329. intptr_t istart, ifinish;
  1330. char *chars;
  1331. if (!SCHEME_BYTE_STRINGP(argv[0]))
  1332. scheme_wrong_type("bytes-utf-8-length", "string", 0, argc, argv);
  1333. chars = SCHEME_BYTE_STR_VAL(argv[0]);
  1334. if ((argc > 1) && !SCHEME_FALSEP(argv[1])) {
  1335. if (!SCHEME_CHARP(argv[1]))
  1336. scheme_wrong_type("bytes-utf-8-length", "character or #f", 1, argc, argv);
  1337. perm = 1;
  1338. } else
  1339. perm = 0;
  1340. scheme_get_substring_indices("bytes-utf-8-length", argv[0], argc, argv,
  1341. 2, 3,
  1342. &istart, &ifinish);
  1343. len = scheme_utf8_decode((unsigned char *)chars, istart, ifinish,
  1344. NULL, 0, -1,
  1345. NULL, 0, perm);
  1346. if (len < 0)
  1347. return scheme_false;
  1348. else
  1349. return scheme_make_integer(len);
  1350. }
  1351. static Scheme_Object *
  1352. byte_string_utf8_index(int argc, Scheme_Object *argv[])
  1353. {
  1354. intptr_t istart, ifinish, pos = -1, opos, ipos;
  1355. int result, perm;
  1356. cha