/racket/src/racket/src/string.c

http://github.com/plt/racket · C · 5610 lines · 4599 code · 714 blank · 297 comment · 1059 complexity · 5a88f05530bc3f678e0a97f47d3bb508 MD5 · raw file

Large files are truncated click here to view the full file

  1. #include "schpriv.h"
  2. #include "schvers.h"
  3. #include "schrktio.h"
  4. #include <string.h>
  5. #include <ctype.h>
  6. #ifdef NO_ERRNO_GLOBAL
  7. # define errno -1
  8. #else
  9. # include <errno.h>
  10. #endif
  11. #ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH
  12. # include "schsys.h"
  13. #endif
  14. #ifndef SPLS_SUFFIX
  15. # define SPLS_SUFFIX ""
  16. #endif
  17. #include "schustr.inc"
  18. #ifdef MACOS_UNICODE_SUPPORT
  19. # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
  20. #endif
  21. #ifdef WINDOWS_UNICODE_SUPPORT
  22. # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
  23. #endif
  24. #ifndef mzLOCALE_IS_UTF_8
  25. # define mzLOCALE_IS_UTF_8(s) (!(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED))
  26. #endif
  27. #define mzICONV_KIND 0
  28. #define mzUTF8_KIND 1
  29. #define mzUTF8_TO_UTF16_KIND 2
  30. #define mzUTF16_TO_UTF8_KIND 3
  31. typedef struct Scheme_Converter {
  32. Scheme_Object so;
  33. short closed;
  34. short kind;
  35. rktio_converter_t *cd;
  36. int permissive;
  37. Scheme_Custodian_Reference *mref;
  38. } Scheme_Converter;
  39. Scheme_Object *scheme_system_type_proc;
  40. static Scheme_Object *make_string (int argc, Scheme_Object *argv[]);
  41. static Scheme_Object *string (int argc, Scheme_Object *argv[]);
  42. static Scheme_Object *string_p (int argc, Scheme_Object *argv[]);
  43. static Scheme_Object *string_length (int argc, Scheme_Object *argv[]);
  44. static Scheme_Object *string_eq (int argc, Scheme_Object *argv[]);
  45. static Scheme_Object *string_locale_eq (int argc, Scheme_Object *argv[]);
  46. static Scheme_Object *string_ci_eq (int argc, Scheme_Object *argv[]);
  47. static Scheme_Object *string_locale_ci_eq (int argc, Scheme_Object *argv[]);
  48. static Scheme_Object *string_lt (int argc, Scheme_Object *argv[]);
  49. static Scheme_Object *string_locale_lt (int argc, Scheme_Object *argv[]);
  50. static Scheme_Object *string_gt (int argc, Scheme_Object *argv[]);
  51. static Scheme_Object *string_locale_gt (int argc, Scheme_Object *argv[]);
  52. static Scheme_Object *string_lt_eq (int argc, Scheme_Object *argv[]);
  53. static Scheme_Object *string_gt_eq (int argc, Scheme_Object *argv[]);
  54. static Scheme_Object *string_ci_lt (int argc, Scheme_Object *argv[]);
  55. static Scheme_Object *string_locale_ci_lt (int argc, Scheme_Object *argv[]);
  56. static Scheme_Object *string_ci_gt (int argc, Scheme_Object *argv[]);
  57. static Scheme_Object *string_locale_ci_gt (int argc, Scheme_Object *argv[]);
  58. static Scheme_Object *string_ci_lt_eq (int argc, Scheme_Object *argv[]);
  59. static Scheme_Object *string_ci_gt_eq (int argc, Scheme_Object *argv[]);
  60. static Scheme_Object *string_upcase (int argc, Scheme_Object *argv[]);
  61. static Scheme_Object *string_downcase (int argc, Scheme_Object *argv[]);
  62. static Scheme_Object *string_titlecase (int argc, Scheme_Object *argv[]);
  63. static Scheme_Object *string_foldcase (int argc, Scheme_Object *argv[]);
  64. static Scheme_Object *string_locale_upcase (int argc, Scheme_Object *argv[]);
  65. static Scheme_Object *string_locale_downcase (int argc, Scheme_Object *argv[]);
  66. static Scheme_Object *substring (int argc, Scheme_Object *argv[]);
  67. static Scheme_Object *string_append (int argc, Scheme_Object *argv[]);
  68. static Scheme_Object *string_append_immutable (int argc, Scheme_Object *argv[]);
  69. static Scheme_Object *string_to_list (int argc, Scheme_Object *argv[]);
  70. static Scheme_Object *list_to_string (int argc, Scheme_Object *argv[]);
  71. static Scheme_Object *string_copy (int argc, Scheme_Object *argv[]);
  72. static Scheme_Object *string_copy_bang (int argc, Scheme_Object *argv[]);
  73. static Scheme_Object *string_fill (int argc, Scheme_Object *argv[]);
  74. static Scheme_Object *string_to_immutable (int argc, Scheme_Object *argv[]);
  75. static Scheme_Object *string_normalize_c (int argc, Scheme_Object *argv[]);
  76. static Scheme_Object *string_normalize_kc (int argc, Scheme_Object *argv[]);
  77. static Scheme_Object *string_normalize_d (int argc, Scheme_Object *argv[]);
  78. static Scheme_Object *string_normalize_kd (int argc, Scheme_Object *argv[]);
  79. static Scheme_Object *make_shared_byte_string (int argc, Scheme_Object *argv[]);
  80. static Scheme_Object *shared_byte_string (int argc, Scheme_Object *argv[]);
  81. static Scheme_Object *make_byte_string (int argc, Scheme_Object *argv[]);
  82. static Scheme_Object *byte_string (int argc, Scheme_Object *argv[]);
  83. static Scheme_Object *byte_p (int argc, Scheme_Object *argv[]);
  84. static Scheme_Object *byte_string_p (int argc, Scheme_Object *argv[]);
  85. static Scheme_Object *byte_string_length (int argc, Scheme_Object *argv[]);
  86. static Scheme_Object *byte_string_eq (int argc, Scheme_Object *argv[]);
  87. static Scheme_Object *byte_string_lt (int argc, Scheme_Object *argv[]);
  88. static Scheme_Object *byte_string_gt (int argc, Scheme_Object *argv[]);
  89. static Scheme_Object *byte_substring (int argc, Scheme_Object *argv[]);
  90. static Scheme_Object *byte_string_append (int argc, Scheme_Object *argv[]);
  91. static Scheme_Object *byte_string_to_list (int argc, Scheme_Object *argv[]);
  92. static Scheme_Object *list_to_byte_string (int argc, Scheme_Object *argv[]);
  93. static Scheme_Object *byte_string_copy (int argc, Scheme_Object *argv[]);
  94. static Scheme_Object *byte_string_copy_bang (int argc, Scheme_Object *argv[]);
  95. static Scheme_Object *byte_string_fill (int argc, Scheme_Object *argv[]);
  96. static Scheme_Object *byte_string_to_immutable (int argc, Scheme_Object *argv[]);
  97. static Scheme_Object *byte_string_utf8_index (int argc, Scheme_Object *argv[]);
  98. static Scheme_Object *byte_string_utf8_ref (int argc, Scheme_Object *argv[]);
  99. static Scheme_Object *byte_string_utf8_length (int argc, Scheme_Object *argv[]);
  100. static Scheme_Object *byte_string_to_char_string (int argc, Scheme_Object *argv[]);
  101. static Scheme_Object *byte_string_to_char_string_locale (int argc, Scheme_Object *argv[]);
  102. static Scheme_Object *byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[]);
  103. static Scheme_Object *char_string_to_byte_string (int argc, Scheme_Object *argv[]);
  104. static Scheme_Object *char_string_to_byte_string_locale (int argc, Scheme_Object *argv[]);
  105. static Scheme_Object *char_string_to_byte_string_latin1 (int argc, Scheme_Object *argv[]);
  106. static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[]);
  107. static Scheme_Object *version(int argc, Scheme_Object *argv[]);
  108. static Scheme_Object *format(int argc, Scheme_Object *argv[]);
  109. static Scheme_Object *sch_printf(int argc, Scheme_Object *argv[]);
  110. static Scheme_Object *sch_eprintf(int argc, Scheme_Object *argv[]);
  111. static Scheme_Object *sch_fprintf(int argc, Scheme_Object *argv[]);
  112. static Scheme_Object *banner(int argc, Scheme_Object *argv[]);
  113. static Scheme_Object *env_p(int argc, Scheme_Object *argv[]);
  114. static Scheme_Object *sch_getenv(int argc, Scheme_Object *argv[]);
  115. static Scheme_Object *sch_getenv_names(int argc, Scheme_Object *argv[]);
  116. static Scheme_Object *sch_putenv(int argc, Scheme_Object *argv[]);
  117. static Scheme_Object *env_copy(int argc, Scheme_Object *argv[]);
  118. static Scheme_Object *env_make(int argc, Scheme_Object *argv[]);
  119. static Scheme_Object *current_environment_variables(int argc, Scheme_Object *argv[]);
  120. static Scheme_Object *system_type(int argc, Scheme_Object *argv[]);
  121. static Scheme_Object *system_library_subpath(int argc, Scheme_Object *argv[]);
  122. static Scheme_Object *cmdline_args(int argc, Scheme_Object *argv[]);
  123. static Scheme_Object *current_locale(int argc, Scheme_Object *argv[]);
  124. static Scheme_Object *locale_string_encoding(int argc, Scheme_Object *argv[]);
  125. static Scheme_Object *system_language_country(int argc, Scheme_Object *argv[]);
  126. static Scheme_Object *byte_string_open_converter(int argc, Scheme_Object *argv[]);
  127. static Scheme_Object *byte_string_close_converter(int argc, Scheme_Object *argv[]);
  128. static Scheme_Object *byte_string_convert(int argc, Scheme_Object *argv[]);
  129. static Scheme_Object *byte_string_convert_end(int argc, Scheme_Object *argv[]);
  130. static Scheme_Object *byte_converter_p(int argc, Scheme_Object *argv[]);
  131. static Scheme_Object *path_lt (int argc, Scheme_Object *argv[]);
  132. #ifdef MZ_PRECISE_GC
  133. static void register_traversers(void);
  134. #endif
  135. static int mz_char_strcmp(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
  136. static int mz_char_strcmp_ci(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
  137. static int mz_strcmp(const char *who, unsigned char *str1, intptr_t l1, unsigned char *str2, intptr_t l2);
  138. XFORM_NONGCING static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t end,
  139. unsigned int *us, intptr_t dstart, intptr_t dend,
  140. intptr_t *ipos, intptr_t *jpos,
  141. char compact, char utf16,
  142. int *state, int might_continue, int permissive);
  143. XFORM_NONGCING static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t end,
  144. unsigned char *s, intptr_t dstart, intptr_t dend,
  145. intptr_t *_ipos, intptr_t *_opos, char utf16);
  146. static char *string_to_from_locale(int to_bytes,
  147. char *in, intptr_t delta, intptr_t len,
  148. intptr_t *olen, int perm,
  149. int *no_cvt);
  150. static void cache_locale_or_close(int to_bytes, rktio_converter_t *cd, char *le);
  151. #define portable_isspace(x) (((x) < 128) && isspace(x))
  152. ROSYM static Scheme_Object *sys_symbol;
  153. ROSYM static Scheme_Object *link_symbol, *machine_symbol, *vm_symbol, *gc_symbol;
  154. ROSYM static Scheme_Object *so_suffix_symbol, *so_mode_symbol, *word_symbol;
  155. ROSYM static Scheme_Object *os_symbol, *fs_change_symbol, *target_machine_symbol, *cross_symbol;
  156. ROSYM static Scheme_Object *racket_symbol, *cgc_symbol, *_3m_symbol, *cs_symbol;
  157. ROSYM static Scheme_Object *force_symbol, *infer_symbol;
  158. ROSYM static Scheme_Object *platform_3m_path, *platform_cgc_path, *platform_cs_path;
  159. READ_ONLY static Scheme_Object *zero_length_char_string;
  160. READ_ONLY static Scheme_Object *zero_length_char_immutable_string;
  161. READ_ONLY static Scheme_Object *zero_length_byte_string;
  162. SHARED_OK static char *embedding_banner;
  163. SHARED_OK static Scheme_Object *vers_str;
  164. SHARED_OK static Scheme_Object *banner_str;
  165. THREAD_LOCAL_DECL(static Scheme_Object *fs_change_props);
  166. THREAD_LOCAL_DECL(static char *cached_locale_encoding_name);
  167. THREAD_LOCAL_DECL(struct rktio_converter_t *cached_locale_to_converter);
  168. THREAD_LOCAL_DECL(struct rktio_converter_t *cached_locale_from_converter);
  169. READ_ONLY static Scheme_Object *complete_symbol, *continues_symbol, *aborts_symbol, *error_symbol;
  170. READ_ONLY Scheme_Object *scheme_string_p_proc;
  171. READ_ONLY Scheme_Object *scheme_byte_string_p_proc;
  172. READ_ONLY static int cross_compile_mode;
  173. /* These two locale variables are only valid when reset_locale()
  174. is called after continuation marks (and hence parameterization)
  175. may have changed. Similarly, setlocale() is only up-to-date
  176. when reset_locale() has been called. */
  177. THREAD_LOCAL_DECL(static int locale_on);
  178. THREAD_LOCAL_DECL(static void *current_locale_name_ptr);
  179. static void reset_locale(void);
  180. #define current_locale_name ((const mzchar *)current_locale_name_ptr)
  181. static const mzchar empty_char_string[1] = { 0 };
  182. static const mzchar xes_char_string[2] = { 0x78787878, 0 };
  183. void
  184. scheme_init_string (Scheme_Startup_Env *env)
  185. {
  186. Scheme_Object *p;
  187. REGISTER_SO(sys_symbol);
  188. sys_symbol = scheme_intern_symbol(SYSTEM_TYPE_NAME);
  189. REGISTER_SO(link_symbol);
  190. REGISTER_SO(machine_symbol);
  191. REGISTER_SO(gc_symbol);
  192. REGISTER_SO(vm_symbol);
  193. REGISTER_SO(so_suffix_symbol);
  194. REGISTER_SO(so_mode_symbol);
  195. REGISTER_SO(word_symbol);
  196. REGISTER_SO(os_symbol);
  197. REGISTER_SO(fs_change_symbol);
  198. REGISTER_SO(target_machine_symbol);
  199. REGISTER_SO(cross_symbol);
  200. link_symbol = scheme_intern_symbol("link");
  201. machine_symbol = scheme_intern_symbol("machine");
  202. vm_symbol = scheme_intern_symbol("vm");
  203. gc_symbol = scheme_intern_symbol("gc");
  204. so_suffix_symbol = scheme_intern_symbol("so-suffix");
  205. so_mode_symbol = scheme_intern_symbol("so-mode");
  206. word_symbol = scheme_intern_symbol("word");
  207. os_symbol = scheme_intern_symbol("os");
  208. fs_change_symbol = scheme_intern_symbol("fs-change");
  209. target_machine_symbol = scheme_intern_symbol("target-machine");
  210. cross_symbol = scheme_intern_symbol("cross");
  211. REGISTER_SO(racket_symbol);
  212. REGISTER_SO(cgc_symbol);
  213. REGISTER_SO(_3m_symbol);
  214. REGISTER_SO(cs_symbol);
  215. racket_symbol = scheme_intern_symbol("racket");
  216. cgc_symbol = scheme_intern_symbol("cgc");
  217. _3m_symbol = scheme_intern_symbol("3m");
  218. cs_symbol = scheme_intern_symbol("cs");
  219. REGISTER_SO(force_symbol);
  220. REGISTER_SO(infer_symbol);
  221. force_symbol = scheme_intern_symbol("force");
  222. infer_symbol = scheme_intern_symbol("infer");
  223. REGISTER_SO(zero_length_char_string);
  224. REGISTER_SO(zero_length_char_immutable_string);
  225. REGISTER_SO(zero_length_byte_string);
  226. zero_length_char_string = scheme_alloc_char_string(0, 0);
  227. zero_length_char_immutable_string = scheme_alloc_char_string(0, 0);
  228. SCHEME_SET_CHAR_STRING_IMMUTABLE(zero_length_char_immutable_string);
  229. zero_length_byte_string = scheme_alloc_byte_string(0, 0);
  230. REGISTER_SO(complete_symbol);
  231. REGISTER_SO(continues_symbol);
  232. REGISTER_SO(aborts_symbol);
  233. REGISTER_SO(error_symbol);
  234. complete_symbol = scheme_intern_symbol("complete");
  235. continues_symbol = scheme_intern_symbol("continues");
  236. aborts_symbol = scheme_intern_symbol("aborts");
  237. error_symbol = scheme_intern_symbol("error");
  238. REGISTER_SO(platform_3m_path);
  239. # ifdef DOS_FILE_SYSTEM
  240. # define MZ3M_SUBDIR "\\3m"
  241. # define MZCS_SUBDIR "\\cs"
  242. # else
  243. # define MZ3M_SUBDIR "/3m"
  244. # define MZCS_SUBDIR "/cs"
  245. #endif
  246. REGISTER_SO(platform_3m_path);
  247. REGISTER_SO(platform_cgc_path);
  248. REGISTER_SO(platform_cs_path);
  249. platform_cgc_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH SPLS_SUFFIX);
  250. platform_3m_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH SPLS_SUFFIX MZ3M_SUBDIR);
  251. platform_cs_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH SPLS_SUFFIX MZCS_SUBDIR);
  252. REGISTER_SO(embedding_banner);
  253. REGISTER_SO(vers_str);
  254. REGISTER_SO(banner_str);
  255. vers_str = scheme_make_utf8_string(scheme_version());
  256. SCHEME_SET_CHAR_STRING_IMMUTABLE(vers_str);
  257. banner_str = scheme_make_utf8_string(scheme_banner());
  258. SCHEME_SET_CHAR_STRING_IMMUTABLE(banner_str);
  259. REGISTER_SO(scheme_string_p_proc);
  260. p = scheme_make_folding_prim(string_p, "string?", 1, 1, 1);
  261. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
  262. | SCHEME_PRIM_IS_OMITABLE
  263. | SCHEME_PRIM_PRODUCES_BOOL);
  264. scheme_addto_prim_instance("string?", p, env);
  265. scheme_string_p_proc = p;
  266. scheme_addto_prim_instance("make-string",
  267. scheme_make_immed_prim(make_string,
  268. "make-string",
  269. 1, 2),
  270. env);
  271. scheme_addto_prim_instance("string",
  272. scheme_make_immed_prim(string,
  273. "string",
  274. 0, -1),
  275. env);
  276. p = scheme_make_folding_prim(string_length, "string-length", 1, 1, 1);
  277. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
  278. |SCHEME_PRIM_PRODUCES_FIXNUM
  279. | SCHEME_PRIM_AD_HOC_OPT);
  280. scheme_addto_prim_instance("string-length", p,
  281. env);
  282. p = scheme_make_immed_prim(scheme_checked_string_ref, "string-ref", 2, 2);
  283. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED
  284. | SCHEME_PRIM_AD_HOC_OPT);
  285. scheme_addto_prim_instance("string-ref", p, env);
  286. p = scheme_make_immed_prim(scheme_checked_string_set, "string-set!", 3, 3);
  287. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_NARY_INLINED
  288. | SCHEME_PRIM_AD_HOC_OPT);
  289. scheme_addto_prim_instance("string-set!", p, env);
  290. p = scheme_make_immed_prim(string_eq, "string=?", 1, -1);
  291. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED
  292. | SCHEME_PRIM_PRODUCES_BOOL
  293. | SCHEME_PRIM_PRODUCES_BOOL);
  294. scheme_addto_prim_instance("string=?", p, env);
  295. scheme_addto_prim_instance("string-locale=?",
  296. scheme_make_immed_prim(string_locale_eq,
  297. "string-locale=?",
  298. 1, -1),
  299. env);
  300. scheme_addto_prim_instance("string-ci=?",
  301. scheme_make_immed_prim(string_ci_eq,
  302. "string-ci=?",
  303. 1, -1),
  304. env);
  305. scheme_addto_prim_instance("string-locale-ci=?",
  306. scheme_make_immed_prim(string_locale_ci_eq,
  307. "string-locale-ci=?",
  308. 1, -1),
  309. env);
  310. scheme_addto_prim_instance("string<?",
  311. scheme_make_immed_prim(string_lt,
  312. "string<?",
  313. 1, -1),
  314. env);
  315. scheme_addto_prim_instance("string-locale<?",
  316. scheme_make_immed_prim(string_locale_lt,
  317. "string-locale<?",
  318. 1, -1),
  319. env);
  320. scheme_addto_prim_instance("string>?",
  321. scheme_make_immed_prim(string_gt,
  322. "string>?",
  323. 1, -1),
  324. env);
  325. scheme_addto_prim_instance("string-locale>?",
  326. scheme_make_immed_prim(string_locale_gt,
  327. "string-locale>?",
  328. 1, -1),
  329. env);
  330. scheme_addto_prim_instance("string<=?",
  331. scheme_make_immed_prim(string_lt_eq,
  332. "string<=?",
  333. 1, -1),
  334. env);
  335. scheme_addto_prim_instance("string>=?",
  336. scheme_make_immed_prim(string_gt_eq,
  337. "string>=?",
  338. 1, -1),
  339. env);
  340. scheme_addto_prim_instance("string-ci<?",
  341. scheme_make_immed_prim(string_ci_lt,
  342. "string-ci<?",
  343. 1, -1),
  344. env);
  345. scheme_addto_prim_instance("string-locale-ci<?",
  346. scheme_make_immed_prim(string_locale_ci_lt,
  347. "string-locale-ci<?",
  348. 1, -1),
  349. env);
  350. scheme_addto_prim_instance("string-ci>?",
  351. scheme_make_immed_prim(string_ci_gt,
  352. "string-ci>?",
  353. 1, -1),
  354. env);
  355. scheme_addto_prim_instance("string-locale-ci>?",
  356. scheme_make_immed_prim(string_locale_ci_gt,
  357. "string-locale-ci>?",
  358. 1, -1),
  359. env);
  360. scheme_addto_prim_instance("string-ci<=?",
  361. scheme_make_immed_prim(string_ci_lt_eq,
  362. "string-ci<=?",
  363. 1, -1),
  364. env);
  365. scheme_addto_prim_instance("string-ci>=?",
  366. scheme_make_immed_prim(string_ci_gt_eq,
  367. "string-ci>=?",
  368. 1, -1),
  369. env);
  370. scheme_addto_prim_instance("substring",
  371. scheme_make_immed_prim(substring,
  372. "substring",
  373. 2, 3),
  374. env);
  375. p = scheme_make_immed_prim(string_append, "string-append", 0, -1);
  376. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
  377. scheme_addto_prim_instance("string-append", p, env);
  378. p = scheme_make_immed_prim(string_append_immutable, "string-append-immutable", 0, -1);
  379. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
  380. scheme_addto_prim_instance("string-append-immutable", p, env);
  381. scheme_addto_prim_instance("string->list",
  382. scheme_make_immed_prim(string_to_list,
  383. "string->list",
  384. 1, 1),
  385. env);
  386. scheme_addto_prim_instance("list->string",
  387. scheme_make_immed_prim(list_to_string,
  388. "list->string",
  389. 1, 1),
  390. env);
  391. scheme_addto_prim_instance("string-copy",
  392. scheme_make_immed_prim(string_copy,
  393. "string-copy",
  394. 1, 1),
  395. env);
  396. scheme_addto_prim_instance("string-copy!",
  397. scheme_make_immed_prim(string_copy_bang,
  398. "string-copy!",
  399. 3, 5),
  400. env);
  401. scheme_addto_prim_instance("string-fill!",
  402. scheme_make_immed_prim(string_fill,
  403. "string-fill!",
  404. 2, 2),
  405. env);
  406. p = scheme_make_immed_prim(string_to_immutable, "string->immutable-string", 1, 1);
  407. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
  408. scheme_addto_prim_instance("string->immutable-string", p, env);
  409. scheme_addto_prim_instance("string-normalize-nfc",
  410. scheme_make_immed_prim(string_normalize_c,
  411. "string-normalize-nfc",
  412. 1, 1),
  413. env);
  414. scheme_addto_prim_instance("string-normalize-nfkc",
  415. scheme_make_immed_prim(string_normalize_kc,
  416. "string-normalize-nfkc",
  417. 1, 1),
  418. env);
  419. scheme_addto_prim_instance("string-normalize-nfd",
  420. scheme_make_immed_prim(string_normalize_d,
  421. "string-normalize-nfd",
  422. 1, 1),
  423. env);
  424. scheme_addto_prim_instance("string-normalize-nfkd",
  425. scheme_make_immed_prim(string_normalize_kd,
  426. "string-normalize-nfkd",
  427. 1, 1),
  428. env);
  429. scheme_addto_prim_instance("string-upcase",
  430. scheme_make_immed_prim(string_upcase,
  431. "string-upcase",
  432. 1, 1),
  433. env);
  434. scheme_addto_prim_instance("string-downcase",
  435. scheme_make_immed_prim(string_downcase,
  436. "string-downcase",
  437. 1, 1),
  438. env);
  439. scheme_addto_prim_instance("string-titlecase",
  440. scheme_make_immed_prim(string_titlecase,
  441. "string-titlecase",
  442. 1, 1),
  443. env);
  444. scheme_addto_prim_instance("string-foldcase",
  445. scheme_make_immed_prim(string_foldcase,
  446. "string-foldcase",
  447. 1, 1),
  448. env);
  449. scheme_addto_prim_instance("string-locale-upcase",
  450. scheme_make_immed_prim(string_locale_upcase,
  451. "string-locale-upcase",
  452. 1, 1),
  453. env);
  454. scheme_addto_prim_instance("string-locale-downcase",
  455. scheme_make_immed_prim(string_locale_downcase,
  456. "string-locale-downcase",
  457. 1, 1),
  458. env);
  459. scheme_addto_prim_instance("current-locale",
  460. scheme_register_parameter(current_locale,
  461. "current-locale",
  462. MZCONFIG_LOCALE),
  463. env);
  464. scheme_addto_prim_instance("locale-string-encoding",
  465. scheme_make_immed_prim(locale_string_encoding,
  466. "locale-string-encoding",
  467. 0, 0),
  468. env);
  469. scheme_addto_prim_instance("system-language+country",
  470. scheme_make_immed_prim(system_language_country,
  471. "system-language+country",
  472. 0, 0),
  473. env);
  474. scheme_addto_prim_instance("bytes-converter?",
  475. scheme_make_immed_prim(byte_converter_p,
  476. "bytes-converter?",
  477. 1, 1),
  478. env);
  479. scheme_addto_prim_instance("bytes-convert",
  480. scheme_make_prim_w_arity2(byte_string_convert,
  481. "bytes-convert",
  482. 2, 7,
  483. 3, 3),
  484. env);
  485. scheme_addto_prim_instance("bytes-convert-end",
  486. scheme_make_prim_w_arity2(byte_string_convert_end,
  487. "bytes-convert-end",
  488. 1, 4,
  489. 2, 2),
  490. env);
  491. scheme_addto_prim_instance("bytes-open-converter",
  492. scheme_make_immed_prim(byte_string_open_converter,
  493. "bytes-open-converter",
  494. 2, 2),
  495. env);
  496. scheme_addto_prim_instance("bytes-close-converter",
  497. scheme_make_immed_prim(byte_string_close_converter,
  498. "bytes-close-converter",
  499. 1, 1),
  500. env);
  501. scheme_addto_prim_instance("format",
  502. scheme_make_noncm_prim(format,
  503. "format",
  504. 1, -1),
  505. env);
  506. scheme_addto_prim_instance("printf",
  507. scheme_make_noncm_prim(sch_printf,
  508. "printf",
  509. 1, -1),
  510. env);
  511. scheme_addto_prim_instance("eprintf",
  512. scheme_make_noncm_prim(sch_eprintf,
  513. "eprintf",
  514. 1, -1),
  515. env);
  516. scheme_addto_prim_instance("fprintf",
  517. scheme_make_noncm_prim(sch_fprintf,
  518. "fprintf",
  519. 2, -1),
  520. env);
  521. scheme_addto_prim_instance("byte?",
  522. scheme_make_folding_prim(byte_p,
  523. "byte?",
  524. 1, 1, 1),
  525. env);
  526. REGISTER_SO(scheme_byte_string_p_proc);
  527. p = scheme_make_folding_prim(byte_string_p, "bytes?", 1, 1, 1);
  528. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
  529. | SCHEME_PRIM_IS_OMITABLE
  530. | SCHEME_PRIM_PRODUCES_BOOL);
  531. scheme_addto_prim_instance("bytes?", p, env);
  532. scheme_byte_string_p_proc = p;
  533. scheme_addto_prim_instance("make-bytes",
  534. scheme_make_immed_prim(make_byte_string,
  535. "make-bytes",
  536. 1, 2),
  537. env);
  538. scheme_addto_prim_instance("bytes",
  539. scheme_make_immed_prim(byte_string,
  540. "bytes",
  541. 0, -1),
  542. env);
  543. ADD_PRIM_W_ARITY("make-shared-bytes", make_shared_byte_string, 1, 2, env);
  544. ADD_PRIM_W_ARITY("shared-bytes", shared_byte_string, 0, -1, env);
  545. p = scheme_make_folding_prim(byte_string_length, "bytes-length", 1, 1, 1);
  546. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
  547. | SCHEME_PRIM_PRODUCES_FIXNUM
  548. | SCHEME_PRIM_AD_HOC_OPT);
  549. scheme_addto_prim_instance("bytes-length", p, env);
  550. p = scheme_make_immed_prim(scheme_checked_byte_string_ref, "bytes-ref", 2, 2);
  551. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED
  552. | SCHEME_PRIM_PRODUCES_FIXNUM
  553. | SCHEME_PRIM_AD_HOC_OPT);
  554. scheme_addto_prim_instance("bytes-ref", p, env);
  555. p = scheme_make_immed_prim(scheme_checked_byte_string_set, "bytes-set!", 3, 3);
  556. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_NARY_INLINED
  557. | SCHEME_PRIM_AD_HOC_OPT);
  558. scheme_addto_prim_instance("bytes-set!", p, env);
  559. p = scheme_make_immed_prim(byte_string_eq, "bytes=?", 1, -1);
  560. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED
  561. | SCHEME_PRIM_PRODUCES_BOOL);
  562. scheme_addto_prim_instance("bytes=?", p, env);
  563. scheme_addto_prim_instance("bytes<?",
  564. scheme_make_immed_prim(byte_string_lt,
  565. "bytes<?",
  566. 1, -1),
  567. env);
  568. scheme_addto_prim_instance("bytes>?",
  569. scheme_make_immed_prim(byte_string_gt,
  570. "bytes>?",
  571. 1, -1),
  572. env);
  573. scheme_addto_prim_instance("subbytes",
  574. scheme_make_immed_prim(byte_substring,
  575. "subbytes",
  576. 2, 3),
  577. env);
  578. p = scheme_make_immed_prim(byte_string_append, "bytes-append", 0, -1);
  579. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
  580. scheme_addto_prim_instance("bytes-append", p, env);
  581. scheme_addto_prim_instance("bytes->list",
  582. scheme_make_immed_prim(byte_string_to_list,
  583. "bytes->list",
  584. 1, 1),
  585. env);
  586. scheme_addto_prim_instance("list->bytes",
  587. scheme_make_immed_prim(list_to_byte_string,
  588. "list->bytes",
  589. 1, 1),
  590. env);
  591. scheme_addto_prim_instance("bytes-copy",
  592. scheme_make_immed_prim(byte_string_copy,
  593. "bytes-copy",
  594. 1, 1),
  595. env);
  596. scheme_addto_prim_instance("bytes-copy!",
  597. scheme_make_immed_prim(byte_string_copy_bang,
  598. "bytes-copy!",
  599. 3, 5),
  600. env);
  601. scheme_addto_prim_instance("bytes-fill!",
  602. scheme_make_immed_prim(byte_string_fill,
  603. "bytes-fill!",
  604. 2, 2),
  605. env);
  606. p = scheme_make_immed_prim(byte_string_to_immutable, "bytes->immutable-bytes", 1, 1);
  607. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_AD_HOC_OPT);
  608. scheme_addto_prim_instance("bytes->immutable-bytes", p, env);
  609. p = scheme_make_immed_prim(byte_string_utf8_index, "bytes-utf-8-index", 2, 5);
  610. /* Incorrect, since the result can be #f:
  611. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM); */
  612. scheme_addto_prim_instance("bytes-utf-8-index", p, env);
  613. p = scheme_make_immed_prim(byte_string_utf8_length, "bytes-utf-8-length", 1, 4);
  614. /* Incorrect, since the result can be #f:
  615. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM); */
  616. scheme_addto_prim_instance("bytes-utf-8-length", p, env);
  617. scheme_addto_prim_instance("bytes-utf-8-ref",
  618. scheme_make_immed_prim(byte_string_utf8_ref,
  619. "bytes-utf-8-ref",
  620. 2, 5),
  621. env);
  622. scheme_addto_prim_instance("bytes->string/utf-8",
  623. scheme_make_immed_prim(byte_string_to_char_string,
  624. "bytes->string/utf-8",
  625. 1, 4),
  626. env);
  627. scheme_addto_prim_instance("bytes->string/locale",
  628. scheme_make_immed_prim(byte_string_to_char_string_locale,
  629. "bytes->string/locale",
  630. 1, 4),
  631. env);
  632. scheme_addto_prim_instance("bytes->string/latin-1",
  633. scheme_make_immed_prim(byte_string_to_char_string_latin1,
  634. "bytes->string/latin-1",
  635. 1, 4),
  636. env);
  637. scheme_addto_prim_instance("string->bytes/utf-8",
  638. scheme_make_immed_prim(char_string_to_byte_string,
  639. "string->bytes/utf-8",
  640. 1, 4),
  641. env);
  642. scheme_addto_prim_instance("string->bytes/locale",
  643. scheme_make_immed_prim(char_string_to_byte_string_locale,
  644. "string->bytes/locale",
  645. 1, 4),
  646. env);
  647. scheme_addto_prim_instance("string->bytes/latin-1",
  648. scheme_make_immed_prim(char_string_to_byte_string_latin1,
  649. "string->bytes/latin-1",
  650. 1, 4),
  651. env);
  652. scheme_addto_prim_instance("string-utf-8-length",
  653. scheme_make_immed_prim(char_string_utf8_length,
  654. "string-utf-8-length",
  655. 1, 3),
  656. env);
  657. /* In principle, `version' could be foldable, but it invites
  658. more problems than it solves... */
  659. scheme_addto_prim_instance("version",
  660. scheme_make_immed_prim(version,
  661. "version",
  662. 0, 0),
  663. env);
  664. scheme_addto_prim_instance("banner",
  665. scheme_make_immed_prim(banner,
  666. "banner",
  667. 0, 0),
  668. env);
  669. /* Environment variables */
  670. scheme_addto_prim_instance("environment-variables?",
  671. scheme_make_folding_prim(env_p,
  672. "environment-variables?",
  673. 1, 1, 1),
  674. env);
  675. scheme_addto_prim_instance("current-environment-variables",
  676. scheme_register_parameter(current_environment_variables,
  677. "current-environment-variables",
  678. MZCONFIG_CURRENT_ENV_VARS),
  679. env);
  680. scheme_addto_prim_instance("environment-variables-ref",
  681. scheme_make_immed_prim(sch_getenv,
  682. "environment-variables-ref",
  683. 2, 2),
  684. env);
  685. scheme_addto_prim_instance("environment-variables-set!",
  686. scheme_make_prim_w_arity(sch_putenv,
  687. "environment-variables-set!",
  688. 3, 4),
  689. env);
  690. scheme_addto_prim_instance("environment-variables-names",
  691. scheme_make_immed_prim(sch_getenv_names,
  692. "environment-variables-names",
  693. 1, 1),
  694. env);
  695. scheme_addto_prim_instance("environment-variables-copy",
  696. scheme_make_immed_prim(env_copy,
  697. "environment-variables-copy",
  698. 1, 1),
  699. env);
  700. scheme_addto_prim_instance("make-environment-variables",
  701. scheme_make_immed_prim(env_make,
  702. "make-environment-variables",
  703. 0, -1),
  704. env);
  705. /* Don't make these folding, since they're platform-specific: */
  706. REGISTER_SO(scheme_system_type_proc);
  707. scheme_system_type_proc = scheme_make_immed_prim(system_type,
  708. "system-type",
  709. 0, 1);
  710. scheme_addto_prim_instance("system-type", scheme_system_type_proc, env);
  711. scheme_addto_prim_instance("system-library-subpath",
  712. scheme_make_immed_prim(system_library_subpath,
  713. "system-library-subpath",
  714. 0, 1),
  715. env);
  716. scheme_addto_prim_instance("current-command-line-arguments",
  717. scheme_register_parameter(cmdline_args,
  718. "current-command-line-arguments",
  719. MZCONFIG_CMDLINE_ARGS),
  720. env);
  721. scheme_addto_prim_instance("path<?",
  722. scheme_make_immed_prim(path_lt,
  723. "path<?",
  724. 1, -1),
  725. env);
  726. #ifdef MZ_PRECISE_GC
  727. register_traversers();
  728. #endif
  729. }
  730. void scheme_init_string_places(void) {
  731. REGISTER_SO(current_locale_name_ptr);
  732. current_locale_name_ptr = (void *)xes_char_string;
  733. REGISTER_SO(fs_change_props);
  734. {
  735. int supported, scalable, low_latency, file_level;
  736. Scheme_Object *s;
  737. scheme_fs_change_properties(&supported, &scalable, &low_latency, &file_level);
  738. fs_change_props = scheme_make_vector(4, scheme_false);
  739. if (supported) {
  740. s = scheme_intern_symbol("supported");
  741. SCHEME_VEC_ELS(fs_change_props)[0] = s;
  742. }
  743. if (scalable) {
  744. s = scheme_intern_symbol("scalable");
  745. SCHEME_VEC_ELS(fs_change_props)[1] = s;
  746. }
  747. if (low_latency) {
  748. s = scheme_intern_symbol("low-latency");
  749. SCHEME_VEC_ELS(fs_change_props)[2] = s;
  750. }
  751. if (file_level) {
  752. s = scheme_intern_symbol("file-level");
  753. SCHEME_VEC_ELS(fs_change_props)[3] = s;
  754. }
  755. SCHEME_SET_IMMUTABLE(fs_change_props);
  756. }
  757. }
  758. /**********************************************************************/
  759. /* UTF-8 char constructors */
  760. /**********************************************************************/
  761. Scheme_Object *scheme_make_sized_offset_utf8_string(char *chars, intptr_t d, intptr_t len)
  762. {
  763. intptr_t ulen;
  764. mzchar *us;
  765. if (len) {
  766. ulen = scheme_utf8_decode((unsigned char *)chars, d, d + len,
  767. NULL, 0, -1,
  768. NULL, 0 /* not UTF-16 */, 0xFFFD);
  769. us = scheme_malloc_atomic(sizeof(mzchar) * (ulen + 1));
  770. scheme_utf8_decode((unsigned char *)chars, d, d + len,
  771. us, 0, -1,
  772. NULL, 0 /* not UTF-16 */, 0xFFFD);
  773. us[ulen] = 0;
  774. } else {
  775. us = (mzchar *)empty_char_string;
  776. ulen = 0;
  777. }
  778. return scheme_make_sized_offset_char_string(us, 0, ulen, 0);
  779. }
  780. Scheme_Object *
  781. scheme_make_sized_utf8_string(char *chars, intptr_t len)
  782. {
  783. return scheme_make_sized_offset_utf8_string(chars, 0, len);
  784. }
  785. Scheme_Object *
  786. scheme_make_immutable_sized_utf8_string(char *chars, intptr_t len)
  787. {
  788. Scheme_Object *s;
  789. s = scheme_make_sized_offset_utf8_string(chars, 0, len);
  790. if (len)
  791. SCHEME_SET_CHAR_STRING_IMMUTABLE(s);
  792. return s;
  793. }
  794. Scheme_Object *
  795. scheme_make_utf8_string(const char *chars)
  796. {
  797. return scheme_make_sized_offset_utf8_string((char *)chars, 0, -1);
  798. }
  799. Scheme_Object *
  800. scheme_make_locale_string(const char *chars)
  801. {
  802. return scheme_byte_string_to_char_string_locale(scheme_make_byte_string((char *)chars));
  803. }
  804. Scheme_Object *scheme_append_strings(Scheme_Object *s1, Scheme_Object *s2)
  805. {
  806. Scheme_Object *a[2];
  807. a[0] = s1;
  808. a[1] = s2;
  809. return string_append(2, a);
  810. }
  811. /**********************************************************************/
  812. /* index helpers */
  813. /**********************************************************************/
  814. intptr_t scheme_extract_index(const char *name, int pos, int argc, Scheme_Object **argv, intptr_t top, int false_ok)
  815. {
  816. intptr_t i;
  817. int is_top = 0;
  818. if (SCHEME_INTP(argv[pos])) {
  819. i = SCHEME_INT_VAL(argv[pos]);
  820. } else if (SCHEME_BIGNUMP(argv[pos])) {
  821. if (SCHEME_BIGPOS(argv[pos])) {
  822. i = top; /* out-of-bounds */
  823. is_top = 1;
  824. } else
  825. i = -1; /* negative */
  826. } else
  827. i = -1;
  828. if (!is_top && (i < 0))
  829. scheme_wrong_contract(name,
  830. (false_ok ? "(or/c exact-nonnegative-integer? #f)" : "exact-nonnegative-integer?"),
  831. pos, argc, argv);
  832. return i;
  833. }
  834. void scheme_get_substring_indices(const char *name, Scheme_Object *str,
  835. int argc, Scheme_Object **argv,
  836. int spos, int fpos, intptr_t *_start, intptr_t *_finish)
  837. {
  838. intptr_t len;
  839. intptr_t start, finish;
  840. if (SCHEME_CHAPERONE_VECTORP(str))
  841. len = SCHEME_CHAPERONE_VEC_SIZE(str);
  842. else if (SCHEME_CHAR_STRINGP(str))
  843. len = SCHEME_CHAR_STRTAG_VAL(str);
  844. else
  845. len = SCHEME_BYTE_STRTAG_VAL(str);
  846. if (argc > spos)
  847. start = scheme_extract_index(name, spos, argc, argv, len + 1, 0);
  848. else
  849. start = 0;
  850. if (argc > fpos)
  851. finish = scheme_extract_index(name, fpos, argc, argv, len + 1, 0);
  852. else
  853. finish = len;
  854. if (!(start <= len)) {
  855. scheme_out_of_range(name, NULL, (fpos < 100) ? "starting " : "", argv[spos], str, 0, len);
  856. }
  857. if (!(finish >= start && finish <= len)) {
  858. scheme_out_of_range(name, NULL, "ending ", argv[fpos], str, start, len);
  859. }
  860. *_start = start;
  861. *_finish = finish;
  862. }
  863. void scheme_do_get_substring_indices(const char *name, Scheme_Object *str,
  864. int argc, Scheme_Object **argv,
  865. int spos, int fpos, intptr_t *_start, intptr_t *_finish, intptr_t len)
  866. {
  867. if (argc > spos) {
  868. if (SCHEME_INTP(argv[spos])) {
  869. intptr_t start = SCHEME_INT_VAL(argv[spos]);
  870. if ((start >= 0) && (start < len)) {
  871. *_start = start;
  872. if (argc > fpos) {
  873. intptr_t finish = SCHEME_INT_VAL(argv[fpos]);
  874. if ((finish >= start) && (finish <= len)) {
  875. *_finish = finish;
  876. return;
  877. }
  878. } else {
  879. *_finish = len;
  880. return;
  881. }
  882. }
  883. }
  884. } else {
  885. *_start = 0;
  886. *_finish = len;
  887. return;
  888. }
  889. scheme_get_substring_indices(name, str, argc, argv, spos, fpos, _start, _finish);
  890. }
  891. /**********************************************************************/
  892. /* char strings */
  893. /**********************************************************************/
  894. #define SCHEME_X_STR_VAL(x) SCHEME_CHAR_STR_VAL(x)
  895. #define SCHEME_X_STRTAG_VAL(x) SCHEME_CHAR_STRTAG_VAL(x)
  896. #define SCHEME_X_STRINGP(x) SCHEME_CHAR_STRINGP(x)
  897. #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_CHAR_STRINGP(x)
  898. #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_CHAR_STRING_IMMUTABLE(x)
  899. #define scheme_x_string_type scheme_char_string_type
  900. #define X(a, b) a##_char##b
  901. #define X_(a, b) a##_##b
  902. #define X__(a) a
  903. #define EMPTY (mzchar *)"\0\0\0"
  904. #define Xchar mzchar
  905. #define uXchar mzchar
  906. #define XSTR ""
  907. #define IS_STR "string?"
  908. #define XSTRINGSTR "string"
  909. #define SUBXSTR "substring"
  910. #define CHARP(x) SCHEME_CHARP(x)
  911. #define CHAR_VAL(x) SCHEME_CHAR_VAL(x)
  912. #define CHAR_STR "char?"
  913. #define MAKE_CHAR(x) _scheme_make_char(x)
  914. #define xstrlen scheme_char_strlen
  915. #include "strops.inc"
  916. #define GEN_STRING_COMP(name, scheme_name, comp, op, ul, size_shortcut) \
  917. static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
  918. { mzchar *s, *prev; int i, sl, pl; int falz = 0;\
  919. if (!SCHEME_CHAR_STRINGP(argv[0])) \
  920. scheme_wrong_contract(scheme_name, "string?", 0, argc, argv); \
  921. prev = SCHEME_CHAR_STR_VAL(argv[0]); pl = SCHEME_CHAR_STRTAG_VAL(argv[0]); \
  922. for (i = 1; i < argc; i++) { \
  923. if (!SCHEME_CHAR_STRINGP(argv[i])) \
  924. scheme_wrong_contract(scheme_name, "string?", i, argc, argv); \
  925. s = SCHEME_CHAR_STR_VAL(argv[i]); sl = SCHEME_CHAR_STRTAG_VAL(argv[i]); \
  926. if (!falz) if (!(comp(scheme_name, \
  927. prev, pl, \
  928. s, sl, ul, size_shortcut) op 0)) falz = 1; \
  929. prev = s; pl = sl; \
  930. } \
  931. return falz ? scheme_false : scheme_true; \
  932. }
  933. GEN_STRING_COMP(string_eq, "string=?", mz_char_strcmp, ==, 0, 1)
  934. GEN_STRING_COMP(string_lt, "string<?", mz_char_strcmp, <, 0, 0)
  935. GEN_STRING_COMP(string_gt, "string>?", mz_char_strcmp, >, 0, 0)
  936. GEN_STRING_COMP(string_lt_eq, "string<=?", mz_char_strcmp, <=, 0, 0)
  937. GEN_STRING_COMP(string_gt_eq, "string>=?", mz_char_strcmp, >=, 0, 0)
  938. GEN_STRING_COMP(string_ci_eq, "string-ci=?", mz_char_strcmp_ci, ==, 0, 0)
  939. GEN_STRING_COMP(string_ci_lt, "string-ci<?", mz_char_strcmp_ci, <, 0, 0)
  940. GEN_STRING_COMP(string_ci_gt, "string-ci>?", mz_char_strcmp_ci, >, 0, 0)
  941. GEN_STRING_COMP(string_ci_lt_eq, "string-ci<=?", mz_char_strcmp_ci, <=, 0, 0)
  942. GEN_STRING_COMP(string_ci_gt_eq, "string-ci>=?", mz_char_strcmp_ci, >=, 0, 0)
  943. GEN_STRING_COMP(string_locale_eq, "string-locale=?", mz_char_strcmp, ==, 1, 0)
  944. GEN_STRING_COMP(string_locale_lt, "string-locale<?", mz_char_strcmp, <, 1, 0)
  945. GEN_STRING_COMP(string_locale_gt, "string-locale>?", mz_char_strcmp, >, 1, 0)
  946. GEN_STRING_COMP(string_locale_ci_eq, "string-locale-ci=?", mz_char_strcmp_ci, ==, 1, 0)
  947. GEN_STRING_COMP(string_locale_ci_lt, "string-locale-ci<?", mz_char_strcmp_ci, <, 1, 0)
  948. GEN_STRING_COMP(string_locale_ci_gt, "string-locale-ci>?", mz_char_strcmp_ci, >, 1, 0)
  949. Scheme_Object *scheme_string_eq_2(Scheme_Object *str1, Scheme_Object *str2)
  950. {
  951. Scheme_Object *a[2];
  952. a[0] = str1;
  953. a[1] = str2;
  954. return string_eq(2, a);
  955. }
  956. Scheme_Object *string_append_immutable(int argc, Scheme_Object *argv[])
  957. {
  958. Scheme_Object *r;
  959. r = do_string_append("string-append-immutable", argc, argv);
  960. if (r == zero_length_char_string)
  961. return zero_length_char_immutable_string;
  962. SCHEME_SET_CHAR_STRING_IMMUTABLE(r);
  963. return r;
  964. }
  965. /**********************************************************************/
  966. /* byte strings */
  967. /**********************************************************************/
  968. #define SCHEME_BYTEP(x) ((SCHEME_INTP(x)) && (SCHEME_INT_VAL(x) >= 0) && (SCHEME_INT_VAL(x) <= 255))
  969. static Scheme_Object *
  970. byte_p(int argc, Scheme_Object *argv[])
  971. {
  972. return (SCHEME_BYTEP(argv[0]) ? scheme_true : scheme_false);
  973. }
  974. #define SCHEME_X_STR_VAL(x) SCHEME_BYTE_STR_VAL(x)
  975. #define SCHEME_X_STRTAG_VAL(x) SCHEME_BYTE_STRTAG_VAL(x)
  976. #define SCHEME_X_STRINGP(x) SCHEME_BYTE_STRINGP(x)
  977. #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_BYTE_STRINGP(x)
  978. #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_BYTE_STRING_IMMUTABLE(x)
  979. #define scheme_x_string_type scheme_byte_string_type
  980. #define X(a, b) a##_byte##b
  981. #define X_(a, b) a##_byte_##b
  982. #define X__(a) byte_##a
  983. #define EMPTY ""
  984. #define Xchar char
  985. #define uXchar unsigned char
  986. #define XSTR "byte "
  987. #define IS_STR "bytes?"
  988. #define XSTRINGSTR "bytes"
  989. #define SUBXSTR "subbytes"
  990. #define CHARP(x) SCHEME_BYTEP(x)
  991. #define CHAR_VAL(x) SCHEME_INT_VAL(x)
  992. #define CHAR_STR "byte?"
  993. #define MAKE_CHAR(x) scheme_make_integer_value(x)
  994. #define xstrlen strlen
  995. #define GENERATING_BYTE
  996. #include "strops.inc"
  997. #undef GENERATING_BYTE
  998. /* comparisons */
  999. #define GEN_BYTE_STRING_PATH_COMP(name, scheme_name, comp, op, PRED, contract) \
  1000. static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
  1001. { char *s, *prev; int i, sl, pl; int falz = 0;\
  1002. if (!PRED(argv[0])) \
  1003. scheme_wrong_contract(scheme_name, contract, 0, argc, argv); \
  1004. prev = SCHEME_BYTE_STR_VAL(argv[0]); pl = SCHEME_BYTE_STRTAG_VAL(argv[0]); \
  1005. for (i = 1; i < argc; i++) { \
  1006. if (!PRED(argv[i])) \
  1007. scheme_wrong_contract(scheme_name, contract, i, argc, argv); \
  1008. s = SCHEME_BYTE_STR_VAL(argv[i]); sl = SCHEME_BYTE_STRTAG_VAL(argv[i]); \
  1009. if (!falz) if (!(comp(scheme_name, \
  1010. (unsigned char *)prev, pl, \
  1011. (unsigned char *)s, sl) op 0)) falz = 1; \
  1012. prev = s; pl = sl; \
  1013. } \
  1014. return falz ? scheme_false : scheme_true; \
  1015. }
  1016. #define GEN_BYTE_STRING_COMP(name, scheme_name, comp, op) \
  1017. GEN_BYTE_STRING_PATH_COMP(name, scheme_name, comp, op, SCHEME_BYTE_STRINGP, "bytes?") \
  1018. GEN_BYTE_STRING_COMP(byte_string_eq, "bytes=?", mz_strcmp, ==)
  1019. GEN_BYTE_STRING_COMP(byte_string_lt, "bytes<?", mz_strcmp, <)
  1020. GEN_BYTE_STRING_COMP(byte_string_gt, "bytes>?", mz_strcmp, >)
  1021. GEN_BYTE_STRING_PATH_COMP(path_lt, "path<?", mz_strcmp, <, SCHEME_PATHP, "path?")
  1022. Scheme_Object *scheme_byte_string_eq_2(Scheme_Object *str1, Scheme_Object *str2)
  1023. {
  1024. Scheme_Object *a[2];
  1025. a[0] = str1;
  1026. a[1] = str2;
  1027. return byte_string_eq(2, a);
  1028. }
  1029. /**********************************************************************/
  1030. /* byte string <-> char string */
  1031. /**********************************************************************/
  1032. /************************* bytes->string *************************/
  1033. static Scheme_Object *
  1034. do_byte_string_to_char_string(const char *who,
  1035. Scheme_Object *bstr,
  1036. intptr_t istart, intptr_t ifinish,
  1037. int perm, int as_locale)
  1038. {
  1039. int i, ulen;
  1040. char *chars;
  1041. unsigned int *v;
  1042. chars = SCHEME_BYTE_STR_VAL(bstr);
  1043. ulen = utf8_decode_x((unsigned char *)chars, istart, ifinish,
  1044. NULL, 0, -1,
  1045. NULL, NULL, 0, 0,
  1046. NULL, 0,
  1047. (perm > -1) ? 0xD800 : 0);
  1048. if (ulen < 0) {
  1049. scheme_contract_error(who,
  1050. "string is not a well-formed UTF-8 encoding",
  1051. "string", 1, bstr,
  1052. NULL);
  1053. }
  1054. v = (unsigned int *)scheme_malloc_atomic((ulen + 1) * sizeof(unsigned int));
  1055. utf8_decode_x((unsigned char *)chars, istart, ifinish,
  1056. v, 0, -1,
  1057. NULL, NULL, 0, 0,
  1058. NULL, 0,
  1059. (perm > -1) ? 0xD800 : 0);
  1060. if (perm > -1) {
  1061. for (i = 0; i < ulen; i++) {
  1062. if (v[i] == 0xD800)
  1063. v[i] = perm;
  1064. }
  1065. }
  1066. v[ulen] = 0;
  1067. return scheme_make_sized_char_string(v, ulen, 0);
  1068. }
  1069. static Scheme_Object *
  1070. do_byte_string_to_char_string_locale(const char *who,
  1071. Scheme_Object *bstr,
  1072. intptr_t istart, intptr_t ifinish,
  1073. int perm)
  1074. {
  1075. char *us;
  1076. intptr_t olen;
  1077. reset_locale();
  1078. if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on
  1079. || !(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED))
  1080. return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
  1081. if (istart < ifinish) {
  1082. int no_cvt;
  1083. us = string_to_from_locale(0, SCHEME_BYTE_STR_VAL(bstr),
  1084. istart, ifinish - istart,
  1085. &olen, perm, &no_cvt);
  1086. if (!us) {
  1087. if (no_cvt) {
  1088. return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
  1089. } else {
  1090. scheme_contract_error(who,
  1091. "byte string is not a valid encoding for the current locale",
  1092. "byte string", 1, bstr,
  1093. NULL);
  1094. }
  1095. }
  1096. ((mzchar *)us)[olen] = 0;
  1097. } else {
  1098. us = "\0\0\0";
  1099. olen = 0;
  1100. }
  1101. return scheme_make_sized_char_string((mzchar *)us, olen, 0);
  1102. }
  1103. static Scheme_Object *
  1104. do_string_to_vector(const char *who, int mode, int argc, Scheme_Object *argv[])
  1105. {
  1106. int permc;
  1107. intptr_t istart, ifinish;
  1108. if (!SCHEME_BYTE_STRINGP(argv[0]))
  1109. scheme_wrong_contract(who, "bytes?", 0, argc, argv);
  1110. if ((argc < 2) || SCHEME_FALSEP(argv[1]))
  1111. permc = -1;
  1112. else {
  1113. if (!SCHEME_CHARP(argv[1]))
  1114. scheme_wrong_contract(who, "(or/c char? #f)", 1, argc, argv);
  1115. permc = SCHEME_CHAR_VAL(argv[1]);
  1116. }
  1117. scheme_get_substring_indices(who, argv[0], argc, argv,
  1118. 2, 3,
  1119. &istart, &ifinish);
  1120. if (mode == 0)
  1121. return do_byte_string_to_char_string(who, argv[0], istart, ifinish, permc, 0);
  1122. else if (mode == 1)
  1123. return do_byte_string_to_char_string_locale(who, argv[0], istart, ifinish, permc);
  1124. else {
  1125. /* Latin-1 */
  1126. mzchar *us;
  1127. unsigned char *s;
  1128. intptr_t i, len;
  1129. len = ifinish - istart;
  1130. s = (unsigned char *)SCHEME_BYTE_STR_VAL(argv[0]);
  1131. us = (mzchar *)scheme_malloc_atomic((len + 1) * sizeof(mzchar));
  1132. for (i = istart; i < ifinish; i++) {
  1133. us[i - istart] = s[i];
  1134. }
  1135. us[len] = 0;
  1136. return scheme_make_sized_char_string(us, len, 0);
  1137. }
  1138. }
  1139. static Scheme_Object *
  1140. byte_string_to_char_string (int argc, Scheme_Object *argv[])
  1141. {
  1142. return do_string_to_vector("bytes->string/utf-8", 0, argc, argv);
  1143. }
  1144. static Scheme_Object *
  1145. byte_string_to_char_string_locale (int argc, Scheme_Object *argv[])
  1146. {
  1147. return do_string_to_vector("bytes->string/locale", 1, argc, argv);
  1148. }
  1149. static Scheme_Object *
  1150. byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[])
  1151. {
  1152. return do_string_to_vector("bytes->string/latin-1", 2, argc, argv);
  1153. }
  1154. Scheme_Object *scheme_byte_string_to_char_string(Scheme_Object *o)
  1155. {
  1156. return do_byte_string_to_char_string("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD, 0);
  1157. }
  1158. Scheme_Object *scheme_byte_string_to_char_string_locale(Scheme_Object *o)
  1159. {
  1160. return do_byte_string_to_char_string_locale("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD);
  1161. }
  1162. /************************* string->bytes *************************/
  1163. static Scheme_Object *do_char_string_to_byte_string(Scheme_Object *s, intptr_t istart, intptr_t ifinish,
  1164. int as_locale)
  1165. {
  1166. char *bs;
  1167. int slen;
  1168. slen = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
  1169. NULL, 0,
  1170. 0 /* UTF-16 */);
  1171. bs = (char *)scheme_malloc_atomic(slen + 1);
  1172. scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
  1173. (unsigned char *)bs, 0,
  1174. 0 /* UTF-16 */);
  1175. bs[slen] = 0;
  1176. return scheme_make_sized_byte_string(bs, slen, 0);
  1177. }
  1178. static Scheme_Object *
  1179. do_char_string_to_byte_string_locale(const char *who,
  1180. Scheme_Object *cstr,
  1181. intptr_t istart, intptr_t ifinish,
  1182. int perm)
  1183. {
  1184. char *s;
  1185. intptr_t olen;
  1186. reset_locale();
  1187. if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on
  1188. || !(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED))
  1189. return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
  1190. if (istart < ifinish) {
  1191. int no_cvt;
  1192. s = string_to_from_locale(1, (char *)SCHEME_CHAR_STR_VAL(cstr),
  1193. istart, ifinish - istart,
  1194. &olen, perm, &no_cvt);
  1195. if (!s) {
  1196. if (no_cvt) {
  1197. return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
  1198. } else {
  1199. scheme_contract