PageRenderTime 75ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 1ms

/racket/src/racket/src/string.c

http://github.com/shekari/racket
C | 5470 lines | 4467 code | 688 blank | 315 comment | 1009 complexity | bf7442ff4502f7e2a6bbc6b06af41f7a MD5 | raw file
Possible License(s): LGPL-2.1, BSD-3-Clause, ISC, LGPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. Racket
  3. Copyright (c) 2004-2017 PLT Design Inc.
  4. Copyright (c) 1995-2001 Matthew Flatt
  5. This library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Library General Public
  7. License as published by the Free Software Foundation; either
  8. version 2 of the License, or (at your option) any later version.
  9. This library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Library General Public License for more details.
  13. You should have received a copy of the GNU Library General Public
  14. License along with this library; if not, write to the Free
  15. Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  16. Boston, MA 02110-1301 USA.
  17. libscheme
  18. Copyright (c) 1994 Brent Benson
  19. All rights reserved.
  20. */
  21. #include "schpriv.h"
  22. #include "schvers.h"
  23. #include "schrktio.h"
  24. #include <string.h>
  25. #include <ctype.h>
  26. #ifdef NO_ERRNO_GLOBAL
  27. # define errno -1
  28. #else
  29. # include <errno.h>
  30. #endif
  31. #ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH
  32. # include "schsys.h"
  33. #endif
  34. #ifndef SPLS_SUFFIX
  35. # define SPLS_SUFFIX ""
  36. #endif
  37. #include "schustr.inc"
  38. #ifdef MACOS_UNICODE_SUPPORT
  39. # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
  40. #endif
  41. #ifdef WINDOWS_UNICODE_SUPPORT
  42. # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
  43. #endif
  44. #ifndef mzLOCALE_IS_UTF_8
  45. # define mzLOCALE_IS_UTF_8(s) (!(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED))
  46. #endif
  47. #define mzICONV_KIND 0
  48. #define mzUTF8_KIND 1
  49. #define mzUTF8_TO_UTF16_KIND 2
  50. #define mzUTF16_TO_UTF8_KIND 3
  51. typedef struct Scheme_Converter {
  52. Scheme_Object so;
  53. short closed;
  54. short kind;
  55. rktio_converter_t *cd;
  56. int permissive;
  57. Scheme_Custodian_Reference *mref;
  58. } Scheme_Converter;
  59. /* locals */
  60. static Scheme_Object *make_string (int argc, Scheme_Object *argv[]);
  61. static Scheme_Object *string (int argc, Scheme_Object *argv[]);
  62. static Scheme_Object *string_p (int argc, Scheme_Object *argv[]);
  63. static Scheme_Object *string_length (int argc, Scheme_Object *argv[]);
  64. static Scheme_Object *string_eq (int argc, Scheme_Object *argv[]);
  65. static Scheme_Object *string_locale_eq (int argc, Scheme_Object *argv[]);
  66. static Scheme_Object *string_ci_eq (int argc, Scheme_Object *argv[]);
  67. static Scheme_Object *string_locale_ci_eq (int argc, Scheme_Object *argv[]);
  68. static Scheme_Object *string_lt (int argc, Scheme_Object *argv[]);
  69. static Scheme_Object *string_locale_lt (int argc, Scheme_Object *argv[]);
  70. static Scheme_Object *string_gt (int argc, Scheme_Object *argv[]);
  71. static Scheme_Object *string_locale_gt (int argc, Scheme_Object *argv[]);
  72. static Scheme_Object *string_lt_eq (int argc, Scheme_Object *argv[]);
  73. static Scheme_Object *string_gt_eq (int argc, Scheme_Object *argv[]);
  74. static Scheme_Object *string_ci_lt (int argc, Scheme_Object *argv[]);
  75. static Scheme_Object *string_locale_ci_lt (int argc, Scheme_Object *argv[]);
  76. static Scheme_Object *string_ci_gt (int argc, Scheme_Object *argv[]);
  77. static Scheme_Object *string_locale_ci_gt (int argc, Scheme_Object *argv[]);
  78. static Scheme_Object *string_ci_lt_eq (int argc, Scheme_Object *argv[]);
  79. static Scheme_Object *string_ci_gt_eq (int argc, Scheme_Object *argv[]);
  80. static Scheme_Object *string_upcase (int argc, Scheme_Object *argv[]);
  81. static Scheme_Object *string_downcase (int argc, Scheme_Object *argv[]);
  82. static Scheme_Object *string_titlecase (int argc, Scheme_Object *argv[]);
  83. static Scheme_Object *string_foldcase (int argc, Scheme_Object *argv[]);
  84. static Scheme_Object *string_locale_upcase (int argc, Scheme_Object *argv[]);
  85. static Scheme_Object *string_locale_downcase (int argc, Scheme_Object *argv[]);
  86. static Scheme_Object *substring (int argc, Scheme_Object *argv[]);
  87. static Scheme_Object *string_append (int argc, Scheme_Object *argv[]);
  88. static Scheme_Object *string_to_list (int argc, Scheme_Object *argv[]);
  89. static Scheme_Object *list_to_string (int argc, Scheme_Object *argv[]);
  90. static Scheme_Object *string_copy (int argc, Scheme_Object *argv[]);
  91. static Scheme_Object *string_copy_bang (int argc, Scheme_Object *argv[]);
  92. static Scheme_Object *string_fill (int argc, Scheme_Object *argv[]);
  93. static Scheme_Object *string_to_immutable (int argc, Scheme_Object *argv[]);
  94. static Scheme_Object *string_normalize_c (int argc, Scheme_Object *argv[]);
  95. static Scheme_Object *string_normalize_kc (int argc, Scheme_Object *argv[]);
  96. static Scheme_Object *string_normalize_d (int argc, Scheme_Object *argv[]);
  97. static Scheme_Object *string_normalize_kd (int argc, Scheme_Object *argv[]);
  98. static Scheme_Object *make_shared_byte_string (int argc, Scheme_Object *argv[]);
  99. static Scheme_Object *shared_byte_string (int argc, Scheme_Object *argv[]);
  100. static Scheme_Object *make_byte_string (int argc, Scheme_Object *argv[]);
  101. static Scheme_Object *byte_string (int argc, Scheme_Object *argv[]);
  102. static Scheme_Object *byte_p (int argc, Scheme_Object *argv[]);
  103. static Scheme_Object *byte_string_p (int argc, Scheme_Object *argv[]);
  104. static Scheme_Object *byte_string_length (int argc, Scheme_Object *argv[]);
  105. static Scheme_Object *byte_string_eq (int argc, Scheme_Object *argv[]);
  106. static Scheme_Object *byte_string_lt (int argc, Scheme_Object *argv[]);
  107. static Scheme_Object *byte_string_gt (int argc, Scheme_Object *argv[]);
  108. static Scheme_Object *byte_substring (int argc, Scheme_Object *argv[]);
  109. static Scheme_Object *byte_string_append (int argc, Scheme_Object *argv[]);
  110. static Scheme_Object *byte_string_to_list (int argc, Scheme_Object *argv[]);
  111. static Scheme_Object *list_to_byte_string (int argc, Scheme_Object *argv[]);
  112. static Scheme_Object *byte_string_copy (int argc, Scheme_Object *argv[]);
  113. static Scheme_Object *byte_string_copy_bang (int argc, Scheme_Object *argv[]);
  114. static Scheme_Object *byte_string_fill (int argc, Scheme_Object *argv[]);
  115. static Scheme_Object *byte_string_to_immutable (int argc, Scheme_Object *argv[]);
  116. static Scheme_Object *byte_string_utf8_index (int argc, Scheme_Object *argv[]);
  117. static Scheme_Object *byte_string_utf8_ref (int argc, Scheme_Object *argv[]);
  118. static Scheme_Object *byte_string_utf8_length (int argc, Scheme_Object *argv[]);
  119. static Scheme_Object *byte_string_to_char_string (int argc, Scheme_Object *argv[]);
  120. static Scheme_Object *byte_string_to_char_string_locale (int argc, Scheme_Object *argv[]);
  121. static Scheme_Object *byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[]);
  122. static Scheme_Object *char_string_to_byte_string (int argc, Scheme_Object *argv[]);
  123. static Scheme_Object *char_string_to_byte_string_locale (int argc, Scheme_Object *argv[]);
  124. static Scheme_Object *char_string_to_byte_string_latin1 (int argc, Scheme_Object *argv[]);
  125. static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[]);
  126. static Scheme_Object *version(int argc, Scheme_Object *argv[]);
  127. static Scheme_Object *format(int argc, Scheme_Object *argv[]);
  128. static Scheme_Object *sch_printf(int argc, Scheme_Object *argv[]);
  129. static Scheme_Object *sch_eprintf(int argc, Scheme_Object *argv[]);
  130. static Scheme_Object *sch_fprintf(int argc, Scheme_Object *argv[]);
  131. static Scheme_Object *banner(int argc, Scheme_Object *argv[]);
  132. static Scheme_Object *env_p(int argc, Scheme_Object *argv[]);
  133. static Scheme_Object *sch_getenv(int argc, Scheme_Object *argv[]);
  134. static Scheme_Object *sch_getenv_names(int argc, Scheme_Object *argv[]);
  135. static Scheme_Object *sch_putenv(int argc, Scheme_Object *argv[]);
  136. static Scheme_Object *env_copy(int argc, Scheme_Object *argv[]);
  137. static Scheme_Object *env_make(int argc, Scheme_Object *argv[]);
  138. static Scheme_Object *current_environment_variables(int argc, Scheme_Object *argv[]);
  139. static Scheme_Object *system_type(int argc, Scheme_Object *argv[]);
  140. static Scheme_Object *system_library_subpath(int argc, Scheme_Object *argv[]);
  141. static Scheme_Object *cmdline_args(int argc, Scheme_Object *argv[]);
  142. static Scheme_Object *current_locale(int argc, Scheme_Object *argv[]);
  143. static Scheme_Object *locale_string_encoding(int argc, Scheme_Object *argv[]);
  144. static Scheme_Object *system_language_country(int argc, Scheme_Object *argv[]);
  145. static Scheme_Object *byte_string_open_converter(int argc, Scheme_Object *argv[]);
  146. static Scheme_Object *byte_string_close_converter(int argc, Scheme_Object *argv[]);
  147. static Scheme_Object *byte_string_convert(int argc, Scheme_Object *argv[]);
  148. static Scheme_Object *byte_string_convert_end(int argc, Scheme_Object *argv[]);
  149. static Scheme_Object *byte_converter_p(int argc, Scheme_Object *argv[]);
  150. static Scheme_Object *path_lt (int argc, Scheme_Object *argv[]);
  151. #ifdef MZ_PRECISE_GC
  152. static void register_traversers(void);
  153. #endif
  154. static int mz_char_strcmp(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
  155. static int mz_char_strcmp_ci(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
  156. static int mz_strcmp(const char *who, unsigned char *str1, intptr_t l1, unsigned char *str2, intptr_t l2);
  157. XFORM_NONGCING static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t end,
  158. unsigned int *us, intptr_t dstart, intptr_t dend,
  159. intptr_t *ipos, intptr_t *jpos,
  160. char compact, char utf16,
  161. int *state, int might_continue, int permissive);
  162. XFORM_NONGCING static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t end,
  163. unsigned char *s, intptr_t dstart, intptr_t dend,
  164. intptr_t *_ipos, intptr_t *_opos, char utf16);
  165. static char *string_to_from_locale(int to_bytes,
  166. char *in, intptr_t delta, intptr_t len,
  167. intptr_t *olen, int perm,
  168. int *no_cvt);
  169. #define portable_isspace(x) (((x) < 128) && isspace(x))
  170. ROSYM static Scheme_Object *sys_symbol;
  171. ROSYM static Scheme_Object *link_symbol, *machine_symbol, *vm_symbol, *gc_symbol;
  172. ROSYM static Scheme_Object *so_suffix_symbol, *so_mode_symbol, *word_symbol;
  173. ROSYM static Scheme_Object *os_symbol, *fs_change_symbol, *cross_symbol;
  174. ROSYM static Scheme_Object *racket_symbol, *cgc_symbol, *_3m_symbol;
  175. ROSYM static Scheme_Object *force_symbol, *infer_symbol;
  176. ROSYM static Scheme_Object *platform_3m_path, *platform_cgc_path;
  177. READ_ONLY static Scheme_Object *zero_length_char_string;
  178. READ_ONLY static Scheme_Object *zero_length_byte_string;
  179. SHARED_OK static char *embedding_banner;
  180. SHARED_OK static Scheme_Object *vers_str;
  181. SHARED_OK static Scheme_Object *banner_str;
  182. THREAD_LOCAL_DECL(static Scheme_Object *fs_change_props);
  183. READ_ONLY static Scheme_Object *complete_symbol, *continues_symbol, *aborts_symbol, *error_symbol;
  184. READ_ONLY Scheme_Object *scheme_string_p_proc;
  185. READ_ONLY Scheme_Object *scheme_byte_string_p_proc;
  186. READ_ONLY static int cross_compile_mode;
  187. /* These two locale variables are only valid when reset_locale()
  188. is called after continuation marks (and hence parameterization)
  189. may have changed. Similarly, setlocale() is only up-to-date
  190. when reset_locale() has been called. */
  191. THREAD_LOCAL_DECL(static int locale_on);
  192. THREAD_LOCAL_DECL(static void *current_locale_name_ptr);
  193. static void reset_locale(void);
  194. #define current_locale_name ((const mzchar *)current_locale_name_ptr)
  195. static const mzchar empty_char_string[1] = { 0 };
  196. static const mzchar xes_char_string[2] = { 0x78787878, 0 };
  197. void
  198. scheme_init_string (Scheme_Env *env)
  199. {
  200. Scheme_Object *p;
  201. REGISTER_SO(sys_symbol);
  202. sys_symbol = scheme_intern_symbol(SYSTEM_TYPE_NAME);
  203. REGISTER_SO(link_symbol);
  204. REGISTER_SO(machine_symbol);
  205. REGISTER_SO(gc_symbol);
  206. REGISTER_SO(vm_symbol);
  207. REGISTER_SO(so_suffix_symbol);
  208. REGISTER_SO(so_mode_symbol);
  209. REGISTER_SO(word_symbol);
  210. REGISTER_SO(os_symbol);
  211. REGISTER_SO(fs_change_symbol);
  212. REGISTER_SO(cross_symbol);
  213. link_symbol = scheme_intern_symbol("link");
  214. machine_symbol = scheme_intern_symbol("machine");
  215. vm_symbol = scheme_intern_symbol("vm");
  216. gc_symbol = scheme_intern_symbol("gc");
  217. so_suffix_symbol = scheme_intern_symbol("so-suffix");
  218. so_mode_symbol = scheme_intern_symbol("so-mode");
  219. word_symbol = scheme_intern_symbol("word");
  220. os_symbol = scheme_intern_symbol("os");
  221. fs_change_symbol = scheme_intern_symbol("fs-change");
  222. cross_symbol = scheme_intern_symbol("cross");
  223. REGISTER_SO(racket_symbol);
  224. REGISTER_SO(cgc_symbol);
  225. REGISTER_SO(_3m_symbol);
  226. racket_symbol = scheme_intern_symbol("racket");
  227. cgc_symbol = scheme_intern_symbol("cgc");
  228. _3m_symbol = scheme_intern_symbol("3m");
  229. REGISTER_SO(force_symbol);
  230. REGISTER_SO(infer_symbol);
  231. force_symbol = scheme_intern_symbol("force");
  232. infer_symbol = scheme_intern_symbol("infer");
  233. REGISTER_SO(zero_length_char_string);
  234. REGISTER_SO(zero_length_byte_string);
  235. zero_length_char_string = scheme_alloc_char_string(0, 0);
  236. zero_length_byte_string = scheme_alloc_byte_string(0, 0);
  237. REGISTER_SO(complete_symbol);
  238. REGISTER_SO(continues_symbol);
  239. REGISTER_SO(aborts_symbol);
  240. REGISTER_SO(error_symbol);
  241. complete_symbol = scheme_intern_symbol("complete");
  242. continues_symbol = scheme_intern_symbol("continues");
  243. aborts_symbol = scheme_intern_symbol("aborts");
  244. error_symbol = scheme_intern_symbol("error");
  245. REGISTER_SO(platform_3m_path);
  246. #ifdef UNIX_FILE_SYSTEM
  247. # define MZ3M_SUBDIR "/3m"
  248. #else
  249. # ifdef DOS_FILE_SYSTEM
  250. # define MZ3M_SUBDIR "\\3m"
  251. # else
  252. # define MZ3M_SUBDIR ":3m"
  253. # endif
  254. #endif
  255. REGISTER_SO(platform_3m_path);
  256. REGISTER_SO(platform_cgc_path);
  257. platform_cgc_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH SPLS_SUFFIX);
  258. platform_3m_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH SPLS_SUFFIX MZ3M_SUBDIR);
  259. REGISTER_SO(embedding_banner);
  260. REGISTER_SO(vers_str);
  261. REGISTER_SO(banner_str);
  262. vers_str = scheme_make_utf8_string(scheme_version());
  263. SCHEME_SET_CHAR_STRING_IMMUTABLE(vers_str);
  264. banner_str = scheme_make_utf8_string(scheme_banner());
  265. SCHEME_SET_CHAR_STRING_IMMUTABLE(banner_str);
  266. REGISTER_SO(scheme_string_p_proc);
  267. p = scheme_make_folding_prim(string_p, "string?", 1, 1, 1);
  268. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
  269. | SCHEME_PRIM_IS_OMITABLE);
  270. scheme_add_global_constant("string?", p, env);
  271. scheme_string_p_proc = p;
  272. scheme_add_global_constant("make-string",
  273. scheme_make_immed_prim(make_string,
  274. "make-string",
  275. 1, 2),
  276. env);
  277. scheme_add_global_constant("string",
  278. scheme_make_immed_prim(string,
  279. "string",
  280. 0, -1),
  281. env);
  282. p = scheme_make_folding_prim(string_length, "string-length", 1, 1, 1);
  283. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
  284. |SCHEME_PRIM_PRODUCES_FIXNUM);
  285. scheme_add_global_constant("string-length", p,
  286. env);
  287. p = scheme_make_immed_prim(scheme_checked_string_ref, "string-ref", 2, 2);
  288. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED);
  289. scheme_add_global_constant("string-ref", p, env);
  290. p = scheme_make_immed_prim(scheme_checked_string_set, "string-set!", 3, 3);
  291. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_NARY_INLINED);
  292. scheme_add_global_constant("string-set!", p, env);
  293. p = scheme_make_immed_prim(string_eq, "string=?", 2, -1);
  294. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED);
  295. scheme_add_global_constant("string=?", p, env);
  296. scheme_add_global_constant("string-locale=?",
  297. scheme_make_immed_prim(string_locale_eq,
  298. "string-locale=?",
  299. 2, -1),
  300. env);
  301. scheme_add_global_constant("string-ci=?",
  302. scheme_make_immed_prim(string_ci_eq,
  303. "string-ci=?",
  304. 2, -1),
  305. env);
  306. scheme_add_global_constant("string-locale-ci=?",
  307. scheme_make_immed_prim(string_locale_ci_eq,
  308. "string-locale-ci=?",
  309. 2, -1),
  310. env);
  311. scheme_add_global_constant("string<?",
  312. scheme_make_immed_prim(string_lt,
  313. "string<?",
  314. 2, -1),
  315. env);
  316. scheme_add_global_constant("string-locale<?",
  317. scheme_make_immed_prim(string_locale_lt,
  318. "string-locale<?",
  319. 2, -1),
  320. env);
  321. scheme_add_global_constant("string>?",
  322. scheme_make_immed_prim(string_gt,
  323. "string>?",
  324. 2, -1),
  325. env);
  326. scheme_add_global_constant("string-locale>?",
  327. scheme_make_immed_prim(string_locale_gt,
  328. "string-locale>?",
  329. 2, -1),
  330. env);
  331. scheme_add_global_constant("string<=?",
  332. scheme_make_immed_prim(string_lt_eq,
  333. "string<=?",
  334. 2, -1),
  335. env);
  336. scheme_add_global_constant("string>=?",
  337. scheme_make_immed_prim(string_gt_eq,
  338. "string>=?",
  339. 2, -1),
  340. env);
  341. scheme_add_global_constant("string-ci<?",
  342. scheme_make_immed_prim(string_ci_lt,
  343. "string-ci<?",
  344. 2, -1),
  345. env);
  346. scheme_add_global_constant("string-locale-ci<?",
  347. scheme_make_immed_prim(string_locale_ci_lt,
  348. "string-locale-ci<?",
  349. 2, -1),
  350. env);
  351. scheme_add_global_constant("string-ci>?",
  352. scheme_make_immed_prim(string_ci_gt,
  353. "string-ci>?",
  354. 2, -1),
  355. env);
  356. scheme_add_global_constant("string-locale-ci>?",
  357. scheme_make_immed_prim(string_locale_ci_gt,
  358. "string-locale-ci>?",
  359. 2, -1),
  360. env);
  361. scheme_add_global_constant("string-ci<=?",
  362. scheme_make_immed_prim(string_ci_lt_eq,
  363. "string-ci<=?",
  364. 2, -1),
  365. env);
  366. scheme_add_global_constant("string-ci>=?",
  367. scheme_make_immed_prim(string_ci_gt_eq,
  368. "string-ci>=?",
  369. 2, -1),
  370. env);
  371. scheme_add_global_constant("substring",
  372. scheme_make_immed_prim(substring,
  373. "substring",
  374. 2, 3),
  375. env);
  376. scheme_add_global_constant("string-append",
  377. scheme_make_immed_prim(string_append,
  378. "string-append",
  379. 0, -1),
  380. env);
  381. scheme_add_global_constant("string->list",
  382. scheme_make_immed_prim(string_to_list,
  383. "string->list",
  384. 1, 1),
  385. env);
  386. scheme_add_global_constant("list->string",
  387. scheme_make_immed_prim(list_to_string,
  388. "list->string",
  389. 1, 1),
  390. env);
  391. scheme_add_global_constant("string-copy",
  392. scheme_make_immed_prim(string_copy,
  393. "string-copy",
  394. 1, 1),
  395. env);
  396. scheme_add_global_constant("string-copy!",
  397. scheme_make_immed_prim(string_copy_bang,
  398. "string-copy!",
  399. 3, 5),
  400. env);
  401. scheme_add_global_constant("string-fill!",
  402. scheme_make_immed_prim(string_fill,
  403. "string-fill!",
  404. 2, 2),
  405. env);
  406. scheme_add_global_constant("string->immutable-string",
  407. scheme_make_immed_prim(string_to_immutable,
  408. "string->immutable-string",
  409. 1, 1),
  410. env);
  411. scheme_add_global_constant("string-normalize-nfc",
  412. scheme_make_immed_prim(string_normalize_c,
  413. "string-normalize-nfc",
  414. 1, 1),
  415. env);
  416. scheme_add_global_constant("string-normalize-nfkc",
  417. scheme_make_immed_prim(string_normalize_kc,
  418. "string-normalize-nfkc",
  419. 1, 1),
  420. env);
  421. scheme_add_global_constant("string-normalize-nfd",
  422. scheme_make_immed_prim(string_normalize_d,
  423. "string-normalize-nfd",
  424. 1, 1),
  425. env);
  426. scheme_add_global_constant("string-normalize-nfkd",
  427. scheme_make_immed_prim(string_normalize_kd,
  428. "string-normalize-nfkd",
  429. 1, 1),
  430. env);
  431. scheme_add_global_constant("string-upcase",
  432. scheme_make_immed_prim(string_upcase,
  433. "string-upcase",
  434. 1, 1),
  435. env);
  436. scheme_add_global_constant("string-downcase",
  437. scheme_make_immed_prim(string_downcase,
  438. "string-downcase",
  439. 1, 1),
  440. env);
  441. scheme_add_global_constant("string-titlecase",
  442. scheme_make_immed_prim(string_titlecase,
  443. "string-titlecase",
  444. 1, 1),
  445. env);
  446. scheme_add_global_constant("string-foldcase",
  447. scheme_make_immed_prim(string_foldcase,
  448. "string-foldcase",
  449. 1, 1),
  450. env);
  451. scheme_add_global_constant("string-locale-upcase",
  452. scheme_make_immed_prim(string_locale_upcase,
  453. "string-locale-upcase",
  454. 1, 1),
  455. env);
  456. scheme_add_global_constant("string-locale-downcase",
  457. scheme_make_immed_prim(string_locale_downcase,
  458. "string-locale-downcase",
  459. 1, 1),
  460. env);
  461. scheme_add_global_constant("current-locale",
  462. scheme_register_parameter(current_locale,
  463. "current-locale",
  464. MZCONFIG_LOCALE),
  465. env);
  466. scheme_add_global_constant("locale-string-encoding",
  467. scheme_make_immed_prim(locale_string_encoding,
  468. "locale-string-encoding",
  469. 0, 0),
  470. env);
  471. scheme_add_global_constant("system-language+country",
  472. scheme_make_immed_prim(system_language_country,
  473. "system-language+country",
  474. 0, 0),
  475. env);
  476. scheme_add_global_constant("bytes-converter?",
  477. scheme_make_immed_prim(byte_converter_p,
  478. "bytes-converter?",
  479. 1, 1),
  480. env);
  481. scheme_add_global_constant("bytes-convert",
  482. scheme_make_prim_w_arity2(byte_string_convert,
  483. "bytes-convert",
  484. 1, 7,
  485. 3, 3),
  486. env);
  487. scheme_add_global_constant("bytes-convert-end",
  488. scheme_make_prim_w_arity2(byte_string_convert_end,
  489. "bytes-convert-end",
  490. 0, 3,
  491. 2, 2),
  492. env);
  493. scheme_add_global_constant("bytes-open-converter",
  494. scheme_make_immed_prim(byte_string_open_converter,
  495. "bytes-open-converter",
  496. 2, 2),
  497. env);
  498. scheme_add_global_constant("bytes-close-converter",
  499. scheme_make_immed_prim(byte_string_close_converter,
  500. "bytes-close-converter",
  501. 1, 1),
  502. env);
  503. scheme_add_global_constant("format",
  504. scheme_make_noncm_prim(format,
  505. "format",
  506. 1, -1),
  507. env);
  508. scheme_add_global_constant("printf",
  509. scheme_make_noncm_prim(sch_printf,
  510. "printf",
  511. 1, -1),
  512. env);
  513. scheme_add_global_constant("eprintf",
  514. scheme_make_noncm_prim(sch_eprintf,
  515. "eprintf",
  516. 1, -1),
  517. env);
  518. scheme_add_global_constant("fprintf",
  519. scheme_make_noncm_prim(sch_fprintf,
  520. "fprintf",
  521. 2, -1),
  522. env);
  523. scheme_add_global_constant("byte?",
  524. scheme_make_folding_prim(byte_p,
  525. "byte?",
  526. 1, 1, 1),
  527. env);
  528. REGISTER_SO(scheme_byte_string_p_proc);
  529. p = scheme_make_folding_prim(byte_string_p, "bytes?", 1, 1, 1);
  530. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
  531. | SCHEME_PRIM_IS_OMITABLE);
  532. scheme_add_global_constant("bytes?", p, env);
  533. scheme_byte_string_p_proc = p;
  534. scheme_add_global_constant("make-bytes",
  535. scheme_make_immed_prim(make_byte_string,
  536. "make-bytes",
  537. 1, 2),
  538. env);
  539. scheme_add_global_constant("bytes",
  540. scheme_make_immed_prim(byte_string,
  541. "bytes",
  542. 0, -1),
  543. env);
  544. GLOBAL_PRIM_W_ARITY("make-shared-bytes", make_shared_byte_string, 1, 2, env);
  545. GLOBAL_PRIM_W_ARITY("shared-bytes", shared_byte_string, 0, -1, env);
  546. p = scheme_make_folding_prim(byte_string_length, "bytes-length", 1, 1, 1);
  547. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
  548. |SCHEME_PRIM_PRODUCES_FIXNUM);
  549. scheme_add_global_constant("bytes-length", p, env);
  550. p = scheme_make_immed_prim(scheme_checked_byte_string_ref, "bytes-ref", 2, 2);
  551. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED
  552. | SCHEME_PRIM_PRODUCES_FIXNUM);
  553. scheme_add_global_constant("bytes-ref", p, env);
  554. p = scheme_make_immed_prim(scheme_checked_byte_string_set, "bytes-set!", 3, 3);
  555. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_NARY_INLINED);
  556. scheme_add_global_constant("bytes-set!", p, env);
  557. p = scheme_make_immed_prim(byte_string_eq, "bytes=?", 2, -1);
  558. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED);
  559. scheme_add_global_constant("bytes=?", p, env);
  560. scheme_add_global_constant("bytes<?",
  561. scheme_make_immed_prim(byte_string_lt,
  562. "bytes<?",
  563. 2, -1),
  564. env);
  565. scheme_add_global_constant("bytes>?",
  566. scheme_make_immed_prim(byte_string_gt,
  567. "bytes>?",
  568. 2, -1),
  569. env);
  570. scheme_add_global_constant("subbytes",
  571. scheme_make_immed_prim(byte_substring,
  572. "subbytes",
  573. 2, 3),
  574. env);
  575. scheme_add_global_constant("bytes-append",
  576. scheme_make_immed_prim(byte_string_append,
  577. "bytes-append",
  578. 0, -1),
  579. env);
  580. scheme_add_global_constant("bytes->list",
  581. scheme_make_immed_prim(byte_string_to_list,
  582. "bytes->list",
  583. 1, 1),
  584. env);
  585. scheme_add_global_constant("list->bytes",
  586. scheme_make_immed_prim(list_to_byte_string,
  587. "list->bytes",
  588. 1, 1),
  589. env);
  590. scheme_add_global_constant("bytes-copy",
  591. scheme_make_immed_prim(byte_string_copy,
  592. "bytes-copy",
  593. 1, 1),
  594. env);
  595. scheme_add_global_constant("bytes-copy!",
  596. scheme_make_immed_prim(byte_string_copy_bang,
  597. "bytes-copy!",
  598. 3, 5),
  599. env);
  600. scheme_add_global_constant("bytes-fill!",
  601. scheme_make_immed_prim(byte_string_fill,
  602. "bytes-fill!",
  603. 2, 2),
  604. env);
  605. scheme_add_global_constant("bytes->immutable-bytes",
  606. scheme_make_immed_prim(byte_string_to_immutable,
  607. "bytes->immutable-bytes",
  608. 1, 1),
  609. env);
  610. p = scheme_make_immed_prim(byte_string_utf8_index, "bytes-utf-8-index", 2, 4);
  611. /* Incorrect, since the result can be #f:
  612. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM); */
  613. scheme_add_global_constant("bytes-utf-8-index", p, env);
  614. p = scheme_make_immed_prim(byte_string_utf8_length, "bytes-utf-8-length", 1, 4);
  615. /* Incorrect, since the result can be #f:
  616. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM); */
  617. scheme_add_global_constant("bytes-utf-8-length", p, env);
  618. scheme_add_global_constant("bytes-utf-8-ref",
  619. scheme_make_immed_prim(byte_string_utf8_ref,
  620. "bytes-utf-8-ref",
  621. 2, 4),
  622. env);
  623. scheme_add_global_constant("bytes->string/utf-8",
  624. scheme_make_immed_prim(byte_string_to_char_string,
  625. "bytes->string/utf-8",
  626. 1, 4),
  627. env);
  628. scheme_add_global_constant("bytes->string/locale",
  629. scheme_make_immed_prim(byte_string_to_char_string_locale,
  630. "bytes->string/locale",
  631. 1, 4),
  632. env);
  633. scheme_add_global_constant("bytes->string/latin-1",
  634. scheme_make_immed_prim(byte_string_to_char_string_latin1,
  635. "bytes->string/latin-1",
  636. 1, 4),
  637. env);
  638. scheme_add_global_constant("string->bytes/utf-8",
  639. scheme_make_immed_prim(char_string_to_byte_string,
  640. "string->bytes/utf-8",
  641. 1, 4),
  642. env);
  643. scheme_add_global_constant("string->bytes/locale",
  644. scheme_make_immed_prim(char_string_to_byte_string_locale,
  645. "string->bytes/locale",
  646. 1, 4),
  647. env);
  648. scheme_add_global_constant("string->bytes/latin-1",
  649. scheme_make_immed_prim(char_string_to_byte_string_latin1,
  650. "string->bytes/latin-1",
  651. 1, 4),
  652. env);
  653. scheme_add_global_constant("string-utf-8-length",
  654. scheme_make_immed_prim(char_string_utf8_length,
  655. "string-utf-8-length",
  656. 1, 3),
  657. env);
  658. /* In principle, `version' could be foldable, but it invites
  659. more problems than it solves... */
  660. scheme_add_global_constant("version",
  661. scheme_make_immed_prim(version,
  662. "version",
  663. 0, 0),
  664. env);
  665. scheme_add_global_constant("banner",
  666. scheme_make_immed_prim(banner,
  667. "banner",
  668. 0, 0),
  669. env);
  670. /* Environment variables */
  671. scheme_add_global_constant("environment-variables?",
  672. scheme_make_folding_prim(env_p,
  673. "environment-variables?",
  674. 1, 1, 1),
  675. env);
  676. scheme_add_global_constant("current-environment-variables",
  677. scheme_register_parameter(current_environment_variables,
  678. "current-environment-variables",
  679. MZCONFIG_CURRENT_ENV_VARS),
  680. env);
  681. scheme_add_global_constant("environment-variables-ref",
  682. scheme_make_immed_prim(sch_getenv,
  683. "environment-variables-ref",
  684. 2, 2),
  685. env);
  686. scheme_add_global_constant("environment-variables-set!",
  687. scheme_make_prim_w_arity(sch_putenv,
  688. "environment-variables-set!",
  689. 3, 4),
  690. env);
  691. scheme_add_global_constant("environment-variables-names",
  692. scheme_make_immed_prim(sch_getenv_names,
  693. "environment-variables-names",
  694. 1, 1),
  695. env);
  696. scheme_add_global_constant("environment-variables-copy",
  697. scheme_make_immed_prim(env_copy,
  698. "environment-variables-copy",
  699. 1, 1),
  700. env);
  701. scheme_add_global_constant("make-environment-variables",
  702. scheme_make_immed_prim(env_make,
  703. "make-environment-variables",
  704. 0, -1),
  705. env);
  706. /* Don't make these folding, since they're platform-specific: */
  707. scheme_add_global_constant("system-type",
  708. scheme_make_immed_prim(system_type,
  709. "system-type",
  710. 0, 1),
  711. env);
  712. scheme_add_global_constant("system-library-subpath",
  713. scheme_make_immed_prim(system_library_subpath,
  714. "system-library-subpath",
  715. 0, 1),
  716. env);
  717. scheme_add_global_constant("current-command-line-arguments",
  718. scheme_register_parameter(cmdline_args,
  719. "current-command-line-arguments",
  720. MZCONFIG_CMDLINE_ARGS),
  721. env);
  722. scheme_add_global_constant("path<?",
  723. scheme_make_immed_prim(path_lt,
  724. "path<?",
  725. 2, -1),
  726. env);
  727. #ifdef MZ_PRECISE_GC
  728. register_traversers();
  729. #endif
  730. }
  731. void scheme_init_string_places(void) {
  732. REGISTER_SO(current_locale_name_ptr);
  733. current_locale_name_ptr = (void *)xes_char_string;
  734. REGISTER_SO(fs_change_props);
  735. {
  736. int supported, scalable, low_latency, file_level;
  737. Scheme_Object *s;
  738. scheme_fs_change_properties(&supported, &scalable, &low_latency, &file_level);
  739. fs_change_props = scheme_make_vector(4, scheme_false);
  740. if (supported) {
  741. s = scheme_intern_symbol("supported");
  742. SCHEME_VEC_ELS(fs_change_props)[0] = s;
  743. }
  744. if (scalable) {
  745. s = scheme_intern_symbol("scalable");
  746. SCHEME_VEC_ELS(fs_change_props)[1] = s;
  747. }
  748. if (low_latency) {
  749. s = scheme_intern_symbol("low-latency");
  750. SCHEME_VEC_ELS(fs_change_props)[2] = s;
  751. }
  752. if (file_level) {
  753. s = scheme_intern_symbol("file-level");
  754. SCHEME_VEC_ELS(fs_change_props)[3] = s;
  755. }
  756. SCHEME_SET_IMMUTABLE(fs_change_props);
  757. }
  758. }
  759. /**********************************************************************/
  760. /* UTF-8 char constructors */
  761. /**********************************************************************/
  762. Scheme_Object *scheme_make_sized_offset_utf8_string(char *chars, intptr_t d, intptr_t len)
  763. {
  764. intptr_t ulen;
  765. mzchar *us;
  766. if (len) {
  767. ulen = scheme_utf8_decode((unsigned char *)chars, d, d + len,
  768. NULL, 0, -1,
  769. NULL, 0 /* not UTF-16 */, 0xFFFD);
  770. us = scheme_malloc_atomic(sizeof(mzchar) * (ulen + 1));
  771. scheme_utf8_decode((unsigned char *)chars, d, d + len,
  772. us, 0, -1,
  773. NULL, 0 /* not UTF-16 */, 0xFFFD);
  774. us[ulen] = 0;
  775. } else {
  776. us = (mzchar *)empty_char_string;
  777. ulen = 0;
  778. }
  779. return scheme_make_sized_offset_char_string(us, 0, ulen, 0);
  780. }
  781. Scheme_Object *
  782. scheme_make_sized_utf8_string(char *chars, intptr_t len)
  783. {
  784. return scheme_make_sized_offset_utf8_string(chars, 0, len);
  785. }
  786. Scheme_Object *
  787. scheme_make_immutable_sized_utf8_string(char *chars, intptr_t len)
  788. {
  789. Scheme_Object *s;
  790. s = scheme_make_sized_offset_utf8_string(chars, 0, len);
  791. if (len)
  792. SCHEME_SET_CHAR_STRING_IMMUTABLE(s);
  793. return s;
  794. }
  795. Scheme_Object *
  796. scheme_make_utf8_string(const char *chars)
  797. {
  798. return scheme_make_sized_offset_utf8_string((char *)chars, 0, -1);
  799. }
  800. Scheme_Object *
  801. scheme_make_locale_string(const char *chars)
  802. {
  803. return scheme_byte_string_to_char_string_locale(scheme_make_byte_string((char *)chars));
  804. }
  805. Scheme_Object *scheme_append_strings(Scheme_Object *s1, Scheme_Object *s2)
  806. {
  807. Scheme_Object *a[2];
  808. a[0] = s1;
  809. a[1] = s2;
  810. return string_append(2, a);
  811. }
  812. /**********************************************************************/
  813. /* index helpers */
  814. /**********************************************************************/
  815. intptr_t scheme_extract_index(const char *name, int pos, int argc, Scheme_Object **argv, intptr_t top, int false_ok)
  816. {
  817. intptr_t i;
  818. int is_top = 0;
  819. if (SCHEME_INTP(argv[pos])) {
  820. i = SCHEME_INT_VAL(argv[pos]);
  821. } else if (SCHEME_BIGNUMP(argv[pos])) {
  822. if (SCHEME_BIGPOS(argv[pos])) {
  823. i = top; /* out-of-bounds */
  824. is_top = 1;
  825. } else
  826. i = -1; /* negative */
  827. } else
  828. i = -1;
  829. if (!is_top && (i < 0))
  830. scheme_wrong_contract(name,
  831. (false_ok ? "(or/c exact-nonnegative-integer? #f)" : "exact-nonnegative-integer?"),
  832. pos, argc, argv);
  833. return i;
  834. }
  835. void scheme_get_substring_indices(const char *name, Scheme_Object *str,
  836. int argc, Scheme_Object **argv,
  837. int spos, int fpos, intptr_t *_start, intptr_t *_finish)
  838. {
  839. intptr_t len;
  840. intptr_t start, finish;
  841. if (SCHEME_CHAPERONE_VECTORP(str))
  842. len = SCHEME_CHAPERONE_VEC_SIZE(str);
  843. else if (SCHEME_CHAR_STRINGP(str))
  844. len = SCHEME_CHAR_STRTAG_VAL(str);
  845. else
  846. len = SCHEME_BYTE_STRTAG_VAL(str);
  847. if (argc > spos)
  848. start = scheme_extract_index(name, spos, argc, argv, len + 1, 0);
  849. else
  850. start = 0;
  851. if (argc > fpos)
  852. finish = scheme_extract_index(name, fpos, argc, argv, len + 1, 0);
  853. else
  854. finish = len;
  855. if (!(start <= len)) {
  856. scheme_out_of_range(name, NULL, (fpos < 100) ? "starting " : "", argv[spos], str, 0, len);
  857. }
  858. if (!(finish >= start && finish <= len)) {
  859. scheme_out_of_range(name, NULL, "ending ", argv[fpos], str, start, len);
  860. }
  861. *_start = start;
  862. *_finish = finish;
  863. }
  864. void scheme_do_get_substring_indices(const char *name, Scheme_Object *str,
  865. int argc, Scheme_Object **argv,
  866. int spos, int fpos, intptr_t *_start, intptr_t *_finish, intptr_t len)
  867. {
  868. if (argc > spos) {
  869. if (SCHEME_INTP(argv[spos])) {
  870. intptr_t start = SCHEME_INT_VAL(argv[spos]);
  871. if ((start >= 0) && (start < len)) {
  872. *_start = start;
  873. if (argc > fpos) {
  874. intptr_t finish = SCHEME_INT_VAL(argv[fpos]);
  875. if ((finish >= start) && (finish <= len)) {
  876. *_finish = finish;
  877. return;
  878. }
  879. } else {
  880. *_finish = len;
  881. return;
  882. }
  883. }
  884. }
  885. } else {
  886. *_start = 0;
  887. *_finish = len;
  888. return;
  889. }
  890. scheme_get_substring_indices(name, str, argc, argv, spos, fpos, _start, _finish);
  891. }
  892. /**********************************************************************/
  893. /* char strings */
  894. /**********************************************************************/
  895. #define SCHEME_X_STR_VAL(x) SCHEME_CHAR_STR_VAL(x)
  896. #define SCHEME_X_STRTAG_VAL(x) SCHEME_CHAR_STRTAG_VAL(x)
  897. #define SCHEME_X_STRINGP(x) SCHEME_CHAR_STRINGP(x)
  898. #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_CHAR_STRINGP(x)
  899. #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_CHAR_STRING_IMMUTABLE(x)
  900. #define scheme_x_string_type scheme_char_string_type
  901. #define X(a, b) a##_char##b
  902. #define X_(a, b) a##_##b
  903. #define X__(a) a
  904. #define EMPTY (mzchar *)"\0\0\0"
  905. #define Xchar mzchar
  906. #define uXchar mzchar
  907. #define XSTR ""
  908. #define IS_STR "string?"
  909. #define XSTRINGSTR "string"
  910. #define SUBXSTR "substring"
  911. #define CHARP(x) SCHEME_CHARP(x)
  912. #define CHAR_VAL(x) SCHEME_CHAR_VAL(x)
  913. #define CHAR_STR "char?"
  914. #define MAKE_CHAR(x) _scheme_make_char(x)
  915. #define xstrlen scheme_char_strlen
  916. #include "strops.inc"
  917. #define GEN_STRING_COMP(name, scheme_name, comp, op, ul, size_shortcut) \
  918. static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
  919. { mzchar *s, *prev; int i, sl, pl; int falz = 0;\
  920. if (!SCHEME_CHAR_STRINGP(argv[0])) \
  921. scheme_wrong_contract(scheme_name, "string?", 0, argc, argv); \
  922. prev = SCHEME_CHAR_STR_VAL(argv[0]); pl = SCHEME_CHAR_STRTAG_VAL(argv[0]); \
  923. for (i = 1; i < argc; i++) { \
  924. if (!SCHEME_CHAR_STRINGP(argv[i])) \
  925. scheme_wrong_contract(scheme_name, "string?", i, argc, argv); \
  926. s = SCHEME_CHAR_STR_VAL(argv[i]); sl = SCHEME_CHAR_STRTAG_VAL(argv[i]); \
  927. if (!falz) if (!(comp(scheme_name, \
  928. prev, pl, \
  929. s, sl, ul, size_shortcut) op 0)) falz = 1; \
  930. prev = s; pl = sl; \
  931. } \
  932. return falz ? scheme_false : scheme_true; \
  933. }
  934. GEN_STRING_COMP(string_eq, "string=?", mz_char_strcmp, ==, 0, 1)
  935. GEN_STRING_COMP(string_lt, "string<?", mz_char_strcmp, <, 0, 0)
  936. GEN_STRING_COMP(string_gt, "string>?", mz_char_strcmp, >, 0, 0)
  937. GEN_STRING_COMP(string_lt_eq, "string<=?", mz_char_strcmp, <=, 0, 0)
  938. GEN_STRING_COMP(string_gt_eq, "string>=?", mz_char_strcmp, >=, 0, 0)
  939. GEN_STRING_COMP(string_ci_eq, "string-ci=?", mz_char_strcmp_ci, ==, 0, 0)
  940. GEN_STRING_COMP(string_ci_lt, "string-ci<?", mz_char_strcmp_ci, <, 0, 0)
  941. GEN_STRING_COMP(string_ci_gt, "string-ci>?", mz_char_strcmp_ci, >, 0, 0)
  942. GEN_STRING_COMP(string_ci_lt_eq, "string-ci<=?", mz_char_strcmp_ci, <=, 0, 0)
  943. GEN_STRING_COMP(string_ci_gt_eq, "string-ci>=?", mz_char_strcmp_ci, >=, 0, 0)
  944. GEN_STRING_COMP(string_locale_eq, "string-locale=?", mz_char_strcmp, ==, 1, 0)
  945. GEN_STRING_COMP(string_locale_lt, "string-locale<?", mz_char_strcmp, <, 1, 0)
  946. GEN_STRING_COMP(string_locale_gt, "string-locale>?", mz_char_strcmp, >, 1, 0)
  947. GEN_STRING_COMP(string_locale_ci_eq, "string-locale-ci=?", mz_char_strcmp_ci, ==, 1, 0)
  948. GEN_STRING_COMP(string_locale_ci_lt, "string-locale-ci<?", mz_char_strcmp_ci, <, 1, 0)
  949. GEN_STRING_COMP(string_locale_ci_gt, "string-locale-ci>?", mz_char_strcmp_ci, >, 1, 0)
  950. Scheme_Object *scheme_string_eq_2(Scheme_Object *str1, Scheme_Object *str2)
  951. {
  952. Scheme_Object *a[2];
  953. a[0] = str1;
  954. a[1] = str2;
  955. return string_eq(2, a);
  956. }
  957. /**********************************************************************/
  958. /* byte strings */
  959. /**********************************************************************/
  960. #define SCHEME_BYTEP(x) ((SCHEME_INTP(x)) && (SCHEME_INT_VAL(x) >= 0) && (SCHEME_INT_VAL(x) <= 255))
  961. static Scheme_Object *
  962. byte_p(int argc, Scheme_Object *argv[])
  963. {
  964. return (SCHEME_BYTEP(argv[0]) ? scheme_true : scheme_false);
  965. }
  966. #define SCHEME_X_STR_VAL(x) SCHEME_BYTE_STR_VAL(x)
  967. #define SCHEME_X_STRTAG_VAL(x) SCHEME_BYTE_STRTAG_VAL(x)
  968. #define SCHEME_X_STRINGP(x) SCHEME_BYTE_STRINGP(x)
  969. #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_BYTE_STRINGP(x)
  970. #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_BYTE_STRING_IMMUTABLE(x)
  971. #define scheme_x_string_type scheme_byte_string_type
  972. #define X(a, b) a##_byte##b
  973. #define X_(a, b) a##_byte_##b
  974. #define X__(a) byte_##a
  975. #define EMPTY ""
  976. #define Xchar char
  977. #define uXchar unsigned char
  978. #define XSTR "byte "
  979. #define IS_STR "bytes?"
  980. #define XSTRINGSTR "bytes"
  981. #define SUBXSTR "subbytes"
  982. #define CHARP(x) SCHEME_BYTEP(x)
  983. #define CHAR_VAL(x) SCHEME_INT_VAL(x)
  984. #define CHAR_STR "byte?"
  985. #define MAKE_CHAR(x) scheme_make_integer_value(x)
  986. #define xstrlen strlen
  987. #define GENERATING_BYTE
  988. #include "strops.inc"
  989. #undef GENERATING_BYTE
  990. /* comparisons */
  991. #define GEN_BYTE_STRING_PATH_COMP(name, scheme_name, comp, op, PRED, contract) \
  992. static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
  993. { char *s, *prev; int i, sl, pl; int falz = 0;\
  994. if (!PRED(argv[0])) \
  995. scheme_wrong_contract(scheme_name, contract, 0, argc, argv); \
  996. prev = SCHEME_BYTE_STR_VAL(argv[0]); pl = SCHEME_BYTE_STRTAG_VAL(argv[0]); \
  997. for (i = 1; i < argc; i++) { \
  998. if (!PRED(argv[i])) \
  999. scheme_wrong_contract(scheme_name, contract, i, argc, argv); \
  1000. s = SCHEME_BYTE_STR_VAL(argv[i]); sl = SCHEME_BYTE_STRTAG_VAL(argv[i]); \
  1001. if (!falz) if (!(comp(scheme_name, \
  1002. (unsigned char *)prev, pl, \
  1003. (unsigned char *)s, sl) op 0)) falz = 1; \
  1004. prev = s; pl = sl; \
  1005. } \
  1006. return falz ? scheme_false : scheme_true; \
  1007. }
  1008. #define GEN_BYTE_STRING_COMP(name, scheme_name, comp, op) \
  1009. GEN_BYTE_STRING_PATH_COMP(name, scheme_name, comp, op, SCHEME_BYTE_STRINGP, "bytes?") \
  1010. GEN_BYTE_STRING_COMP(byte_string_eq, "bytes=?", mz_strcmp, ==)
  1011. GEN_BYTE_STRING_COMP(byte_string_lt, "bytes<?", mz_strcmp, <)
  1012. GEN_BYTE_STRING_COMP(byte_string_gt, "bytes>?", mz_strcmp, >)
  1013. GEN_BYTE_STRING_PATH_COMP(path_lt, "path<?", mz_strcmp, <, SCHEME_PATHP, "path?")
  1014. Scheme_Object *scheme_byte_string_eq_2(Scheme_Object *str1, Scheme_Object *str2)
  1015. {
  1016. Scheme_Object *a[2];
  1017. a[0] = str1;
  1018. a[1] = str2;
  1019. return byte_string_eq(2, a);
  1020. }
  1021. /**********************************************************************/
  1022. /* byte string <-> char string */
  1023. /**********************************************************************/
  1024. /************************* bytes->string *************************/
  1025. static Scheme_Object *
  1026. do_byte_string_to_char_string(const char *who,
  1027. Scheme_Object *bstr,
  1028. intptr_t istart, intptr_t ifinish,
  1029. int perm, int as_locale)
  1030. {
  1031. int i, ulen;
  1032. char *chars;
  1033. unsigned int *v;
  1034. chars = SCHEME_BYTE_STR_VAL(bstr);
  1035. ulen = utf8_decode_x((unsigned char *)chars, istart, ifinish,
  1036. NULL, 0, -1,
  1037. NULL, NULL, 0, 0,
  1038. NULL, 0,
  1039. (perm > -1) ? 0xD800 : 0);
  1040. if (ulen < 0) {
  1041. scheme_contract_error(who,
  1042. "string is not a well-formed UTF-8 encoding",
  1043. "string", 1, bstr,
  1044. NULL);
  1045. }
  1046. v = (unsigned int *)scheme_malloc_atomic((ulen + 1) * sizeof(unsigned int));
  1047. utf8_decode_x((unsigned char *)chars, istart, ifinish,
  1048. v, 0, -1,
  1049. NULL, NULL, 0, 0,
  1050. NULL, 0,
  1051. (perm > -1) ? 0xD800 : 0);
  1052. if (perm > -1) {
  1053. for (i = 0; i < ulen; i++) {
  1054. if (v[i] == 0xD800)
  1055. v[i] = perm;
  1056. }
  1057. }
  1058. v[ulen] = 0;
  1059. return scheme_make_sized_char_string(v, ulen, 0);
  1060. }
  1061. static Scheme_Object *
  1062. do_byte_string_to_char_string_locale(const char *who,
  1063. Scheme_Object *bstr,
  1064. intptr_t istart, intptr_t ifinish,
  1065. int perm)
  1066. {
  1067. char *us;
  1068. intptr_t olen;
  1069. reset_locale();
  1070. if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on
  1071. || !(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED))
  1072. return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
  1073. if (istart < ifinish) {
  1074. int no_cvt;
  1075. us = string_to_from_locale(0, SCHEME_BYTE_STR_VAL(bstr),
  1076. istart, ifinish - istart,
  1077. &olen, perm, &no_cvt);
  1078. if (!us) {
  1079. if (no_cvt) {
  1080. return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
  1081. } else {
  1082. scheme_contract_error(who,
  1083. "byte string is not a valid encoding for the current locale",
  1084. "byte string", 1, bstr,
  1085. NULL);
  1086. }
  1087. }
  1088. ((mzchar *)us)[olen] = 0;
  1089. } else {
  1090. us = "\0\0\0";
  1091. olen = 0;
  1092. }
  1093. return scheme_make_sized_char_string((mzchar *)us, olen, 0);
  1094. }
  1095. static Scheme_Object *
  1096. do_string_to_vector(const char *who, int mode, int argc, Scheme_Object *argv[])
  1097. {
  1098. int permc;
  1099. intptr_t istart, ifinish;
  1100. if (!SCHEME_BYTE_STRINGP(argv[0]))
  1101. scheme_wrong_contract(who, "bytes?", 0, argc, argv);
  1102. if ((argc < 2) || SCHEME_FALSEP(argv[1]))
  1103. permc = -1;
  1104. else {
  1105. if (!SCHEME_CHARP(argv[1]))
  1106. scheme_wrong_contract(who, "(or/c char? #f)", 1, argc, argv);
  1107. permc = SCHEME_CHAR_VAL(argv[1]);
  1108. }
  1109. scheme_get_substring_indices(who, argv[0], argc, argv,
  1110. 2, 3,
  1111. &istart, &ifinish);
  1112. if (mode == 0)
  1113. return do_byte_string_to_char_string(who, argv[0], istart, ifinish, permc, 0);
  1114. else if (mode == 1)
  1115. return do_byte_string_to_char_string_locale(who, argv[0], istart, ifinish, permc);
  1116. else {
  1117. /* Latin-1 */
  1118. mzchar *us;
  1119. unsigned char *s;
  1120. intptr_t i, len;
  1121. len = ifinish - istart;
  1122. s = (unsigned char *)SCHEME_BYTE_STR_VAL(argv[0]);
  1123. us = (mzchar *)scheme_malloc_atomic((len + 1) * sizeof(mzchar));
  1124. for (i = istart; i < ifinish; i++) {
  1125. us[i - istart] = s[i];
  1126. }
  1127. us[len] = 0;
  1128. return scheme_make_sized_char_string(us, len, 0);
  1129. }
  1130. }
  1131. static Scheme_Object *
  1132. byte_string_to_char_string (int argc, Scheme_Object *argv[])
  1133. {
  1134. return do_string_to_vector("bytes->string/utf-8", 0, argc, argv);
  1135. }
  1136. static Scheme_Object *
  1137. byte_string_to_char_string_locale (int argc, Scheme_Object *argv[])
  1138. {
  1139. return do_string_to_vector("bytes->string/locale", 1, argc, argv);
  1140. }
  1141. static Scheme_Object *
  1142. byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[])
  1143. {
  1144. return do_string_to_vector("bytes->string/latin-1", 2, argc, argv);
  1145. }
  1146. Scheme_Object *scheme_byte_string_to_char_string(Scheme_Object *o)
  1147. {
  1148. return do_byte_string_to_char_string("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD, 0);
  1149. }
  1150. Scheme_Object *scheme_byte_string_to_char_string_locale(Scheme_Object *o)
  1151. {
  1152. return do_byte_string_to_char_string_locale("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD);
  1153. }
  1154. /************************* string->bytes *************************/
  1155. static Scheme_Object *do_char_string_to_byte_string(Scheme_Object *s, intptr_t istart, intptr_t ifinish,
  1156. int as_locale)
  1157. {
  1158. char *bs;
  1159. int slen;
  1160. slen = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
  1161. NULL, 0,
  1162. 0 /* UTF-16 */);
  1163. bs = (char *)scheme_malloc_atomic(slen + 1);
  1164. scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
  1165. (unsigned char *)bs, 0,
  1166. 0 /* UTF-16 */);
  1167. bs[slen] = 0;
  1168. return scheme_make_sized_byte_string(bs, slen, 0);
  1169. }
  1170. static Scheme_Object *
  1171. do_char_string_to_byte_string_locale(const char *who,
  1172. Scheme_Object *cstr,
  1173. intptr_t istart, intptr_t ifinish,
  1174. int perm)
  1175. {
  1176. char *s;
  1177. intptr_t olen;
  1178. reset_locale();
  1179. if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on
  1180. || !(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED))
  1181. return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
  1182. if (istart < ifinish) {
  1183. int no_cvt;
  1184. s = string_to_from_locale(1, (char *)SCHEME_CHAR_STR_VAL(cstr),
  1185. istart, ifinish - istart,
  1186. &olen, perm, &no_cvt);
  1187. if (!s) {
  1188. if (no_cvt) {
  1189. return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
  1190. } else {
  1191. scheme_contract_error(who,
  1192. "string cannot be encoded for the current locale",
  1193. "string", 1, cstr,
  1194. NULL);
  1195. }
  1196. }
  1197. s[olen] = 0;
  1198. } else {
  1199. s = "";
  1200. olen = 0;
  1201. }
  1202. return scheme_make_sized_byte_string(s, olen, 0);
  1203. }
  1204. Scheme_Object *scheme_char_string_to_byte_string(Scheme_Object *s)
  1205. {
  1206. return do_char_string_to_byte_string(s, 0, SCHEME_CHAR_STRLEN_VAL(s), 0);
  1207. }
  1208. Scheme_Object *scheme_char_string_to_byte_string_locale(Scheme_Object *s)
  1209. {
  1210. return do_char_string_to_byte_string_locale("s->s", s, 0, SCHEME_CHAR_STRLEN_VAL(s), '?');
  1211. }
  1212. static Scheme_Object *do_chars_to_bytes(const char *who, int mode,
  1213. int argc, Scheme_Object *argv[])
  1214. {
  1215. intptr_t istart, ifinish;
  1216. int permc;
  1217. if (!SCHEME_CHAR_STRINGP(argv[0]))
  1218. scheme_wrong_contract(who, "string?", 0, argc, argv);
  1219. if ((argc < 2) || SCHEME_FALSEP(argv[1]))
  1220. permc = -1;
  1221. else {
  1222. if (!SCHEME_BYTEP(argv[1]))
  1223. scheme_wrong_contract(who, "(or/c byte? #f)", 1, argc, argv);
  1224. permc = SCHEME_INT_VAL(argv[1]);
  1225. }
  1226. scheme_get_substring_indices(who, argv[0], argc, argv,
  1227. 2, 3, &istart, &ifinish);
  1228. if (mode == 1)
  1229. return do_char_string_to_byte_string_locale(who, argv[0], istart, ifinish, permc);
  1230. else if (mode == 0)
  1231. return do_char_string_to_byte_string(argv[0], istart, ifinish, 0);
  1232. else {
  1233. /* Latin-1 */
  1234. mzchar *us;
  1235. unsigned char *s;
  1236. intptr_t i, len;
  1237. len = ifinish - istart;
  1238. us = SCHEME_CHAR_STR_VAL(argv[0]);
  1239. s = (unsigned char *)scheme_malloc_atomic(len + 1);
  1240. for (i = istart; i < ifinish; i++) {
  1241. if (us[i] < 256)
  1242. s[i - istart] = us[i];
  1243. else if (permc >= 0) {
  1244. s[i - istart] = permc;
  1245. } else {
  1246. scheme_contract_error(who,
  1247. "string cannot be encoded in Latin-1",
  1248. "string", 1, argv[0],
  1249. NULL);
  1250. }
  1251. }
  1252. s[len] = 0;
  1253. return scheme_ma

Large files files are truncated, but you can click here to view the full file