PageRenderTime 73ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 1ms

/src/racket/src/string.c

http://github.com/dyoo/racket
C | 5990 lines | 4895 code | 748 blank | 347 comment | 1095 complexity | b7e0ccf9aaca8c8d36abccaac11d9421 MD5 | raw file
Possible License(s): BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. Racket
  3. Copyright (c) 2004-2013 PLT Design Inc.
  4. Copyright (c) 1995-2001 Matthew Flatt
  5. This library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Library General Public
  7. License as published by the Free Software Foundation; either
  8. version 2 of the License, or (at your option) any later version.
  9. This library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Library General Public License for more details.
  13. You should have received a copy of the GNU Library General Public
  14. License along with this library; if not, write to the Free
  15. Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  16. Boston, MA 02110-1301 USA.
  17. libscheme
  18. Copyright (c) 1994 Brent Benson
  19. All rights reserved.
  20. */
  21. #include "schpriv.h"
  22. #include "schvers.h"
  23. #include <string.h>
  24. #include <ctype.h>
  25. #ifndef DONT_USE_LOCALE
  26. # include <locale.h>
  27. # ifdef MZ_NO_ICONV
  28. # define USE_ICONV_DLL
  29. # endif
  30. # ifndef USE_ICONV_DLL
  31. # include <iconv.h>
  32. # include <langinfo.h>
  33. # endif
  34. # include <wchar.h>
  35. # include <wctype.h>
  36. # include <errno.h>
  37. # ifdef MACOS_UNICODE_SUPPORT
  38. # include <CoreFoundation/CFString.h>
  39. # include <CoreFoundation/CFLocale.h>
  40. # endif
  41. # ifdef WINDOWS_UNICODE_SUPPORT
  42. # include <windows.h>
  43. # endif
  44. #endif
  45. #ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH
  46. # include "schsys.h"
  47. #endif
  48. #include "schustr.inc"
  49. #ifdef USE_ICONV_DLL
  50. typedef intptr_t iconv_t;
  51. typedef int *(*errno_proc_t)();
  52. typedef size_t (*iconv_proc_t)(iconv_t cd,
  53. char **inbuf, size_t *inbytesleft,
  54. char **outbuf, size_t *outbytesleft);
  55. typedef iconv_t (*iconv_open_proc_t)(const char *tocode, const char *fromcode);
  56. typedef void (*iconv_close_proc_t)(iconv_t cd);
  57. typedef char *(*locale_charset_proc_t)();
  58. static errno_proc_t iconv_errno;
  59. static iconv_proc_t iconv;
  60. static iconv_open_proc_t iconv_open;
  61. static iconv_close_proc_t iconv_close;
  62. static locale_charset_proc_t locale_charset; /* Not used, currently */
  63. #define mzCHK_PROC(x) x
  64. static int get_iconv_errno(void)
  65. {
  66. int *a;
  67. a = iconv_errno();
  68. return *a;
  69. }
  70. # undef HAVE_CODESET
  71. # define HAVE_CODESET 1
  72. # define CODESET 0
  73. # define ICONV_errno get_iconv_errno()
  74. static int iconv_ready = 0;
  75. static void init_iconv()
  76. {
  77. # ifdef MZ_NO_ICONV
  78. # else
  79. HMODULE m;
  80. m = LoadLibraryW(scheme_get_dll_path(L"iconv.dll"));
  81. if (!m)
  82. m = LoadLibraryW(scheme_get_dll_path(L"libiconv.dll"));
  83. if (!m)
  84. m = LoadLibraryW(scheme_get_dll_path(L"libiconv-2.dll"));
  85. if (!m)
  86. m = LoadLibrary("iconv.dll");
  87. if (!m)
  88. m = LoadLibrary("libiconv.dll");
  89. if (!m)
  90. m = LoadLibrary("libiconv-2.dll");
  91. if (m) {
  92. iconv = (iconv_proc_t)GetProcAddress(m, "libiconv");
  93. iconv_open = (iconv_open_proc_t)GetProcAddress(m, "libiconv_open");
  94. iconv_close = (iconv_close_proc_t)GetProcAddress(m, "libiconv_close");
  95. locale_charset = (locale_charset_proc_t)GetProcAddress(m, "locale_charset");
  96. /* Make sure we have all of them or none: */
  97. if (!iconv || !iconv_open || !iconv_close) {
  98. iconv = NULL;
  99. iconv_open = NULL;
  100. iconv_close = NULL;
  101. }
  102. }
  103. if (iconv) {
  104. iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno");
  105. if (!iconv_errno) {
  106. /* The iconv.dll distributed with Racket links to msvcrt.dll.
  107. It's a slighly dangerous assumption that whatever iconv we
  108. found also uses msvcrt.dll. */
  109. m = LoadLibrary("msvcrt.dll");
  110. if (m) {
  111. iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno");
  112. if (!iconv_errno) {
  113. iconv = NULL;
  114. iconv_open = NULL;
  115. iconv_close = NULL;
  116. }
  117. }
  118. }
  119. }
  120. # endif
  121. iconv_ready = 1;
  122. }
  123. #else
  124. # define ICONV_errno errno
  125. # define iconv_ready 1
  126. # define mzCHK_PROC(x) 1
  127. static void init_iconv() { }
  128. #endif
  129. #ifdef MACOS_UNICODE_SUPPORT
  130. # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
  131. #endif
  132. #ifdef WINDOWS_UNICODE_SUPPORT
  133. # define mzLOCALE_IS_UTF_8(s) (!s || !(*s))
  134. #endif
  135. #ifndef mzLOCALE_IS_UTF_8
  136. # define mzLOCALE_IS_UTF_8(s) !mzCHK_PROC(iconv_open)
  137. #endif
  138. #define mzICONV_KIND 0
  139. #define mzUTF8_KIND 1
  140. #define mzUTF8_TO_UTF16_KIND 2
  141. #define mzUTF16_TO_UTF8_KIND 3
  142. typedef struct Scheme_Converter {
  143. Scheme_Object so;
  144. short closed;
  145. short kind;
  146. iconv_t cd;
  147. int permissive;
  148. Scheme_Custodian_Reference *mref;
  149. } Scheme_Converter;
  150. /* locals */
  151. /* These two locale variables are only valid when reset_locale()
  152. is called after continuation marks (and hence parameterization)
  153. may have changed. Similarly, setlocale() is only up-to-date
  154. when reset_locale() has been called. */
  155. THREAD_LOCAL_DECL(static int locale_on);
  156. THREAD_LOCAL_DECL(static void *current_locale_name_ptr);
  157. static void reset_locale(void);
  158. #define current_locale_name ((const mzchar *)current_locale_name_ptr)
  159. #ifdef USE_ICONV_DLL
  160. static char *nl_langinfo(int which)
  161. {
  162. int i;
  163. reset_locale();
  164. if (!current_locale_name)
  165. current_locale_name_ptr = "\0\0\0\0";
  166. if ((current_locale_name[0] == 'C')
  167. && !current_locale_name[1])
  168. return "US-ASCII";
  169. for (i = 0; current_locale_name[i]; i++) {
  170. if (current_locale_name[i] == '.') {
  171. if (current_locale_name[i + 1]) {
  172. int len, j = 0;
  173. char *enc;
  174. i++;
  175. len = scheme_char_strlen(current_locale_name) - i;
  176. enc = (char *)scheme_malloc_atomic(len + 1);
  177. while (current_locale_name[i]) {
  178. if (current_locale_name[i] > 127)
  179. return "UTF-8";
  180. enc[j++] = current_locale_name[i++];
  181. }
  182. enc[j] = 0;
  183. return enc;
  184. }
  185. }
  186. }
  187. return "UTF-8";
  188. }
  189. #endif
  190. #ifdef DONT_USE_LOCALE
  191. # define mz_iconv_nl_langinfo() ""
  192. #else
  193. static char *mz_iconv_nl_langinfo(){
  194. char *s;
  195. # if HAVE_CODESET
  196. s = nl_langinfo(CODESET);
  197. # else
  198. s = NULL;
  199. # endif
  200. if (!s)
  201. return "";
  202. else
  203. return s;
  204. }
  205. #endif
  206. static Scheme_Object *make_string (int argc, Scheme_Object *argv[]);
  207. static Scheme_Object *string (int argc, Scheme_Object *argv[]);
  208. static Scheme_Object *string_p (int argc, Scheme_Object *argv[]);
  209. static Scheme_Object *string_length (int argc, Scheme_Object *argv[]);
  210. static Scheme_Object *string_eq (int argc, Scheme_Object *argv[]);
  211. static Scheme_Object *string_locale_eq (int argc, Scheme_Object *argv[]);
  212. static Scheme_Object *string_ci_eq (int argc, Scheme_Object *argv[]);
  213. static Scheme_Object *string_locale_ci_eq (int argc, Scheme_Object *argv[]);
  214. static Scheme_Object *string_lt (int argc, Scheme_Object *argv[]);
  215. static Scheme_Object *string_locale_lt (int argc, Scheme_Object *argv[]);
  216. static Scheme_Object *string_gt (int argc, Scheme_Object *argv[]);
  217. static Scheme_Object *string_locale_gt (int argc, Scheme_Object *argv[]);
  218. static Scheme_Object *string_lt_eq (int argc, Scheme_Object *argv[]);
  219. static Scheme_Object *string_gt_eq (int argc, Scheme_Object *argv[]);
  220. static Scheme_Object *string_ci_lt (int argc, Scheme_Object *argv[]);
  221. static Scheme_Object *string_locale_ci_lt (int argc, Scheme_Object *argv[]);
  222. static Scheme_Object *string_ci_gt (int argc, Scheme_Object *argv[]);
  223. static Scheme_Object *string_locale_ci_gt (int argc, Scheme_Object *argv[]);
  224. static Scheme_Object *string_ci_lt_eq (int argc, Scheme_Object *argv[]);
  225. static Scheme_Object *string_ci_gt_eq (int argc, Scheme_Object *argv[]);
  226. static Scheme_Object *string_upcase (int argc, Scheme_Object *argv[]);
  227. static Scheme_Object *string_downcase (int argc, Scheme_Object *argv[]);
  228. static Scheme_Object *string_titlecase (int argc, Scheme_Object *argv[]);
  229. static Scheme_Object *string_foldcase (int argc, Scheme_Object *argv[]);
  230. static Scheme_Object *string_locale_upcase (int argc, Scheme_Object *argv[]);
  231. static Scheme_Object *string_locale_downcase (int argc, Scheme_Object *argv[]);
  232. static Scheme_Object *substring (int argc, Scheme_Object *argv[]);
  233. static Scheme_Object *string_append (int argc, Scheme_Object *argv[]);
  234. static Scheme_Object *string_to_list (int argc, Scheme_Object *argv[]);
  235. static Scheme_Object *list_to_string (int argc, Scheme_Object *argv[]);
  236. static Scheme_Object *string_copy (int argc, Scheme_Object *argv[]);
  237. static Scheme_Object *string_copy_bang (int argc, Scheme_Object *argv[]);
  238. static Scheme_Object *string_fill (int argc, Scheme_Object *argv[]);
  239. static Scheme_Object *string_to_immutable (int argc, Scheme_Object *argv[]);
  240. static Scheme_Object *string_normalize_c (int argc, Scheme_Object *argv[]);
  241. static Scheme_Object *string_normalize_kc (int argc, Scheme_Object *argv[]);
  242. static Scheme_Object *string_normalize_d (int argc, Scheme_Object *argv[]);
  243. static Scheme_Object *string_normalize_kd (int argc, Scheme_Object *argv[]);
  244. static Scheme_Object *make_shared_byte_string (int argc, Scheme_Object *argv[]);
  245. static Scheme_Object *shared_byte_string (int argc, Scheme_Object *argv[]);
  246. static Scheme_Object *make_byte_string (int argc, Scheme_Object *argv[]);
  247. static Scheme_Object *byte_string (int argc, Scheme_Object *argv[]);
  248. static Scheme_Object *byte_p (int argc, Scheme_Object *argv[]);
  249. static Scheme_Object *byte_string_p (int argc, Scheme_Object *argv[]);
  250. static Scheme_Object *byte_string_length (int argc, Scheme_Object *argv[]);
  251. static Scheme_Object *byte_string_eq (int argc, Scheme_Object *argv[]);
  252. static Scheme_Object *byte_string_lt (int argc, Scheme_Object *argv[]);
  253. static Scheme_Object *byte_string_gt (int argc, Scheme_Object *argv[]);
  254. static Scheme_Object *byte_substring (int argc, Scheme_Object *argv[]);
  255. static Scheme_Object *byte_string_append (int argc, Scheme_Object *argv[]);
  256. static Scheme_Object *byte_string_to_list (int argc, Scheme_Object *argv[]);
  257. static Scheme_Object *list_to_byte_string (int argc, Scheme_Object *argv[]);
  258. static Scheme_Object *byte_string_copy (int argc, Scheme_Object *argv[]);
  259. static Scheme_Object *byte_string_copy_bang (int argc, Scheme_Object *argv[]);
  260. static Scheme_Object *byte_string_fill (int argc, Scheme_Object *argv[]);
  261. static Scheme_Object *byte_string_to_immutable (int argc, Scheme_Object *argv[]);
  262. static Scheme_Object *byte_string_utf8_index (int argc, Scheme_Object *argv[]);
  263. static Scheme_Object *byte_string_utf8_ref (int argc, Scheme_Object *argv[]);
  264. static Scheme_Object *byte_string_utf8_length (int argc, Scheme_Object *argv[]);
  265. static Scheme_Object *byte_string_to_char_string (int argc, Scheme_Object *argv[]);
  266. static Scheme_Object *byte_string_to_char_string_locale (int argc, Scheme_Object *argv[]);
  267. static Scheme_Object *byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[]);
  268. static Scheme_Object *char_string_to_byte_string (int argc, Scheme_Object *argv[]);
  269. static Scheme_Object *char_string_to_byte_string_locale (int argc, Scheme_Object *argv[]);
  270. static Scheme_Object *char_string_to_byte_string_latin1 (int argc, Scheme_Object *argv[]);
  271. static Scheme_Object *char_string_utf8_length (int argc, Scheme_Object *argv[]);
  272. static Scheme_Object *version(int argc, Scheme_Object *argv[]);
  273. static Scheme_Object *format(int argc, Scheme_Object *argv[]);
  274. static Scheme_Object *sch_printf(int argc, Scheme_Object *argv[]);
  275. static Scheme_Object *sch_eprintf(int argc, Scheme_Object *argv[]);
  276. static Scheme_Object *sch_fprintf(int argc, Scheme_Object *argv[]);
  277. static Scheme_Object *banner(int argc, Scheme_Object *argv[]);
  278. static Scheme_Object *env_p(int argc, Scheme_Object *argv[]);
  279. static Scheme_Object *sch_getenv(int argc, Scheme_Object *argv[]);
  280. static Scheme_Object *sch_getenv_names(int argc, Scheme_Object *argv[]);
  281. static Scheme_Object *sch_putenv(int argc, Scheme_Object *argv[]);
  282. static Scheme_Object *env_copy(int argc, Scheme_Object *argv[]);
  283. static Scheme_Object *env_make(int argc, Scheme_Object *argv[]);
  284. static Scheme_Object *current_environment_variables(int argc, Scheme_Object *argv[]);
  285. static Scheme_Object *system_type(int argc, Scheme_Object *argv[]);
  286. static Scheme_Object *system_library_subpath(int argc, Scheme_Object *argv[]);
  287. static Scheme_Object *cmdline_args(int argc, Scheme_Object *argv[]);
  288. static Scheme_Object *current_locale(int argc, Scheme_Object *argv[]);
  289. static Scheme_Object *locale_string_encoding(int argc, Scheme_Object *argv[]);
  290. static Scheme_Object *system_language_country(int argc, Scheme_Object *argv[]);
  291. static Scheme_Object *byte_string_open_converter(int argc, Scheme_Object *argv[]);
  292. static Scheme_Object *byte_string_close_converter(int argc, Scheme_Object *argv[]);
  293. static Scheme_Object *byte_string_convert(int argc, Scheme_Object *argv[]);
  294. static Scheme_Object *byte_string_convert_end(int argc, Scheme_Object *argv[]);
  295. static Scheme_Object *byte_converter_p(int argc, Scheme_Object *argv[]);
  296. #ifdef MZ_PRECISE_GC
  297. static void register_traversers(void);
  298. #endif
  299. static int mz_char_strcmp(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
  300. static int mz_char_strcmp_ci(const char *who, const mzchar *str1, intptr_t l1, const mzchar *str2, intptr_t l2, int locale, int size_shortcut);
  301. static int mz_strcmp(const char *who, unsigned char *str1, intptr_t l1, unsigned char *str2, intptr_t l2);
  302. XFORM_NONGCING static intptr_t utf8_decode_x(const unsigned char *s, intptr_t start, intptr_t end,
  303. unsigned int *us, intptr_t dstart, intptr_t dend,
  304. intptr_t *ipos, intptr_t *jpos,
  305. char compact, char utf16,
  306. int *state, int might_continue, int permissive);
  307. XFORM_NONGCING static intptr_t utf8_encode_x(const unsigned int *us, intptr_t start, intptr_t end,
  308. unsigned char *s, intptr_t dstart, intptr_t dend,
  309. intptr_t *_ipos, intptr_t *_opos, char utf16);
  310. static char *string_to_from_locale(int to_bytes,
  311. char *in, intptr_t delta, intptr_t len,
  312. intptr_t *olen, int perm,
  313. int *no_cvt);
  314. #define portable_isspace(x) (((x) < 128) && isspace(x))
  315. ROSYM static Scheme_Object *sys_symbol;
  316. ROSYM static Scheme_Object *link_symbol, *machine_symbol, *gc_symbol;
  317. ROSYM static Scheme_Object *so_suffix_symbol, *so_mode_symbol, *word_symbol;
  318. ROSYM static Scheme_Object *os_symbol;
  319. ROSYM static Scheme_Object *platform_3m_path, *platform_cgc_path;
  320. READ_ONLY static Scheme_Object *zero_length_char_string;
  321. READ_ONLY static Scheme_Object *zero_length_byte_string;
  322. SHARED_OK static Scheme_Hash_Table *putenv_str_table;
  323. SHARED_OK static char *embedding_banner;
  324. SHARED_OK static Scheme_Object *vers_str;
  325. SHARED_OK static Scheme_Object *banner_str;
  326. READ_ONLY static Scheme_Object *complete_symbol, *continues_symbol, *aborts_symbol, *error_symbol;
  327. void
  328. scheme_init_string (Scheme_Env *env)
  329. {
  330. Scheme_Object *p;
  331. REGISTER_SO(sys_symbol);
  332. sys_symbol = scheme_intern_symbol(SYSTEM_TYPE_NAME);
  333. REGISTER_SO(link_symbol);
  334. REGISTER_SO(machine_symbol);
  335. REGISTER_SO(gc_symbol);
  336. REGISTER_SO(so_suffix_symbol);
  337. REGISTER_SO(so_mode_symbol);
  338. REGISTER_SO(word_symbol);
  339. REGISTER_SO(os_symbol);
  340. link_symbol = scheme_intern_symbol("link");
  341. machine_symbol = scheme_intern_symbol("machine");
  342. gc_symbol = scheme_intern_symbol("gc");
  343. so_suffix_symbol = scheme_intern_symbol("so-suffix");
  344. so_mode_symbol = scheme_intern_symbol("so-mode");
  345. word_symbol = scheme_intern_symbol("word");
  346. os_symbol = scheme_intern_symbol("os");
  347. REGISTER_SO(zero_length_char_string);
  348. REGISTER_SO(zero_length_byte_string);
  349. zero_length_char_string = scheme_alloc_char_string(0, 0);
  350. zero_length_byte_string = scheme_alloc_byte_string(0, 0);
  351. REGISTER_SO(complete_symbol);
  352. REGISTER_SO(continues_symbol);
  353. REGISTER_SO(aborts_symbol);
  354. REGISTER_SO(error_symbol);
  355. complete_symbol = scheme_intern_symbol("complete");
  356. continues_symbol = scheme_intern_symbol("continues");
  357. aborts_symbol = scheme_intern_symbol("aborts");
  358. error_symbol = scheme_intern_symbol("error");
  359. REGISTER_SO(platform_3m_path);
  360. #ifdef UNIX_FILE_SYSTEM
  361. # define MZ3M_SUBDIR "/3m"
  362. #else
  363. # ifdef DOS_FILE_SYSTEM
  364. # define MZ3M_SUBDIR "\\3m"
  365. # else
  366. # define MZ3M_SUBDIR ":3m"
  367. # endif
  368. #endif
  369. REGISTER_SO(platform_3m_path);
  370. REGISTER_SO(platform_cgc_path);
  371. platform_cgc_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH);
  372. platform_3m_path = scheme_make_path(SCHEME_PLATFORM_LIBRARY_SUBPATH MZ3M_SUBDIR);
  373. REGISTER_SO(putenv_str_table);
  374. REGISTER_SO(embedding_banner);
  375. REGISTER_SO(vers_str);
  376. REGISTER_SO(banner_str);
  377. vers_str = scheme_make_utf8_string(scheme_version());
  378. SCHEME_SET_CHAR_STRING_IMMUTABLE(vers_str);
  379. banner_str = scheme_make_utf8_string(scheme_banner());
  380. SCHEME_SET_CHAR_STRING_IMMUTABLE(banner_str);
  381. p = scheme_make_folding_prim(string_p, "string?", 1, 1, 1);
  382. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
  383. | SCHEME_PRIM_IS_OMITABLE);
  384. scheme_add_global_constant("string?", p, env);
  385. scheme_add_global_constant("make-string",
  386. scheme_make_immed_prim(make_string,
  387. "make-string",
  388. 1, 2),
  389. env);
  390. scheme_add_global_constant("string",
  391. scheme_make_immed_prim(string,
  392. "string",
  393. 0, -1),
  394. env);
  395. p = scheme_make_folding_prim(string_length, "string-length", 1, 1, 1);
  396. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM);
  397. scheme_add_global_constant("string-length", p,
  398. env);
  399. p = scheme_make_immed_prim(scheme_checked_string_ref, "string-ref", 2, 2);
  400. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED);
  401. scheme_add_global_constant("string-ref", p, env);
  402. p = scheme_make_immed_prim(scheme_checked_string_set, "string-set!", 3, 3);
  403. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_NARY_INLINED);
  404. scheme_add_global_constant("string-set!", p, env);
  405. scheme_add_global_constant("string=?",
  406. scheme_make_immed_prim(string_eq,
  407. "string=?",
  408. 2, -1),
  409. env);
  410. scheme_add_global_constant("string-locale=?",
  411. scheme_make_immed_prim(string_locale_eq,
  412. "string-locale=?",
  413. 2, -1),
  414. env);
  415. scheme_add_global_constant("string-ci=?",
  416. scheme_make_immed_prim(string_ci_eq,
  417. "string-ci=?",
  418. 2, -1),
  419. env);
  420. scheme_add_global_constant("string-locale-ci=?",
  421. scheme_make_immed_prim(string_locale_ci_eq,
  422. "string-locale-ci=?",
  423. 2, -1),
  424. env);
  425. scheme_add_global_constant("string<?",
  426. scheme_make_immed_prim(string_lt,
  427. "string<?",
  428. 2, -1),
  429. env);
  430. scheme_add_global_constant("string-locale<?",
  431. scheme_make_immed_prim(string_locale_lt,
  432. "string-locale<?",
  433. 2, -1),
  434. env);
  435. scheme_add_global_constant("string>?",
  436. scheme_make_immed_prim(string_gt,
  437. "string>?",
  438. 2, -1),
  439. env);
  440. scheme_add_global_constant("string-locale>?",
  441. scheme_make_immed_prim(string_locale_gt,
  442. "string-locale>?",
  443. 2, -1),
  444. env);
  445. scheme_add_global_constant("string<=?",
  446. scheme_make_immed_prim(string_lt_eq,
  447. "string<=?",
  448. 2, -1),
  449. env);
  450. scheme_add_global_constant("string>=?",
  451. scheme_make_immed_prim(string_gt_eq,
  452. "string>=?",
  453. 2, -1),
  454. env);
  455. scheme_add_global_constant("string-ci<?",
  456. scheme_make_immed_prim(string_ci_lt,
  457. "string-ci<?",
  458. 2, -1),
  459. env);
  460. scheme_add_global_constant("string-locale-ci<?",
  461. scheme_make_immed_prim(string_locale_ci_lt,
  462. "string-locale-ci<?",
  463. 2, -1),
  464. env);
  465. scheme_add_global_constant("string-ci>?",
  466. scheme_make_immed_prim(string_ci_gt,
  467. "string-ci>?",
  468. 2, -1),
  469. env);
  470. scheme_add_global_constant("string-locale-ci>?",
  471. scheme_make_immed_prim(string_locale_ci_gt,
  472. "string-locale-ci>?",
  473. 2, -1),
  474. env);
  475. scheme_add_global_constant("string-ci<=?",
  476. scheme_make_immed_prim(string_ci_lt_eq,
  477. "string-ci<=?",
  478. 2, -1),
  479. env);
  480. scheme_add_global_constant("string-ci>=?",
  481. scheme_make_immed_prim(string_ci_gt_eq,
  482. "string-ci>=?",
  483. 2, -1),
  484. env);
  485. scheme_add_global_constant("substring",
  486. scheme_make_immed_prim(substring,
  487. "substring",
  488. 2, 3),
  489. env);
  490. scheme_add_global_constant("string-append",
  491. scheme_make_immed_prim(string_append,
  492. "string-append",
  493. 0, -1),
  494. env);
  495. scheme_add_global_constant("string->list",
  496. scheme_make_immed_prim(string_to_list,
  497. "string->list",
  498. 1, 1),
  499. env);
  500. scheme_add_global_constant("list->string",
  501. scheme_make_immed_prim(list_to_string,
  502. "list->string",
  503. 1, 1),
  504. env);
  505. scheme_add_global_constant("string-copy",
  506. scheme_make_immed_prim(string_copy,
  507. "string-copy",
  508. 1, 1),
  509. env);
  510. scheme_add_global_constant("string-copy!",
  511. scheme_make_immed_prim(string_copy_bang,
  512. "string-copy!",
  513. 3, 5),
  514. env);
  515. scheme_add_global_constant("string-fill!",
  516. scheme_make_immed_prim(string_fill,
  517. "string-fill!",
  518. 2, 2),
  519. env);
  520. scheme_add_global_constant("string->immutable-string",
  521. scheme_make_immed_prim(string_to_immutable,
  522. "string->immutable-string",
  523. 1, 1),
  524. env);
  525. scheme_add_global_constant("string-normalize-nfc",
  526. scheme_make_immed_prim(string_normalize_c,
  527. "string-normalize-nfc",
  528. 1, 1),
  529. env);
  530. scheme_add_global_constant("string-normalize-nfkc",
  531. scheme_make_immed_prim(string_normalize_kc,
  532. "string-normalize-nfkc",
  533. 1, 1),
  534. env);
  535. scheme_add_global_constant("string-normalize-nfd",
  536. scheme_make_immed_prim(string_normalize_d,
  537. "string-normalize-nfd",
  538. 1, 1),
  539. env);
  540. scheme_add_global_constant("string-normalize-nfkd",
  541. scheme_make_immed_prim(string_normalize_kd,
  542. "string-normalize-nfkd",
  543. 1, 1),
  544. env);
  545. scheme_add_global_constant("string-upcase",
  546. scheme_make_immed_prim(string_upcase,
  547. "string-upcase",
  548. 1, 1),
  549. env);
  550. scheme_add_global_constant("string-downcase",
  551. scheme_make_immed_prim(string_downcase,
  552. "string-downcase",
  553. 1, 1),
  554. env);
  555. scheme_add_global_constant("string-titlecase",
  556. scheme_make_immed_prim(string_titlecase,
  557. "string-titlecase",
  558. 1, 1),
  559. env);
  560. scheme_add_global_constant("string-foldcase",
  561. scheme_make_immed_prim(string_foldcase,
  562. "string-foldcase",
  563. 1, 1),
  564. env);
  565. scheme_add_global_constant("string-locale-upcase",
  566. scheme_make_immed_prim(string_locale_upcase,
  567. "string-locale-upcase",
  568. 1, 1),
  569. env);
  570. scheme_add_global_constant("string-locale-downcase",
  571. scheme_make_immed_prim(string_locale_downcase,
  572. "string-locale-downcase",
  573. 1, 1),
  574. env);
  575. scheme_add_global_constant("current-locale",
  576. scheme_register_parameter(current_locale,
  577. "current-locale",
  578. MZCONFIG_LOCALE),
  579. env);
  580. scheme_add_global_constant("locale-string-encoding",
  581. scheme_make_immed_prim(locale_string_encoding,
  582. "locale-string-encoding",
  583. 0, 0),
  584. env);
  585. scheme_add_global_constant("system-language+country",
  586. scheme_make_immed_prim(system_language_country,
  587. "system-language+country",
  588. 0, 0),
  589. env);
  590. scheme_add_global_constant("bytes-converter?",
  591. scheme_make_immed_prim(byte_converter_p,
  592. "bytes-converter?",
  593. 1, 1),
  594. env);
  595. scheme_add_global_constant("bytes-convert",
  596. scheme_make_prim_w_arity2(byte_string_convert,
  597. "bytes-convert",
  598. 1, 7,
  599. 3, 3),
  600. env);
  601. scheme_add_global_constant("bytes-convert-end",
  602. scheme_make_prim_w_arity2(byte_string_convert_end,
  603. "bytes-convert-end",
  604. 0, 3,
  605. 2, 2),
  606. env);
  607. scheme_add_global_constant("bytes-open-converter",
  608. scheme_make_immed_prim(byte_string_open_converter,
  609. "bytes-open-converter",
  610. 2, 2),
  611. env);
  612. scheme_add_global_constant("bytes-close-converter",
  613. scheme_make_immed_prim(byte_string_close_converter,
  614. "bytes-close-converter",
  615. 1, 1),
  616. env);
  617. scheme_add_global_constant("format",
  618. scheme_make_noncm_prim(format,
  619. "format",
  620. 1, -1),
  621. env);
  622. scheme_add_global_constant("printf",
  623. scheme_make_noncm_prim(sch_printf,
  624. "printf",
  625. 1, -1),
  626. env);
  627. scheme_add_global_constant("eprintf",
  628. scheme_make_noncm_prim(sch_eprintf,
  629. "eprintf",
  630. 1, -1),
  631. env);
  632. scheme_add_global_constant("fprintf",
  633. scheme_make_noncm_prim(sch_fprintf,
  634. "fprintf",
  635. 2, -1),
  636. env);
  637. scheme_add_global_constant("byte?",
  638. scheme_make_folding_prim(byte_p,
  639. "byte?",
  640. 1, 1, 1),
  641. env);
  642. p = scheme_make_folding_prim(byte_string_p, "bytes?", 1, 1, 1);
  643. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_NARY_INLINED
  644. | SCHEME_PRIM_IS_OMITABLE);
  645. scheme_add_global_constant("bytes?", p, env);
  646. scheme_add_global_constant("make-bytes",
  647. scheme_make_immed_prim(make_byte_string,
  648. "make-bytes",
  649. 1, 2),
  650. env);
  651. scheme_add_global_constant("bytes",
  652. scheme_make_immed_prim(byte_string,
  653. "bytes",
  654. 0, -1),
  655. env);
  656. GLOBAL_PRIM_W_ARITY("make-shared-bytes", make_shared_byte_string, 1, 2, env);
  657. GLOBAL_PRIM_W_ARITY("shared-bytes", shared_byte_string, 0, -1, env);
  658. p = scheme_make_folding_prim(byte_string_length, "bytes-length", 1, 1, 1);
  659. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM);
  660. scheme_add_global_constant("bytes-length", p, env);
  661. p = scheme_make_immed_prim(scheme_checked_byte_string_ref, "bytes-ref", 2, 2);
  662. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED
  663. | SCHEME_PRIM_PRODUCES_FIXNUM);
  664. scheme_add_global_constant("bytes-ref", p, env);
  665. p = scheme_make_immed_prim(scheme_checked_byte_string_set, "bytes-set!", 3, 3);
  666. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_NARY_INLINED);
  667. scheme_add_global_constant("bytes-set!", p, env);
  668. scheme_add_global_constant("bytes=?",
  669. scheme_make_immed_prim(byte_string_eq,
  670. "bytes=?",
  671. 2, -1),
  672. env);
  673. scheme_add_global_constant("bytes<?",
  674. scheme_make_immed_prim(byte_string_lt,
  675. "bytes<?",
  676. 2, -1),
  677. env);
  678. scheme_add_global_constant("bytes>?",
  679. scheme_make_immed_prim(byte_string_gt,
  680. "bytes>?",
  681. 2, -1),
  682. env);
  683. scheme_add_global_constant("subbytes",
  684. scheme_make_immed_prim(byte_substring,
  685. "subbytes",
  686. 2, 3),
  687. env);
  688. scheme_add_global_constant("bytes-append",
  689. scheme_make_immed_prim(byte_string_append,
  690. "bytes-append",
  691. 0, -1),
  692. env);
  693. scheme_add_global_constant("bytes->list",
  694. scheme_make_immed_prim(byte_string_to_list,
  695. "bytes->list",
  696. 1, 1),
  697. env);
  698. scheme_add_global_constant("list->bytes",
  699. scheme_make_immed_prim(list_to_byte_string,
  700. "list->bytes",
  701. 1, 1),
  702. env);
  703. scheme_add_global_constant("bytes-copy",
  704. scheme_make_immed_prim(byte_string_copy,
  705. "bytes-copy",
  706. 1, 1),
  707. env);
  708. scheme_add_global_constant("bytes-copy!",
  709. scheme_make_immed_prim(byte_string_copy_bang,
  710. "bytes-copy!",
  711. 3, 5),
  712. env);
  713. scheme_add_global_constant("bytes-fill!",
  714. scheme_make_immed_prim(byte_string_fill,
  715. "bytes-fill!",
  716. 2, 2),
  717. env);
  718. scheme_add_global_constant("bytes->immutable-bytes",
  719. scheme_make_immed_prim(byte_string_to_immutable,
  720. "bytes->immutable-bytes",
  721. 1, 1),
  722. env);
  723. p = scheme_make_immed_prim(byte_string_utf8_index, "bytes-utf-8-index", 2, 4);
  724. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM);
  725. scheme_add_global_constant("bytes-utf-8-index", p, env);
  726. p = scheme_make_immed_prim(byte_string_utf8_length, "bytes-utf-8-length", 1, 4);
  727. SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_PRODUCES_FIXNUM);
  728. scheme_add_global_constant("bytes-utf-8-length", p, env);
  729. scheme_add_global_constant("bytes-utf-8-ref",
  730. scheme_make_immed_prim(byte_string_utf8_ref,
  731. "bytes-utf-8-ref",
  732. 2, 4),
  733. env);
  734. scheme_add_global_constant("bytes->string/utf-8",
  735. scheme_make_immed_prim(byte_string_to_char_string,
  736. "bytes->string/utf-8",
  737. 1, 4),
  738. env);
  739. scheme_add_global_constant("bytes->string/locale",
  740. scheme_make_immed_prim(byte_string_to_char_string_locale,
  741. "bytes->string/locale",
  742. 1, 4),
  743. env);
  744. scheme_add_global_constant("bytes->string/latin-1",
  745. scheme_make_immed_prim(byte_string_to_char_string_latin1,
  746. "bytes->string/latin-1",
  747. 1, 4),
  748. env);
  749. scheme_add_global_constant("string->bytes/utf-8",
  750. scheme_make_immed_prim(char_string_to_byte_string,
  751. "string->bytes/utf-8",
  752. 1, 4),
  753. env);
  754. scheme_add_global_constant("string->bytes/locale",
  755. scheme_make_immed_prim(char_string_to_byte_string_locale,
  756. "string->bytes/locale",
  757. 1, 4),
  758. env);
  759. scheme_add_global_constant("string->bytes/latin-1",
  760. scheme_make_immed_prim(char_string_to_byte_string_latin1,
  761. "string->bytes/latin-1",
  762. 1, 4),
  763. env);
  764. scheme_add_global_constant("string-utf-8-length",
  765. scheme_make_immed_prim(char_string_utf8_length,
  766. "string-utf-8-length",
  767. 1, 3),
  768. env);
  769. /* In principle, `version' could be foldable, but it invites
  770. more problems than it solves... */
  771. scheme_add_global_constant("version",
  772. scheme_make_immed_prim(version,
  773. "version",
  774. 0, 0),
  775. env);
  776. scheme_add_global_constant("banner",
  777. scheme_make_immed_prim(banner,
  778. "banner",
  779. 0, 0),
  780. env);
  781. /* Environment variables */
  782. scheme_add_global_constant("environment-variables?",
  783. scheme_make_folding_prim(env_p,
  784. "environment-variables?",
  785. 1, 1, 1),
  786. env);
  787. scheme_add_global_constant("current-environment-variables",
  788. scheme_register_parameter(current_environment_variables,
  789. "current-environment-variables",
  790. MZCONFIG_CURRENT_ENV_VARS),
  791. env);
  792. scheme_add_global_constant("environment-variables-ref",
  793. scheme_make_immed_prim(sch_getenv,
  794. "environment-variables-ref",
  795. 2, 2),
  796. env);
  797. scheme_add_global_constant("environment-variables-set!",
  798. scheme_make_prim_w_arity(sch_putenv,
  799. "environment-variables-set!",
  800. 3, 4),
  801. env);
  802. scheme_add_global_constant("environment-variables-names",
  803. scheme_make_immed_prim(sch_getenv_names,
  804. "environment-variables-names",
  805. 1, 1),
  806. env);
  807. scheme_add_global_constant("environment-variables-copy",
  808. scheme_make_immed_prim(env_copy,
  809. "environment-variables-copy",
  810. 1, 1),
  811. env);
  812. scheme_add_global_constant("make-environment-variables",
  813. scheme_make_immed_prim(env_make,
  814. "make-environment-variables",
  815. 0, -1),
  816. env);
  817. /* Don't make these folding, since they're platform-specific: */
  818. scheme_add_global_constant("system-type",
  819. scheme_make_immed_prim(system_type,
  820. "system-type",
  821. 0, 1),
  822. env);
  823. scheme_add_global_constant("system-library-subpath",
  824. scheme_make_immed_prim(system_library_subpath,
  825. "system-library-subpath",
  826. 0, 1),
  827. env);
  828. scheme_add_global_constant("current-command-line-arguments",
  829. scheme_register_parameter(cmdline_args,
  830. "current-command-line-arguments",
  831. MZCONFIG_CMDLINE_ARGS),
  832. env);
  833. #ifdef MZ_PRECISE_GC
  834. register_traversers();
  835. #endif
  836. }
  837. void scheme_init_string_places(void) {
  838. REGISTER_SO(current_locale_name_ptr);
  839. current_locale_name_ptr = "xxxx\0\0\0\0";
  840. }
  841. /**********************************************************************/
  842. /* UTF-8 char constructors */
  843. /**********************************************************************/
  844. Scheme_Object *scheme_make_sized_offset_utf8_string(char *chars, intptr_t d, intptr_t len)
  845. {
  846. intptr_t ulen;
  847. mzchar *us;
  848. if (len) {
  849. ulen = scheme_utf8_decode((unsigned char *)chars, d, d + len,
  850. NULL, 0, -1,
  851. NULL, 0 /* not UTF-16 */, 0xFFFD);
  852. us = scheme_malloc_atomic(sizeof(mzchar) * (ulen + 1));
  853. scheme_utf8_decode((unsigned char *)chars, d, d + len,
  854. us, 0, -1,
  855. NULL, 0 /* not UTF-16 */, 0xFFFD);
  856. us[ulen] = 0;
  857. } else {
  858. us = (mzchar *)"\0\0\0";
  859. ulen = 0;
  860. }
  861. return scheme_make_sized_offset_char_string(us, 0, ulen, 0);
  862. }
  863. Scheme_Object *
  864. scheme_make_sized_utf8_string(char *chars, intptr_t len)
  865. {
  866. return scheme_make_sized_offset_utf8_string(chars, 0, len);
  867. }
  868. Scheme_Object *
  869. scheme_make_immutable_sized_utf8_string(char *chars, intptr_t len)
  870. {
  871. Scheme_Object *s;
  872. s = scheme_make_sized_offset_utf8_string(chars, 0, len);
  873. if (len)
  874. SCHEME_SET_CHAR_STRING_IMMUTABLE(s);
  875. return s;
  876. }
  877. Scheme_Object *
  878. scheme_make_utf8_string(const char *chars)
  879. {
  880. return scheme_make_sized_offset_utf8_string((char *)chars, 0, -1);
  881. }
  882. Scheme_Object *
  883. scheme_make_locale_string(const char *chars)
  884. {
  885. return scheme_byte_string_to_char_string_locale(scheme_make_byte_string((char *)chars));
  886. }
  887. /**********************************************************************/
  888. /* index helpers */
  889. /**********************************************************************/
  890. intptr_t scheme_extract_index(const char *name, int pos, int argc, Scheme_Object **argv, intptr_t top, int false_ok)
  891. {
  892. intptr_t i;
  893. int is_top = 0;
  894. if (SCHEME_INTP(argv[pos])) {
  895. i = SCHEME_INT_VAL(argv[pos]);
  896. } else if (SCHEME_BIGNUMP(argv[pos])) {
  897. if (SCHEME_BIGPOS(argv[pos])) {
  898. i = top; /* out-of-bounds */
  899. is_top = 1;
  900. } else
  901. i = -1; /* negative */
  902. } else
  903. i = -1;
  904. if (!is_top && (i < 0))
  905. scheme_wrong_contract(name,
  906. (false_ok ? "(or/c exact-nonnegative-integer? #f)" : "exact-nonnegative-integer?"),
  907. pos, argc, argv);
  908. return i;
  909. }
  910. void scheme_get_substring_indices(const char *name, Scheme_Object *str,
  911. int argc, Scheme_Object **argv,
  912. int spos, int fpos, intptr_t *_start, intptr_t *_finish)
  913. {
  914. intptr_t len;
  915. intptr_t start, finish;
  916. if (SCHEME_CHAPERONE_VECTORP(str))
  917. len = SCHEME_VEC_SIZE(str);
  918. else if (SCHEME_CHAR_STRINGP(str))
  919. len = SCHEME_CHAR_STRTAG_VAL(str);
  920. else
  921. len = SCHEME_BYTE_STRTAG_VAL(str);
  922. if (argc > spos)
  923. start = scheme_extract_index(name, spos, argc, argv, len + 1, 0);
  924. else
  925. start = 0;
  926. if (argc > fpos)
  927. finish = scheme_extract_index(name, fpos, argc, argv, len + 1, 0);
  928. else
  929. finish = len;
  930. if (!(start <= len)) {
  931. scheme_out_of_range(name, NULL, (fpos < 100) ? "starting " : "", argv[spos], str, 0, len);
  932. }
  933. if (!(finish >= start && finish <= len)) {
  934. scheme_out_of_range(name, NULL, "ending ", argv[fpos], str, start, len);
  935. }
  936. *_start = start;
  937. *_finish = finish;
  938. }
  939. void scheme_do_get_substring_indices(const char *name, Scheme_Object *str,
  940. int argc, Scheme_Object **argv,
  941. int spos, int fpos, intptr_t *_start, intptr_t *_finish, intptr_t len)
  942. {
  943. if (argc > spos) {
  944. if (SCHEME_INTP(argv[spos])) {
  945. intptr_t start = SCHEME_INT_VAL(argv[spos]);
  946. if ((start >= 0) && (start < len)) {
  947. *_start = start;
  948. if (argc > fpos) {
  949. intptr_t finish = SCHEME_INT_VAL(argv[fpos]);
  950. if ((finish >= start) && (finish <= len)) {
  951. *_finish = finish;
  952. return;
  953. }
  954. } else {
  955. *_finish = len;
  956. return;
  957. }
  958. }
  959. }
  960. } else {
  961. *_start = 0;
  962. *_finish = len;
  963. return;
  964. }
  965. scheme_get_substring_indices(name, str, argc, argv, spos, fpos, _start, _finish);
  966. }
  967. /**********************************************************************/
  968. /* char strings */
  969. /**********************************************************************/
  970. #define SCHEME_X_STR_VAL(x) SCHEME_CHAR_STR_VAL(x)
  971. #define SCHEME_X_STRTAG_VAL(x) SCHEME_CHAR_STRTAG_VAL(x)
  972. #define SCHEME_X_STRINGP(x) SCHEME_CHAR_STRINGP(x)
  973. #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_CHAR_STRINGP(x)
  974. #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_CHAR_STRING_IMMUTABLE(x)
  975. #define scheme_x_string_type scheme_char_string_type
  976. #define X(a, b) a##_char##b
  977. #define X_(a, b) a##_##b
  978. #define X__(a) a
  979. #define EMPTY (mzchar *)"\0\0\0"
  980. #define Xchar mzchar
  981. #define uXchar mzchar
  982. #define XSTR ""
  983. #define IS_STR "string?"
  984. #define XSTRINGSTR "string"
  985. #define SUBXSTR "substring"
  986. #define CHARP(x) SCHEME_CHARP(x)
  987. #define CHAR_VAL(x) SCHEME_CHAR_VAL(x)
  988. #define CHAR_STR "char?"
  989. #define MAKE_CHAR(x) _scheme_make_char(x)
  990. #define xstrlen scheme_char_strlen
  991. #include "strops.inc"
  992. #define GEN_STRING_COMP(name, scheme_name, comp, op, ul, size_shortcut) \
  993. static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
  994. { mzchar *s, *prev; int i, sl, pl; int falz = 0;\
  995. if (!SCHEME_CHAR_STRINGP(argv[0])) \
  996. scheme_wrong_contract(scheme_name, "string?", 0, argc, argv); \
  997. prev = SCHEME_CHAR_STR_VAL(argv[0]); pl = SCHEME_CHAR_STRTAG_VAL(argv[0]); \
  998. for (i = 1; i < argc; i++) { \
  999. if (!SCHEME_CHAR_STRINGP(argv[i])) \
  1000. scheme_wrong_contract(scheme_name, "string?", i, argc, argv); \
  1001. s = SCHEME_CHAR_STR_VAL(argv[i]); sl = SCHEME_CHAR_STRTAG_VAL(argv[i]); \
  1002. if (!falz) if (!(comp(scheme_name, \
  1003. prev, pl, \
  1004. s, sl, ul, size_shortcut) op 0)) falz = 1; \
  1005. prev = s; pl = sl; \
  1006. } \
  1007. return falz ? scheme_false : scheme_true; \
  1008. }
  1009. GEN_STRING_COMP(string_eq, "string=?", mz_char_strcmp, ==, 0, 1)
  1010. GEN_STRING_COMP(string_lt, "string<?", mz_char_strcmp, <, 0, 0)
  1011. GEN_STRING_COMP(string_gt, "string>?", mz_char_strcmp, >, 0, 0)
  1012. GEN_STRING_COMP(string_lt_eq, "string<=?", mz_char_strcmp, <=, 0, 0)
  1013. GEN_STRING_COMP(string_gt_eq, "string>=?", mz_char_strcmp, >=, 0, 0)
  1014. GEN_STRING_COMP(string_ci_eq, "string-ci=?", mz_char_strcmp_ci, ==, 0, 0)
  1015. GEN_STRING_COMP(string_ci_lt, "string-ci<?", mz_char_strcmp_ci, <, 0, 0)
  1016. GEN_STRING_COMP(string_ci_gt, "string-ci>?", mz_char_strcmp_ci, >, 0, 0)
  1017. GEN_STRING_COMP(string_ci_lt_eq, "string-ci<=?", mz_char_strcmp_ci, <=, 0, 0)
  1018. GEN_STRING_COMP(string_ci_gt_eq, "string-ci>=?", mz_char_strcmp_ci, >=, 0, 0)
  1019. GEN_STRING_COMP(string_locale_eq, "string-locale=?", mz_char_strcmp, ==, 1, 0)
  1020. GEN_STRING_COMP(string_locale_lt, "string-locale<?", mz_char_strcmp, <, 1, 0)
  1021. GEN_STRING_COMP(string_locale_gt, "string-locale>?", mz_char_strcmp, >, 1, 0)
  1022. GEN_STRING_COMP(string_locale_ci_eq, "string-locale-ci=?", mz_char_strcmp_ci, ==, 1, 0)
  1023. GEN_STRING_COMP(string_locale_ci_lt, "string-locale-ci<?", mz_char_strcmp_ci, <, 1, 0)
  1024. GEN_STRING_COMP(string_locale_ci_gt, "string-locale-ci>?", mz_char_strcmp_ci, >, 1, 0)
  1025. /**********************************************************************/
  1026. /* byte strings */
  1027. /**********************************************************************/
  1028. #define SCHEME_BYTEP(x) ((SCHEME_INTP(x)) && (SCHEME_INT_VAL(x) >= 0) && (SCHEME_INT_VAL(x) <= 255))
  1029. static Scheme_Object *
  1030. byte_p(int argc, Scheme_Object *argv[])
  1031. {
  1032. return (SCHEME_BYTEP(argv[0]) ? scheme_true : scheme_false);
  1033. }
  1034. #define SCHEME_X_STR_VAL(x) SCHEME_BYTE_STR_VAL(x)
  1035. #define SCHEME_X_STRTAG_VAL(x) SCHEME_BYTE_STRTAG_VAL(x)
  1036. #define SCHEME_X_STRINGP(x) SCHEME_BYTE_STRINGP(x)
  1037. #define SCHEME_MUTABLE_X_STRINGP(x) SCHEME_MUTABLE_BYTE_STRINGP(x)
  1038. #define SCHEME_SET_X_STRING_IMMUTABLE(x) SCHEME_SET_BYTE_STRING_IMMUTABLE(x)
  1039. #define scheme_x_string_type scheme_byte_string_type
  1040. #define X(a, b) a##_byte##b
  1041. #define X_(a, b) a##_byte_##b
  1042. #define X__(a) byte_##a
  1043. #define EMPTY ""
  1044. #define Xchar char
  1045. #define uXchar unsigned char
  1046. #define XSTR "byte "
  1047. #define IS_STR "bytes?"
  1048. #define XSTRINGSTR "bytes"
  1049. #define SUBXSTR "subbytes"
  1050. #define CHARP(x) SCHEME_BYTEP(x)
  1051. #define CHAR_VAL(x) SCHEME_INT_VAL(x)
  1052. #define CHAR_STR "byte?"
  1053. #define MAKE_CHAR(x) scheme_make_integer_value(x)
  1054. #define xstrlen strlen
  1055. #define GENERATING_BYTE
  1056. #include "strops.inc"
  1057. #undef GENERATING_BYTE
  1058. /* comparisons */
  1059. #define GEN_BYTE_STRING_COMP(name, scheme_name, comp, op) \
  1060. static Scheme_Object * name (int argc, Scheme_Object *argv[]) \
  1061. { char *s, *prev; int i, sl, pl; int falz = 0;\
  1062. if (!SCHEME_BYTE_STRINGP(argv[0])) \
  1063. scheme_wrong_contract(scheme_name, "bytes?", 0, argc, argv); \
  1064. prev = SCHEME_BYTE_STR_VAL(argv[0]); pl = SCHEME_BYTE_STRTAG_VAL(argv[0]); \
  1065. for (i = 1; i < argc; i++) { \
  1066. if (!SCHEME_BYTE_STRINGP(argv[i])) \
  1067. scheme_wrong_contract(scheme_name, "bytes?", i, argc, argv); \
  1068. s = SCHEME_BYTE_STR_VAL(argv[i]); sl = SCHEME_BYTE_STRTAG_VAL(argv[i]); \
  1069. if (!falz) if (!(comp(scheme_name, \
  1070. (unsigned char *)prev, pl, \
  1071. (unsigned char *)s, sl) op 0)) falz = 1; \
  1072. prev = s; pl = sl; \
  1073. } \
  1074. return falz ? scheme_false : scheme_true; \
  1075. }
  1076. GEN_BYTE_STRING_COMP(byte_string_eq, "bytes=?", mz_strcmp, ==)
  1077. GEN_BYTE_STRING_COMP(byte_string_lt, "bytes<?", mz_strcmp, <)
  1078. GEN_BYTE_STRING_COMP(byte_string_gt, "bytes>?", mz_strcmp, >)
  1079. /**********************************************************************/
  1080. /* byte string <-> char string */
  1081. /**********************************************************************/
  1082. /************************* bytes->string *************************/
  1083. static Scheme_Object *
  1084. do_byte_string_to_char_string(const char *who,
  1085. Scheme_Object *bstr,
  1086. intptr_t istart, intptr_t ifinish,
  1087. int perm, int as_locale)
  1088. {
  1089. int i, ulen;
  1090. char *chars;
  1091. unsigned int *v;
  1092. chars = SCHEME_BYTE_STR_VAL(bstr);
  1093. ulen = utf8_decode_x((unsigned char *)chars, istart, ifinish,
  1094. NULL, 0, -1,
  1095. NULL, NULL, 0, 0,
  1096. NULL, 0,
  1097. (perm > -1) ? 0xD800 : 0);
  1098. if (ulen < 0) {
  1099. scheme_contract_error(who,
  1100. "string is not a well-formed UTF-8 encoding",
  1101. "string", 1, bstr,
  1102. NULL);
  1103. }
  1104. v = (unsigned int *)scheme_malloc_atomic((ulen + 1) * sizeof(unsigned int));
  1105. utf8_decode_x((unsigned char *)chars, istart, ifinish,
  1106. v, 0, -1,
  1107. NULL, NULL, 0, 0,
  1108. NULL, 0,
  1109. (perm > -1) ? 0xD800 : 0);
  1110. if (perm > -1) {
  1111. for (i = 0; i < ulen; i++) {
  1112. if (v[i] == 0xD800)
  1113. v[i] = perm;
  1114. }
  1115. }
  1116. v[ulen] = 0;
  1117. return scheme_make_sized_char_string(v, ulen, 0);
  1118. }
  1119. static Scheme_Object *
  1120. do_byte_string_to_char_string_locale(const char *who,
  1121. Scheme_Object *bstr,
  1122. intptr_t istart, intptr_t ifinish,
  1123. int perm)
  1124. {
  1125. char *us;
  1126. intptr_t olen;
  1127. reset_locale();
  1128. if (!iconv_ready) init_iconv();
  1129. if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open))
  1130. return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
  1131. if (istart < ifinish) {
  1132. int no_cvt;
  1133. us = string_to_from_locale(0, SCHEME_BYTE_STR_VAL(bstr),
  1134. istart, ifinish - istart,
  1135. &olen, perm, &no_cvt);
  1136. if (!us) {
  1137. if (no_cvt) {
  1138. return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1);
  1139. } else {
  1140. scheme_contract_error(who,
  1141. "byte string is not a valid encoding for the current locale",
  1142. "byte string", 1, bstr,
  1143. NULL);
  1144. }
  1145. }
  1146. ((mzchar *)us)[olen] = 0;
  1147. } else {
  1148. us = "\0\0\0";
  1149. olen = 0;
  1150. }
  1151. return scheme_make_sized_char_string((mzchar *)us, olen, 0);
  1152. }
  1153. static Scheme_Object *
  1154. do_string_to_vector(const char *who, int mode, int argc, Scheme_Object *argv[])
  1155. {
  1156. int permc;
  1157. intptr_t istart, ifinish;
  1158. if (!SCHEME_BYTE_STRINGP(argv[0]))
  1159. scheme_wrong_contract(who, "bytes?", 0, argc, argv);
  1160. if ((argc < 2) || SCHEME_FALSEP(argv[1]))
  1161. permc = -1;
  1162. else {
  1163. if (!SCHEME_CHARP(argv[1]))
  1164. scheme_wrong_contract(who, "(or/c char? #f)", 1, argc, argv);
  1165. permc = SCHEME_CHAR_VAL(argv[1]);
  1166. }
  1167. scheme_get_substring_indices(who, argv[0], argc, argv,
  1168. 2, 3,
  1169. &istart, &ifinish);
  1170. if (mode == 0)
  1171. return do_byte_string_to_char_string(who, argv[0], istart, ifinish, permc, 0);
  1172. else if (mode == 1)
  1173. return do_byte_string_to_char_string_locale(who, argv[0], istart, ifinish, permc);
  1174. else {
  1175. /* Latin-1 */
  1176. mzchar *us;
  1177. unsigned char *s;
  1178. intptr_t i, len;
  1179. len = ifinish - istart;
  1180. s = (unsigned char *)SCHEME_BYTE_STR_VAL(argv[0]);
  1181. us = (mzchar *)scheme_malloc_atomic((len + 1) * sizeof(mzchar));
  1182. for (i = istart; i < ifinish; i++) {
  1183. us[i - istart] = s[i];
  1184. }
  1185. us[len] = 0;
  1186. return scheme_make_sized_char_string(us, len, 0);
  1187. }
  1188. }
  1189. static Scheme_Object *
  1190. byte_string_to_char_string (int argc, Scheme_Object *argv[])
  1191. {
  1192. return do_string_to_vector("bytes->string/utf-8", 0, argc, argv);
  1193. }
  1194. static Scheme_Object *
  1195. byte_string_to_char_string_locale (int argc, Scheme_Object *argv[])
  1196. {
  1197. return do_string_to_vector("bytes->string/locale", 1, argc, argv);
  1198. }
  1199. static Scheme_Object *
  1200. byte_string_to_char_string_latin1 (int argc, Scheme_Object *argv[])
  1201. {
  1202. return do_string_to_vector("bytes->string/latin-1", 2, argc, argv);
  1203. }
  1204. Scheme_Object *scheme_byte_string_to_char_string(Scheme_Object *o)
  1205. {
  1206. return do_byte_string_to_char_string("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD, 0);
  1207. }
  1208. Scheme_Object *scheme_byte_string_to_char_string_locale(Scheme_Object *o)
  1209. {
  1210. return do_byte_string_to_char_string_locale("s->s", o, 0, SCHEME_BYTE_STRLEN_VAL(o), 0xFFFD);
  1211. }
  1212. /************************* string->bytes *************************/
  1213. static Scheme_Object *do_char_string_to_byte_string(Scheme_Object *s, intptr_t istart, intptr_t ifinish,
  1214. int as_locale)
  1215. {
  1216. char *bs;
  1217. int slen;
  1218. slen = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
  1219. NULL, 0,
  1220. 0 /* UTF-16 */);
  1221. bs = (char *)scheme_malloc_atomic(slen + 1);
  1222. scheme_utf8_encode(SCHEME_CHAR_STR_VAL(s), istart, ifinish,
  1223. (unsigned char *)bs, 0,
  1224. 0 /* UTF-16 */);
  1225. bs[slen] = 0;
  1226. return scheme_make_sized_byte_string(bs, slen, 0);
  1227. }
  1228. static Scheme_Object *
  1229. do_char_string_to_byte_string_locale(const char *who,
  1230. Scheme_Object *cstr,
  1231. intptr_t istart, intptr_t ifinish,
  1232. int perm)
  1233. {
  1234. char *s;
  1235. intptr_t olen;
  1236. reset_locale();
  1237. if (!iconv_ready) init_iconv();
  1238. if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open))
  1239. return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
  1240. if (istart < ifinish) {
  1241. int no_cvt;
  1242. s = string_to_from_locale(1, (char *)SCHEME_CHAR_STR_VAL(cstr),
  1243. istart, ifinish - istart,
  1244. &olen, perm, &no_cvt);
  1245. if (!s) {
  1246. if (no_cvt) {
  1247. return do_char_string_to_byte_string(cstr, istart, ifinish, 1);
  1248. } else {
  1249. scheme_contract_error(who,
  1250. "string cannot be encoded for the current locale",
  1251. "string", 1, cstr,
  1252. NULL);
  1253. }
  1254. }
  1255. s[olen] = 0;
  1256. } else {
  1257. s = "";
  1258. olen = 0;
  1259. }
  1260. return scheme_make_sized_byte_string(s, olen, 0);
  1261. }
  1262. Scheme_Object *scheme_char_string_to_byte_string(Scheme_Object *s)
  1263. {
  1264. return do_char_string_to_byte_string(s, 0, SCHEME_CHAR_STRLEN_VAL(s), 0);
  1265. }
  1266. Scheme_Object *scheme_char_string_to_byte_string_locale(Scheme_Object *s)
  1267. {
  1268. return do_char_string_to_byte_string_locale("s->s", s, 0, SCHEME_CHAR_STRLEN_VAL(s), '?');
  1269. }
  1270. static Scheme_Object *do_chars_to_bytes(const char *who, int mode,
  1271. int argc, Scheme_Object *argv[])
  1272. {
  1273. intptr_t istart, ifinish;
  1274. int permc;
  1275. if (!SCHEME_CHAR_STRINGP(argv[0]))
  1276. scheme_wrong_contract(who, "string?", 0, argc, argv);
  1277. if ((argc < 2) || SCHEME_FALSEP(argv[1]))
  1278. permc = -1;
  1279. else {
  1280. if (!SCHEME_BYTEP(argv[1]))
  1281. scheme_wrong_contract(who, "(or/c byte? #f)", 1, argc, argv);
  1282. permc = SCHEME_INT_VAL(argv[1]);
  1283. }
  1284. scheme_get_substring_indices(who, argv[0], argc, argv,
  1285. 2, 3, &istart, &ifinish);
  1286. if (mode == 1)
  1287. return do_char_string_to_byte_string_locale(who, argv[0], istart, ifinish, permc);
  1288. else if (mode == 0)
  1289. return do_char_string_to_byte_string(argv[0], istart, ifinish, 0);
  1290. else {
  1291. /* Latin-1 */
  1292. mzchar *us;
  1293. unsigned char *s;
  1294. intptr_t i, len;
  1295. len = ifinish - istart;
  1296. us = SCHEME_CHAR_STR_VAL(argv[0]);
  1297. s = (unsigned char *)scheme_malloc_atomic(len + 1);
  1298. for (i = istart; i < ifinish; i++) {
  1299. if (us[i] < 256)
  1300. s[i - istart] = us[i];
  1301. else if (permc >= 0) {
  1302. s[i - istart] = permc;
  1303. } else {
  1304. scheme_contract_error(who,
  1305. "string cannot be encoded in …

Large files files are truncated, but you can click here to view the full file