PageRenderTime 60ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/libguile/symbols.c

https://gitlab.com/janneke/guile
C | 563 lines | 452 code | 85 blank | 26 comment | 46 complexity | c847c81a348eadbdff79a4da36849776 MD5 | raw file
  1. /* Copyright 1995-1998,2000-2001,2003-2004,2006,2009,2011,2013,2015,2018
  2. Free Software Foundation, Inc.
  3. This file is part of Guile.
  4. Guile is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Lesser General Public License as published
  6. by the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. Guile is distributed in the hope that it will be useful, but WITHOUT
  9. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  11. License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with Guile. If not, see
  14. <https://www.gnu.org/licenses/>. */
  15. #ifdef HAVE_CONFIG_H
  16. # include <config.h>
  17. #endif
  18. #include <string.h>
  19. #include <unistr.h>
  20. #include "alist.h"
  21. #include "boolean.h"
  22. #include "chars.h"
  23. #include "eval.h"
  24. #include "fluids.h"
  25. #include "gsubr.h"
  26. #include "hash.h"
  27. #include "list.h"
  28. #include "modules.h"
  29. #include "numbers.h"
  30. #include "pairs.h"
  31. #include "private-options.h"
  32. #include "read.h"
  33. #include "smob.h"
  34. #include "srfi-13.h"
  35. #include "strings.h"
  36. #include "strorder.h"
  37. #include "threads.h"
  38. #include "variable.h"
  39. #include "vectors.h"
  40. #include "weak-set.h"
  41. #include "symbols.h"
  42. static SCM symbols;
  43. #ifdef GUILE_DEBUG
  44. SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
  45. (),
  46. "Return the system symbol obarray.")
  47. #define FUNC_NAME s_scm_sys_symbols
  48. {
  49. return symbols;
  50. }
  51. #undef FUNC_NAME
  52. #endif
  53. /* {Symbols}
  54. */
  55. unsigned long
  56. scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
  57. {
  58. return scm_i_symbol_hash (obj) % n;
  59. }
  60. struct string_lookup_data
  61. {
  62. SCM string;
  63. unsigned long string_hash;
  64. };
  65. static int
  66. string_lookup_predicate_fn (SCM sym, void *closure)
  67. {
  68. struct string_lookup_data *data = closure;
  69. if (scm_i_symbol_hash (sym) == data->string_hash
  70. && scm_i_symbol_length (sym) == scm_i_string_length (data->string))
  71. {
  72. size_t n = scm_i_symbol_length (sym);
  73. while (n--)
  74. if (scm_i_symbol_ref (sym, n) != scm_i_string_ref (data->string, n))
  75. return 0;
  76. return 1;
  77. }
  78. else
  79. return 0;
  80. }
  81. static SCM
  82. lookup_interned_symbol (SCM name, unsigned long raw_hash)
  83. {
  84. struct string_lookup_data data;
  85. data.string = name;
  86. data.string_hash = raw_hash;
  87. return scm_c_weak_set_lookup (symbols, raw_hash,
  88. string_lookup_predicate_fn,
  89. &data, SCM_BOOL_F);
  90. }
  91. struct latin1_lookup_data
  92. {
  93. const char *str;
  94. size_t len;
  95. unsigned long string_hash;
  96. };
  97. static int
  98. latin1_lookup_predicate_fn (SCM sym, void *closure)
  99. {
  100. struct latin1_lookup_data *data = closure;
  101. return scm_i_symbol_hash (sym) == data->string_hash
  102. && scm_i_is_narrow_symbol (sym)
  103. && scm_i_symbol_length (sym) == data->len
  104. && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0;
  105. }
  106. static SCM
  107. lookup_interned_latin1_symbol (const char *str, size_t len,
  108. unsigned long raw_hash)
  109. {
  110. struct latin1_lookup_data data;
  111. data.str = str;
  112. data.len = len;
  113. data.string_hash = raw_hash;
  114. return scm_c_weak_set_lookup (symbols, raw_hash,
  115. latin1_lookup_predicate_fn,
  116. &data, SCM_BOOL_F);
  117. }
  118. struct utf8_lookup_data
  119. {
  120. const char *str;
  121. size_t len;
  122. unsigned long string_hash;
  123. };
  124. static int
  125. utf8_string_equals_wide_string (const uint8_t *narrow, size_t nlen,
  126. const scm_t_wchar *wide, size_t wlen)
  127. {
  128. size_t byte_idx = 0, char_idx = 0;
  129. while (byte_idx < nlen && char_idx < wlen)
  130. {
  131. ucs4_t c;
  132. int nbytes;
  133. nbytes = u8_mbtoucr (&c, narrow + byte_idx, nlen - byte_idx);
  134. if (nbytes == 0)
  135. break;
  136. else if (nbytes < 0)
  137. /* Bad UTF-8. */
  138. return 0;
  139. else if (c != wide[char_idx])
  140. return 0;
  141. byte_idx += nbytes;
  142. char_idx++;
  143. }
  144. return byte_idx == nlen && char_idx == wlen;
  145. }
  146. static int
  147. utf8_lookup_predicate_fn (SCM sym, void *closure)
  148. {
  149. struct utf8_lookup_data *data = closure;
  150. if (scm_i_symbol_hash (sym) != data->string_hash)
  151. return 0;
  152. if (scm_i_is_narrow_symbol (sym))
  153. return (scm_i_symbol_length (sym) == data->len
  154. && strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
  155. else
  156. return utf8_string_equals_wide_string ((const uint8_t *) data->str,
  157. data->len,
  158. scm_i_symbol_wide_chars (sym),
  159. scm_i_symbol_length (sym));
  160. }
  161. static SCM
  162. lookup_interned_utf8_symbol (const char *str, size_t len,
  163. unsigned long raw_hash)
  164. {
  165. struct utf8_lookup_data data;
  166. data.str = str;
  167. data.len = len;
  168. data.string_hash = raw_hash;
  169. return scm_c_weak_set_lookup (symbols, raw_hash,
  170. utf8_lookup_predicate_fn,
  171. &data, SCM_BOOL_F);
  172. }
  173. static int
  174. symbol_lookup_predicate_fn (SCM sym, void *closure)
  175. {
  176. SCM other = SCM_PACK_POINTER (closure);
  177. if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (other)
  178. && scm_i_symbol_length (sym) == scm_i_symbol_length (other))
  179. {
  180. if (scm_i_is_narrow_symbol (sym))
  181. return scm_i_is_narrow_symbol (other)
  182. && (strncmp (scm_i_symbol_chars (sym),
  183. scm_i_symbol_chars (other),
  184. scm_i_symbol_length (other)) == 0);
  185. else
  186. return scm_is_true
  187. (scm_string_equal_p (scm_symbol_to_string (sym),
  188. scm_symbol_to_string (other)));
  189. }
  190. return 0;
  191. }
  192. static SCM
  193. scm_i_str2symbol (SCM str)
  194. {
  195. SCM symbol;
  196. size_t raw_hash = scm_i_string_hash (str);
  197. symbol = lookup_interned_symbol (str, raw_hash);
  198. if (scm_is_true (symbol))
  199. return symbol;
  200. else
  201. {
  202. /* The symbol was not found, create it. */
  203. symbol = scm_i_make_symbol (str, 0, raw_hash,
  204. scm_cons (SCM_BOOL_F, SCM_EOL));
  205. /* Might return a different symbol, if another one was interned at
  206. the same time. */
  207. return scm_c_weak_set_add_x (symbols, raw_hash,
  208. symbol_lookup_predicate_fn,
  209. SCM_UNPACK_POINTER (symbol), symbol);
  210. }
  211. }
  212. static SCM
  213. scm_i_str2uninterned_symbol (SCM str)
  214. {
  215. size_t raw_hash = scm_i_string_hash (str);
  216. return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED,
  217. raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
  218. }
  219. SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
  220. (SCM obj),
  221. "Return @code{#t} if @var{obj} is a symbol, otherwise return\n"
  222. "@code{#f}.")
  223. #define FUNC_NAME s_scm_symbol_p
  224. {
  225. return scm_from_bool (scm_is_symbol (obj));
  226. }
  227. #undef FUNC_NAME
  228. SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
  229. (SCM symbol),
  230. "Return @code{#t} if @var{symbol} is interned, otherwise return\n"
  231. "@code{#f}.")
  232. #define FUNC_NAME s_scm_symbol_interned_p
  233. {
  234. SCM_VALIDATE_SYMBOL (1, symbol);
  235. return scm_from_bool (scm_i_symbol_is_interned (symbol));
  236. }
  237. #undef FUNC_NAME
  238. SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
  239. (SCM name),
  240. "Return a new uninterned symbol with the name @var{name}. "
  241. "The returned symbol is guaranteed to be unique and future "
  242. "calls to @code{string->symbol} will not return it.")
  243. #define FUNC_NAME s_scm_make_symbol
  244. {
  245. SCM_VALIDATE_STRING (1, name);
  246. return scm_i_str2uninterned_symbol (name);
  247. }
  248. #undef FUNC_NAME
  249. SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
  250. (SCM s),
  251. "Return the name of @var{symbol} as a string. If the symbol was\n"
  252. "part of an object returned as the value of a literal expression\n"
  253. "(section @pxref{Literal expressions,,,r5rs, The Revised^5\n"
  254. "Report on Scheme}) or by a call to the @code{read} procedure,\n"
  255. "and its name contains alphabetic characters, then the string\n"
  256. "returned will contain characters in the implementation's\n"
  257. "preferred standard case---some implementations will prefer\n"
  258. "upper case, others lower case. If the symbol was returned by\n"
  259. "@code{string->symbol}, the case of characters in the string\n"
  260. "returned will be the same as the case in the string that was\n"
  261. "passed to @code{string->symbol}. It is an error to apply\n"
  262. "mutation procedures like @code{string-set!} to strings returned\n"
  263. "by this procedure.\n"
  264. "\n"
  265. "The following examples assume that the implementation's\n"
  266. "standard case is lower case:\n"
  267. "\n"
  268. "@lisp\n"
  269. "(symbol->string 'flying-fish) @result{} \"flying-fish\"\n"
  270. "(symbol->string 'Martin) @result{} \"martin\"\n"
  271. "(symbol->string\n"
  272. " (string->symbol \"Malvina\")) @result{} \"Malvina\"\n"
  273. "@end lisp")
  274. #define FUNC_NAME s_scm_symbol_to_string
  275. {
  276. SCM_VALIDATE_SYMBOL (1, s);
  277. return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
  278. }
  279. #undef FUNC_NAME
  280. SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
  281. (SCM string),
  282. "Return the symbol whose name is @var{string}. This procedure\n"
  283. "can create symbols with names containing special characters or\n"
  284. "letters in the non-standard case, but it is usually a bad idea\n"
  285. "to create such symbols because in some implementations of\n"
  286. "Scheme they cannot be read as themselves. See\n"
  287. "@code{symbol->string}.\n"
  288. "\n"
  289. "The following examples assume that the implementation's\n"
  290. "standard case is lower case:\n"
  291. "\n"
  292. "@lisp\n"
  293. "(eq? 'mISSISSIppi 'mississippi) @result{} #t\n"
  294. "(string->symbol \"mISSISSIppi\") @result{} @r{the symbol with name \"mISSISSIppi\"}\n"
  295. "(eq? 'bitBlt (string->symbol \"bitBlt\")) @result{} #f\n"
  296. "(eq? 'JollyWog\n"
  297. " (string->symbol (symbol->string 'JollyWog))) @result{} #t\n"
  298. "(string=? \"K. Harper, M.D.\"\n"
  299. " (symbol->string\n"
  300. " (string->symbol \"K. Harper, M.D.\"))) @result{}#t\n"
  301. "@end lisp")
  302. #define FUNC_NAME s_scm_string_to_symbol
  303. {
  304. SCM_VALIDATE_STRING (1, string);
  305. return scm_i_str2symbol (string);
  306. }
  307. #undef FUNC_NAME
  308. SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
  309. (SCM str),
  310. "Return the symbol whose name is @var{str}. @var{str} is\n"
  311. "converted to lowercase before the conversion is done, if Guile\n"
  312. "is currently reading symbols case-insensitively.")
  313. #define FUNC_NAME s_scm_string_ci_to_symbol
  314. {
  315. return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
  316. ? scm_string_downcase(str)
  317. : str);
  318. }
  319. #undef FUNC_NAME
  320. /* The default prefix for `gensym'd symbols. */
  321. static SCM default_gensym_prefix;
  322. #define MAX_PREFIX_LENGTH 30
  323. SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
  324. (SCM prefix),
  325. "Create a new symbol with a name constructed from a prefix and\n"
  326. "a counter value. The string @var{prefix} can be specified as\n"
  327. "an optional argument. Default prefix is @code{ g}. The counter\n"
  328. "is increased by 1 at each call. There is no provision for\n"
  329. "resetting the counter.")
  330. #define FUNC_NAME s_scm_gensym
  331. {
  332. static int gensym_counter = 0;
  333. SCM suffix, name;
  334. int n, n_digits;
  335. char buf[SCM_INTBUFLEN];
  336. if (SCM_UNBNDP (prefix))
  337. prefix = default_gensym_prefix;
  338. /* mutex in case another thread looks and incs at the exact same moment */
  339. scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
  340. n = gensym_counter++;
  341. scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
  342. n_digits = scm_iint2str (n, 10, buf);
  343. suffix = scm_from_latin1_stringn (buf, n_digits);
  344. name = scm_string_append (scm_list_2 (prefix, suffix));
  345. return scm_string_to_symbol (name);
  346. }
  347. #undef FUNC_NAME
  348. SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
  349. (SCM symbol),
  350. "Return a hash value for @var{symbol}.")
  351. #define FUNC_NAME s_scm_symbol_hash
  352. {
  353. SCM_VALIDATE_SYMBOL (1, symbol);
  354. return scm_from_ulong (scm_i_symbol_hash (symbol));
  355. }
  356. #undef FUNC_NAME
  357. SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
  358. (SCM s),
  359. "Return the contents of the symbol @var{s}'s @dfn{function slot}.")
  360. #define FUNC_NAME s_scm_symbol_fref
  361. {
  362. SCM_VALIDATE_SYMBOL (1, s);
  363. return SCM_CAR (SCM_CELL_OBJECT_3 (s));
  364. }
  365. #undef FUNC_NAME
  366. SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
  367. (SCM s),
  368. "Return the @dfn{property list} currently associated with the\n"
  369. "symbol @var{s}.")
  370. #define FUNC_NAME s_scm_symbol_pref
  371. {
  372. SCM_VALIDATE_SYMBOL (1, s);
  373. return SCM_CDR (SCM_CELL_OBJECT_3 (s));
  374. }
  375. #undef FUNC_NAME
  376. SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
  377. (SCM s, SCM val),
  378. "Change the binding of the symbol @var{s}'s function slot.")
  379. #define FUNC_NAME s_scm_symbol_fset_x
  380. {
  381. SCM_VALIDATE_SYMBOL (1, s);
  382. scm_set_car_x (SCM_CELL_OBJECT_3 (s), val);
  383. return SCM_UNSPECIFIED;
  384. }
  385. #undef FUNC_NAME
  386. SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
  387. (SCM s, SCM val),
  388. "Change the binding of the symbol @var{s}'s property slot.")
  389. #define FUNC_NAME s_scm_symbol_pset_x
  390. {
  391. SCM_VALIDATE_SYMBOL (1, s);
  392. scm_set_cdr_x (SCM_CELL_OBJECT_3 (s), val);
  393. return SCM_UNSPECIFIED;
  394. }
  395. #undef FUNC_NAME
  396. SCM
  397. scm_from_locale_symbol (const char *sym)
  398. {
  399. return scm_from_locale_symboln (sym, -1);
  400. }
  401. SCM
  402. scm_from_locale_symboln (const char *sym, size_t len)
  403. {
  404. SCM str = scm_from_locale_stringn (sym, len);
  405. return scm_i_str2symbol (str);
  406. }
  407. SCM
  408. scm_take_locale_symboln (char *sym, size_t len)
  409. {
  410. SCM str;
  411. str = scm_take_locale_stringn (sym, len);
  412. return scm_i_str2symbol (str);
  413. }
  414. SCM
  415. scm_take_locale_symbol (char *sym)
  416. {
  417. return scm_take_locale_symboln (sym, (size_t)-1);
  418. }
  419. SCM
  420. scm_from_latin1_symbol (const char *sym)
  421. {
  422. return scm_from_latin1_symboln (sym, -1);
  423. }
  424. SCM
  425. scm_from_latin1_symboln (const char *sym, size_t len)
  426. {
  427. unsigned long hash;
  428. SCM ret;
  429. if (len == (size_t) -1)
  430. len = strlen (sym);
  431. hash = scm_i_latin1_string_hash (sym, len);
  432. ret = lookup_interned_latin1_symbol (sym, len, hash);
  433. if (scm_is_false (ret))
  434. {
  435. SCM str = scm_from_latin1_stringn (sym, len);
  436. ret = scm_i_str2symbol (str);
  437. }
  438. return ret;
  439. }
  440. SCM
  441. scm_from_utf8_symbol (const char *sym)
  442. {
  443. return scm_from_utf8_symboln (sym, -1);
  444. }
  445. SCM
  446. scm_from_utf8_symboln (const char *sym, size_t len)
  447. {
  448. unsigned long hash;
  449. SCM ret;
  450. if (len == (size_t) -1)
  451. len = strlen (sym);
  452. hash = scm_i_utf8_string_hash (sym, len);
  453. ret = lookup_interned_utf8_symbol (sym, len, hash);
  454. if (scm_is_false (ret))
  455. {
  456. SCM str = scm_from_utf8_stringn (sym, len);
  457. ret = scm_i_str2symbol (str);
  458. }
  459. return ret;
  460. }
  461. void
  462. scm_symbols_prehistory ()
  463. {
  464. symbols = scm_c_make_weak_set (5000);
  465. }
  466. void
  467. scm_init_symbols ()
  468. {
  469. #include "symbols.x"
  470. default_gensym_prefix = scm_from_latin1_string (" g");
  471. }