PageRenderTime 3394ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/erts/emulator/beam/atom.c

http://github.com/erlang/otp
C | 535 lines | 398 code | 72 blank | 65 comment | 60 complexity | 797a65f003bd652c897c5cd83ec66744 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0, Unlicense, LGPL-2.1, MPL-2.0-no-copyleft-exception
  1. /*
  2. * %CopyrightBegin%
  3. *
  4. * Copyright Ericsson AB 1996-2020. All Rights Reserved.
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. *
  18. * %CopyrightEnd%
  19. */
  20. #ifdef HAVE_CONFIG_H
  21. # include "config.h"
  22. #endif
  23. #include "sys.h"
  24. #include "erl_sys_driver.h"
  25. #include "erl_vm.h"
  26. #include "global.h"
  27. #include "hash.h"
  28. #include "atom.h"
  29. #define ATOM_SIZE 3000
  30. IndexTable erts_atom_table; /* The index table */
  31. static erts_rwmtx_t atom_table_lock;
  32. #define atom_read_lock() erts_rwmtx_rlock(&atom_table_lock)
  33. #define atom_read_unlock() erts_rwmtx_runlock(&atom_table_lock)
  34. #define atom_write_lock() erts_rwmtx_rwlock(&atom_table_lock)
  35. #define atom_write_unlock() erts_rwmtx_rwunlock(&atom_table_lock)
  36. #if 0
  37. #define ERTS_ATOM_PUT_OPS_STAT
  38. #endif
  39. #ifdef ERTS_ATOM_PUT_OPS_STAT
  40. static erts_atomic_t atom_put_ops;
  41. #endif
  42. /* Functions for allocating space for the ext of atoms. We do not
  43. * use malloc for each atom to prevent excessive memory fragmentation
  44. */
  45. typedef struct _atom_text {
  46. struct _atom_text* next;
  47. unsigned char text[ATOM_TEXT_SIZE];
  48. } AtomText;
  49. static AtomText* text_list; /* List of text buffers */
  50. static byte *atom_text_pos;
  51. static byte *atom_text_end;
  52. static Uint reserved_atom_space; /* Total amount of atom text space */
  53. static Uint atom_space; /* Amount of atom text space used */
  54. /*
  55. * Print info about atom tables
  56. */
  57. void atom_info(fmtfn_t to, void *to_arg)
  58. {
  59. int lock = !ERTS_IS_CRASH_DUMPING;
  60. if (lock)
  61. atom_read_lock();
  62. index_info(to, to_arg, &erts_atom_table);
  63. #ifdef ERTS_ATOM_PUT_OPS_STAT
  64. erts_print(to, to_arg, "atom_put_ops: %ld\n",
  65. erts_atomic_read_nob(&atom_put_ops));
  66. #endif
  67. if (lock)
  68. atom_read_unlock();
  69. }
  70. /*
  71. * Allocate an atom text segment.
  72. */
  73. static void
  74. more_atom_space(void)
  75. {
  76. AtomText* ptr;
  77. ptr = (AtomText*) erts_alloc(ERTS_ALC_T_ATOM_TXT, sizeof(AtomText));
  78. ptr->next = text_list;
  79. text_list = ptr;
  80. atom_text_pos = ptr->text;
  81. atom_text_end = atom_text_pos + ATOM_TEXT_SIZE;
  82. reserved_atom_space += sizeof(AtomText);
  83. VERBOSE(DEBUG_SYSTEM,("Allocated %d atom space\n",ATOM_TEXT_SIZE));
  84. }
  85. /*
  86. * Allocate string space within an atom text segment.
  87. */
  88. static byte*
  89. atom_text_alloc(int bytes)
  90. {
  91. byte *res;
  92. ASSERT(bytes <= MAX_ATOM_SZ_LIMIT);
  93. if (atom_text_pos + bytes >= atom_text_end) {
  94. more_atom_space();
  95. }
  96. res = atom_text_pos;
  97. atom_text_pos += bytes;
  98. atom_space += bytes;
  99. return res;
  100. }
  101. /*
  102. * Calculate atom hash value (using the hash algorithm
  103. * hashpjw from the Dragon Book).
  104. */
  105. static HashValue
  106. atom_hash(Atom* obj)
  107. {
  108. byte* p = obj->name;
  109. int len = obj->len;
  110. HashValue h = 0, g;
  111. byte v;
  112. while(len--) {
  113. v = *p++;
  114. /* latin1 clutch for r16 */
  115. if (len && (v & 0xFE) == 0xC2 && (*p & 0xC0) == 0x80) {
  116. v = (v << 6) | (*p & 0x3F);
  117. p++; len--;
  118. }
  119. /* normal hashpjw follows for v */
  120. h = (h << 4) + v;
  121. if ((g = h & 0xf0000000)) {
  122. h ^= (g >> 24);
  123. h ^= g;
  124. }
  125. }
  126. return h;
  127. }
  128. static int
  129. atom_cmp(Atom* tmpl, Atom* obj)
  130. {
  131. if (tmpl->len == obj->len &&
  132. sys_memcmp(tmpl->name, obj->name, tmpl->len) == 0)
  133. return 0;
  134. return 1;
  135. }
  136. static Atom*
  137. atom_alloc(Atom* tmpl)
  138. {
  139. Atom* obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom));
  140. obj->name = atom_text_alloc(tmpl->len);
  141. sys_memcpy(obj->name, tmpl->name, tmpl->len);
  142. obj->len = tmpl->len;
  143. obj->latin1_chars = tmpl->latin1_chars;
  144. obj->slot.index = -1;
  145. /*
  146. * Precompute ordinal value of first 3 bytes + 7 bits.
  147. * This is used by erl_utils.h:erts_cmp_atoms().
  148. * We cannot use the full 32 bits of the first 4 bytes,
  149. * since we use the sign of the difference between two
  150. * ordinal values to represent their relative order.
  151. */
  152. {
  153. unsigned char c[4];
  154. int i;
  155. int j;
  156. j = (tmpl->len < 4) ? tmpl->len : 4;
  157. for(i = 0; i < j; ++i)
  158. c[i] = tmpl->name[i];
  159. for(; i < 4; ++i)
  160. c[i] = '\0';
  161. obj->ord0 = (c[0] << 23) + (c[1] << 15) + (c[2] << 7) + (c[3] >> 1);
  162. }
  163. return obj;
  164. }
  165. static void
  166. atom_free(Atom* obj)
  167. {
  168. ASSERT(obj->slot.index == atom_val(am_ErtsSecretAtom));
  169. }
  170. static void latin1_to_utf8(byte* conv_buf, Uint buf_sz,
  171. const byte** srcp, Uint* lenp)
  172. {
  173. byte* dst;
  174. const byte* src = *srcp;
  175. Uint i, len = *lenp;
  176. ASSERT(len <= MAX_ATOM_CHARACTERS);
  177. ASSERT(buf_sz >= MAX_ATOM_SZ_FROM_LATIN1);
  178. for (i=0 ; i < len; ++i) {
  179. if (src[i] & 0x80) {
  180. goto need_convertion;
  181. }
  182. }
  183. return;
  184. need_convertion:
  185. sys_memcpy(conv_buf, src, i);
  186. dst = conv_buf + i;
  187. for ( ; i < len; ++i) {
  188. unsigned char chr = src[i];
  189. if (!(chr & 0x80)) {
  190. *dst++ = chr;
  191. }
  192. else {
  193. *dst++ = 0xC0 | (chr >> 6);
  194. *dst++ = 0x80 | (chr & 0x3F);
  195. }
  196. }
  197. *srcp = conv_buf;
  198. *lenp = dst - conv_buf;
  199. }
  200. /*
  201. * erts_atom_put_index() may fail. Returns negative indexes for errors.
  202. */
  203. int
  204. erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)
  205. {
  206. byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
  207. const byte *text = name;
  208. Uint tlen;
  209. Sint no_latin1_chars;
  210. Atom a;
  211. int aix;
  212. #ifdef ERTS_ATOM_PUT_OPS_STAT
  213. erts_atomic_inc_nob(&atom_put_ops);
  214. #endif
  215. if (len < 0) {
  216. if (trunc) {
  217. len = 0;
  218. } else {
  219. return ATOM_MAX_CHARS_ERROR;
  220. }
  221. }
  222. tlen = len;
  223. switch (enc) {
  224. case ERTS_ATOM_ENC_7BIT_ASCII:
  225. if (tlen > MAX_ATOM_CHARACTERS) {
  226. if (trunc)
  227. tlen = MAX_ATOM_CHARACTERS;
  228. else
  229. return ATOM_MAX_CHARS_ERROR;
  230. }
  231. #ifdef DEBUG
  232. for (aix = 0; aix < len; aix++) {
  233. ASSERT((name[aix] & 0x80) == 0);
  234. }
  235. #endif
  236. no_latin1_chars = tlen;
  237. break;
  238. case ERTS_ATOM_ENC_LATIN1:
  239. if (tlen > MAX_ATOM_CHARACTERS) {
  240. if (trunc)
  241. tlen = MAX_ATOM_CHARACTERS;
  242. else
  243. return ATOM_MAX_CHARS_ERROR;
  244. }
  245. no_latin1_chars = tlen;
  246. latin1_to_utf8(utf8_copy, sizeof(utf8_copy), &text, &tlen);
  247. break;
  248. case ERTS_ATOM_ENC_UTF8:
  249. /* First sanity check; need to verify later */
  250. if (tlen > MAX_ATOM_SZ_LIMIT && !trunc)
  251. return ATOM_MAX_CHARS_ERROR;
  252. break;
  253. }
  254. a.len = tlen;
  255. a.name = (byte *) text;
  256. atom_read_lock();
  257. aix = index_get(&erts_atom_table, (void*) &a);
  258. atom_read_unlock();
  259. if (aix >= 0) {
  260. /* Already in table no need to verify it */
  261. return aix;
  262. }
  263. if (enc == ERTS_ATOM_ENC_UTF8) {
  264. /* Need to verify encoding and length */
  265. byte *err_pos;
  266. Uint no_chars;
  267. switch (erts_analyze_utf8_x((byte *) text,
  268. (Uint) tlen,
  269. &err_pos,
  270. &no_chars, NULL,
  271. &no_latin1_chars,
  272. MAX_ATOM_CHARACTERS)) {
  273. case ERTS_UTF8_OK:
  274. ASSERT(no_chars <= MAX_ATOM_CHARACTERS);
  275. break;
  276. case ERTS_UTF8_OK_MAX_CHARS:
  277. /* Truncated... */
  278. if (!trunc)
  279. return ATOM_MAX_CHARS_ERROR;
  280. ASSERT(no_chars == MAX_ATOM_CHARACTERS);
  281. tlen = err_pos - text;
  282. break;
  283. default:
  284. /* Bad utf8... */
  285. return ATOM_BAD_ENCODING_ERROR;
  286. }
  287. }
  288. ASSERT(tlen <= MAX_ATOM_SZ_LIMIT);
  289. ASSERT(-1 <= no_latin1_chars && no_latin1_chars <= MAX_ATOM_CHARACTERS);
  290. a.len = tlen;
  291. a.latin1_chars = (Sint16) no_latin1_chars;
  292. a.name = (byte *) text;
  293. atom_write_lock();
  294. aix = index_put(&erts_atom_table, (void*) &a);
  295. atom_write_unlock();
  296. return aix;
  297. }
  298. /*
  299. * erts_atom_put() may fail. If it fails THE_NON_VALUE is returned!
  300. */
  301. Eterm
  302. erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)
  303. {
  304. int aix = erts_atom_put_index(name, len, enc, trunc);
  305. if (aix >= 0)
  306. return make_atom(aix);
  307. else
  308. return THE_NON_VALUE;
  309. }
  310. Eterm
  311. am_atom_put(const char* name, Sint len)
  312. {
  313. /* Assumes 7-bit ascii; use erts_atom_put() for other encodings... */
  314. return erts_atom_put((byte *) name, len, ERTS_ATOM_ENC_7BIT_ASCII, 1);
  315. }
  316. int atom_table_size(void)
  317. {
  318. int ret;
  319. int lock = !ERTS_IS_CRASH_DUMPING;
  320. if (lock)
  321. atom_read_lock();
  322. ret = erts_atom_table.entries;
  323. if (lock)
  324. atom_read_unlock();
  325. return ret;
  326. }
  327. int atom_table_sz(void)
  328. {
  329. int ret;
  330. int lock = !ERTS_IS_CRASH_DUMPING;
  331. if (lock)
  332. atom_read_lock();
  333. ret = index_table_sz(&erts_atom_table);
  334. if (lock)
  335. atom_read_unlock();
  336. return ret;
  337. }
  338. int
  339. erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc)
  340. {
  341. byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
  342. Atom a;
  343. int i;
  344. int res;
  345. switch (enc) {
  346. case ERTS_ATOM_ENC_LATIN1:
  347. if (len > MAX_ATOM_CHARACTERS) {
  348. return 0;
  349. }
  350. latin1_to_utf8(utf8_copy, sizeof(utf8_copy), (const byte**)&name, &len);
  351. a.name = (byte*)name;
  352. a.len = (Sint16)len;
  353. break;
  354. case ERTS_ATOM_ENC_7BIT_ASCII:
  355. if (len > MAX_ATOM_CHARACTERS) {
  356. return 0;
  357. }
  358. for (i = 0; i < len; i++) {
  359. if (name[i] & 0x80) {
  360. return 0;
  361. }
  362. }
  363. a.len = (Sint16)len;
  364. a.name = (byte*)name;
  365. break;
  366. case ERTS_ATOM_ENC_UTF8:
  367. if (len > MAX_ATOM_SZ_LIMIT) {
  368. return 0;
  369. }
  370. /* We don't need to check whether the encoding is legal as all atom
  371. * names are stored as UTF-8 and we know a lookup with a badly encoded
  372. * name will fail. */
  373. a.len = (Sint16)len;
  374. a.name = (byte*)name;
  375. break;
  376. }
  377. atom_read_lock();
  378. i = index_get(&erts_atom_table, (void*) &a);
  379. res = i < 0 ? 0 : (*ap = make_atom(i), 1);
  380. atom_read_unlock();
  381. return res;
  382. }
  383. void
  384. erts_atom_get_text_space_sizes(Uint *reserved, Uint *used)
  385. {
  386. int lock = !ERTS_IS_CRASH_DUMPING;
  387. if (lock)
  388. atom_read_lock();
  389. if (reserved)
  390. *reserved = reserved_atom_space;
  391. if (used)
  392. *used = atom_space;
  393. if (lock)
  394. atom_read_unlock();
  395. }
  396. void
  397. init_atom_table(void)
  398. {
  399. HashFunctions f;
  400. int i;
  401. Atom a;
  402. erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER;
  403. rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ;
  404. rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED;
  405. #ifdef ERTS_ATOM_PUT_OPS_STAT
  406. erts_atomic_init_nob(&atom_put_ops, 0);
  407. #endif
  408. erts_rwmtx_init_opt(&atom_table_lock, &rwmtx_opt, "atom_tab", NIL,
  409. ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
  410. f.hash = (H_FUN) atom_hash;
  411. f.cmp = (HCMP_FUN) atom_cmp;
  412. f.alloc = (HALLOC_FUN) atom_alloc;
  413. f.free = (HFREE_FUN) atom_free;
  414. f.meta_alloc = (HMALLOC_FUN) erts_alloc;
  415. f.meta_free = (HMFREE_FUN) erts_free;
  416. f.meta_print = (HMPRINT_FUN) erts_print;
  417. atom_text_pos = NULL;
  418. atom_text_end = NULL;
  419. reserved_atom_space = 0;
  420. atom_space = 0;
  421. text_list = NULL;
  422. erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table,
  423. "atom_tab", ATOM_SIZE, erts_atom_table_size, f);
  424. more_atom_space();
  425. /* Ordinary atoms */
  426. for (i = 0; erl_atom_names[i] != 0; i++) {
  427. int ix;
  428. a.len = sys_strlen(erl_atom_names[i]);
  429. a.latin1_chars = a.len;
  430. a.name = (byte*)erl_atom_names[i];
  431. a.slot.index = i;
  432. #ifdef DEBUG
  433. /* Verify 7-bit ascii */
  434. for (ix = 0; ix < a.len; ix++) {
  435. ASSERT((a.name[ix] & 0x80) == 0);
  436. }
  437. #endif
  438. ix = index_put(&erts_atom_table, (void*) &a);
  439. atom_text_pos -= a.len;
  440. atom_space -= a.len;
  441. atom_tab(ix)->name = (byte*)erl_atom_names[i];
  442. }
  443. /* Hide am_ErtsSecretAtom */
  444. hash_erase(&erts_atom_table.htable, atom_tab(atom_val(am_ErtsSecretAtom)));
  445. }
  446. void
  447. dump_atoms(fmtfn_t to, void *to_arg)
  448. {
  449. int i = erts_atom_table.entries;
  450. /*
  451. * Print out the atom table starting from the end.
  452. */
  453. while (--i >= 0) {
  454. if (erts_index_lookup(&erts_atom_table, i)) {
  455. erts_print(to, to_arg, "%T\n", make_atom(i));
  456. }
  457. }
  458. }
  459. Uint
  460. erts_get_atom_limit(void)
  461. {
  462. return erts_atom_table.limit;
  463. }