PageRenderTime 51ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/erts/emulator/beam/utils.c

https://github.com/bsmr-erlang/otp
C | 4791 lines | 3943 code | 486 blank | 362 comment | 788 complexity | c4cdd7b93ea09f08d9dfeb5b2b4c4923 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, MPL-2.0-no-copyleft-exception, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * %CopyrightBegin%
  3. *
  4. * Copyright Ericsson AB 1996-2018. All Rights Reserved.
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. *
  18. * %CopyrightEnd%
  19. */
  20. #ifdef HAVE_CONFIG_H
  21. # include "config.h"
  22. #endif
  23. #define ERTS_DO_INCL_GLB_INLINE_FUNC_DEF
  24. #include "sys.h"
  25. #include "erl_vm.h"
  26. #include "global.h"
  27. #include "erl_process.h"
  28. #include "big.h"
  29. #include "bif.h"
  30. #include "erl_binary.h"
  31. #include "erl_bits.h"
  32. #include "erl_map.h"
  33. #include "packet_parser.h"
  34. #include "erl_gc.h"
  35. #define ERTS_WANT_DB_INTERNAL__
  36. #include "erl_db.h"
  37. #include "erl_threads.h"
  38. #include "register.h"
  39. #include "dist.h"
  40. #include "erl_printf.h"
  41. #include "erl_threads.h"
  42. #include "erl_lock_count.h"
  43. #include "erl_time.h"
  44. #include "erl_thr_progress.h"
  45. #include "erl_thr_queue.h"
  46. #include "erl_sched_spec_pre_alloc.h"
  47. #include "beam_bp.h"
  48. #include "erl_ptab.h"
  49. #include "erl_check_io.h"
  50. #include "erl_bif_unique.h"
  51. #include "erl_io_queue.h"
  52. #define ERTS_WANT_TIMER_WHEEL_API
  53. #include "erl_time.h"
  54. #ifdef HIPE
  55. # include "hipe_mode_switch.h"
  56. #endif
  57. #define ERTS_WANT_NFUNC_SCHED_INTERNALS__
  58. #include "erl_nfunc_sched.h"
  59. #include "erl_proc_sig_queue.h"
  60. #undef M_TRIM_THRESHOLD
  61. #undef M_TOP_PAD
  62. #undef M_MMAP_THRESHOLD
  63. #undef M_MMAP_MAX
  64. #if defined(__GLIBC__) && defined(HAVE_MALLOC_H)
  65. #include <malloc.h>
  66. #endif
  67. #if !defined(HAVE_MALLOPT)
  68. #undef HAVE_MALLOPT
  69. #define HAVE_MALLOPT 0
  70. #endif
  71. Eterm*
  72. erts_heap_alloc(Process* p, Uint need, Uint xtra)
  73. {
  74. ErlHeapFragment* bp;
  75. Uint n;
  76. #if defined(DEBUG) || defined(CHECK_FOR_HOLES)
  77. Uint i;
  78. #endif
  79. #ifdef FORCE_HEAP_FRAGS
  80. if (p->space_verified && p->space_verified_from!=NULL
  81. && HEAP_TOP(p) >= p->space_verified_from
  82. && HEAP_TOP(p) + need <= p->space_verified_from + p->space_verified
  83. && HEAP_LIMIT(p) - HEAP_TOP(p) >= need) {
  84. Uint consumed = need + (HEAP_TOP(p) - p->space_verified_from);
  85. ASSERT(consumed <= p->space_verified);
  86. p->space_verified -= consumed;
  87. p->space_verified_from += consumed;
  88. HEAP_TOP(p) = p->space_verified_from;
  89. return HEAP_TOP(p) - need;
  90. }
  91. p->space_verified = 0;
  92. p->space_verified_from = NULL;
  93. #endif /* FORCE_HEAP_FRAGS */
  94. n = need + xtra;
  95. bp = MBUF(p);
  96. if (bp != NULL && need <= (bp->alloc_size - bp->used_size)) {
  97. Eterm* ret = bp->mem + bp->used_size;
  98. bp->used_size += need;
  99. p->mbuf_sz += need;
  100. return ret;
  101. }
  102. #ifdef DEBUG
  103. n++;
  104. #endif
  105. bp = (ErlHeapFragment*)
  106. ERTS_HEAP_ALLOC(ERTS_ALC_T_HEAP_FRAG, ERTS_HEAP_FRAG_SIZE(n));
  107. #if defined(DEBUG) || defined(CHECK_FOR_HOLES)
  108. for (i = 0; i < n; i++) {
  109. bp->mem[i] = ERTS_HOLE_MARKER;
  110. }
  111. #endif
  112. #ifdef DEBUG
  113. n--;
  114. #endif
  115. bp->next = MBUF(p);
  116. MBUF(p) = bp;
  117. bp->alloc_size = n;
  118. bp->used_size = need;
  119. MBUF_SIZE(p) += need;
  120. bp->off_heap.first = NULL;
  121. bp->off_heap.overhead = 0;
  122. return bp->mem;
  123. }
  124. #ifdef CHECK_FOR_HOLES
  125. Eterm*
  126. erts_set_hole_marker(Eterm* ptr, Uint sz)
  127. {
  128. Eterm* p = ptr;
  129. Uint i;
  130. for (i = 0; i < sz; i++) {
  131. *p++ = ERTS_HOLE_MARKER;
  132. }
  133. return ptr;
  134. }
  135. #endif
  136. /*
  137. * Helper function for the ESTACK macros defined in global.h.
  138. */
  139. void
  140. erl_grow_estack(ErtsEStack* s, Uint need)
  141. {
  142. Uint old_size = (s->end - s->start);
  143. Uint new_size;
  144. Uint sp_offs = s->sp - s->start;
  145. if (need < old_size)
  146. new_size = 2*old_size;
  147. else
  148. new_size = ((need / old_size) + 2) * old_size;
  149. if (s->start != s->edefault) {
  150. s->start = erts_realloc(s->alloc_type, s->start,
  151. new_size*sizeof(Eterm));
  152. } else {
  153. Eterm* new_ptr = erts_alloc(s->alloc_type, new_size*sizeof(Eterm));
  154. sys_memcpy(new_ptr, s->start, old_size*sizeof(Eterm));
  155. s->start = new_ptr;
  156. }
  157. s->end = s->start + new_size;
  158. s->sp = s->start + sp_offs;
  159. }
  160. /*
  161. * Helper function for the WSTACK macros defined in global.h.
  162. */
  163. void
  164. erl_grow_wstack(ErtsWStack* s, Uint need)
  165. {
  166. Uint old_size = (s->wend - s->wstart);
  167. Uint new_size;
  168. Uint sp_offs = s->wsp - s->wstart;
  169. if (need < old_size)
  170. new_size = 2 * old_size;
  171. else
  172. new_size = ((need / old_size) + 2) * old_size;
  173. if (s->wstart != s->wdefault) {
  174. s->wstart = erts_realloc(s->alloc_type, s->wstart,
  175. new_size*sizeof(UWord));
  176. } else {
  177. UWord* new_ptr = erts_alloc(s->alloc_type, new_size*sizeof(UWord));
  178. sys_memcpy(new_ptr, s->wstart, old_size*sizeof(UWord));
  179. s->wstart = new_ptr;
  180. }
  181. s->wend = s->wstart + new_size;
  182. s->wsp = s->wstart + sp_offs;
  183. }
  184. /*
  185. * Helper function for the PSTACK macros defined in global.h.
  186. */
  187. void
  188. erl_grow_pstack(ErtsPStack* s, void* default_pstack, unsigned need_bytes)
  189. {
  190. Uint old_size = s->size;
  191. Uint new_size;
  192. if (need_bytes < old_size)
  193. new_size = 2 * old_size;
  194. else
  195. new_size = ((need_bytes / old_size) + 2) * old_size;
  196. if (s->pstart != default_pstack) {
  197. s->pstart = erts_realloc(s->alloc_type, s->pstart, new_size);
  198. } else {
  199. byte* new_ptr = erts_alloc(s->alloc_type, new_size);
  200. sys_memcpy(new_ptr, s->pstart, old_size);
  201. s->pstart = new_ptr;
  202. }
  203. s->size = new_size;
  204. }
  205. /*
  206. * Helper function for the EQUEUE macros defined in global.h.
  207. */
  208. void
  209. erl_grow_equeue(ErtsEQueue* q, Eterm* default_equeue)
  210. {
  211. Uint old_size = (q->end - q->start);
  212. Uint new_size = old_size * 2;
  213. Uint first_part = (q->end - q->front);
  214. Uint second_part = (q->back - q->start);
  215. Eterm* new_ptr = erts_alloc(q->alloc_type, new_size*sizeof(Eterm));
  216. ASSERT(q->back == q->front); // of course the queue is full now!
  217. if (first_part > 0)
  218. sys_memcpy(new_ptr, q->front, first_part*sizeof(Eterm));
  219. if (second_part > 0)
  220. sys_memcpy(new_ptr+first_part, q->start, second_part*sizeof(Eterm));
  221. if (q->start != default_equeue)
  222. erts_free(q->alloc_type, q->start);
  223. q->start = new_ptr;
  224. q->end = q->start + new_size;
  225. q->front = q->start;
  226. q->back = q->start + old_size;
  227. }
  228. /* CTYPE macros */
  229. #define LATIN1
  230. #define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
  231. #ifdef LATIN1
  232. #define IS_LOWER(c) (((c) >= 'a' && (c) <= 'z') \
  233. || ((c) >= 128+95 && (c) <= 255 && (c) != 247))
  234. #define IS_UPPER(c) (((c) >= 'A' && (c) <= 'Z') \
  235. || ((c) >= 128+64 && (c) <= 128+94 && (c) != 247-32))
  236. #else
  237. #define IS_LOWER(c) ((c) >= 'a' && (c) <= 'z')
  238. #define IS_UPPER(c) ((c) >= 'A' && (c) <= 'Z')
  239. #endif
  240. #define IS_ALNUM(c) (IS_DIGIT(c) || IS_LOWER(c) || IS_UPPER(c))
  241. /* We don't include 160 (non-breaking space). */
  242. #define IS_SPACE(c) (c == ' ' || c == '\n' || c == '\t' || c == '\r')
  243. #ifdef LATIN1
  244. #define IS_CNTRL(c) ((c) < ' ' || (c) == 127 \
  245. || ((c) >= 128 && (c) < 128+32))
  246. #else
  247. /* Treat all non-ASCII as control characters */
  248. #define IS_CNTRL(c) ((c) < ' ' || (c) >= 127)
  249. #endif
  250. #define IS_PRINT(c) (!IS_CNTRL(c))
  251. /*
  252. * Calculate length of a list.
  253. * Returns -1 if not a proper list (i.e. not terminated with NIL)
  254. */
  255. Sint
  256. erts_list_length(Eterm list)
  257. {
  258. Sint i = 0;
  259. while(is_list(list)) {
  260. i++;
  261. list = CDR(list_val(list));
  262. }
  263. if (is_not_nil(list)) {
  264. return -1;
  265. }
  266. return i;
  267. }
  268. static const struct {
  269. Sint64 mask;
  270. int bits;
  271. } fib_data[] = {{ERTS_I64_LITERAL(0x2), 1},
  272. {ERTS_I64_LITERAL(0xc), 2},
  273. {ERTS_I64_LITERAL(0xf0), 4},
  274. {ERTS_I64_LITERAL(0xff00), 8},
  275. {ERTS_I64_LITERAL(0xffff0000), 16},
  276. {ERTS_I64_LITERAL(0xffffffff00000000), 32}};
  277. static ERTS_INLINE int
  278. fit_in_bits(Sint64 value, int start)
  279. {
  280. int bits = 0;
  281. int i;
  282. for (i = start; i >= 0; i--) {
  283. if (value & fib_data[i].mask) {
  284. value >>= fib_data[i].bits;
  285. bits |= fib_data[i].bits;
  286. }
  287. }
  288. bits++;
  289. return bits;
  290. }
  291. int erts_fit_in_bits_int64(Sint64 value)
  292. {
  293. return fit_in_bits(value, 5);
  294. }
  295. int erts_fit_in_bits_int32(Sint32 value)
  296. {
  297. return fit_in_bits((Sint64) (Uint32) value, 4);
  298. }
  299. int erts_fit_in_bits_uint(Uint value)
  300. {
  301. #if ERTS_SIZEOF_ETERM == 4
  302. return fit_in_bits((Sint64) (Uint32) value, 4);
  303. #elif ERTS_SIZEOF_ETERM == 8
  304. return fit_in_bits(value, 5);
  305. #else
  306. # error "No way, Jose"
  307. #endif
  308. }
  309. int
  310. erts_print(fmtfn_t to, void *arg, char *format, ...)
  311. {
  312. int res;
  313. va_list arg_list;
  314. va_start(arg_list, format);
  315. {
  316. switch ((UWord)to) {
  317. case (UWord)ERTS_PRINT_STDOUT:
  318. res = erts_vprintf(format, arg_list);
  319. break;
  320. case (UWord)ERTS_PRINT_STDERR:
  321. res = erts_vfprintf(stderr, format, arg_list);
  322. break;
  323. case (UWord)ERTS_PRINT_FILE:
  324. res = erts_vfprintf((FILE *) arg, format, arg_list);
  325. break;
  326. case (UWord)ERTS_PRINT_SBUF:
  327. res = erts_vsprintf((char *) arg, format, arg_list);
  328. break;
  329. case (UWord)ERTS_PRINT_SNBUF:
  330. res = erts_vsnprintf(((erts_print_sn_buf *) arg)->buf,
  331. ((erts_print_sn_buf *) arg)->size,
  332. format,
  333. arg_list);
  334. break;
  335. case (UWord)ERTS_PRINT_DSBUF:
  336. res = erts_vdsprintf((erts_dsprintf_buf_t *) arg, format, arg_list);
  337. break;
  338. case (UWord)ERTS_PRINT_FD:
  339. res = erts_vfdprintf((int)(SWord) arg, format, arg_list);
  340. break;
  341. default:
  342. res = erts_vcbprintf(to, arg, format, arg_list);
  343. break;
  344. }
  345. }
  346. va_end(arg_list);
  347. return res;
  348. }
  349. int
  350. erts_putc(fmtfn_t to, void *arg, char c)
  351. {
  352. return erts_print(to, arg, "%c", c);
  353. }
  354. /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
  355. * Some Erlang term building utility functions (to be used when performance *
  356. * isn't critical). *
  357. * *
  358. * Add more functions like these here (and function prototypes in global.h) *
  359. * when needed. *
  360. * *
  361. \* */
  362. Eterm
  363. erts_bld_atom(Uint **hpp, Uint *szp, char *str)
  364. {
  365. if (hpp)
  366. return erts_atom_put((byte *) str, sys_strlen(str), ERTS_ATOM_ENC_LATIN1, 1);
  367. else
  368. return THE_NON_VALUE;
  369. }
  370. Eterm
  371. erts_bld_uint(Uint **hpp, Uint *szp, Uint ui)
  372. {
  373. Eterm res = THE_NON_VALUE;
  374. if (IS_USMALL(0, ui)) {
  375. if (hpp)
  376. res = make_small(ui);
  377. }
  378. else {
  379. if (szp)
  380. *szp += BIG_UINT_HEAP_SIZE;
  381. if (hpp) {
  382. res = uint_to_big(ui, *hpp);
  383. *hpp += BIG_UINT_HEAP_SIZE;
  384. }
  385. }
  386. return res;
  387. }
  388. /*
  389. * Erts_bld_uword is more or less similar to erts_bld_uint, but a pointer
  390. * can safely be passed.
  391. */
  392. Eterm
  393. erts_bld_uword(Uint **hpp, Uint *szp, UWord uw)
  394. {
  395. Eterm res = THE_NON_VALUE;
  396. if (IS_USMALL(0, uw)) {
  397. if (hpp)
  398. res = make_small((Uint) uw);
  399. }
  400. else {
  401. if (szp)
  402. *szp += BIG_UWORD_HEAP_SIZE(uw);
  403. if (hpp) {
  404. res = uword_to_big(uw, *hpp);
  405. *hpp += BIG_UWORD_HEAP_SIZE(uw);
  406. }
  407. }
  408. return res;
  409. }
  410. Eterm
  411. erts_bld_uint64(Uint **hpp, Uint *szp, Uint64 ui64)
  412. {
  413. Eterm res = THE_NON_VALUE;
  414. if (IS_USMALL(0, ui64)) {
  415. if (hpp)
  416. res = make_small((Uint) ui64);
  417. }
  418. else {
  419. if (szp)
  420. *szp += ERTS_UINT64_HEAP_SIZE(ui64);
  421. if (hpp)
  422. res = erts_uint64_to_big(ui64, hpp);
  423. }
  424. return res;
  425. }
  426. Eterm
  427. erts_bld_sint64(Uint **hpp, Uint *szp, Sint64 si64)
  428. {
  429. Eterm res = THE_NON_VALUE;
  430. if (IS_SSMALL(si64)) {
  431. if (hpp)
  432. res = make_small((Sint) si64);
  433. }
  434. else {
  435. if (szp)
  436. *szp += ERTS_SINT64_HEAP_SIZE(si64);
  437. if (hpp)
  438. res = erts_sint64_to_big(si64, hpp);
  439. }
  440. return res;
  441. }
  442. Eterm
  443. erts_bld_cons(Uint **hpp, Uint *szp, Eterm car, Eterm cdr)
  444. {
  445. Eterm res = THE_NON_VALUE;
  446. if (szp)
  447. *szp += 2;
  448. if (hpp) {
  449. res = CONS(*hpp, car, cdr);
  450. *hpp += 2;
  451. }
  452. return res;
  453. }
  454. Eterm
  455. erts_bld_tuple(Uint **hpp, Uint *szp, Uint arity, ...)
  456. {
  457. Eterm res = THE_NON_VALUE;
  458. ASSERT(arity < (((Uint)1) << (sizeof(Uint)*8 - _HEADER_ARITY_OFFS)));
  459. if (szp)
  460. *szp += arity + 1;
  461. if (hpp) {
  462. res = make_tuple(*hpp);
  463. *((*hpp)++) = make_arityval(arity);
  464. if (arity > 0) {
  465. Uint i;
  466. va_list argp;
  467. va_start(argp, arity);
  468. for (i = 0; i < arity; i++) {
  469. *((*hpp)++) = va_arg(argp, Eterm);
  470. }
  471. va_end(argp);
  472. }
  473. }
  474. return res;
  475. }
  476. Eterm erts_bld_tuplev(Uint **hpp, Uint *szp, Uint arity, Eterm terms[])
  477. {
  478. Eterm res = THE_NON_VALUE;
  479. /*
  480. * Note callers expect that 'terms' is *not* accessed if hpp == NULL.
  481. */
  482. ASSERT(arity < (((Uint)1) << (sizeof(Uint)*8 - _HEADER_ARITY_OFFS)));
  483. if (szp)
  484. *szp += arity + 1;
  485. if (hpp) {
  486. res = make_tuple(*hpp);
  487. *((*hpp)++) = make_arityval(arity);
  488. if (arity > 0) {
  489. Uint i;
  490. for (i = 0; i < arity; i++)
  491. *((*hpp)++) = terms[i];
  492. }
  493. }
  494. return res;
  495. }
  496. Eterm
  497. erts_bld_string_n(Uint **hpp, Uint *szp, const char *str, Sint len)
  498. {
  499. Eterm res = THE_NON_VALUE;
  500. Sint i = len;
  501. if (szp)
  502. *szp += len*2;
  503. if (hpp) {
  504. res = NIL;
  505. while (--i >= 0) {
  506. res = CONS(*hpp, make_small((byte) str[i]), res);
  507. *hpp += 2;
  508. }
  509. }
  510. return res;
  511. }
  512. Eterm
  513. erts_bld_list(Uint **hpp, Uint *szp, Sint length, Eterm terms[])
  514. {
  515. Eterm list = THE_NON_VALUE;
  516. if (szp)
  517. *szp += 2*length;
  518. if (hpp) {
  519. Sint i = length;
  520. list = NIL;
  521. while (--i >= 0) {
  522. list = CONS(*hpp, terms[i], list);
  523. *hpp += 2;
  524. }
  525. }
  526. return list;
  527. }
  528. Eterm
  529. erts_bld_2tup_list(Uint **hpp, Uint *szp,
  530. Sint length, Eterm terms1[], Uint terms2[])
  531. {
  532. Eterm res = THE_NON_VALUE;
  533. if (szp)
  534. *szp += 5*length;
  535. if (hpp) {
  536. Sint i = length;
  537. res = NIL;
  538. while (--i >= 0) {
  539. res = CONS(*hpp+3, TUPLE2(*hpp, terms1[i], terms2[i]), res);
  540. *hpp += 5;
  541. }
  542. }
  543. return res;
  544. }
  545. Eterm
  546. erts_bld_atom_uword_2tup_list(Uint **hpp, Uint *szp,
  547. Sint length, Eterm atoms[], UWord uints[])
  548. {
  549. Sint i;
  550. Eterm res = THE_NON_VALUE;
  551. if (szp) {
  552. *szp += 5*length;
  553. i = length;
  554. while (--i >= 0) {
  555. if (!IS_USMALL(0, uints[i]))
  556. *szp += BIG_UINT_HEAP_SIZE;
  557. }
  558. }
  559. if (hpp) {
  560. i = length;
  561. res = NIL;
  562. while (--i >= 0) {
  563. Eterm ui;
  564. if (IS_USMALL(0, uints[i]))
  565. ui = make_small(uints[i]);
  566. else {
  567. ui = uint_to_big(uints[i], *hpp);
  568. *hpp += BIG_UINT_HEAP_SIZE;
  569. }
  570. res = CONS(*hpp+3, TUPLE2(*hpp, atoms[i], ui), res);
  571. *hpp += 5;
  572. }
  573. }
  574. return res;
  575. }
  576. Eterm
  577. erts_bld_atom_2uint_3tup_list(Uint **hpp, Uint *szp, Sint length,
  578. Eterm atoms[], Uint uints1[], Uint uints2[])
  579. {
  580. Sint i;
  581. Eterm res = THE_NON_VALUE;
  582. if (szp) {
  583. *szp += 6*length;
  584. i = length;
  585. while (--i >= 0) {
  586. if (!IS_USMALL(0, uints1[i]))
  587. *szp += BIG_UINT_HEAP_SIZE;
  588. if (!IS_USMALL(0, uints2[i]))
  589. *szp += BIG_UINT_HEAP_SIZE;
  590. }
  591. }
  592. if (hpp) {
  593. i = length;
  594. res = NIL;
  595. while (--i >= 0) {
  596. Eterm ui1;
  597. Eterm ui2;
  598. if (IS_USMALL(0, uints1[i]))
  599. ui1 = make_small(uints1[i]);
  600. else {
  601. ui1 = uint_to_big(uints1[i], *hpp);
  602. *hpp += BIG_UINT_HEAP_SIZE;
  603. }
  604. if (IS_USMALL(0, uints2[i]))
  605. ui2 = make_small(uints2[i]);
  606. else {
  607. ui2 = uint_to_big(uints2[i], *hpp);
  608. *hpp += BIG_UINT_HEAP_SIZE;
  609. }
  610. res = CONS(*hpp+4, TUPLE3(*hpp, atoms[i], ui1, ui2), res);
  611. *hpp += 6;
  612. }
  613. }
  614. return res;
  615. }
  616. /* *\
  617. * *
  618. \* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
  619. /* make a hash index from an erlang term */
  620. /*
  621. ** There are two hash functions.
  622. **
  623. ** make_hash: A hash function that will give the same values for the same
  624. ** terms regardless of the internal representation. Small integers are
  625. ** hashed using the same algorithm as bignums and bignums are hashed
  626. ** independent of the CPU endianess.
  627. ** Make_hash also hashes pids, ports and references like 32 bit numbers
  628. ** (but with different constants).
  629. ** make_hash() is called from the bif erlang:phash/2
  630. **
  631. ** The idea behind the hash algorithm is to produce values suitable for
  632. ** linear dynamic hashing. We cannot choose the range at all while hashing
  633. ** (it's not even supplied to the hashing functions). The good old algorithm
  634. ** [H = H*C+X mod M, where H is the hash value, C is a "random" constant(or M),
  635. ** M is the range, preferably a prime, and X is each byte value] is therefore
  636. ** modified to:
  637. ** H = H*C+X mod 2^32, where C is a large prime. This gives acceptable
  638. ** "spreading" of the hashes, so that later modulo calculations also will give
  639. ** acceptable "spreading" in the range.
  640. ** We really need to hash on bytes, otherwise the
  641. ** upper bytes of a word will be less significant than the lower ones. That's
  642. ** not acceptable at all. For internal use one could maybe optimize by using
  643. ** another hash function, that is less strict but faster. That is, however, not
  644. ** implemented.
  645. **
  646. ** Short semi-formal description of make_hash:
  647. **
  648. ** In make_hash, the number N is treated like this:
  649. ** Abs(N) is hashed bytewise with the least significant byte, B(0), first.
  650. ** The number of bytes (J) to calculate hash on in N is
  651. ** (the number of _32_ bit words needed to store the unsigned
  652. ** value of abs(N)) * 4.
  653. ** X = FUNNY_NUMBER2
  654. ** If N < 0, Y = FUNNY_NUMBER4 else Y = FUNNY_NUMBER3.
  655. ** The hash value is Y*h(J) mod 2^32 where h(J) is calculated like
  656. ** h(0) = <initial hash>
  657. ** h(i) = h(i-1)*X + B(i-1)
  658. ** The above should hold regardless of internal representation.
  659. ** Pids are hashed like small numbers but with differrent constants, as are
  660. ** ports.
  661. ** References are hashed like ports but only on the least significant byte.
  662. ** Binaries are hashed on all bytes (not on the 15 first as in
  663. ** make_broken_hash()).
  664. ** Bytes in lists (possibly text strings) use a simpler multiplication inlined
  665. ** in the handling of lists, that is an optimization.
  666. ** Everything else is like in the old hash (make_broken_hash()).
  667. **
  668. ** make_hash2() is faster than make_hash, in particular for bignums
  669. ** and binaries, and produces better hash values.
  670. */
  671. /* some prime numbers just above 2 ^ 28 */
  672. #define FUNNY_NUMBER1 268440163
  673. #define FUNNY_NUMBER2 268439161
  674. #define FUNNY_NUMBER3 268435459
  675. #define FUNNY_NUMBER4 268436141
  676. #define FUNNY_NUMBER5 268438633
  677. #define FUNNY_NUMBER6 268437017
  678. #define FUNNY_NUMBER7 268438039
  679. #define FUNNY_NUMBER8 268437511
  680. #define FUNNY_NUMBER9 268439627
  681. #define FUNNY_NUMBER10 268440479
  682. #define FUNNY_NUMBER11 268440577
  683. #define FUNNY_NUMBER12 268440581
  684. #define FUNNY_NUMBER13 268440593
  685. #define FUNNY_NUMBER14 268440611
  686. static Uint32
  687. hash_binary_bytes(Eterm bin, Uint sz, Uint32 hash)
  688. {
  689. byte* ptr;
  690. Uint bitoffs;
  691. Uint bitsize;
  692. ERTS_GET_BINARY_BYTES(bin, ptr, bitoffs, bitsize);
  693. if (bitoffs == 0) {
  694. while (sz--) {
  695. hash = hash*FUNNY_NUMBER1 + *ptr++;
  696. }
  697. if (bitsize > 0) {
  698. byte b = *ptr;
  699. b >>= 8 - bitsize;
  700. hash = (hash*FUNNY_NUMBER1 + b) * FUNNY_NUMBER12 + bitsize;
  701. }
  702. } else {
  703. Uint previous = *ptr++;
  704. Uint b;
  705. Uint lshift = bitoffs;
  706. Uint rshift = 8 - lshift;
  707. while (sz--) {
  708. b = (previous << lshift) & 0xFF;
  709. previous = *ptr++;
  710. b |= previous >> rshift;
  711. hash = hash*FUNNY_NUMBER1 + b;
  712. }
  713. if (bitsize > 0) {
  714. b = (previous << lshift) & 0xFF;
  715. previous = *ptr++;
  716. b |= previous >> rshift;
  717. b >>= 8 - bitsize;
  718. hash = (hash*FUNNY_NUMBER1 + b) * FUNNY_NUMBER12 + bitsize;
  719. }
  720. }
  721. return hash;
  722. }
  723. Uint32 make_hash(Eterm term_arg)
  724. {
  725. DECLARE_WSTACK(stack);
  726. Eterm term = term_arg;
  727. Eterm hash = 0;
  728. unsigned op;
  729. #define MAKE_HASH_TUPLE_OP (FIRST_VACANT_TAG_DEF)
  730. #define MAKE_HASH_TERM_ARRAY_OP (FIRST_VACANT_TAG_DEF+1)
  731. #define MAKE_HASH_CDR_PRE_OP (FIRST_VACANT_TAG_DEF+2)
  732. #define MAKE_HASH_CDR_POST_OP (FIRST_VACANT_TAG_DEF+3)
  733. /*
  734. ** Convenience macro for calculating a bytewise hash on an unsigned 32 bit
  735. ** integer.
  736. ** If the endianess is known, we could be smarter here,
  737. ** but that gives no significant speedup (on a sparc at least)
  738. */
  739. #define UINT32_HASH_STEP(Expr, Prime1) \
  740. do { \
  741. Uint32 x = (Uint32) (Expr); \
  742. hash = \
  743. (((((hash)*(Prime1) + (x & 0xFF)) * (Prime1) + \
  744. ((x >> 8) & 0xFF)) * (Prime1) + \
  745. ((x >> 16) & 0xFF)) * (Prime1) + \
  746. (x >> 24)); \
  747. } while(0)
  748. #define UINT32_HASH_RET(Expr, Prime1, Prime2) \
  749. UINT32_HASH_STEP(Expr, Prime1); \
  750. hash = hash * (Prime2); \
  751. break
  752. /*
  753. * Significant additions needed for real 64 bit port with larger fixnums.
  754. */
  755. /*
  756. * Note, for the simple 64bit port, not utilizing the
  757. * larger word size this function will work without modification.
  758. */
  759. tail_recur:
  760. op = tag_val_def(term);
  761. for (;;) {
  762. switch (op) {
  763. case NIL_DEF:
  764. hash = hash*FUNNY_NUMBER3 + 1;
  765. break;
  766. case ATOM_DEF:
  767. hash = hash*FUNNY_NUMBER1 +
  768. (atom_tab(atom_val(term))->slot.bucket.hvalue);
  769. break;
  770. case SMALL_DEF:
  771. {
  772. Sint y1 = signed_val(term);
  773. Uint y2 = y1 < 0 ? -(Uint)y1 : y1;
  774. UINT32_HASH_STEP(y2, FUNNY_NUMBER2);
  775. #if defined(ARCH_64)
  776. if (y2 >> 32)
  777. UINT32_HASH_STEP(y2 >> 32, FUNNY_NUMBER2);
  778. #endif
  779. hash *= (y1 < 0 ? FUNNY_NUMBER4 : FUNNY_NUMBER3);
  780. break;
  781. }
  782. case BINARY_DEF:
  783. {
  784. Uint sz = binary_size(term);
  785. hash = hash_binary_bytes(term, sz, hash);
  786. hash = hash*FUNNY_NUMBER4 + sz;
  787. break;
  788. }
  789. case EXPORT_DEF:
  790. {
  791. Export* ep = *((Export **) (export_val(term) + 1));
  792. hash = hash * FUNNY_NUMBER11 + ep->info.mfa.arity;
  793. hash = hash*FUNNY_NUMBER1 +
  794. (atom_tab(atom_val(ep->info.mfa.module))->slot.bucket.hvalue);
  795. hash = hash*FUNNY_NUMBER1 +
  796. (atom_tab(atom_val(ep->info.mfa.function))->slot.bucket.hvalue);
  797. break;
  798. }
  799. case FUN_DEF:
  800. {
  801. ErlFunThing* funp = (ErlFunThing *) fun_val(term);
  802. Uint num_free = funp->num_free;
  803. hash = hash * FUNNY_NUMBER10 + num_free;
  804. hash = hash*FUNNY_NUMBER1 +
  805. (atom_tab(atom_val(funp->fe->module))->slot.bucket.hvalue);
  806. hash = hash*FUNNY_NUMBER2 + funp->fe->old_index;
  807. hash = hash*FUNNY_NUMBER2 + funp->fe->old_uniq;
  808. if (num_free > 0) {
  809. if (num_free > 1) {
  810. WSTACK_PUSH3(stack, (UWord) &funp->env[1], (num_free-1), MAKE_HASH_TERM_ARRAY_OP);
  811. }
  812. term = funp->env[0];
  813. goto tail_recur;
  814. }
  815. break;
  816. }
  817. case PID_DEF:
  818. UINT32_HASH_RET(internal_pid_number(term),FUNNY_NUMBER5,FUNNY_NUMBER6);
  819. case EXTERNAL_PID_DEF:
  820. UINT32_HASH_RET(external_pid_number(term),FUNNY_NUMBER5,FUNNY_NUMBER6);
  821. case PORT_DEF:
  822. UINT32_HASH_RET(internal_port_number(term),FUNNY_NUMBER9,FUNNY_NUMBER10);
  823. case EXTERNAL_PORT_DEF:
  824. UINT32_HASH_RET(external_port_number(term),FUNNY_NUMBER9,FUNNY_NUMBER10);
  825. case REF_DEF:
  826. UINT32_HASH_RET(internal_ref_numbers(term)[0],FUNNY_NUMBER9,FUNNY_NUMBER10);
  827. case EXTERNAL_REF_DEF:
  828. UINT32_HASH_RET(external_ref_numbers(term)[0],FUNNY_NUMBER9,FUNNY_NUMBER10);
  829. case FLOAT_DEF:
  830. {
  831. FloatDef ff;
  832. GET_DOUBLE(term, ff);
  833. if (ff.fd == 0.0f) {
  834. /* ensure positive 0.0 */
  835. ff.fd = erts_get_positive_zero_float();
  836. }
  837. hash = hash*FUNNY_NUMBER6 + (ff.fw[0] ^ ff.fw[1]);
  838. break;
  839. }
  840. case MAKE_HASH_CDR_PRE_OP:
  841. term = (Eterm) WSTACK_POP(stack);
  842. if (is_not_list(term)) {
  843. WSTACK_PUSH(stack, (UWord) MAKE_HASH_CDR_POST_OP);
  844. goto tail_recur;
  845. }
  846. /* fall through */
  847. case LIST_DEF:
  848. {
  849. Eterm* list = list_val(term);
  850. while(is_byte(*list)) {
  851. /* Optimization for strings.
  852. ** Note that this hash is different from a 'small' hash,
  853. ** as multiplications on a Sparc is so slow.
  854. */
  855. hash = hash*FUNNY_NUMBER2 + unsigned_val(*list);
  856. if (is_not_list(CDR(list))) {
  857. WSTACK_PUSH(stack, MAKE_HASH_CDR_POST_OP);
  858. term = CDR(list);
  859. goto tail_recur;
  860. }
  861. list = list_val(CDR(list));
  862. }
  863. WSTACK_PUSH2(stack, CDR(list), MAKE_HASH_CDR_PRE_OP);
  864. term = CAR(list);
  865. goto tail_recur;
  866. }
  867. case MAKE_HASH_CDR_POST_OP:
  868. hash *= FUNNY_NUMBER8;
  869. break;
  870. case BIG_DEF:
  871. /* Note that this is the exact same thing as the hashing of smalls.*/
  872. {
  873. Eterm* ptr = big_val(term);
  874. Uint n = BIG_SIZE(ptr);
  875. Uint k = n-1;
  876. ErtsDigit d;
  877. int is_neg = BIG_SIGN(ptr);
  878. Uint i;
  879. int j;
  880. for (i = 0; i < k; i++) {
  881. d = BIG_DIGIT(ptr, i);
  882. for(j = 0; j < sizeof(ErtsDigit); ++j) {
  883. hash = (hash*FUNNY_NUMBER2) + (d & 0xff);
  884. d >>= 8;
  885. }
  886. }
  887. d = BIG_DIGIT(ptr, k);
  888. k = sizeof(ErtsDigit);
  889. #if defined(ARCH_64)
  890. if (!(d >> 32))
  891. k /= 2;
  892. #endif
  893. for(j = 0; j < (int)k; ++j) {
  894. hash = (hash*FUNNY_NUMBER2) + (d & 0xff);
  895. d >>= 8;
  896. }
  897. hash *= is_neg ? FUNNY_NUMBER4 : FUNNY_NUMBER3;
  898. break;
  899. }
  900. case MAP_DEF:
  901. hash = hash*FUNNY_NUMBER13 + FUNNY_NUMBER14 + make_hash2(term);
  902. break;
  903. case TUPLE_DEF:
  904. {
  905. Eterm* ptr = tuple_val(term);
  906. Uint arity = arityval(*ptr);
  907. WSTACK_PUSH3(stack, (UWord) arity, (UWord)(ptr+1), (UWord) arity);
  908. op = MAKE_HASH_TUPLE_OP;
  909. }/*fall through*/
  910. case MAKE_HASH_TUPLE_OP:
  911. case MAKE_HASH_TERM_ARRAY_OP:
  912. {
  913. Uint i = (Uint) WSTACK_POP(stack);
  914. Eterm* ptr = (Eterm*) WSTACK_POP(stack);
  915. if (i != 0) {
  916. term = *ptr;
  917. WSTACK_PUSH3(stack, (UWord)(ptr+1), (UWord) i-1, (UWord) op);
  918. goto tail_recur;
  919. }
  920. if (op == MAKE_HASH_TUPLE_OP) {
  921. Uint32 arity = (Uint32) WSTACK_POP(stack);
  922. hash = hash*FUNNY_NUMBER9 + arity;
  923. }
  924. break;
  925. }
  926. default:
  927. erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_hash(0x%X,0x%X)\n", term, op);
  928. return 0;
  929. }
  930. if (WSTACK_ISEMPTY(stack)) break;
  931. op = WSTACK_POP(stack);
  932. }
  933. DESTROY_WSTACK(stack);
  934. return hash;
  935. #undef MAKE_HASH_TUPLE_OP
  936. #undef MAKE_HASH_TERM_ARRAY_OP
  937. #undef MAKE_HASH_CDR_PRE_OP
  938. #undef MAKE_HASH_CDR_POST_OP
  939. #undef UINT32_HASH_STEP
  940. #undef UINT32_HASH_RET
  941. }
  942. /* Hash function suggested by Bob Jenkins. */
  943. #define MIX(a,b,c) \
  944. do { \
  945. a -= b; a -= c; a ^= (c>>13); \
  946. b -= c; b -= a; b ^= (a<<8); \
  947. c -= a; c -= b; c ^= (b>>13); \
  948. a -= b; a -= c; a ^= (c>>12); \
  949. b -= c; b -= a; b ^= (a<<16); \
  950. c -= a; c -= b; c ^= (b>>5); \
  951. a -= b; a -= c; a ^= (c>>3); \
  952. b -= c; b -= a; b ^= (a<<10); \
  953. c -= a; c -= b; c ^= (b>>15); \
  954. } while(0)
  955. #define HCONST 0x9e3779b9UL /* the golden ratio; an arbitrary value */
  956. static Uint32
  957. block_hash(byte *k, Uint length, Uint32 initval)
  958. {
  959. Uint32 a,b,c;
  960. Uint len;
  961. /* Set up the internal state */
  962. len = length;
  963. a = b = HCONST;
  964. c = initval; /* the previous hash value */
  965. while (len >= 12)
  966. {
  967. a += (k[0] +((Uint32)k[1]<<8) +((Uint32)k[2]<<16) +((Uint32)k[3]<<24));
  968. b += (k[4] +((Uint32)k[5]<<8) +((Uint32)k[6]<<16) +((Uint32)k[7]<<24));
  969. c += (k[8] +((Uint32)k[9]<<8) +((Uint32)k[10]<<16)+((Uint32)k[11]<<24));
  970. MIX(a,b,c);
  971. k += 12; len -= 12;
  972. }
  973. c += length;
  974. switch(len) /* all the case statements fall through */
  975. {
  976. case 11: c+=((Uint32)k[10]<<24);
  977. case 10: c+=((Uint32)k[9]<<16);
  978. case 9 : c+=((Uint32)k[8]<<8);
  979. /* the first byte of c is reserved for the length */
  980. case 8 : b+=((Uint32)k[7]<<24);
  981. case 7 : b+=((Uint32)k[6]<<16);
  982. case 6 : b+=((Uint32)k[5]<<8);
  983. case 5 : b+=k[4];
  984. case 4 : a+=((Uint32)k[3]<<24);
  985. case 3 : a+=((Uint32)k[2]<<16);
  986. case 2 : a+=((Uint32)k[1]<<8);
  987. case 1 : a+=k[0];
  988. /* case 0: nothing left to add */
  989. }
  990. MIX(a,b,c);
  991. return c;
  992. }
  993. Uint32
  994. make_hash2(Eterm term)
  995. {
  996. Uint32 hash;
  997. Uint32 hash_xor_pairs;
  998. DeclareTmpHeapNoproc(tmp_big,2);
  999. ERTS_UNDEF(hash_xor_pairs, 0);
  1000. /* (HCONST * {2, ..., 22}) mod 2^32 */
  1001. #define HCONST_2 0x3c6ef372UL
  1002. #define HCONST_3 0xdaa66d2bUL
  1003. #define HCONST_4 0x78dde6e4UL
  1004. #define HCONST_5 0x1715609dUL
  1005. #define HCONST_6 0xb54cda56UL
  1006. #define HCONST_7 0x5384540fUL
  1007. #define HCONST_8 0xf1bbcdc8UL
  1008. #define HCONST_9 0x8ff34781UL
  1009. #define HCONST_10 0x2e2ac13aUL
  1010. #define HCONST_11 0xcc623af3UL
  1011. #define HCONST_12 0x6a99b4acUL
  1012. #define HCONST_13 0x08d12e65UL
  1013. #define HCONST_14 0xa708a81eUL
  1014. #define HCONST_15 0x454021d7UL
  1015. #define HCONST_16 0xe3779b90UL
  1016. #define HCONST_17 0x81af1549UL
  1017. #define HCONST_18 0x1fe68f02UL
  1018. #define HCONST_19 0xbe1e08bbUL
  1019. #define HCONST_20 0x5c558274UL
  1020. #define HCONST_21 0xfa8cfc2dUL
  1021. #define HCONST_22 0x98c475e6UL
  1022. #define HASH_MAP_TAIL (_make_header(1,_TAG_HEADER_REF))
  1023. #define HASH_MAP_PAIR (_make_header(2,_TAG_HEADER_REF))
  1024. #define HASH_CDR (_make_header(3,_TAG_HEADER_REF))
  1025. #define UINT32_HASH_2(Expr1, Expr2, AConst) \
  1026. do { \
  1027. Uint32 a,b; \
  1028. a = AConst + (Uint32) (Expr1); \
  1029. b = AConst + (Uint32) (Expr2); \
  1030. MIX(a,b,hash); \
  1031. } while(0)
  1032. #define UINT32_HASH(Expr, AConst) UINT32_HASH_2(Expr, 0, AConst)
  1033. #define SINT32_HASH(Expr, AConst) \
  1034. do { \
  1035. Sint32 y = (Sint32) (Expr); \
  1036. if (y < 0) { \
  1037. UINT32_HASH(-y, AConst); \
  1038. /* Negative numbers are unnecessarily mixed twice. */ \
  1039. } \
  1040. UINT32_HASH(y, AConst); \
  1041. } while(0)
  1042. #define IS_SSMALL28(x) (((Uint) (((x) >> (28-1)) + 1)) < 2)
  1043. #ifdef ARCH_64
  1044. # define POINTER_HASH(Ptr, AConst) UINT32_HASH_2((Uint32)(UWord)(Ptr), (((UWord)(Ptr)) >> 32), AConst)
  1045. #else
  1046. # define POINTER_HASH(Ptr, AConst) UINT32_HASH(Ptr, AConst)
  1047. #endif
  1048. /* Optimization. Simple cases before declaration of estack. */
  1049. if (primary_tag(term) == TAG_PRIMARY_IMMED1) {
  1050. switch (term & _TAG_IMMED1_MASK) {
  1051. case _TAG_IMMED1_IMMED2:
  1052. switch (term & _TAG_IMMED2_MASK) {
  1053. case _TAG_IMMED2_ATOM:
  1054. /* Fast, but the poor hash value should be mixed. */
  1055. return atom_tab(atom_val(term))->slot.bucket.hvalue;
  1056. }
  1057. break;
  1058. case _TAG_IMMED1_SMALL:
  1059. {
  1060. Sint x = signed_val(term);
  1061. if (SMALL_BITS > 28 && !IS_SSMALL28(x)) {
  1062. term = small_to_big(x, tmp_big);
  1063. break;
  1064. }
  1065. hash = 0;
  1066. SINT32_HASH(x, HCONST);
  1067. return hash;
  1068. }
  1069. }
  1070. };
  1071. {
  1072. Eterm tmp;
  1073. DECLARE_ESTACK(s);
  1074. UseTmpHeapNoproc(2);
  1075. hash = 0;
  1076. for (;;) {
  1077. switch (primary_tag(term)) {
  1078. case TAG_PRIMARY_LIST:
  1079. {
  1080. int c = 0;
  1081. Uint32 sh = 0;
  1082. Eterm* ptr = list_val(term);
  1083. while (is_byte(*ptr)) {
  1084. /* Optimization for strings. */
  1085. sh = (sh << 8) + unsigned_val(*ptr);
  1086. if (c == 3) {
  1087. UINT32_HASH(sh, HCONST_4);
  1088. c = sh = 0;
  1089. } else {
  1090. c++;
  1091. }
  1092. term = CDR(ptr);
  1093. if (is_not_list(term))
  1094. break;
  1095. ptr = list_val(term);
  1096. }
  1097. if (c > 0)
  1098. UINT32_HASH(sh, HCONST_4);
  1099. if (is_list(term)) {
  1100. tmp = CDR(ptr);
  1101. ESTACK_PUSH(s, tmp);
  1102. term = CAR(ptr);
  1103. }
  1104. }
  1105. break;
  1106. case TAG_PRIMARY_BOXED:
  1107. {
  1108. Eterm hdr = *boxed_val(term);
  1109. ASSERT(is_header(hdr));
  1110. switch (hdr & _TAG_HEADER_MASK) {
  1111. case ARITYVAL_SUBTAG:
  1112. {
  1113. int i;
  1114. int arity = header_arity(hdr);
  1115. Eterm* elem = tuple_val(term);
  1116. UINT32_HASH(arity, HCONST_9);
  1117. if (arity == 0) /* Empty tuple */
  1118. goto hash2_common;
  1119. for (i = arity; ; i--) {
  1120. term = elem[i];
  1121. if (i == 1)
  1122. break;
  1123. ESTACK_PUSH(s, term);
  1124. }
  1125. }
  1126. break;
  1127. case MAP_SUBTAG:
  1128. {
  1129. Eterm* ptr = boxed_val(term) + 1;
  1130. Uint size;
  1131. int i;
  1132. switch (hdr & _HEADER_MAP_SUBTAG_MASK) {
  1133. case HAMT_SUBTAG_HEAD_FLATMAP:
  1134. {
  1135. flatmap_t *mp = (flatmap_t *)flatmap_val(term);
  1136. Eterm *ks = flatmap_get_keys(mp);
  1137. Eterm *vs = flatmap_get_values(mp);
  1138. size = flatmap_get_size(mp);
  1139. UINT32_HASH(size, HCONST_16);
  1140. if (size == 0)
  1141. goto hash2_common;
  1142. /* We want a portable hash function that is *independent* of
  1143. * the order in which keys and values are encountered.
  1144. * We therefore calculate context independent hashes for all .
  1145. * key-value pairs and then xor them together.
  1146. */
  1147. ESTACK_PUSH(s, hash_xor_pairs);
  1148. ESTACK_PUSH(s, hash);
  1149. ESTACK_PUSH(s, HASH_MAP_TAIL);
  1150. hash = 0;
  1151. hash_xor_pairs = 0;
  1152. for (i = size - 1; i >= 0; i--) {
  1153. ESTACK_PUSH(s, HASH_MAP_PAIR);
  1154. ESTACK_PUSH(s, vs[i]);
  1155. ESTACK_PUSH(s, ks[i]);
  1156. }
  1157. goto hash2_common;
  1158. }
  1159. case HAMT_SUBTAG_HEAD_ARRAY:
  1160. case HAMT_SUBTAG_HEAD_BITMAP:
  1161. size = *ptr++;
  1162. UINT32_HASH(size, HCONST_16);
  1163. if (size == 0)
  1164. goto hash2_common;
  1165. ESTACK_PUSH(s, hash_xor_pairs);
  1166. ESTACK_PUSH(s, hash);
  1167. ESTACK_PUSH(s, HASH_MAP_TAIL);
  1168. hash = 0;
  1169. hash_xor_pairs = 0;
  1170. }
  1171. switch (hdr & _HEADER_MAP_SUBTAG_MASK) {
  1172. case HAMT_SUBTAG_HEAD_ARRAY:
  1173. i = 16;
  1174. break;
  1175. case HAMT_SUBTAG_HEAD_BITMAP:
  1176. case HAMT_SUBTAG_NODE_BITMAP:
  1177. i = hashmap_bitcount(MAP_HEADER_VAL(hdr));
  1178. break;
  1179. default:
  1180. erts_exit(ERTS_ERROR_EXIT, "bad header");
  1181. }
  1182. while (i) {
  1183. if (is_list(*ptr)) {
  1184. Eterm* cons = list_val(*ptr);
  1185. ESTACK_PUSH(s, HASH_MAP_PAIR);
  1186. ESTACK_PUSH(s, CDR(cons));
  1187. ESTACK_PUSH(s, CAR(cons));
  1188. }
  1189. else {
  1190. ASSERT(is_boxed(*ptr));
  1191. ESTACK_PUSH(s, *ptr);
  1192. }
  1193. i--; ptr++;
  1194. }
  1195. goto hash2_common;
  1196. }
  1197. break;
  1198. case EXPORT_SUBTAG:
  1199. {
  1200. Export* ep = *((Export **) (export_val(term) + 1));
  1201. UINT32_HASH_2
  1202. (ep->info.mfa.arity,
  1203. atom_tab(atom_val(ep->info.mfa.module))->slot.bucket.hvalue,
  1204. HCONST);
  1205. UINT32_HASH
  1206. (atom_tab(atom_val(ep->info.mfa.function))->slot.bucket.hvalue,
  1207. HCONST_14);
  1208. goto hash2_common;
  1209. }
  1210. case FUN_SUBTAG:
  1211. {
  1212. ErlFunThing* funp = (ErlFunThing *) fun_val(term);
  1213. Uint num_free = funp->num_free;
  1214. UINT32_HASH_2
  1215. (num_free,
  1216. atom_tab(atom_val(funp->fe->module))->slot.bucket.hvalue,
  1217. HCONST);
  1218. UINT32_HASH_2
  1219. (funp->fe->old_index, funp->fe->old_uniq, HCONST);
  1220. if (num_free == 0) {
  1221. goto hash2_common;
  1222. } else {
  1223. Eterm* bptr = funp->env + num_free - 1;
  1224. while (num_free-- > 1) {
  1225. term = *bptr--;
  1226. ESTACK_PUSH(s, term);
  1227. }
  1228. term = *bptr;
  1229. }
  1230. }
  1231. break;
  1232. case REFC_BINARY_SUBTAG:
  1233. case HEAP_BINARY_SUBTAG:
  1234. case SUB_BINARY_SUBTAG:
  1235. {
  1236. byte* bptr;
  1237. unsigned sz = binary_size(term);
  1238. Uint32 con = HCONST_13 + hash;
  1239. Uint bitoffs;
  1240. Uint bitsize;
  1241. ERTS_GET_BINARY_BYTES(term, bptr, bitoffs, bitsize);
  1242. if (sz == 0 && bitsize == 0) {
  1243. hash = con;
  1244. } else {
  1245. if (bitoffs == 0) {
  1246. hash = block_hash(bptr, sz, con);
  1247. if (bitsize > 0) {
  1248. UINT32_HASH_2(bitsize, (bptr[sz] >> (8 - bitsize)),
  1249. HCONST_15);
  1250. }
  1251. } else {
  1252. byte* buf = (byte *) erts_alloc(ERTS_ALC_T_TMP,
  1253. sz + (bitsize != 0));
  1254. erts_copy_bits(bptr, bitoffs, 1, buf, 0, 1, sz*8+bitsize);
  1255. hash = block_hash(buf, sz, con);
  1256. if (bitsize > 0) {
  1257. UINT32_HASH_2(bitsize, (buf[sz] >> (8 - bitsize)),
  1258. HCONST_15);
  1259. }
  1260. erts_free(ERTS_ALC_T_TMP, (void *) buf);
  1261. }
  1262. }
  1263. goto hash2_common;
  1264. }
  1265. break;
  1266. case POS_BIG_SUBTAG:
  1267. case NEG_BIG_SUBTAG:
  1268. {
  1269. Eterm* ptr = big_val(term);
  1270. Uint i = 0;
  1271. Uint n = BIG_SIZE(ptr);
  1272. Uint32 con = BIG_SIGN(ptr) ? HCONST_10 : HCONST_11;
  1273. #if D_EXP == 16
  1274. do {
  1275. Uint32 x, y;
  1276. x = i < n ? BIG_DIGIT(ptr, i++) : 0;
  1277. x += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16;
  1278. y = i < n ? BIG_DIGIT(ptr, i++) : 0;
  1279. y += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16;
  1280. UINT32_HASH_2(x, y, con);
  1281. } while (i < n);
  1282. #elif D_EXP == 32
  1283. do {
  1284. Uint32 x, y;
  1285. x = i < n ? BIG_DIGIT(ptr, i++) : 0;
  1286. y = i < n ? BIG_DIGIT(ptr, i++) : 0;
  1287. UINT32_HASH_2(x, y, con);
  1288. } while (i < n);
  1289. #elif D_EXP == 64
  1290. do {
  1291. Uint t;
  1292. Uint32 x, y;
  1293. ASSERT(i < n);
  1294. t = BIG_DIGIT(ptr, i++);
  1295. x = t & 0xffffffff;
  1296. y = t >> 32;
  1297. UINT32_HASH_2(x, y, con);
  1298. } while (i < n);
  1299. #else
  1300. #error "unsupported D_EXP size"
  1301. #endif
  1302. goto hash2_common;
  1303. }
  1304. break;
  1305. case REF_SUBTAG:
  1306. /* All parts of the ref should be hashed. */
  1307. UINT32_HASH(internal_ref_numbers(term)[0], HCONST_7);
  1308. goto hash2_common;
  1309. break;
  1310. case EXTERNAL_REF_SUBTAG:
  1311. /* All parts of the ref should be hashed. */
  1312. UINT32_HASH(external_ref_numbers(term)[0], HCONST_7);
  1313. goto hash2_common;
  1314. break;
  1315. case EXTERNAL_PID_SUBTAG:
  1316. /* Only 15 bits are hashed. */
  1317. UINT32_HASH(external_pid_number(term), HCONST_5);
  1318. goto hash2_common;
  1319. case EXTERNAL_PORT_SUBTAG:
  1320. /* Only 15 bits are hashed. */
  1321. UINT32_HASH(external_port_number(term), HCONST_6);
  1322. goto hash2_common;
  1323. case FLOAT_SUBTAG:
  1324. {
  1325. FloatDef ff;
  1326. GET_DOUBLE(term, ff);
  1327. if (ff.fd == 0.0f) {
  1328. /* ensure positive 0.0 */
  1329. ff.fd = erts_get_positive_zero_float();
  1330. }
  1331. #if defined(WORDS_BIGENDIAN) || defined(DOUBLE_MIDDLE_ENDIAN)
  1332. UINT32_HASH_2(ff.fw[0], ff.fw[1], HCONST_12);
  1333. #else
  1334. UINT32_HASH_2(ff.fw[1], ff.fw[0], HCONST_12);
  1335. #endif
  1336. goto hash2_common;
  1337. }
  1338. break;
  1339. default:
  1340. erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_hash2(0x%X)\n", term);
  1341. }
  1342. }
  1343. break;
  1344. case TAG_PRIMARY_IMMED1:
  1345. switch (term & _TAG_IMMED1_MASK) {
  1346. case _TAG_IMMED1_PID:
  1347. /* Only 15 bits are hashed. */
  1348. UINT32_HASH(internal_pid_number(term), HCONST_5);
  1349. goto hash2_common;
  1350. case _TAG_IMMED1_PORT:
  1351. /* Only 15 bits are hashed. */
  1352. UINT32_HASH(internal_port_number(term), HCONST_6);
  1353. goto hash2_common;
  1354. case _TAG_IMMED1_IMMED2:
  1355. switch (term & _TAG_IMMED2_MASK) {
  1356. case _TAG_IMMED2_ATOM:
  1357. if (hash == 0)
  1358. /* Fast, but the poor hash value should be mixed. */
  1359. hash = atom_tab(atom_val(term))->slot.bucket.hvalue;
  1360. else
  1361. UINT32_HASH(atom_tab(atom_val(term))->slot.bucket.hvalue,
  1362. HCONST_3);
  1363. goto hash2_common;
  1364. case _TAG_IMMED2_NIL:
  1365. if (hash == 0)
  1366. hash = 3468870702UL;
  1367. else
  1368. UINT32_HASH(NIL_DEF, HCONST_2);
  1369. goto hash2_common;
  1370. default:
  1371. erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_hash2(0x%X)\n", term);
  1372. }
  1373. case _TAG_IMMED1_SMALL:
  1374. {
  1375. Sint x = signed_val(term);
  1376. if (SMALL_BITS > 28 && !IS_SSMALL28(x)) {
  1377. term = small_to_big(x, tmp_big);
  1378. break;
  1379. }
  1380. SINT32_HASH(x, HCONST);
  1381. goto hash2_common;
  1382. }
  1383. }
  1384. break;
  1385. default:
  1386. erts_exit(ERTS_ERROR_EXIT, "Invalid tag in make_hash2(0x%X)\n", term);
  1387. hash2_common:
  1388. /* Uint32 hash always has the hash value of the previous term,
  1389. * compounded or otherwise.
  1390. */
  1391. if (ESTACK_ISEMPTY(s)) {
  1392. DESTROY_ESTACK(s);
  1393. UnUseTmpHeapNoproc(2);
  1394. return hash;
  1395. }
  1396. term = ESTACK_POP(s);
  1397. switch (term) {
  1398. case HASH_MAP_TAIL: {
  1399. hash = (Uint32) ESTACK_POP(s);
  1400. UINT32_HASH(hash_xor_pairs, HCONST_19);
  1401. hash_xor_pairs = (Uint32) ESTACK_POP(s);
  1402. goto hash2_common;
  1403. }
  1404. case HASH_MAP_PAIR:
  1405. hash_xor_pairs ^= hash;
  1406. hash = 0;
  1407. goto hash2_common;
  1408. default:
  1409. break;
  1410. }
  1411. }
  1412. }
  1413. }
  1414. }
  1415. /* Term hash function for internal use.
  1416. *
  1417. * Limitation #1: Is not "portable" in any way between different VM instances.
  1418. *
  1419. * Limitation #2: The hash value is only valid as long as the term exists
  1420. * somewhere in the VM. Why? Because external pids, ports and refs are hashed
  1421. * by mixing the node *pointer* value. If a node disappears and later reappears
  1422. * with a new ErlNode struct, externals from that node will hash different than
  1423. * before.
  1424. *
  1425. * One IMPORTANT property must hold (for hamt).
  1426. * EVERY BIT of the term that is significant for equality (see EQ)
  1427. * MUST BE USED AS INPUT FOR THE HASH. Two different terms must always have a
  1428. * chance of hashing different when salted: hash([Salt|A]) vs hash([Salt|B]).
  1429. *
  1430. * This is why we cannot use cached hash values for atoms for example.
  1431. *
  1432. */
  1433. #define CONST_HASH(AConst) \
  1434. do { /* Lightweight mixing of constant (type info) */ \
  1435. hash ^= AConst; \
  1436. hash = (hash << 17) ^ (hash >> (32-17)); \
  1437. } while (0)
  1438. Uint32
  1439. make_internal_hash(Eterm term, Uint32 salt)
  1440. {
  1441. Uint32 hash;
  1442. /* Optimization. Simple cases before declaration of estack. */
  1443. if (primary_tag(term) == TAG_PRIMARY_IMMED1) {
  1444. hash = salt;
  1445. #if ERTS_SIZEOF_ETERM == 8
  1446. UINT32_HASH_2((Uint32)term, (Uint32)(term >> 32), HCONST);
  1447. #elif ERTS_SIZEOF_ETERM == 4
  1448. UINT32_HASH(term, HCONST);
  1449. #else
  1450. # error "No you don't"
  1451. #endif
  1452. return hash;
  1453. }
  1454. {
  1455. Eterm tmp;
  1456. DECLARE_ESTACK(s);
  1457. hash = salt;
  1458. for (;;) {
  1459. switch (primary_tag(term)) {
  1460. case TAG_PRIMARY_LIST:
  1461. {
  1462. int c = 0;
  1463. Uint32 sh = 0;
  1464. Eterm* ptr = list_val(term);
  1465. while (is_byte(*ptr)) {
  1466. /* Optimization for strings. */
  1467. sh = (sh << 8) + unsigned_val(*ptr);
  1468. if (c == 3) {
  1469. UINT32_HASH(sh, HCONST_4);
  1470. c = sh = 0;
  1471. } else {
  1472. c++;
  1473. }
  1474. term = CDR(ptr);
  1475. if (is_not_list(term))
  1476. break;
  1477. ptr = list_val(term);
  1478. }
  1479. if (c > 0)
  1480. UINT32_HASH_2(sh, (Uint32)c, HCONST_22);
  1481. if (is_list(term)) {
  1482. tmp = CDR(ptr);
  1483. CONST_HASH(HCONST_17); /* Hash CAR in cons cell */
  1484. ESTACK_PUSH(s, tmp);
  1485. if (is_not_list(tmp)) {
  1486. ESTACK_PUSH(s, HASH_CDR);
  1487. }
  1488. term = CAR(ptr);
  1489. }
  1490. }
  1491. break;
  1492. case TAG_PRIMARY_BOXED:
  1493. {
  1494. Eterm hdr = *boxed_val(term);
  1495. ASSERT(is_header(hdr));
  1496. switch (hdr & _TAG_HEADER_MASK) {
  1497. case ARITYVAL_SUBTAG:
  1498. {
  1499. int i;
  1500. int arity = header_arity(hdr);
  1501. Eterm* elem = tuple_val(term);
  1502. UINT32_HASH(arity, HCONST_9);
  1503. if (arity == 0) /* Empty tuple */
  1504. goto pop_next;
  1505. for (i = arity; ; i--) {
  1506. term = elem[i];
  1507. if (i == 1)
  1508. break;
  1509. ESTACK_PUSH(s, term);
  1510. }
  1511. }
  1512. break;
  1513. case MAP_SUBTAG:
  1514. {
  1515. Eterm* ptr = boxed_val(term) + 1;
  1516. Uint size;
  1517. int i;
  1518. /*
  1519. * We rely on key-value iteration order being constant
  1520. * for identical maps (in this VM instance).
  1521. */
  1522. switch (hdr & _HEADER_MAP_SUBTAG_MASK) {
  1523. case HAMT_SUBTAG_HEAD_FLATMAP:
  1524. {
  1525. flatmap_t *mp = (flatmap_t *)flatmap_val(term);
  1526. Eterm *ks = flatmap_get_keys(mp);
  1527. Eterm *vs = flatmap_get_values(mp);
  1528. size = flatmap_get_size(mp);
  1529. UINT32_HASH(size, HCONST_16);
  1530. if (size == 0)
  1531. goto pop_next;
  1532. for (i = size - 1; i >= 0; i--) {
  1533. ESTACK_PUSH(s, vs[i]);
  1534. ESTACK_PUSH(s, ks[i]);
  1535. }
  1536. goto pop_next;
  1537. }
  1538. case HAMT_SUBTAG_HEAD_ARRAY:
  1539. case HAMT_SUBTAG_HEAD_BITMAP:
  1540. size = *ptr++;
  1541. UINT32_HASH(size, HCONST_16);
  1542. if (size == 0)
  1543. goto pop_next;
  1544. }
  1545. switch (hdr & _HEADER_MAP_SUBTAG_MASK) {
  1546. case HAMT_SUBTAG_HEAD_ARRAY:
  1547. i = 16;
  1548. break;
  1549. case HAMT_SUBTAG_HEAD_BITMAP:
  1550. case HAMT_SUBTAG_NODE_BITMAP:
  1551. i = hashmap_bitcount(MAP_HEADER_VAL(hdr));
  1552. break;
  1553. default:
  1554. erts_exit(ERTS_ERROR_EXIT, "bad header");
  1555. }
  1556. while (i) {
  1557. if (is_list(*ptr)) {
  1558. Eterm* cons = list_val(*ptr);
  1559. ESTACK_PUSH(s, CDR(cons));
  1560. ESTACK_PUSH(s, CAR(cons));
  1561. }
  1562. else {
  1563. ASSERT(is_boxed(*ptr));
  1564. ESTACK_PUSH(s, *ptr);
  1565. }
  1566. i--; ptr++;
  1567. }
  1568. goto pop_next;
  1569. }
  1570. break;
  1571. case EXPORT_SUBTAG:
  1572. {
  1573. Export* ep = *((Export **) (export_val(term) + 1));
  1574. /* Assumes Export entries never move */
  1575. POINTER_HASH(ep, HCONST_14);
  1576. goto pop_next;
  1577. }
  1578. case FUN_SUBTAG:
  1579. {
  1580. ErlFunThing* funp = (ErlFunThing *) fun_val(term);
  1581. Uint num_free = funp->num_free;
  1582. UINT32_HASH_2(num_free, funp->fe->module, HCONST_20);
  1583. UINT32_HASH_2(funp->fe->old_index, funp->fe->old_uniq, HCONST_21);
  1584. if (num_free == 0) {
  1585. goto pop_next;
  1586. } else {
  1587. Eterm* bptr = funp->env + num_free - 1;
  1588. while (num_free-- > 1) {
  1589. term = *bptr--;
  1590. ESTACK_PUSH(s, term);
  1591. }
  1592. term = *bptr;
  1593. }
  1594. }
  1595. break;
  1596. case REFC_BINARY_SUBTAG:
  1597. case HEAP_BINARY_SUBTAG:
  1598. case SUB_BINARY_SUBTAG:
  1599. {
  1600. byte* bptr;
  1601. Uint sz = binary_size(term);
  1602. Uint32 con = HCONST_13 + hash;
  1603. Uint bitoffs;
  1604. Uint bitsize;
  1605. ERTS_GET_BINARY_BYTES(term, bptr, bitoffs, bitsize);
  1606. if (sz == 0 && bitsize == 0) {
  1607. hash = con;
  1608. } else {
  1609. if (bitoffs == 0) {
  1610. hash = block_hash(bptr, sz, con);
  1611. if (bitsize > 0) {
  1612. UINT32_HASH_2(bitsize, (bptr[sz] >> (8 - bitsize)),
  1613. HCONST_15);
  1614. }
  1615. } else {
  1616. byte* buf = (byte *) erts_alloc(ERTS_ALC_T_TMP,
  1617. sz + (bitsize != 0));
  1618. erts_copy_bits(bptr, bitoffs, 1, buf, 0, 1, sz*8+bitsize);
  1619. hash = block_hash(buf, sz, con);
  1620. if (bitsize > 0) {
  1621. UINT32_HASH_2(bitsize, (buf[sz] >> (8 - bitsize)),
  1622. HCONST_15);
  1623. }
  1624. erts_free(ERTS_ALC_T_TMP, (void *) buf);
  1625. }
  1626. }
  1627. goto pop_next;
  1628. }
  1629. break;
  1630. case POS_BIG_SUBTAG:
  1631. case NEG_BIG_SUBTAG:
  1632. {
  1633. Eterm* ptr = big_val(term);
  1634. Uint i = 0;
  1635. Uint n = BIG_SIZE(ptr);
  1636. Uint32 con = BIG_SIGN(ptr) ? HCONST_10 : HCONST_11;
  1637. #if D_EXP == 16
  1638. do {
  1639. Uint32 x, y;
  1640. x = i < n ? BIG_DIGIT(ptr, i++) : 0;
  1641. x += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16;
  1642. y = i < n ? BIG_DIGIT(ptr, i++) : 0;
  1643. y += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16;
  1644. UINT32_HASH_2(x, y, con);
  1645. } while (i < n);
  1646. #elif D_EXP == 32
  1647. do {
  1648. Uint32 x, y;
  1649. x = i < n ? BIG_DIGIT(ptr, i++) : 0;
  1650. y = i < n ? BIG_DIGIT(ptr, i++) : 0;
  1651. UINT32_HASH_2(x, y, con);
  1652. } while (i < n);
  1653. #elif D_EXP == 64
  1654. do {
  1655. Uint t;
  1656. Uint32 x, y;
  1657. ASSERT(i < n);
  1658. t = BIG_DIGIT(ptr, i++);
  1659. x = t & 0xffffffff;
  1660. y = t >> 32;
  1661. UINT32_HASH_2(x, y, con);
  1662. } while (i < n);
  1663. #else
  1664. #error "unsupported D_EXP size"
  1665. #endif
  1666. goto pop_next;
  1667. }
  1668. break;
  1669. case REF_SUBTAG:
  1670. UINT32_HASH(internal_ref_numbers(term)[0], HCONST_7);
  1671. ASSERT(internal_ref_no_numbers(term) == 3);
  1672. UINT32_HASH_2(internal_ref_numbers(term)[1],
  1673. internal_ref_numbers(term)[2], HCONST_8);
  1674. goto pop_next;
  1675. case EXTERNAL_REF_SUBTAG:
  1676. {
  1677. ExternalThing* thing = external_thing_ptr(term);
  1678. ASSERT(external_thing_ref_no_numbers(thing) == 3);
  1679. /* See limitation #2 */
  1680. #ifdef ARCH_64
  1681. POINTER_HASH(thing->node, HCONST_7);
  1682. UINT32_HASH(external_thing_ref_numbers(thing)[0], HCONST_7);
  1683. #else
  1684. UINT32_HASH_2(thing->node,
  1685. external_thing_ref_numbers(thing)[0], HCONST_7);
  1686. #endif
  1687. UINT32_HASH_2(external_thing_ref_numbers(thing)[1],
  1688. external_thing_ref_numbers(thing)[2], HCONST_8);
  1689. goto pop_next;
  1690. }
  1691. case EXTERNAL_PID_SUBTAG: {
  1692. ExternalThing* thing = external_thing_ptr(term);
  1693. /* See limitation #2 */
  1694. #ifdef ARCH_64
  1695. POINTER_HASH(thing->node, HCONST_5);
  1696. UINT32_HASH(thing->data.ui[0], HCONST_5);
  1697. #else
  1698. UINT32_HASH_2(thing->node, thing->data.ui[0], HCONST_5);
  1699. #endif
  1700. goto pop_next;
  1701. }
  1702. case EXTERNAL_PORT_SUBTAG: {
  1703. ExternalThing* thing = external_thing_ptr(term);
  1704. /* See limitation #2 */
  1705. #ifdef ARCH_64
  1706. POINTER_HASH(thing->node, HCONST_6);
  1707. UINT32_HASH(thing->data.ui[0], HCONST_6);
  1708. #else
  1709. UINT32_HASH_2(thing->node, thing->data.ui[0], HCONST_6);
  1710. #endif
  1711. goto pop_next;
  1712. }
  1713. case FLOAT_SUBTAG:
  1714. {
  1715. FloatDef ff;
  1716. GET_DOUBLE(term, ff);
  1717. if (ff.fd == 0.0f) {
  1718. /* ensure positive 0.0 */
  1719. ff.fd = erts_get_positive_zero_float();
  1720. }
  1721. U

Large files files are truncated, but you can click here to view the full file