PageRenderTime 49ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/src/hash_table.c

https://bitbucket.org/swhalen/rsl
C | 257 lines | 165 code | 49 blank | 43 comment | 42 complexity | 0f2fbee38a7cdbac67e3034a84f076c5 MD5 | raw file
Possible License(s): GPL-3.0
  1. /*
  2. * This file incorporates code by James Routley, under the following
  3. * license.
  4. *
  5. * MIT License
  6. *
  7. * Copyright (c) 2017 James Routley
  8. *
  9. * Permission is hereby granted, free of charge, to any person
  10. * obtaining a copy of this software and associated documentation
  11. * files (the "Software"), to deal in the Software without
  12. * restriction, including without limitation the rights to use, copy,
  13. * modify, merge, publish, distribute, sublicense, and/or sell copies
  14. * of the Software, and to permit persons to whom the Software is
  15. * furnished to do so, subject to the following conditions:
  16. *
  17. * The above copyright notice and this permission notice shall be
  18. * included in all copies or substantial portions of the Software.
  19. *
  20. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  24. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  26. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  27. * SOFTWARE.
  28. */
  29. #include <assert.h>
  30. #include <gc/gc.h>
  31. #include <string.h>
  32. #include <math.h>
  33. #include "hash_table.h"
  34. /* Definitions */
  35. typedef struct {
  36. char* key;
  37. void* value;
  38. } ht_item;
  39. struct _HashTable {
  40. int base_size;
  41. int size;
  42. int count;
  43. ht_item** items;
  44. };
  45. /* Constants */
  46. static const int HT_INITIAL_BASE_SIZE = 2;
  47. static const int HT_MIN_LOAD = 30;
  48. static const int HT_MAX_LOAD = 70;
  49. static ht_item HT_DELETED_ITEM = {NULL, NULL};
  50. /* Utilities */
  51. static int is_prime(const int x) {
  52. if (x < 2) { return -1; }
  53. if (x < 4) { return 1; }
  54. if ((x % 2) == 0) { return 0; }
  55. for (int i = 3; i <= floor(sqrt((double) x)); i += 2) {
  56. if ((x % i) == 0) {
  57. return 0;
  58. }
  59. }
  60. return 1;
  61. }
  62. static int next_prime(int x) {
  63. while (is_prime(x) != 1) {
  64. x++;
  65. }
  66. return x;
  67. }
  68. /* Static functions */
  69. static ht_item* ht_new_item(const char* k, void* v) {
  70. ht_item* i = GC_MALLOC(sizeof(*i));
  71. assert(i != NULL);
  72. size_t k_len = strlen(k);
  73. /* Copy the key string */
  74. i->key = GC_MALLOC(k_len + 1);
  75. assert(i->key != NULL);
  76. memcpy(i->key, k, k_len + 1);
  77. i->value = v;
  78. return i;
  79. }
  80. static unsigned int ht_hash(const char *str) {
  81. /* djb2 */
  82. unsigned int hash = 5381;
  83. int c;
  84. while ((c = *str++)) {
  85. hash = ((hash << 5) + hash) + c;
  86. }
  87. return hash;
  88. }
  89. static unsigned int ht_get_hash(const char* s, const int m,
  90. const int attempt) {
  91. const unsigned int hash_a = ht_hash(s);
  92. return (hash_a + attempt) % m;
  93. }
  94. static bool ht_under_load(HashTable ht) {
  95. const int load = ht->count * 100 / ht->size;
  96. return load < HT_MIN_LOAD;
  97. }
  98. static bool ht_over_load(HashTable ht) {
  99. const int load = (ht->count + 1) * 100 / ht->size;
  100. return load > HT_MAX_LOAD;
  101. }
  102. static HashTable ht_new_sized(const int base_size) {
  103. HashTable ht = GC_MALLOC(sizeof(*ht));
  104. assert(ht != NULL);
  105. ht->base_size = base_size;
  106. ht->size = next_prime(ht->base_size);
  107. ht->count = 0;
  108. ht->items = GC_MALLOC(((size_t)ht->size) * sizeof(ht_item*));
  109. assert(ht->items != NULL);
  110. return ht;
  111. }
  112. static void ht_resize(HashTable ht, const int base_size) {
  113. if (base_size < HT_INITIAL_BASE_SIZE) {
  114. /* Don't resize below the minimum */
  115. return;
  116. }
  117. /* Create a temporary hash table */
  118. HashTable tmp_ht = ht_new_sized(base_size);
  119. ht_item* item;
  120. /* Copy over everything from the old hash table into the temp one */
  121. for (int jj = 0; jj < ht->size; ++jj) {
  122. item = ht->items[jj];
  123. if (item != NULL && item != &HT_DELETED_ITEM) {
  124. hash_table_insert(tmp_ht, item->key, item->value);
  125. }
  126. }
  127. /* Update the old hash table with the temp one's properties */
  128. ht->base_size = tmp_ht->base_size;
  129. ht->size = tmp_ht->size;
  130. ht->count = tmp_ht->count;
  131. ht->items = tmp_ht->items;
  132. }
  133. static void ht_resize_up(HashTable ht) {
  134. const int new_size = ht->base_size * 2;
  135. ht_resize(ht, new_size);
  136. }
  137. static void ht_resize_down(HashTable ht) {
  138. const int new_size = ht->base_size / 2;
  139. assert(new_size >= ht->count);
  140. ht_resize(ht, new_size);
  141. }
  142. /* Interface functions */
  143. HashTable make_hash_table() {
  144. return ht_new_sized(HT_INITIAL_BASE_SIZE);
  145. }
  146. void ht_display(HashTable);
  147. void hash_table_insert(HashTable ht, const char* key, void* value) {
  148. if (ht_over_load(ht)) {
  149. ht_resize_up(ht);
  150. }
  151. ht_item* item = ht_new_item(key, value);
  152. unsigned int index = ht_get_hash(item->key, ht->size, 0);
  153. ht_item* cur_item = ht->items[index];
  154. int attempt = 1;
  155. while (cur_item != NULL) {
  156. if (cur_item == &HT_DELETED_ITEM) {
  157. /* We can overwrite this "bucket" */
  158. ht->items[index] = item;
  159. ht->count++;
  160. return;
  161. } else if (strcmp(cur_item->key, key) == 0) {
  162. /* Existing entry with same key, so we overwrite again but
  163. * count stays the same */
  164. ht->items[index] = item;
  165. return;
  166. } else {
  167. index = ht_get_hash(item->key, ht->size, attempt);
  168. cur_item = ht->items[index];
  169. attempt++;
  170. }
  171. }
  172. /* New entry */
  173. ht->items[index] = item;
  174. ht->count++;
  175. }
  176. void* hash_table_search(HashTable ht, const char* key) {
  177. unsigned int index = ht_get_hash(key, ht->size, 0);
  178. ht_item* item = ht->items[index];
  179. int i = 1;
  180. while (item != NULL) {
  181. if (item != &HT_DELETED_ITEM && strcmp(item->key, key) == 0) {
  182. return item->value;
  183. }
  184. index = ht_get_hash(key, ht->size, i);
  185. item = ht->items[index];
  186. i++;
  187. }
  188. return NULL;
  189. }
  190. bool hash_table_delete(HashTable ht, const char* key) {
  191. if (ht_under_load(ht)) {
  192. ht_resize_down(ht);
  193. }
  194. unsigned int index = ht_get_hash(key, ht->size, 0);
  195. ht_item* item = ht->items[index];
  196. int i = 1;
  197. while (item != NULL) {
  198. if (item != &HT_DELETED_ITEM && strcmp(item->key, key) == 0) {
  199. ht->items[index] = &HT_DELETED_ITEM;
  200. ht->count--;
  201. return true;
  202. }
  203. index = ht_get_hash(key, ht->size, i);
  204. item = ht->items[index];
  205. i++;
  206. }
  207. return false;
  208. }