hash.c 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <stdint.h>
  5. #include "MurmurHash3.h"
  6. #include "hash.h"
  7. #define MURMUR_SEED 718281828
  8. static uint64_t hash_key(char* key, size_t len);
  9. static int64_t find_bucket(HashTable* obj, uint64_t hash, char* key);
  10. HashTable* HT_create(int allocPOT) {
  11. HashTable* obj;
  12. obj = malloc(sizeof(*obj));
  13. if(!obj) return NULL;
  14. if(HT_init(obj, allocPOT)) {
  15. fprintf(stderr, "Failed to initialized hash table\n");
  16. free(obj);
  17. return NULL;
  18. }
  19. return obj;
  20. }
  21. int HT_init(HashTable* obj, int allocPOT) {
  22. int pot, allocSz;
  23. pot = allocPOT < 4 ? 4 : allocPOT;
  24. obj->fill = 0;
  25. obj->alloc_size = 1 << pot;
  26. obj->grow_ratio = 0.75f;
  27. obj->shrink_ratio = 99.0f; // set greater than 1.0 to entirely disable
  28. obj->buckets = calloc(1, sizeof(*obj->buckets) * obj->alloc_size);
  29. if(!obj->buckets) {
  30. return 1;
  31. }
  32. return 0;
  33. }
  34. void HT_destroy(HashTable* obj, int free_values_too) {
  35. int i, n;
  36. if(free_values_too) {
  37. for(i = 0, n = 0; i < obj->alloc_size, n < obj->fill; i++) {
  38. // only free valid pointers that also have a key
  39. // deleted items are assumed to be cleaned up by the user
  40. if(obj->buckets[i].key) {
  41. if(obj->buckets[i].value) free(obj->buckets[i].value);
  42. n++;
  43. }
  44. }
  45. }
  46. if(obj->buckets) free(obj->buckets);
  47. // free(obj); owner has to clean up
  48. }
  49. // uses a truncated 128bit murmur3 hash
  50. static uint64_t hash_key(char* key, size_t len) {
  51. uint64_t hash[2];
  52. // len is optional
  53. if(len == -1) len = strlen(key);
  54. MurmurHash3_x64_128(key, len, MURMUR_SEED, hash);
  55. return hash[0];
  56. }
  57. static int64_t find_bucket(HashTable* obj, uint64_t hash, char* key) {
  58. int64_t startBucket, bi;
  59. bi = startBucket = hash % obj->alloc_size;
  60. do {
  61. struct hash_bucket* bucket;
  62. bucket = &obj->buckets[bi];
  63. // empty bucket
  64. if(bucket->key == NULL) {
  65. return bi;
  66. }
  67. if(bucket->hash == hash) {
  68. if(!strcmp(key, bucket->key)) {
  69. // bucket is the right one and contains a value already
  70. return bi;
  71. }
  72. // collision, probe next bucket
  73. }
  74. bi = (bi + 1) % obj->alloc_size;
  75. } while(bi != startBucket);
  76. // should never reach here if the table is maintained properly
  77. return -1;
  78. }
  79. // should always be called with a power of two
  80. int HT_resize(HashTable* obj, int newSize) {
  81. struct hash_bucket* old, *op;
  82. size_t oldlen = obj->alloc_size;
  83. int64_t i, n, bi;
  84. old = op = obj->buckets;
  85. obj->alloc_size = newSize;
  86. obj->buckets = calloc(1, sizeof(*obj->buckets) * newSize);
  87. if(!obj->buckets) return 1;
  88. for(i = 0, n = 0; i < oldlen && n < obj->fill; i++) {
  89. if(op->key == NULL) continue;
  90. bi = find_bucket(obj, op->hash, op->key);
  91. obj->buckets[bi].value = op->value;
  92. obj->buckets[bi].hash = op->hash;
  93. obj->buckets[bi].key = op->key;
  94. n++;
  95. op++;
  96. }
  97. free(old);
  98. return 0;
  99. }
  100. // TODO: better return values and missing key handling
  101. // returns 0 if val is set to the value
  102. // *val == NULL && return > 0 means the key was not found;
  103. int HT_get(HashTable* obj, char* key, void* val) {
  104. uint64_t hash;
  105. int64_t bi;
  106. hash = hash_key(key, -1);
  107. bi = find_bucket(obj, hash, key);
  108. if(bi < 0 || obj->buckets[bi].key == NULL) {
  109. *(void**)val = NULL;
  110. return 1;
  111. }
  112. *(void**)val = obj->buckets[bi].value;
  113. return 0;
  114. }
  115. int HT_getInt(HashTable* obj, char* key, int64_t* val) {
  116. return HT_get(obj, key, (void*)val);
  117. }
  118. // zero for success
  119. int HT_set(HashTable* obj, char* key, void* val) {
  120. uint64_t hash;
  121. int64_t bi;
  122. // check size and grow if necessary
  123. if(obj->fill / obj->alloc_size >= obj->grow_ratio) {
  124. HT_resize(obj, obj->alloc_size * 2);
  125. }
  126. hash = hash_key(key, -1);
  127. bi = find_bucket(obj, hash, key);
  128. if(bi < 0) return 1;
  129. if(obj->buckets[bi].key == NULL) {
  130. // new bucket
  131. obj->buckets[bi].key = key;
  132. obj->buckets[bi].hash = hash;
  133. obj->fill++;
  134. }
  135. obj->buckets[bi].value = val;
  136. obj->buckets[bi].key = key;
  137. obj->buckets[bi].hash = hash;
  138. return 0;
  139. }
  140. // zero for success
  141. int HT_setInt(HashTable* obj, char* key, int64_t val) {
  142. return HT_set(obj, key, (void*)val);
  143. }
  144. // zero for success
  145. int HT_delete(HashTable* obj, char* key) {
  146. uint64_t hash;
  147. int64_t bi, empty_bi, nat_bi;
  148. size_t alloc_size = obj->alloc_size;
  149. /* do this instead of the deletion algorithm
  150. // check size and shrink if necessary
  151. if(obj->fill / alloc_size <= obj->shrink_ratio) {
  152. HT_resize(obj, alloc_size > 32 ? alloc_size / 2 : 16);
  153. alloc_size = obj->alloc_size;
  154. }
  155. */
  156. hash = hash_key(key, -1);
  157. bi = find_bucket(obj, hash, key);
  158. // if there's a key, work until an empty bucket is found
  159. // check successive buckets for colliding keys
  160. // walk forward until the furthest colliding key is found
  161. // move it to the working bucket.
  162. //
  163. // nothing to delete, bail early
  164. if(obj->buckets[bi].key == NULL) return 0;
  165. //
  166. empty_bi = bi;
  167. do {
  168. bi = (bi + 1) % alloc_size;
  169. if(obj->buckets[bi].key == NULL) {
  170. //empty bucket
  171. break;
  172. }
  173. // bucket the hash at the current index naturally would be in
  174. nat_bi = obj->buckets[bi].hash % alloc_size;
  175. if((bi > empty_bi && // after the start
  176. (nat_bi <= empty_bi /* in a sequence of probed misses */ || nat_bi > bi /* wrapped all the way around */))
  177. ||
  178. (bi < empty_bi && // wrapped around
  179. (nat_bi <= empty_bi /* in a sequence of probed misses... */ && nat_bi > bi /* ..from before the wrap */))) {
  180. // move this one back
  181. obj->buckets[empty_bi].key = obj->buckets[bi].key;
  182. obj->buckets[empty_bi].hash = obj->buckets[bi].hash;
  183. obj->buckets[empty_bi].value = obj->buckets[bi].value;
  184. empty_bi = bi;
  185. }
  186. } while(1);
  187. obj->buckets[empty_bi].key = NULL;
  188. return 0;
  189. }
  190. // iteration. no order. results undefined if modified while iterating
  191. // returns 0 when there is none left
  192. // set iter to NULL to start
  193. int HT_next(HashTable* obj, void* iter, char** key, void* value) {
  194. struct hash_bucket* b = *(void**)iter;
  195. // a tiny bit of idiot-proofing
  196. if(b == NULL) b = &obj->buckets[-1];
  197. do {
  198. b++;
  199. if(b >= obj->buckets + obj->alloc_size) {
  200. // end of the list
  201. *(void**)value = NULL;
  202. *key = NULL;
  203. return 0;
  204. }
  205. } while(!b->key);
  206. *key = b->key;
  207. *(void**)value = b->value;
  208. *(void**)iter = b;
  209. return 1;
  210. }