hash.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. // Public Domain.
  2. #include <stdlib.h>
  3. #include <stdio.h>
  4. #include <string.h>
  5. #include <stdint.h>
  6. #include <assert.h>
  7. #include "hash_fns/MurmurHash3.h"
  8. #include "hash.h"
  9. #include <stdlib.h>
  10. #define MURMUR_SEED 718281828
  11. static uint64_t hash_key(char* key, int64_t len);
  12. //static uint64_t hash_int64_key(uint64_t key);
  13. #ifndef STI_HAS_STATIC_nextPOT
  14. #define STI_HAS_STATIC_nextPOT
  15. // super nifty site:
  16. // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
  17. inline static size_t nextPOT(size_t in) {
  18. in--;
  19. in |= in >> 1;
  20. in |= in >> 2;
  21. in |= in >> 4;
  22. in |= in >> 8;
  23. in |= in >> 16;
  24. in++;
  25. return in;
  26. }
  27. #endif
  28. static ptrdiff_t oaht_find_bucket(struct HT_base_layout* ht, uint64_t hash, void* key) {
  29. int64_t startBucket, bi;
  30. bi = hash % ht->alloc_size;
  31. startBucket = bi;
  32. struct bucket {
  33. uint64_t hash;
  34. void* key;
  35. char value[];
  36. };
  37. do {
  38. // struct hash_bucket* bucket;
  39. struct bucket* bucket = (struct bucket*)((char*)ht->buckets + (bi * ht->stride));
  40. // empty bucket
  41. if(bucket->key == NULL) {
  42. return bi;
  43. }
  44. if(bucket->hash == hash) {
  45. switch(ht->key_mode) {
  46. case 's':
  47. if(!strcmp(key, bucket->key)) {
  48. return bi;
  49. }
  50. break;
  51. case 'p':
  52. if(!memcmp(key, bucket->key, ht->key_len)) {
  53. // bucket is the right one and contains a value already
  54. return bi;
  55. }
  56. break;
  57. case 'i':
  58. if(!memcmp(key, &bucket->key, ht->key_len)) {
  59. // bucket is the right one and contains a value already
  60. return bi;
  61. }
  62. break;
  63. }
  64. // collision, probe next bucket
  65. }
  66. bi = (bi + 1) % ht->alloc_size;
  67. } while(bi != startBucket);
  68. // should never reach here if the table is maintained properly
  69. assert(0);
  70. return -1;
  71. }
  72. // TODO: better return values and missing key handling
  73. // returns 0 if val is set to the value
  74. // return > 0 means the key was not found;
  75. int oaht_getp_kptr(struct HT_base_layout* ht, void* key, void** valp) {
  76. uint64_t hash;
  77. int64_t bi;
  78. // memory layout:
  79. //
  80. // uint64_t hash;
  81. // keyType key;
  82. // valType val;
  83. if(key == NULL) {
  84. // if(valp) *valp = NULL;
  85. return 2;
  86. }
  87. size_t key_len = ht->key_mode == 's' ? strlen(key) : ht->key_len;
  88. hash = hash_key(key, key_len);
  89. bi = oaht_find_bucket(ht, hash, key);
  90. // printf("\nkeymode: %c, keylen %ld, hash %lx, bi: %ld \n", ht->key_mode, key_len, hash, bi);
  91. if(bi < 0) {// || *(char**)(ht->buckets + (bi * ht->stride) + sizeof(uint64_t)) == NULL) {
  92. // *valp = NULL;
  93. // printf("bail #1 (negative bucket index)\n");
  94. return 1;
  95. }
  96. char* b = (char*)ht->buckets + (ht->stride * bi);
  97. size_t key_width = ht->key_mode == 'i' ? ht->key_len : sizeof(char*);
  98. uint64_t* b_hash = (uint64_t*)b;
  99. char* b_key = b + sizeof(uint64_t);
  100. char* b_val = b + sizeof(uint64_t) + key_width;
  101. if(*b_hash != hash) {
  102. // printf("bail #2 (wrong hash)\n");
  103. return 1;
  104. }
  105. if(ht->key_mode != 'i') {
  106. if(memcmp(*(char**)b_key, key, key_len)) {
  107. // printf("bail #3 (hash collision, wrong key)\n");
  108. return 1;
  109. }
  110. }
  111. else {
  112. if(memcmp(b_key, key, key_width)) {
  113. // printf("bail #4 (hash collision, wrong key)\n");
  114. return 1;
  115. }
  116. }
  117. // printf("oaht get - bi: %ld (alloc %ld, stride %ld)\n", bi, ht->alloc_size, ht->stride);
  118. // index hash key
  119. *valp = b_val; //(char*)ht->buckets + (bi * ht->stride) + sizeof(uint64_t) + key_width;
  120. //printf("memcpy len: %ld\n", ht->stride - key_width - 8);
  121. //memcpy(*valp, (char*)ht->buckets + (bi * ht->stride) + sizeof(uint64_t) + key_width, ht->stride - key_width - 8);
  122. return 0;
  123. }
  124. // zero for success
  125. int oaht_get_klit(struct HT_base_layout* ht, uint64_t key, void* val) {
  126. return oaht_get_kptr(ht, &key, val);
  127. }
  128. int oaht_get_kptr(struct HT_base_layout* ht, void* key, void* val) {
  129. void* p = NULL;
  130. int ret = oaht_getp_kptr(ht, key, &p);
  131. if(ret == 0) {
  132. size_t key_width = ht->key_mode == 'i' ? ht->key_len : sizeof(char*);
  133. memcpy(
  134. val,
  135. p,
  136. ht->stride - sizeof(uint64_t) - key_width
  137. );
  138. }
  139. return ret;
  140. }
  141. // zero for success
  142. int oaht_set_kptr(struct HT_base_layout* ht, void* key, void* val) {
  143. uint64_t hash;
  144. int64_t bi;
  145. // memory layout:
  146. //
  147. // uint64_t hash;
  148. // keyType key;
  149. // valType val;
  150. // check size and grow if necessary
  151. if((float)ht->fill / (float)ht->alloc_size >= 0.75) {
  152. oaht_resize(ht, ht->alloc_size * 2);
  153. }
  154. if(ht->key_mode == 's') {
  155. hash = hash_key(key, strlen(key));
  156. }
  157. else { /* if(ht->key_mode == 'p') { */
  158. hash = hash_key(key, ht->key_len);
  159. } /*
  160. else {
  161. hash = hash_key((char*)&key, ht->key_len);
  162. } */
  163. bi = oaht_find_bucket(ht, hash, key);
  164. if(bi < 0) return 1;
  165. // printf("oaht set - bi: %ld (alloc %ld, stride %ld)\n", bi, ht->alloc_size, ht->stride);
  166. char* b = (char*)ht->buckets + (ht->stride * bi);
  167. size_t key_width = ht->key_mode == 'i' ? ht->key_len : sizeof(char*);
  168. uint64_t* b_hash = (uint64_t*)b;
  169. char* b_key = b + sizeof(uint64_t);
  170. char* b_val = b + sizeof(uint64_t) + key_width;
  171. if(*b_hash == 0) {
  172. // new bucket
  173. ht->fill++;
  174. }
  175. size_t val_width = ht->stride - sizeof(uint64_t) - key_width;
  176. // copy the value in
  177. memcpy(b_val, val, val_width);
  178. // copy the key in
  179. if(ht->key_mode == 'i') {
  180. memcpy(b_key, key, key_width);
  181. }
  182. else {
  183. *(uint64_t*)b_key = (uint64_t)key;
  184. }
  185. // finally the hash value
  186. *b_hash = hash;
  187. return 0;
  188. }
  189. // zero for success
  190. int oaht_set_klit(struct HT_base_layout* ht, uint64_t key, void* val) {
  191. return oaht_set_kptr(ht, &key, val);
  192. }
  193. // should always be called with a power of two
  194. int oaht_resize(struct HT_base_layout* ht, size_t newSize) {
  195. struct bucket {
  196. uint64_t hash;
  197. char* key;
  198. char value;
  199. };
  200. char* old, *op;
  201. int64_t oldlen = ht->alloc_size;
  202. int64_t i, n, bi;
  203. old = op = ht->buckets;
  204. ht->alloc_size = newSize;
  205. ht->buckets = calloc(1, ht->stride * newSize);
  206. if(!ht->buckets) return 1;
  207. for(i = 0, n = 0; i < oldlen && n < (int64_t)ht->fill; i++) {
  208. #define OP ((struct bucket*)(op + (ht->stride * i)))
  209. if(OP->hash == 0) {
  210. continue;
  211. }
  212. #define BK ((struct bucket*)((char*)ht->buckets + (ht->stride * bi)))
  213. // size_t key_width = ht->key_mode == 'i' ? ht->key_len : sizeof(char*);
  214. bi = oaht_find_bucket(ht, OP->hash, ht->key_mode == 'i' ? (char*)&OP->key : (char*)OP->key);
  215. memcpy(BK, OP, ht->stride);
  216. n++;
  217. }
  218. #undef BK
  219. #undef OP
  220. free(old);
  221. return 0;
  222. }
  223. // zero for success
  224. int oaht_delete(struct HT_base_layout* ht, void* key) {
  225. uint64_t hash;
  226. int64_t bi, empty_bi, nat_bi;
  227. struct bucket {
  228. uint64_t hash;
  229. char* key;
  230. char value;
  231. };
  232. /* do this instead of the deletion algorithm
  233. // check size and shrink if necessary
  234. if(obj->fill / alloc_size <= obj->shrink_ratio) {
  235. HT_resize(obj, alloc_size > 32 ? alloc_size / 2 : 16);
  236. alloc_size = obj->alloc_size;
  237. }
  238. */
  239. size_t key_len = ht->key_mode == 's' ? strlen(key) : ht->key_len;
  240. hash = hash_key(key, key_len);
  241. bi = oaht_find_bucket(ht, hash, key);
  242. // if there's a key, work until an empty bucket is found
  243. // check successive buckets for colliding keys
  244. // walk forward until the furthest colliding key is found
  245. // move it to the working bucket.
  246. //
  247. #define BK ((struct bucket*)((char*)ht->buckets + (ht->stride * bi)))
  248. #define E_BK ((struct bucket*)((char*)ht->buckets + (ht->stride * empty_bi)))
  249. // nothing to delete, bail early
  250. if(BK->hash == 0) return 0;
  251. //
  252. empty_bi = bi;
  253. // size_t key_width = ht->key_mode == 'i' ? ht->key_len : sizeof(char*);
  254. do {
  255. bi = (bi + 1) % ht->alloc_size;
  256. if(BK->hash == 0) {
  257. //empty bucket
  258. break;
  259. }
  260. // bucket the hash at the current index naturally would be in
  261. nat_bi = BK->hash % ht->alloc_size;
  262. if((bi > empty_bi && // after the start
  263. (nat_bi <= empty_bi /* in a sequence of probed misses */ || nat_bi > bi /* wrapped all the way around */))
  264. ||
  265. (bi < empty_bi && // wrapped around
  266. (nat_bi <= empty_bi /* in a sequence of probed misses... */ && nat_bi > bi /* ..from before the wrap */))) {
  267. // move this one back
  268. memcpy(E_BK, BK, ht->stride);
  269. empty_bi = bi;
  270. }
  271. } while(1);
  272. E_BK->hash = 0;
  273. E_BK->key = 0;
  274. ht->fill--;
  275. return 0;
  276. }
  277. // iteration. no order. results undefined if modified while iterating
  278. // returns 0 when there is none left
  279. // set iter to NULL to start
  280. int oaht_nextp(struct HT_base_layout* ht, void** iter, void** key, void** valp) {
  281. // memory layout:
  282. //
  283. // uint64_t hash;
  284. // keyType key;
  285. // valType val;
  286. char* b = *iter;
  287. // for starting the loop
  288. if(b == NULL) b = (char*)ht->buckets - ht->stride;
  289. // TODO: make sure strings and pointers and such are all reasonably handled for the key
  290. do {
  291. b += ht->stride;
  292. if(b >= (char*)ht->buckets + (ht->alloc_size * ht->stride)) {
  293. // end of the list
  294. *valp = NULL;
  295. return 0;
  296. }
  297. } while(!*(uint64_t*)b);
  298. // *key = B->key;
  299. size_t key_width = ht->key_mode == 'i' ? ht->key_len : sizeof(char*);
  300. size_t val_width = ht->stride - sizeof(uint64_t) - key_width;
  301. uint64_t* b_hash = (uint64_t*)b;
  302. char* b_key = b + sizeof(uint64_t);
  303. char* b_val = b + sizeof(uint64_t) + key_width;
  304. // copy the value pointer
  305. *valp = b_val;
  306. // copy the key out
  307. if(ht->key_mode == 'i') {
  308. memcpy(key, b_key, key_width);
  309. }
  310. else {
  311. *(uint64_t*)key = *(uint64_t*)b_key;
  312. }
  313. *iter = b;
  314. return 1;
  315. }
  316. // iteration. no order. results undefined if modified while iterating
  317. // returns 0 when there is none left
  318. // set iter to NULL to start
  319. int oaht_next(struct HT_base_layout* ht, void** iter, void** key, void* val) {
  320. // memory layout:
  321. //
  322. // uint64_t hash;
  323. // keyType key;
  324. // valType val;
  325. char* b = *iter;
  326. // a tiny bit of idiot-proofing
  327. if(b == NULL) b = (char*)ht->buckets - ht->stride;
  328. do {
  329. b += ht->stride;
  330. if(b >= (char*)ht->buckets + (ht->alloc_size * ht->stride)) {
  331. // end of the list
  332. return 0;
  333. }
  334. } while(0 == (*(uint64_t*)b));
  335. size_t key_width = ht->key_mode == 'i' ? ht->key_len : sizeof(char*);
  336. size_t val_width = ht->stride - sizeof(uint64_t) - key_width;
  337. uint64_t* b_hash = (uint64_t*)b;
  338. char* b_key = b + sizeof(uint64_t);
  339. char* b_val = b + sizeof(uint64_t) + key_width;
  340. // copy the value out
  341. memcpy(val, b_val, val_width);
  342. // copy the key out
  343. if(ht->key_mode == 'i') {
  344. memcpy(key, b_key, key_width);
  345. }
  346. else {
  347. *(uint64_t*)key = *(uint64_t*)b_key;
  348. }
  349. *iter = b;
  350. return 1;
  351. }
  352. // uses a truncated 128bit murmur3 hash, except never returns 0
  353. static uint64_t hash_key(char* key, int64_t len) {
  354. uint64_t hash[2];
  355. // len is optional
  356. // if(len <= 0) len = strlen(key);
  357. MurmurHash3_x64_128(key, len, MURMUR_SEED, hash);
  358. return hash[0] == 0 ? 1 : hash[0];
  359. }
  360. /*
  361. static uint64_t hash_int64_key(uint64_t key) {
  362. uint64_t hash[2];
  363. MurmurHash3_x64_128(&key, 8, MURMUR_SEED, hash);
  364. return hash[0] == 0 ? 1 : hash[0];
  365. }
  366. */