hash_map.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. /**************************************************************************/
  2. /* hash_map.h */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #ifndef HASH_MAP_H
  31. #define HASH_MAP_H
  32. #include "core/math/math_funcs.h"
  33. #include "core/os/memory.h"
  34. #include "core/templates/hashfuncs.h"
  35. #include "core/templates/paged_allocator.h"
  36. #include "core/templates/pair.h"
  37. /**
  38. * A HashMap implementation that uses open addressing with Robin Hood hashing.
  39. * Robin Hood hashing swaps out entries that have a smaller probing distance
  40. * than the to-be-inserted entry, that evens out the average probing distance
  41. * and enables faster lookups. Backward shift deletion is employed to further
  42. * improve the performance and to avoid infinite loops in rare cases.
  43. *
  44. * Keys and values are stored in a double linked list by insertion order. This
  45. * has a slight performance overhead on lookup, which can be mostly compensated
  46. * using a paged allocator if required.
  47. *
  48. * The assignment operator copy the pairs from one map to the other.
  49. */
  50. template <class TKey, class TValue>
  51. struct HashMapElement {
  52. HashMapElement *next = nullptr;
  53. HashMapElement *prev = nullptr;
  54. KeyValue<TKey, TValue> data;
  55. HashMapElement() {}
  56. HashMapElement(const TKey &p_key, const TValue &p_value) :
  57. data(p_key, p_value) {}
  58. };
  59. template <class TKey, class TValue,
  60. class Hasher = HashMapHasherDefault,
  61. class Comparator = HashMapComparatorDefault<TKey>,
  62. class Allocator = DefaultTypedAllocator<HashMapElement<TKey, TValue>>>
  63. class HashMap {
  64. public:
  65. static constexpr uint32_t MIN_CAPACITY_INDEX = 2; // Use a prime.
  66. static constexpr float MAX_OCCUPANCY = 0.75;
  67. static constexpr uint32_t EMPTY_HASH = 0;
  68. private:
  69. Allocator element_alloc;
  70. HashMapElement<TKey, TValue> **elements = nullptr;
  71. uint32_t *hashes = nullptr;
  72. HashMapElement<TKey, TValue> *head_element = nullptr;
  73. HashMapElement<TKey, TValue> *tail_element = nullptr;
  74. uint32_t capacity_index = 0;
  75. uint32_t num_elements = 0;
  76. _FORCE_INLINE_ uint32_t _hash(const TKey &p_key) const {
  77. uint32_t hash = Hasher::hash(p_key);
  78. if (unlikely(hash == EMPTY_HASH)) {
  79. hash = EMPTY_HASH + 1;
  80. }
  81. return hash;
  82. }
  83. static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
  84. const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
  85. return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
  86. }
  87. bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
  88. if (elements == nullptr || num_elements == 0) {
  89. return false; // Failed lookups, no elements
  90. }
  91. const uint32_t capacity = hash_table_size_primes[capacity_index];
  92. const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
  93. uint32_t hash = _hash(p_key);
  94. uint32_t pos = fastmod(hash, capacity_inv, capacity);
  95. uint32_t distance = 0;
  96. while (true) {
  97. if (hashes[pos] == EMPTY_HASH) {
  98. return false;
  99. }
  100. if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
  101. return false;
  102. }
  103. if (hashes[pos] == hash && Comparator::compare(elements[pos]->data.key, p_key)) {
  104. r_pos = pos;
  105. return true;
  106. }
  107. pos = fastmod((pos + 1), capacity_inv, capacity);
  108. distance++;
  109. }
  110. }
  111. void _insert_with_hash(uint32_t p_hash, HashMapElement<TKey, TValue> *p_value) {
  112. const uint32_t capacity = hash_table_size_primes[capacity_index];
  113. const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
  114. uint32_t hash = p_hash;
  115. HashMapElement<TKey, TValue> *value = p_value;
  116. uint32_t distance = 0;
  117. uint32_t pos = fastmod(hash, capacity_inv, capacity);
  118. while (true) {
  119. if (hashes[pos] == EMPTY_HASH) {
  120. elements[pos] = value;
  121. hashes[pos] = hash;
  122. num_elements++;
  123. return;
  124. }
  125. // Not an empty slot, let's check the probing length of the existing one.
  126. uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
  127. if (existing_probe_len < distance) {
  128. SWAP(hash, hashes[pos]);
  129. SWAP(value, elements[pos]);
  130. distance = existing_probe_len;
  131. }
  132. pos = fastmod((pos + 1), capacity_inv, capacity);
  133. distance++;
  134. }
  135. }
  136. void _resize_and_rehash(uint32_t p_new_capacity_index) {
  137. uint32_t old_capacity = hash_table_size_primes[capacity_index];
  138. // Capacity can't be 0.
  139. capacity_index = MAX((uint32_t)MIN_CAPACITY_INDEX, p_new_capacity_index);
  140. uint32_t capacity = hash_table_size_primes[capacity_index];
  141. HashMapElement<TKey, TValue> **old_elements = elements;
  142. uint32_t *old_hashes = hashes;
  143. num_elements = 0;
  144. hashes = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
  145. elements = reinterpret_cast<HashMapElement<TKey, TValue> **>(Memory::alloc_static(sizeof(HashMapElement<TKey, TValue> *) * capacity));
  146. for (uint32_t i = 0; i < capacity; i++) {
  147. hashes[i] = 0;
  148. elements[i] = nullptr;
  149. }
  150. if (old_capacity == 0) {
  151. // Nothing to do.
  152. return;
  153. }
  154. for (uint32_t i = 0; i < old_capacity; i++) {
  155. if (old_hashes[i] == EMPTY_HASH) {
  156. continue;
  157. }
  158. _insert_with_hash(old_hashes[i], old_elements[i]);
  159. }
  160. Memory::free_static(old_elements);
  161. Memory::free_static(old_hashes);
  162. }
  163. _FORCE_INLINE_ HashMapElement<TKey, TValue> *_insert(const TKey &p_key, const TValue &p_value, bool p_front_insert = false) {
  164. uint32_t capacity = hash_table_size_primes[capacity_index];
  165. if (unlikely(elements == nullptr)) {
  166. // Allocate on demand to save memory.
  167. hashes = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
  168. elements = reinterpret_cast<HashMapElement<TKey, TValue> **>(Memory::alloc_static(sizeof(HashMapElement<TKey, TValue> *) * capacity));
  169. for (uint32_t i = 0; i < capacity; i++) {
  170. hashes[i] = EMPTY_HASH;
  171. elements[i] = nullptr;
  172. }
  173. }
  174. uint32_t pos = 0;
  175. bool exists = _lookup_pos(p_key, pos);
  176. if (exists) {
  177. elements[pos]->data.value = p_value;
  178. return elements[pos];
  179. } else {
  180. if (num_elements + 1 > MAX_OCCUPANCY * capacity) {
  181. ERR_FAIL_COND_V_MSG(capacity_index + 1 == HASH_TABLE_SIZE_MAX, nullptr, "Hash table maximum capacity reached, aborting insertion.");
  182. _resize_and_rehash(capacity_index + 1);
  183. }
  184. HashMapElement<TKey, TValue> *elem = element_alloc.new_allocation(HashMapElement<TKey, TValue>(p_key, p_value));
  185. if (tail_element == nullptr) {
  186. head_element = elem;
  187. tail_element = elem;
  188. } else if (p_front_insert) {
  189. head_element->prev = elem;
  190. elem->next = head_element;
  191. head_element = elem;
  192. } else {
  193. tail_element->next = elem;
  194. elem->prev = tail_element;
  195. tail_element = elem;
  196. }
  197. uint32_t hash = _hash(p_key);
  198. _insert_with_hash(hash, elem);
  199. return elem;
  200. }
  201. }
  202. public:
  203. _FORCE_INLINE_ uint32_t get_capacity() const { return hash_table_size_primes[capacity_index]; }
  204. _FORCE_INLINE_ uint32_t size() const { return num_elements; }
  205. /* Standard Godot Container API */
  206. bool is_empty() const {
  207. return num_elements == 0;
  208. }
  209. void clear() {
  210. if (elements == nullptr || num_elements == 0) {
  211. return;
  212. }
  213. uint32_t capacity = hash_table_size_primes[capacity_index];
  214. for (uint32_t i = 0; i < capacity; i++) {
  215. if (hashes[i] == EMPTY_HASH) {
  216. continue;
  217. }
  218. hashes[i] = EMPTY_HASH;
  219. element_alloc.delete_allocation(elements[i]);
  220. elements[i] = nullptr;
  221. }
  222. tail_element = nullptr;
  223. head_element = nullptr;
  224. num_elements = 0;
  225. }
  226. TValue &get(const TKey &p_key) {
  227. uint32_t pos = 0;
  228. bool exists = _lookup_pos(p_key, pos);
  229. CRASH_COND_MSG(!exists, "HashMap key not found.");
  230. return elements[pos]->data.value;
  231. }
  232. const TValue &get(const TKey &p_key) const {
  233. uint32_t pos = 0;
  234. bool exists = _lookup_pos(p_key, pos);
  235. CRASH_COND_MSG(!exists, "HashMap key not found.");
  236. return elements[pos]->data.value;
  237. }
  238. const TValue *getptr(const TKey &p_key) const {
  239. uint32_t pos = 0;
  240. bool exists = _lookup_pos(p_key, pos);
  241. if (exists) {
  242. return &elements[pos]->data.value;
  243. }
  244. return nullptr;
  245. }
  246. TValue *getptr(const TKey &p_key) {
  247. uint32_t pos = 0;
  248. bool exists = _lookup_pos(p_key, pos);
  249. if (exists) {
  250. return &elements[pos]->data.value;
  251. }
  252. return nullptr;
  253. }
  254. _FORCE_INLINE_ bool has(const TKey &p_key) const {
  255. uint32_t _pos = 0;
  256. return _lookup_pos(p_key, _pos);
  257. }
  258. bool erase(const TKey &p_key) {
  259. uint32_t pos = 0;
  260. bool exists = _lookup_pos(p_key, pos);
  261. if (!exists) {
  262. return false;
  263. }
  264. const uint32_t capacity = hash_table_size_primes[capacity_index];
  265. const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
  266. uint32_t next_pos = fastmod((pos + 1), capacity_inv, capacity);
  267. while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
  268. SWAP(hashes[next_pos], hashes[pos]);
  269. SWAP(elements[next_pos], elements[pos]);
  270. pos = next_pos;
  271. next_pos = fastmod((pos + 1), capacity_inv, capacity);
  272. }
  273. hashes[pos] = EMPTY_HASH;
  274. if (head_element == elements[pos]) {
  275. head_element = elements[pos]->next;
  276. }
  277. if (tail_element == elements[pos]) {
  278. tail_element = elements[pos]->prev;
  279. }
  280. if (elements[pos]->prev) {
  281. elements[pos]->prev->next = elements[pos]->next;
  282. }
  283. if (elements[pos]->next) {
  284. elements[pos]->next->prev = elements[pos]->prev;
  285. }
  286. element_alloc.delete_allocation(elements[pos]);
  287. elements[pos] = nullptr;
  288. num_elements--;
  289. return true;
  290. }
  291. // Reserves space for a number of elements, useful to avoid many resizes and rehashes.
  292. // If adding a known (possibly large) number of elements at once, must be larger than old capacity.
  293. void reserve(uint32_t p_new_capacity) {
  294. uint32_t new_index = capacity_index;
  295. while (hash_table_size_primes[new_index] < p_new_capacity) {
  296. ERR_FAIL_COND_MSG(new_index + 1 == (uint32_t)HASH_TABLE_SIZE_MAX, nullptr);
  297. new_index++;
  298. }
  299. if (new_index == capacity_index) {
  300. return;
  301. }
  302. if (elements == nullptr) {
  303. capacity_index = new_index;
  304. return; // Unallocated yet.
  305. }
  306. _resize_and_rehash(new_index);
  307. }
  308. /** Iterator API **/
  309. struct ConstIterator {
  310. _FORCE_INLINE_ const KeyValue<TKey, TValue> &operator*() const {
  311. return E->data;
  312. }
  313. _FORCE_INLINE_ const KeyValue<TKey, TValue> *operator->() const { return &E->data; }
  314. _FORCE_INLINE_ ConstIterator &operator++() {
  315. if (E) {
  316. E = E->next;
  317. }
  318. return *this;
  319. }
  320. _FORCE_INLINE_ ConstIterator &operator--() {
  321. if (E) {
  322. E = E->prev;
  323. }
  324. return *this;
  325. }
  326. _FORCE_INLINE_ bool operator==(const ConstIterator &b) const { return E == b.E; }
  327. _FORCE_INLINE_ bool operator!=(const ConstIterator &b) const { return E != b.E; }
  328. _FORCE_INLINE_ explicit operator bool() const {
  329. return E != nullptr;
  330. }
  331. _FORCE_INLINE_ ConstIterator(const HashMapElement<TKey, TValue> *p_E) { E = p_E; }
  332. _FORCE_INLINE_ ConstIterator() {}
  333. _FORCE_INLINE_ ConstIterator(const ConstIterator &p_it) { E = p_it.E; }
  334. _FORCE_INLINE_ void operator=(const ConstIterator &p_it) {
  335. E = p_it.E;
  336. }
  337. private:
  338. const HashMapElement<TKey, TValue> *E = nullptr;
  339. };
  340. struct Iterator {
  341. _FORCE_INLINE_ KeyValue<TKey, TValue> &operator*() const {
  342. return E->data;
  343. }
  344. _FORCE_INLINE_ KeyValue<TKey, TValue> *operator->() const { return &E->data; }
  345. _FORCE_INLINE_ Iterator &operator++() {
  346. if (E) {
  347. E = E->next;
  348. }
  349. return *this;
  350. }
  351. _FORCE_INLINE_ Iterator &operator--() {
  352. if (E) {
  353. E = E->prev;
  354. }
  355. return *this;
  356. }
  357. _FORCE_INLINE_ bool operator==(const Iterator &b) const { return E == b.E; }
  358. _FORCE_INLINE_ bool operator!=(const Iterator &b) const { return E != b.E; }
  359. _FORCE_INLINE_ explicit operator bool() const {
  360. return E != nullptr;
  361. }
  362. _FORCE_INLINE_ Iterator(HashMapElement<TKey, TValue> *p_E) { E = p_E; }
  363. _FORCE_INLINE_ Iterator() {}
  364. _FORCE_INLINE_ Iterator(const Iterator &p_it) { E = p_it.E; }
  365. _FORCE_INLINE_ void operator=(const Iterator &p_it) {
  366. E = p_it.E;
  367. }
  368. operator ConstIterator() const {
  369. return ConstIterator(E);
  370. }
  371. private:
  372. HashMapElement<TKey, TValue> *E = nullptr;
  373. };
  374. _FORCE_INLINE_ Iterator begin() {
  375. return Iterator(head_element);
  376. }
  377. _FORCE_INLINE_ Iterator end() {
  378. return Iterator(nullptr);
  379. }
  380. _FORCE_INLINE_ Iterator last() {
  381. return Iterator(tail_element);
  382. }
  383. _FORCE_INLINE_ Iterator find(const TKey &p_key) {
  384. uint32_t pos = 0;
  385. bool exists = _lookup_pos(p_key, pos);
  386. if (!exists) {
  387. return end();
  388. }
  389. return Iterator(elements[pos]);
  390. }
  391. _FORCE_INLINE_ void remove(const Iterator &p_iter) {
  392. if (p_iter) {
  393. erase(p_iter->key);
  394. }
  395. }
  396. _FORCE_INLINE_ ConstIterator begin() const {
  397. return ConstIterator(head_element);
  398. }
  399. _FORCE_INLINE_ ConstIterator end() const {
  400. return ConstIterator(nullptr);
  401. }
  402. _FORCE_INLINE_ ConstIterator last() const {
  403. return ConstIterator(tail_element);
  404. }
  405. _FORCE_INLINE_ ConstIterator find(const TKey &p_key) const {
  406. uint32_t pos = 0;
  407. bool exists = _lookup_pos(p_key, pos);
  408. if (!exists) {
  409. return end();
  410. }
  411. return ConstIterator(elements[pos]);
  412. }
  413. /* Indexing */
  414. const TValue &operator[](const TKey &p_key) const {
  415. uint32_t pos = 0;
  416. bool exists = _lookup_pos(p_key, pos);
  417. CRASH_COND(!exists);
  418. return elements[pos]->data.value;
  419. }
  420. TValue &operator[](const TKey &p_key) {
  421. uint32_t pos = 0;
  422. bool exists = _lookup_pos(p_key, pos);
  423. if (!exists) {
  424. return _insert(p_key, TValue())->data.value;
  425. } else {
  426. return elements[pos]->data.value;
  427. }
  428. }
  429. /* Insert */
  430. Iterator insert(const TKey &p_key, const TValue &p_value, bool p_front_insert = false) {
  431. return Iterator(_insert(p_key, p_value, p_front_insert));
  432. }
  433. /* Constructors */
  434. HashMap(const HashMap &p_other) {
  435. reserve(hash_table_size_primes[p_other.capacity_index]);
  436. if (p_other.num_elements == 0) {
  437. return;
  438. }
  439. for (const KeyValue<TKey, TValue> &E : p_other) {
  440. insert(E.key, E.value);
  441. }
  442. }
  443. void operator=(const HashMap &p_other) {
  444. if (this == &p_other) {
  445. return; // Ignore self assignment.
  446. }
  447. if (num_elements != 0) {
  448. clear();
  449. }
  450. reserve(hash_table_size_primes[p_other.capacity_index]);
  451. if (p_other.elements == nullptr) {
  452. return; // Nothing to copy.
  453. }
  454. for (const KeyValue<TKey, TValue> &E : p_other) {
  455. insert(E.key, E.value);
  456. }
  457. }
  458. HashMap(uint32_t p_initial_capacity) {
  459. // Capacity can't be 0.
  460. capacity_index = 0;
  461. reserve(p_initial_capacity);
  462. }
  463. HashMap() {
  464. capacity_index = MIN_CAPACITY_INDEX;
  465. }
  466. uint32_t debug_get_hash(uint32_t p_index) {
  467. if (num_elements == 0) {
  468. return 0;
  469. }
  470. ERR_FAIL_INDEX_V(p_index, get_capacity(), 0);
  471. return hashes[p_index];
  472. }
  473. Iterator debug_get_element(uint32_t p_index) {
  474. if (num_elements == 0) {
  475. return Iterator();
  476. }
  477. ERR_FAIL_INDEX_V(p_index, get_capacity(), Iterator());
  478. return Iterator(elements[p_index]);
  479. }
  480. ~HashMap() {
  481. clear();
  482. if (elements != nullptr) {
  483. Memory::free_static(elements);
  484. Memory::free_static(hashes);
  485. }
  486. }
  487. };
  488. #endif // HASH_MAP_H