uhash.cpp 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. * Copyright (C) 1997-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. ******************************************************************************
  8. * Date Name Description
  9. * 03/22/00 aliu Adapted from original C++ ICU Hashtable.
  10. * 07/06/01 aliu Modified to support int32_t keys on
  11. * platforms with sizeof(void*) < 32.
  12. ******************************************************************************
  13. */
  14. #include "uhash.h"
  15. #include "unicode/ustring.h"
  16. #include "cstring.h"
  17. #include "cmemory.h"
  18. #include "uassert.h"
  19. #include "ustr_imp.h"
  20. /* This hashtable is implemented as a double hash. All elements are
  21. * stored in a single array with no secondary storage for collision
  22. * resolution (no linked list, etc.). When there is a hash collision
  23. * (when two unequal keys have the same hashcode) we resolve this by
  24. * using a secondary hash. The secondary hash is an increment
  25. * computed as a hash function (a different one) of the primary
  26. * hashcode. This increment is added to the initial hash value to
  27. * obtain further slots assigned to the same hash code. For this to
  28. * work, the length of the array and the increment must be relatively
  29. * prime. The easiest way to achieve this is to have the length of
  30. * the array be prime, and the increment be any value from
  31. * 1..length-1.
  32. *
  33. * Hashcodes are 32-bit integers. We make sure all hashcodes are
  34. * non-negative by masking off the top bit. This has two effects: (1)
  35. * modulo arithmetic is simplified. If we allowed negative hashcodes,
  36. * then when we computed hashcode % length, we could get a negative
  37. * result, which we would then have to adjust back into range. It's
  38. * simpler to just make hashcodes non-negative. (2) It makes it easy
  39. * to check for empty vs. occupied slots in the table. We just mark
  40. * empty or deleted slots with a negative hashcode.
  41. *
  42. * The central function is _uhash_find(). This function looks for a
  43. * slot matching the given key and hashcode. If one is found, it
  44. * returns a pointer to that slot. If the table is full, and no match
  45. * is found, it returns nullptr -- in theory. This would make the code
  46. * more complicated, since all callers of _uhash_find() would then
  47. * have to check for a nullptr result. To keep this from happening, we
  48. * don't allow the table to fill. When there is only one
  49. * empty/deleted slot left, uhash_put() will refuse to increase the
  50. * count, and fail. This simplifies the code. In practice, one will
  51. * seldom encounter this using default UHashtables. However, if a
  52. * hashtable is set to a U_FIXED resize policy, or if memory is
  53. * exhausted, then the table may fill.
  54. *
  55. * High and low water ratios control rehashing. They establish levels
  56. * of fullness (from 0 to 1) outside of which the data array is
  57. * reallocated and repopulated. Setting the low water ratio to zero
  58. * means the table will never shrink. Setting the high water ratio to
  59. * one means the table will never grow. The ratios should be
  60. * coordinated with the ratio between successive elements of the
  61. * PRIMES table, so that when the primeIndex is incremented or
  62. * decremented during rehashing, it brings the ratio of count / length
  63. * back into the desired range (between low and high water ratios).
  64. */
  65. /********************************************************************
  66. * PRIVATE Constants, Macros
  67. ********************************************************************/
  68. /* This is a list of non-consecutive primes chosen such that
  69. * PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81
  70. * to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this
  71. * ratio is changed, the low and high water ratios should also be
  72. * adjusted to suit.
  73. *
  74. * These prime numbers were also chosen so that they are the largest
  75. * prime number while being less than a power of two.
  76. */
  77. static const int32_t PRIMES[] = {
  78. 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
  79. 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
  80. 16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
  81. 1073741789, 2147483647 /*, 4294967291 */
  82. };
  83. #define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
  84. #define DEFAULT_PRIME_INDEX 4
  85. /* These ratios are tuned to the PRIMES array such that a resize
  86. * places the table back into the zone of non-resizing. That is,
  87. * after a call to _uhash_rehash(), a subsequent call to
  88. * _uhash_rehash() should do nothing (should not churn). This is only
  89. * a potential problem with U_GROW_AND_SHRINK.
  90. */
  91. static const float RESIZE_POLICY_RATIO_TABLE[6] = {
  92. /* low, high water ratio */
  93. 0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */
  94. 0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */
  95. 0.0F, 1.0F /* U_FIXED: Never change size */
  96. };
  97. /*
  98. Invariants for hashcode values:
  99. * DELETED < 0
  100. * EMPTY < 0
  101. * Real hashes >= 0
  102. Hashcodes may not start out this way, but internally they are
  103. adjusted so that they are always positive. We assume 32-bit
  104. hashcodes; adjust these constants for other hashcode sizes.
  105. */
  106. #define HASH_DELETED ((int32_t) 0x80000000)
  107. #define HASH_EMPTY ((int32_t) HASH_DELETED + 1)
  108. #define IS_EMPTY_OR_DELETED(x) ((x) < 0)
  109. /* This macro expects a UHashTok.pointer as its keypointer and
  110. valuepointer parameters */
  111. #define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) UPRV_BLOCK_MACRO_BEGIN { \
  112. if (hash->keyDeleter != nullptr && keypointer != nullptr) { \
  113. (*hash->keyDeleter)(keypointer); \
  114. } \
  115. if (hash->valueDeleter != nullptr && valuepointer != nullptr) { \
  116. (*hash->valueDeleter)(valuepointer); \
  117. } \
  118. } UPRV_BLOCK_MACRO_END
  119. /*
  120. * Constants for hinting whether a key or value is an integer
  121. * or a pointer. If a hint bit is zero, then the associated
  122. * token is assumed to be an integer.
  123. */
  124. #define HINT_BOTH_INTEGERS (0)
  125. #define HINT_KEY_POINTER (1)
  126. #define HINT_VALUE_POINTER (2)
  127. #define HINT_ALLOW_ZERO (4)
  128. /********************************************************************
  129. * PRIVATE Implementation
  130. ********************************************************************/
  131. static UHashTok
  132. _uhash_setElement(UHashtable *hash, UHashElement* e,
  133. int32_t hashcode,
  134. UHashTok key, UHashTok value, int8_t hint) {
  135. UHashTok oldValue = e->value;
  136. if (hash->keyDeleter != nullptr && e->key.pointer != nullptr &&
  137. e->key.pointer != key.pointer) { /* Avoid double deletion */
  138. (*hash->keyDeleter)(e->key.pointer);
  139. }
  140. if (hash->valueDeleter != nullptr) {
  141. if (oldValue.pointer != nullptr &&
  142. oldValue.pointer != value.pointer) { /* Avoid double deletion */
  143. (*hash->valueDeleter)(oldValue.pointer);
  144. }
  145. oldValue.pointer = nullptr;
  146. }
  147. /* Compilers should copy the UHashTok union correctly, but even if
  148. * they do, memory heap tools (e.g. BoundsChecker) can get
  149. * confused when a pointer is cloaked in a union and then copied.
  150. * TO ALLEVIATE THIS, we use hints (based on what API the user is
  151. * calling) to copy pointers when we know the user thinks
  152. * something is a pointer. */
  153. if (hint & HINT_KEY_POINTER) {
  154. e->key.pointer = key.pointer;
  155. } else {
  156. e->key = key;
  157. }
  158. if (hint & HINT_VALUE_POINTER) {
  159. e->value.pointer = value.pointer;
  160. } else {
  161. e->value = value;
  162. }
  163. e->hashcode = hashcode;
  164. return oldValue;
  165. }
  166. /**
  167. * Assumes that the given element is not empty or deleted.
  168. */
  169. static UHashTok
  170. _uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) {
  171. UHashTok empty;
  172. U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode));
  173. --hash->count;
  174. empty.pointer = nullptr; empty.integer = 0;
  175. return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0);
  176. }
  177. static void
  178. _uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
  179. U_ASSERT(hash != nullptr);
  180. U_ASSERT(((int32_t)policy) >= 0);
  181. U_ASSERT(((int32_t)policy) < 3);
  182. hash->lowWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2];
  183. hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1];
  184. }
  185. /**
  186. * Allocate internal data array of a size determined by the given
  187. * prime index. If the index is out of range it is pinned into range.
  188. * If the allocation fails the status is set to
  189. * U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In
  190. * either case the previous array pointer is overwritten.
  191. *
  192. * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1.
  193. */
  194. static void
  195. _uhash_allocate(UHashtable *hash,
  196. int32_t primeIndex,
  197. UErrorCode *status) {
  198. UHashElement *p, *limit;
  199. UHashTok emptytok;
  200. if (U_FAILURE(*status)) return;
  201. U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH);
  202. hash->primeIndex = static_cast<int8_t>(primeIndex);
  203. hash->length = PRIMES[primeIndex];
  204. p = hash->elements = (UHashElement*)
  205. uprv_malloc(sizeof(UHashElement) * hash->length);
  206. if (hash->elements == nullptr) {
  207. *status = U_MEMORY_ALLOCATION_ERROR;
  208. return;
  209. }
  210. emptytok.pointer = nullptr; /* Only one of these two is needed */
  211. emptytok.integer = 0; /* but we don't know which one. */
  212. limit = p + hash->length;
  213. while (p < limit) {
  214. p->key = emptytok;
  215. p->value = emptytok;
  216. p->hashcode = HASH_EMPTY;
  217. ++p;
  218. }
  219. hash->count = 0;
  220. hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
  221. hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
  222. }
  223. static UHashtable*
  224. _uhash_init(UHashtable *result,
  225. UHashFunction *keyHash,
  226. UKeyComparator *keyComp,
  227. UValueComparator *valueComp,
  228. int32_t primeIndex,
  229. UErrorCode *status)
  230. {
  231. if (U_FAILURE(*status)) return nullptr;
  232. U_ASSERT(keyHash != nullptr);
  233. U_ASSERT(keyComp != nullptr);
  234. result->keyHasher = keyHash;
  235. result->keyComparator = keyComp;
  236. result->valueComparator = valueComp;
  237. result->keyDeleter = nullptr;
  238. result->valueDeleter = nullptr;
  239. result->allocated = false;
  240. _uhash_internalSetResizePolicy(result, U_GROW);
  241. _uhash_allocate(result, primeIndex, status);
  242. if (U_FAILURE(*status)) {
  243. return nullptr;
  244. }
  245. return result;
  246. }
  247. static UHashtable*
  248. _uhash_create(UHashFunction *keyHash,
  249. UKeyComparator *keyComp,
  250. UValueComparator *valueComp,
  251. int32_t primeIndex,
  252. UErrorCode *status) {
  253. UHashtable *result;
  254. if (U_FAILURE(*status)) return nullptr;
  255. result = (UHashtable*) uprv_malloc(sizeof(UHashtable));
  256. if (result == nullptr) {
  257. *status = U_MEMORY_ALLOCATION_ERROR;
  258. return nullptr;
  259. }
  260. _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status);
  261. result->allocated = true;
  262. if (U_FAILURE(*status)) {
  263. uprv_free(result);
  264. return nullptr;
  265. }
  266. return result;
  267. }
  268. /**
  269. * Look for a key in the table, or if no such key exists, the first
  270. * empty slot matching the given hashcode. Keys are compared using
  271. * the keyComparator function.
  272. *
  273. * First find the start position, which is the hashcode modulo
  274. * the length. Test it to see if it is:
  275. *
  276. * a. identical: First check the hash values for a quick check,
  277. * then compare keys for equality using keyComparator.
  278. * b. deleted
  279. * c. empty
  280. *
  281. * Stop if it is identical or empty, otherwise continue by adding a
  282. * "jump" value (moduloing by the length again to keep it within
  283. * range) and retesting. For efficiency, there need enough empty
  284. * values so that the searches stop within a reasonable amount of time.
  285. * This can be changed by changing the high/low water marks.
  286. *
  287. * In theory, this function can return nullptr, if it is full (no empty
  288. * or deleted slots) and if no matching key is found. In practice, we
  289. * prevent this elsewhere (in uhash_put) by making sure the last slot
  290. * in the table is never filled.
  291. *
  292. * The size of the table should be prime for this algorithm to work;
  293. * otherwise we are not guaranteed that the jump value (the secondary
  294. * hash) is relatively prime to the table length.
  295. */
  296. static UHashElement*
  297. _uhash_find(const UHashtable *hash, UHashTok key,
  298. int32_t hashcode) {
  299. int32_t firstDeleted = -1; /* assume invalid index */
  300. int32_t theIndex, startIndex;
  301. int32_t jump = 0; /* lazy evaluate */
  302. int32_t tableHash;
  303. UHashElement *elements = hash->elements;
  304. hashcode &= 0x7FFFFFFF; /* must be positive */
  305. startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length;
  306. do {
  307. tableHash = elements[theIndex].hashcode;
  308. if (tableHash == hashcode) { /* quick check */
  309. if ((*hash->keyComparator)(key, elements[theIndex].key)) {
  310. return &(elements[theIndex]);
  311. }
  312. } else if (!IS_EMPTY_OR_DELETED(tableHash)) {
  313. /* We have hit a slot which contains a key-value pair,
  314. * but for which the hash code does not match. Keep
  315. * looking.
  316. */
  317. } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */
  318. break;
  319. } else if (firstDeleted < 0) { /* remember first deleted */
  320. firstDeleted = theIndex;
  321. }
  322. if (jump == 0) { /* lazy compute jump */
  323. /* The jump value must be relatively prime to the table
  324. * length. As long as the length is prime, then any value
  325. * 1..length-1 will be relatively prime to it.
  326. */
  327. jump = (hashcode % (hash->length - 1)) + 1;
  328. }
  329. theIndex = (theIndex + jump) % hash->length;
  330. } while (theIndex != startIndex);
  331. if (firstDeleted >= 0) {
  332. theIndex = firstDeleted; /* reset if had deleted slot */
  333. } else if (tableHash != HASH_EMPTY) {
  334. /* We get to this point if the hashtable is full (no empty or
  335. * deleted slots), and we've failed to find a match. THIS
  336. * WILL NEVER HAPPEN as long as uhash_put() makes sure that
  337. * count is always < length.
  338. */
  339. UPRV_UNREACHABLE_EXIT;
  340. }
  341. return &(elements[theIndex]);
  342. }
  343. /**
  344. * Attempt to grow or shrink the data arrays in order to make the
  345. * count fit between the high and low water marks. hash_put() and
  346. * hash_remove() call this method when the count exceeds the high or
  347. * low water marks. This method may do nothing, if memory allocation
  348. * fails, or if the count is already in range, or if the length is
  349. * already at the low or high limit. In any case, upon return the
  350. * arrays will be valid.
  351. */
  352. static void
  353. _uhash_rehash(UHashtable *hash, UErrorCode *status) {
  354. UHashElement *old = hash->elements;
  355. int32_t oldLength = hash->length;
  356. int32_t newPrimeIndex = hash->primeIndex;
  357. int32_t i;
  358. if (hash->count > hash->highWaterMark) {
  359. if (++newPrimeIndex >= PRIMES_LENGTH) {
  360. return;
  361. }
  362. } else if (hash->count < hash->lowWaterMark) {
  363. if (--newPrimeIndex < 0) {
  364. return;
  365. }
  366. } else {
  367. return;
  368. }
  369. _uhash_allocate(hash, newPrimeIndex, status);
  370. if (U_FAILURE(*status)) {
  371. hash->elements = old;
  372. hash->length = oldLength;
  373. return;
  374. }
  375. for (i = oldLength - 1; i >= 0; --i) {
  376. if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) {
  377. UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode);
  378. U_ASSERT(e != nullptr);
  379. U_ASSERT(e->hashcode == HASH_EMPTY);
  380. e->key = old[i].key;
  381. e->value = old[i].value;
  382. e->hashcode = old[i].hashcode;
  383. ++hash->count;
  384. }
  385. }
  386. uprv_free(old);
  387. }
  388. static UHashTok
  389. _uhash_remove(UHashtable *hash,
  390. UHashTok key) {
  391. /* First find the position of the key in the table. If the object
  392. * has not been removed already, remove it. If the user wanted
  393. * keys deleted, then delete it also. We have to put a special
  394. * hashcode in that position that means that something has been
  395. * deleted, since when we do a find, we have to continue PAST any
  396. * deleted values.
  397. */
  398. UHashTok result;
  399. UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key));
  400. U_ASSERT(e != nullptr);
  401. result.pointer = nullptr;
  402. result.integer = 0;
  403. if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
  404. result = _uhash_internalRemoveElement(hash, e);
  405. if (hash->count < hash->lowWaterMark) {
  406. UErrorCode status = U_ZERO_ERROR;
  407. _uhash_rehash(hash, &status);
  408. }
  409. }
  410. return result;
  411. }
  412. static UHashTok
  413. _uhash_put(UHashtable *hash,
  414. UHashTok key,
  415. UHashTok value,
  416. int8_t hint,
  417. UErrorCode *status) {
  418. /* Put finds the position in the table for the new value. If the
  419. * key is already in the table, it is deleted, if there is a
  420. * non-nullptr keyDeleter. Then the key, the hash and the value are
  421. * all put at the position in their respective arrays.
  422. */
  423. int32_t hashcode;
  424. UHashElement* e;
  425. UHashTok emptytok;
  426. if (U_FAILURE(*status)) {
  427. goto err;
  428. }
  429. U_ASSERT(hash != nullptr);
  430. if ((hint & HINT_VALUE_POINTER) ?
  431. value.pointer == nullptr :
  432. value.integer == 0 && (hint & HINT_ALLOW_ZERO) == 0) {
  433. /* Disallow storage of nullptr values, since nullptr is returned by
  434. * get() to indicate an absent key. Storing nullptr == removing.
  435. */
  436. return _uhash_remove(hash, key);
  437. }
  438. if (hash->count > hash->highWaterMark) {
  439. _uhash_rehash(hash, status);
  440. if (U_FAILURE(*status)) {
  441. goto err;
  442. }
  443. }
  444. hashcode = (*hash->keyHasher)(key);
  445. e = _uhash_find(hash, key, hashcode);
  446. U_ASSERT(e != nullptr);
  447. if (IS_EMPTY_OR_DELETED(e->hashcode)) {
  448. /* Important: We must never actually fill the table up. If we
  449. * do so, then _uhash_find() will return nullptr, and we'll have
  450. * to check for nullptr after every call to _uhash_find(). To
  451. * avoid this we make sure there is always at least one empty
  452. * or deleted slot in the table. This only is a problem if we
  453. * are out of memory and rehash isn't working.
  454. */
  455. ++hash->count;
  456. if (hash->count == hash->length) {
  457. /* Don't allow count to reach length */
  458. --hash->count;
  459. *status = U_MEMORY_ALLOCATION_ERROR;
  460. goto err;
  461. }
  462. }
  463. /* We must in all cases handle storage properly. If there was an
  464. * old key, then it must be deleted (if the deleter != nullptr).
  465. * Make hashcodes stored in table positive.
  466. */
  467. return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint);
  468. err:
  469. /* If the deleters are non-nullptr, this method adopts its key and/or
  470. * value arguments, and we must be sure to delete the key and/or
  471. * value in all cases, even upon failure.
  472. */
  473. HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer);
  474. emptytok.pointer = nullptr; emptytok.integer = 0;
  475. return emptytok;
  476. }
  477. /********************************************************************
  478. * PUBLIC API
  479. ********************************************************************/
  480. U_CAPI UHashtable* U_EXPORT2
  481. uhash_open(UHashFunction *keyHash,
  482. UKeyComparator *keyComp,
  483. UValueComparator *valueComp,
  484. UErrorCode *status) {
  485. return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
  486. }
  487. U_CAPI UHashtable* U_EXPORT2
  488. uhash_openSize(UHashFunction *keyHash,
  489. UKeyComparator *keyComp,
  490. UValueComparator *valueComp,
  491. int32_t size,
  492. UErrorCode *status) {
  493. /* Find the smallest index i for which PRIMES[i] >= size. */
  494. int32_t i = 0;
  495. while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
  496. ++i;
  497. }
  498. return _uhash_create(keyHash, keyComp, valueComp, i, status);
  499. }
  500. U_CAPI UHashtable* U_EXPORT2
  501. uhash_init(UHashtable *fillinResult,
  502. UHashFunction *keyHash,
  503. UKeyComparator *keyComp,
  504. UValueComparator *valueComp,
  505. UErrorCode *status) {
  506. return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
  507. }
  508. U_CAPI UHashtable* U_EXPORT2
  509. uhash_initSize(UHashtable *fillinResult,
  510. UHashFunction *keyHash,
  511. UKeyComparator *keyComp,
  512. UValueComparator *valueComp,
  513. int32_t size,
  514. UErrorCode *status) {
  515. // Find the smallest index i for which PRIMES[i] >= size.
  516. int32_t i = 0;
  517. while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
  518. ++i;
  519. }
  520. return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
  521. }
  522. U_CAPI void U_EXPORT2
  523. uhash_close(UHashtable *hash) {
  524. if (hash == nullptr) {
  525. return;
  526. }
  527. if (hash->elements != nullptr) {
  528. if (hash->keyDeleter != nullptr || hash->valueDeleter != nullptr) {
  529. int32_t pos=UHASH_FIRST;
  530. UHashElement *e;
  531. while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != nullptr) {
  532. HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer);
  533. }
  534. }
  535. uprv_free(hash->elements);
  536. hash->elements = nullptr;
  537. }
  538. if (hash->allocated) {
  539. uprv_free(hash);
  540. }
  541. }
  542. U_CAPI UHashFunction *U_EXPORT2
  543. uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) {
  544. UHashFunction *result = hash->keyHasher;
  545. hash->keyHasher = fn;
  546. return result;
  547. }
  548. U_CAPI UKeyComparator *U_EXPORT2
  549. uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
  550. UKeyComparator *result = hash->keyComparator;
  551. hash->keyComparator = fn;
  552. return result;
  553. }
  554. U_CAPI UValueComparator *U_EXPORT2
  555. uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
  556. UValueComparator *result = hash->valueComparator;
  557. hash->valueComparator = fn;
  558. return result;
  559. }
  560. U_CAPI UObjectDeleter *U_EXPORT2
  561. uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) {
  562. UObjectDeleter *result = hash->keyDeleter;
  563. hash->keyDeleter = fn;
  564. return result;
  565. }
  566. U_CAPI UObjectDeleter *U_EXPORT2
  567. uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) {
  568. UObjectDeleter *result = hash->valueDeleter;
  569. hash->valueDeleter = fn;
  570. return result;
  571. }
  572. U_CAPI void U_EXPORT2
  573. uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
  574. UErrorCode status = U_ZERO_ERROR;
  575. _uhash_internalSetResizePolicy(hash, policy);
  576. hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
  577. hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
  578. _uhash_rehash(hash, &status);
  579. }
  580. U_CAPI int32_t U_EXPORT2
  581. uhash_count(const UHashtable *hash) {
  582. return hash->count;
  583. }
  584. U_CAPI void* U_EXPORT2
  585. uhash_get(const UHashtable *hash,
  586. const void* key) {
  587. UHashTok keyholder;
  588. keyholder.pointer = (void*) key;
  589. return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
  590. }
  591. U_CAPI void* U_EXPORT2
  592. uhash_iget(const UHashtable *hash,
  593. int32_t key) {
  594. UHashTok keyholder;
  595. keyholder.integer = key;
  596. return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
  597. }
  598. U_CAPI int32_t U_EXPORT2
  599. uhash_geti(const UHashtable *hash,
  600. const void* key) {
  601. UHashTok keyholder;
  602. keyholder.pointer = (void*) key;
  603. return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
  604. }
  605. U_CAPI int32_t U_EXPORT2
  606. uhash_igeti(const UHashtable *hash,
  607. int32_t key) {
  608. UHashTok keyholder;
  609. keyholder.integer = key;
  610. return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
  611. }
  612. U_CAPI int32_t U_EXPORT2
  613. uhash_getiAndFound(const UHashtable *hash,
  614. const void *key,
  615. UBool *found) {
  616. UHashTok keyholder;
  617. keyholder.pointer = (void *)key;
  618. const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
  619. *found = !IS_EMPTY_OR_DELETED(e->hashcode);
  620. return e->value.integer;
  621. }
  622. U_CAPI int32_t U_EXPORT2
  623. uhash_igetiAndFound(const UHashtable *hash,
  624. int32_t key,
  625. UBool *found) {
  626. UHashTok keyholder;
  627. keyholder.integer = key;
  628. const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
  629. *found = !IS_EMPTY_OR_DELETED(e->hashcode);
  630. return e->value.integer;
  631. }
  632. U_CAPI void* U_EXPORT2
  633. uhash_put(UHashtable *hash,
  634. void* key,
  635. void* value,
  636. UErrorCode *status) {
  637. UHashTok keyholder, valueholder;
  638. keyholder.pointer = key;
  639. valueholder.pointer = value;
  640. return _uhash_put(hash, keyholder, valueholder,
  641. HINT_KEY_POINTER | HINT_VALUE_POINTER,
  642. status).pointer;
  643. }
  644. U_CAPI void* U_EXPORT2
  645. uhash_iput(UHashtable *hash,
  646. int32_t key,
  647. void* value,
  648. UErrorCode *status) {
  649. UHashTok keyholder, valueholder;
  650. keyholder.integer = key;
  651. valueholder.pointer = value;
  652. return _uhash_put(hash, keyholder, valueholder,
  653. HINT_VALUE_POINTER,
  654. status).pointer;
  655. }
  656. U_CAPI int32_t U_EXPORT2
  657. uhash_puti(UHashtable *hash,
  658. void* key,
  659. int32_t value,
  660. UErrorCode *status) {
  661. UHashTok keyholder, valueholder;
  662. keyholder.pointer = key;
  663. valueholder.integer = value;
  664. return _uhash_put(hash, keyholder, valueholder,
  665. HINT_KEY_POINTER,
  666. status).integer;
  667. }
  668. U_CAPI int32_t U_EXPORT2
  669. uhash_iputi(UHashtable *hash,
  670. int32_t key,
  671. int32_t value,
  672. UErrorCode *status) {
  673. UHashTok keyholder, valueholder;
  674. keyholder.integer = key;
  675. valueholder.integer = value;
  676. return _uhash_put(hash, keyholder, valueholder,
  677. HINT_BOTH_INTEGERS,
  678. status).integer;
  679. }
  680. U_CAPI int32_t U_EXPORT2
  681. uhash_putiAllowZero(UHashtable *hash,
  682. void *key,
  683. int32_t value,
  684. UErrorCode *status) {
  685. UHashTok keyholder, valueholder;
  686. keyholder.pointer = key;
  687. valueholder.integer = value;
  688. return _uhash_put(hash, keyholder, valueholder,
  689. HINT_KEY_POINTER | HINT_ALLOW_ZERO,
  690. status).integer;
  691. }
  692. U_CAPI int32_t U_EXPORT2
  693. uhash_iputiAllowZero(UHashtable *hash,
  694. int32_t key,
  695. int32_t value,
  696. UErrorCode *status) {
  697. UHashTok keyholder, valueholder;
  698. keyholder.integer = key;
  699. valueholder.integer = value;
  700. return _uhash_put(hash, keyholder, valueholder,
  701. HINT_BOTH_INTEGERS | HINT_ALLOW_ZERO,
  702. status).integer;
  703. }
  704. U_CAPI void* U_EXPORT2
  705. uhash_remove(UHashtable *hash,
  706. const void* key) {
  707. UHashTok keyholder;
  708. keyholder.pointer = (void*) key;
  709. return _uhash_remove(hash, keyholder).pointer;
  710. }
  711. U_CAPI void* U_EXPORT2
  712. uhash_iremove(UHashtable *hash,
  713. int32_t key) {
  714. UHashTok keyholder;
  715. keyholder.integer = key;
  716. return _uhash_remove(hash, keyholder).pointer;
  717. }
  718. U_CAPI int32_t U_EXPORT2
  719. uhash_removei(UHashtable *hash,
  720. const void* key) {
  721. UHashTok keyholder;
  722. keyholder.pointer = (void*) key;
  723. return _uhash_remove(hash, keyholder).integer;
  724. }
  725. U_CAPI int32_t U_EXPORT2
  726. uhash_iremovei(UHashtable *hash,
  727. int32_t key) {
  728. UHashTok keyholder;
  729. keyholder.integer = key;
  730. return _uhash_remove(hash, keyholder).integer;
  731. }
  732. U_CAPI void U_EXPORT2
  733. uhash_removeAll(UHashtable *hash) {
  734. int32_t pos = UHASH_FIRST;
  735. const UHashElement *e;
  736. U_ASSERT(hash != nullptr);
  737. if (hash->count != 0) {
  738. while ((e = uhash_nextElement(hash, &pos)) != nullptr) {
  739. uhash_removeElement(hash, e);
  740. }
  741. }
  742. U_ASSERT(hash->count == 0);
  743. }
  744. U_CAPI UBool U_EXPORT2
  745. uhash_containsKey(const UHashtable *hash, const void *key) {
  746. UHashTok keyholder;
  747. keyholder.pointer = (void *)key;
  748. const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
  749. return !IS_EMPTY_OR_DELETED(e->hashcode);
  750. }
  751. /**
  752. * Returns true if the UHashtable contains an item with this integer key.
  753. *
  754. * @param hash The target UHashtable.
  755. * @param key An integer key stored in a hashtable
  756. * @return true if the key is found.
  757. */
  758. U_CAPI UBool U_EXPORT2
  759. uhash_icontainsKey(const UHashtable *hash, int32_t key) {
  760. UHashTok keyholder;
  761. keyholder.integer = key;
  762. const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
  763. return !IS_EMPTY_OR_DELETED(e->hashcode);
  764. }
  765. U_CAPI const UHashElement* U_EXPORT2
  766. uhash_find(const UHashtable *hash, const void* key) {
  767. UHashTok keyholder;
  768. const UHashElement *e;
  769. keyholder.pointer = (void*) key;
  770. e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
  771. return IS_EMPTY_OR_DELETED(e->hashcode) ? nullptr : e;
  772. }
  773. U_CAPI const UHashElement* U_EXPORT2
  774. uhash_nextElement(const UHashtable *hash, int32_t *pos) {
  775. /* Walk through the array until we find an element that is not
  776. * EMPTY and not DELETED.
  777. */
  778. int32_t i;
  779. U_ASSERT(hash != nullptr);
  780. for (i = *pos + 1; i < hash->length; ++i) {
  781. if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) {
  782. *pos = i;
  783. return &(hash->elements[i]);
  784. }
  785. }
  786. /* No more elements */
  787. return nullptr;
  788. }
  789. U_CAPI void* U_EXPORT2
  790. uhash_removeElement(UHashtable *hash, const UHashElement* e) {
  791. U_ASSERT(hash != nullptr);
  792. U_ASSERT(e != nullptr);
  793. if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
  794. UHashElement *nce = (UHashElement *)e;
  795. return _uhash_internalRemoveElement(hash, nce).pointer;
  796. }
  797. return nullptr;
  798. }
  799. /********************************************************************
  800. * UHashTok convenience
  801. ********************************************************************/
  802. /**
  803. * Return a UHashTok for an integer.
  804. */
  805. /*U_CAPI UHashTok U_EXPORT2
  806. uhash_toki(int32_t i) {
  807. UHashTok tok;
  808. tok.integer = i;
  809. return tok;
  810. }*/
  811. /**
  812. * Return a UHashTok for a pointer.
  813. */
  814. /*U_CAPI UHashTok U_EXPORT2
  815. uhash_tokp(void* p) {
  816. UHashTok tok;
  817. tok.pointer = p;
  818. return tok;
  819. }*/
  820. /********************************************************************
  821. * PUBLIC Key Hash Functions
  822. ********************************************************************/
  823. U_CAPI int32_t U_EXPORT2
  824. uhash_hashUChars(const UHashTok key) {
  825. const char16_t *s = (const char16_t *)key.pointer;
  826. return s == nullptr ? 0 : ustr_hashUCharsN(s, u_strlen(s));
  827. }
  828. U_CAPI int32_t U_EXPORT2
  829. uhash_hashChars(const UHashTok key) {
  830. const char *s = (const char *)key.pointer;
  831. return s == nullptr ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s))));
  832. }
  833. U_CAPI int32_t U_EXPORT2
  834. uhash_hashIChars(const UHashTok key) {
  835. const char *s = (const char *)key.pointer;
  836. return s == nullptr ? 0 : ustr_hashICharsN(s, static_cast<int32_t>(uprv_strlen(s)));
  837. }
  838. U_CAPI UBool U_EXPORT2
  839. uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
  840. int32_t count1, count2, pos, i;
  841. if(hash1==hash2){
  842. return true;
  843. }
  844. /*
  845. * Make sure that we are comparing 2 valid hashes of the same type
  846. * with valid comparison functions.
  847. * Without valid comparison functions, a binary comparison
  848. * of the hash values will yield random results on machines
  849. * with 64-bit pointers and 32-bit integer hashes.
  850. * A valueComparator is normally optional.
  851. */
  852. if (hash1==nullptr || hash2==nullptr ||
  853. hash1->keyComparator != hash2->keyComparator ||
  854. hash1->valueComparator != hash2->valueComparator ||
  855. hash1->valueComparator == nullptr)
  856. {
  857. /*
  858. Normally we would return an error here about incompatible hash tables,
  859. but we return false instead.
  860. */
  861. return false;
  862. }
  863. count1 = uhash_count(hash1);
  864. count2 = uhash_count(hash2);
  865. if(count1!=count2){
  866. return false;
  867. }
  868. pos=UHASH_FIRST;
  869. for(i=0; i<count1; i++){
  870. const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
  871. const UHashTok key1 = elem1->key;
  872. const UHashTok val1 = elem1->value;
  873. /* here the keys are not compared, instead the key form hash1 is used to fetch
  874. * value from hash2. If the hashes are equal then then both hashes should
  875. * contain equal values for the same key!
  876. */
  877. const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
  878. const UHashTok val2 = elem2->value;
  879. if(hash1->valueComparator(val1, val2)==false){
  880. return false;
  881. }
  882. }
  883. return true;
  884. }
  885. /********************************************************************
  886. * PUBLIC Comparator Functions
  887. ********************************************************************/
  888. U_CAPI UBool U_EXPORT2
  889. uhash_compareUChars(const UHashTok key1, const UHashTok key2) {
  890. const char16_t *p1 = (const char16_t*) key1.pointer;
  891. const char16_t *p2 = (const char16_t*) key2.pointer;
  892. if (p1 == p2) {
  893. return true;
  894. }
  895. if (p1 == nullptr || p2 == nullptr) {
  896. return false;
  897. }
  898. while (*p1 != 0 && *p1 == *p2) {
  899. ++p1;
  900. ++p2;
  901. }
  902. return (UBool)(*p1 == *p2);
  903. }
  904. U_CAPI UBool U_EXPORT2
  905. uhash_compareChars(const UHashTok key1, const UHashTok key2) {
  906. const char *p1 = (const char*) key1.pointer;
  907. const char *p2 = (const char*) key2.pointer;
  908. if (p1 == p2) {
  909. return true;
  910. }
  911. if (p1 == nullptr || p2 == nullptr) {
  912. return false;
  913. }
  914. while (*p1 != 0 && *p1 == *p2) {
  915. ++p1;
  916. ++p2;
  917. }
  918. return (UBool)(*p1 == *p2);
  919. }
  920. U_CAPI UBool U_EXPORT2
  921. uhash_compareIChars(const UHashTok key1, const UHashTok key2) {
  922. const char *p1 = (const char*) key1.pointer;
  923. const char *p2 = (const char*) key2.pointer;
  924. if (p1 == p2) {
  925. return true;
  926. }
  927. if (p1 == nullptr || p2 == nullptr) {
  928. return false;
  929. }
  930. while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) {
  931. ++p1;
  932. ++p2;
  933. }
  934. return (UBool)(*p1 == *p2);
  935. }
  936. /********************************************************************
  937. * PUBLIC int32_t Support Functions
  938. ********************************************************************/
  939. U_CAPI int32_t U_EXPORT2
  940. uhash_hashLong(const UHashTok key) {
  941. return key.integer;
  942. }
  943. U_CAPI UBool U_EXPORT2
  944. uhash_compareLong(const UHashTok key1, const UHashTok key2) {
  945. return (UBool)(key1.integer == key2.integer);
  946. }