tableimpl.nim 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2015 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. # An `include` file for the different table implementations.
  10. include hashcommon
  11. const
  12. defaultInitialSize* = 32
  13. template rawGetDeepImpl() {.dirty.} = # Search algo for unconditional add
  14. genHashImpl(key, hc)
  15. var h: Hash = hc and maxHash(t)
  16. while isFilled(t.data[h].hcode):
  17. h = nextTry(h, maxHash(t))
  18. result = h
  19. template rawInsertImpl() {.dirty.} =
  20. data[h].key = key
  21. data[h].val = val
  22. data[h].hcode = hc
  23. proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline, outParamsAt: [3].} =
  24. rawGetDeepImpl()
  25. proc rawInsert[X, A, B](t: var X, data: var KeyValuePairSeq[A, B],
  26. key: A, val: sink B, hc: Hash, h: Hash) =
  27. rawInsertImpl()
  28. template checkIfInitialized() =
  29. if t.dataLen == 0:
  30. initImpl(t, defaultInitialSize)
  31. template addImpl(enlarge) {.dirty.} =
  32. checkIfInitialized()
  33. if mustRehash(t): enlarge(t)
  34. var hc: Hash
  35. var j = rawGetDeep(t, key, hc)
  36. rawInsert(t, t.data, key, val, hc, j)
  37. inc(t.counter)
  38. template maybeRehashPutImpl(enlarge) {.dirty.} =
  39. checkIfInitialized()
  40. if mustRehash(t):
  41. enlarge(t)
  42. index = rawGetKnownHC(t, key, hc)
  43. index = -1 - index # important to transform for mgetOrPutImpl
  44. rawInsert(t, t.data, key, val, hc, index)
  45. inc(t.counter)
  46. template putImpl(enlarge) {.dirty.} =
  47. checkIfInitialized()
  48. var hc: Hash = default(Hash)
  49. var index = rawGet(t, key, hc)
  50. if index >= 0: t.data[index].val = val
  51. else: maybeRehashPutImpl(enlarge)
  52. template mgetOrPutImpl(enlarge) {.dirty.} =
  53. checkIfInitialized()
  54. var hc: Hash = default(Hash)
  55. var index = rawGet(t, key, hc)
  56. if index < 0:
  57. # not present: insert (flipping index)
  58. maybeRehashPutImpl(enlarge)
  59. # either way return modifiable val
  60. result = t.data[index].val
  61. template hasKeyOrPutImpl(enlarge) {.dirty.} =
  62. checkIfInitialized()
  63. var hc: Hash = default(Hash)
  64. var index = rawGet(t, key, hc)
  65. if index < 0:
  66. result = false
  67. maybeRehashPutImpl(enlarge)
  68. else: result = true
  69. # delImplIdx is KnuthV3 Algo6.4R adapted to i=i+1 (from i=i-1) which has come to
  70. # be called "back shift delete". It shifts elements in the collision cluster of
  71. # a victim backward to make things as-if the victim were never inserted in the
  72. # first place. This is desirable to keep things "ageless" after many deletes.
  73. # It is trickier than you might guess since initial probe (aka "home") locations
  74. # of keys in a cluster may collide and since table addresses wrap around.
  75. #
  76. # A before-after diagram might look like ('.' means empty):
  77. # slot: 0 1 2 3 4 5 6 7
  78. # before(1)
  79. # hash1: 6 7 . 3 . 5 5 6 ; Really hash() and msk
  80. # data1: E F . A . B C D ; About to delete C @index 6
  81. # after(2)
  82. # hash2: 7 . . 3 . 5 6 6 ; Really hash() and msk
  83. # data2: F . . A . B D E ; After deletion of C
  84. #
  85. # This lowers total search depth over the whole table from 1+1+2+2+2+2=10 to 7.
  86. # Had the victim been B@5, C would need back shifting to slot 5. Total depth is
  87. # always lowered by at least 1, e.g. victim A@3. This is all quite fast when
  88. # empty slots are frequent (also needed to keep insert/miss searches fast) and
  89. # hash() is either fast or avoided (via `.hcode`). It need not compare keys.
  90. #
  91. # delImplIdx realizes the above transformation, but only works for dense Linear
  92. # Probing, nextTry(h)=h+1. This is not an important limitation since that's the
  93. # fastest sequence on any CPU made since the 1980s. { Performance analysis often
  94. # overweights "key cmp" neglecting cache behavior, giving bad ideas how big/slow
  95. # tables behave (when perf matters most!). Comparing hcode first means usually
  96. # only 1 key cmp is needed for *any* seq. Timing only predictable activity,
  97. # small tables, and/or integer keys often perpetuates such bad ideas. }
  98. template delImplIdx(t, i, makeEmpty, cellEmpty, cellHash) =
  99. let msk = maxHash(t)
  100. if i >= 0:
  101. dec(t.counter)
  102. block outer:
  103. while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
  104. var j = i # The correctness of this depends on (h+1) in nextTry
  105. var r = j # though may be adaptable to other simple sequences.
  106. makeEmpty(i) # mark current EMPTY
  107. t.data[i].key = default(typeof(t.data[i].key))
  108. t.data[i].val = default(typeof(t.data[i].val))
  109. while true:
  110. i = (i + 1) and msk # increment mod table size
  111. if cellEmpty(i): # end of collision cluster; So all done
  112. break outer
  113. r = cellHash(i) and msk # initial probe index for key@slot i
  114. if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
  115. break
  116. when defined(js):
  117. t.data[j] = t.data[i]
  118. else:
  119. t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop
  120. template delImpl(makeEmpty, cellEmpty, cellHash) {.dirty.} =
  121. var hc: Hash
  122. var i = rawGet(t, key, hc)
  123. delImplIdx(t, i, makeEmpty, cellEmpty, cellHash)
  124. template delImplNoHCode(makeEmpty, cellEmpty, cellHash) {.dirty.} =
  125. if t.dataLen > 0:
  126. var i: Hash = hash(key) and maxHash(t)
  127. while not cellEmpty(i):
  128. if t.data[i].key == key:
  129. delImplIdx(t, i, makeEmpty, cellEmpty, cellHash)
  130. break
  131. i = nextTry(i, maxHash(t))
  132. template clearImpl() {.dirty.} =
  133. for i in 0 ..< t.dataLen:
  134. when compiles(t.data[i].hcode): # CountTable records don't contain a hcode
  135. t.data[i].hcode = 0
  136. t.data[i].key = default(typeof(t.data[i].key))
  137. t.data[i].val = default(typeof(t.data[i].val))
  138. t.counter = 0
  139. template ctAnd(a, b): bool =
  140. when a:
  141. when b: true
  142. else: false
  143. else: false
  144. template initImpl(result: typed, size: int) =
  145. let correctSize = slotsNeeded(size)
  146. when ctAnd(declared(SharedTable), typeof(result) is SharedTable):
  147. init(result, correctSize)
  148. else:
  149. result.counter = 0
  150. newSeq(result.data, correctSize)
  151. when compiles(result.first):
  152. result.first = -1
  153. result.last = -1
  154. template insertImpl() = # for CountTable
  155. if t.dataLen == 0: initImpl(t, defaultInitialSize)
  156. if mustRehash(t): enlarge(t)
  157. ctRawInsert(t, t.data, key, val)
  158. inc(t.counter)
  159. template getOrDefaultImpl(t, key): untyped =
  160. mixin rawGet
  161. var hc: Hash
  162. var index = rawGet(t, key, hc)
  163. if index >= 0: result = t.data[index].val
  164. template getOrDefaultImpl(t, key, default: untyped): untyped =
  165. mixin rawGet
  166. var hc: Hash
  167. var index = rawGet(t, key, hc)
  168. result = if index >= 0: t.data[index].val else: default
  169. template dollarImpl(): untyped {.dirty.} =
  170. if t.len == 0:
  171. result = "{:}"
  172. else:
  173. result = "{"
  174. for key, val in pairs(t):
  175. if result.len > 1: result.add(", ")
  176. result.addQuoted(key)
  177. result.add(": ")
  178. result.addQuoted(val)
  179. result.add("}")
  180. template equalsImpl(s, t: typed) =
  181. if s.counter == t.counter:
  182. # different insertion orders mean different 'data' seqs, so we have
  183. # to use the slow route here:
  184. for key, val in s:
  185. if not t.hasKey(key): return false
  186. if t.getOrDefault(key) != val: return false
  187. return true
  188. else:
  189. return false