bitabs.nim 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. ## A BiTable is a table that can be seen as an optimized pair
  2. ## of `(Table[LitId, Val], Table[Val, LitId])`.
  3. import hashes, rodfiles
  4. when defined(nimPreviewSlimSystem):
  5. import std/assertions
  6. type
  7. LitId* = distinct uint32
  8. BiTable*[T] = object
  9. vals: seq[T] # indexed by LitId
  10. keys: seq[LitId] # indexed by hash(val)
  11. proc initBiTable*[T](): BiTable[T] = BiTable[T](vals: @[], keys: @[])
  12. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  13. result = (h + 1) and maxHash
  14. template maxHash(t): untyped = high(t.keys)
  15. template isFilled(x: LitId): bool = x.uint32 > 0'u32
  16. proc `$`*(x: LitId): string {.borrow.}
  17. proc `<`*(x, y: LitId): bool {.borrow.}
  18. proc `<=`*(x, y: LitId): bool {.borrow.}
  19. proc `==`*(x, y: LitId): bool {.borrow.}
  20. proc hash*(x: LitId): Hash {.borrow.}
  21. proc len*[T](t: BiTable[T]): int = t.vals.len
  22. proc mustRehash(length, counter: int): bool {.inline.} =
  23. assert(length > counter)
  24. result = (length * 2 < counter * 3) or (length - counter < 4)
  25. const
  26. idStart = 256 ##
  27. ## Ids do not start with 0 but with this value. The IR needs it.
  28. ## TODO: explain why
  29. template idToIdx(x: LitId): int = x.int - idStart
  30. proc hasLitId*[T](t: BiTable[T]; x: LitId): bool =
  31. let idx = idToIdx(x)
  32. result = idx >= 0 and idx < t.vals.len
  33. proc enlarge[T](t: var BiTable[T]) =
  34. var n: seq[LitId]
  35. newSeq(n, len(t.keys) * 2)
  36. swap(t.keys, n)
  37. for i in 0..high(n):
  38. let eh = n[i]
  39. if isFilled(eh):
  40. var j = hash(t.vals[idToIdx eh]) and maxHash(t)
  41. while isFilled(t.keys[j]):
  42. j = nextTry(j, maxHash(t))
  43. t.keys[j] = move n[i]
  44. proc getKeyId*[T](t: BiTable[T]; v: T): LitId =
  45. let origH = hash(v)
  46. var h = origH and maxHash(t)
  47. if t.keys.len != 0:
  48. while true:
  49. let litId = t.keys[h]
  50. if not isFilled(litId): break
  51. if t.vals[idToIdx t.keys[h]] == v: return litId
  52. h = nextTry(h, maxHash(t))
  53. return LitId(0)
  54. proc getOrIncl*[T](t: var BiTable[T]; v: T): LitId =
  55. let origH = hash(v)
  56. var h = origH and maxHash(t)
  57. if t.keys.len != 0:
  58. while true:
  59. let litId = t.keys[h]
  60. if not isFilled(litId): break
  61. if t.vals[idToIdx t.keys[h]] == v: return litId
  62. h = nextTry(h, maxHash(t))
  63. # not found, we need to insert it:
  64. if mustRehash(t.keys.len, t.vals.len):
  65. enlarge(t)
  66. # recompute where to insert:
  67. h = origH and maxHash(t)
  68. while true:
  69. let litId = t.keys[h]
  70. if not isFilled(litId): break
  71. h = nextTry(h, maxHash(t))
  72. else:
  73. setLen(t.keys, 16)
  74. h = origH and maxHash(t)
  75. result = LitId(t.vals.len + idStart)
  76. t.keys[h] = result
  77. t.vals.add v
  78. proc `[]`*[T](t: var BiTable[T]; litId: LitId): var T {.inline.} =
  79. let idx = idToIdx litId
  80. assert idx < t.vals.len
  81. result = t.vals[idx]
  82. proc `[]`*[T](t: BiTable[T]; litId: LitId): lent T {.inline.} =
  83. let idx = idToIdx litId
  84. assert idx < t.vals.len
  85. result = t.vals[idx]
  86. proc hash*[T](t: BiTable[T]): Hash =
  87. ## as the keys are hashes of the values, we simply use them instead
  88. var h: Hash = 0
  89. for i, n in pairs t.keys:
  90. h = h !& hash((i, n))
  91. result = !$h
  92. proc store*[T](f: var RodFile; t: BiTable[T]) =
  93. storeSeq(f, t.vals)
  94. storeSeq(f, t.keys)
  95. proc load*[T](f: var RodFile; t: var BiTable[T]) =
  96. loadSeq(f, t.vals)
  97. loadSeq(f, t.keys)
  98. when isMainModule:
  99. var t: BiTable[string]
  100. echo getOrIncl(t, "hello")
  101. echo getOrIncl(t, "hello")
  102. echo getOrIncl(t, "hello3")
  103. echo getOrIncl(t, "hello4")
  104. echo getOrIncl(t, "helloasfasdfdsa")
  105. echo getOrIncl(t, "hello")
  106. echo getKeyId(t, "hello")
  107. echo getKeyId(t, "none")
  108. for i in 0 ..< 100_000:
  109. discard t.getOrIncl($i & "___" & $i)
  110. for i in 0 ..< 100_000:
  111. assert t.getOrIncl($i & "___" & $i).idToIdx == i + 4
  112. echo "begin"
  113. echo t.vals.len
  114. echo t.vals[0]
  115. echo t.vals[1004]
  116. echo "middle"
  117. var tf: BiTable[float]
  118. discard tf.getOrIncl(0.4)
  119. discard tf.getOrIncl(16.4)
  120. discard tf.getOrIncl(32.4)
  121. echo getKeyId(tf, 32.4)
  122. var f2 = open("testblah.bin", fmWrite)
  123. echo store(f2, tf)
  124. f2.close
  125. var f1 = open("testblah.bin", fmRead)
  126. var t2: BiTable[float]
  127. echo f1.load(t2)
  128. echo t2.vals.len
  129. echo getKeyId(t2, 32.4)
  130. echo "end"
  131. f1.close