btrees.nim 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2018 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## BTree implementation with few features, but good enough for the
  10. ## Nim compiler's needs.
  11. const
  12. M = 512 # max children per B-tree node = M-1
  13. # (must be even and greater than 2)
  14. Mhalf = M div 2
  15. type
  16. Node[Key, Val] = ref object
  17. entries: int
  18. keys: array[M, Key]
  19. case isInternal: bool
  20. of false:
  21. vals: array[M, Val]
  22. of true:
  23. links: array[M, Node[Key, Val]]
  24. BTree*[Key, Val] = object
  25. root: Node[Key, Val]
  26. entries: int ## number of key-value pairs
  27. proc initBTree*[Key, Val](): BTree[Key, Val] =
  28. BTree[Key, Val](root: Node[Key, Val](entries: 0, isInternal: false))
  29. template less(a, b): bool = cmp(a, b) < 0
  30. template eq(a, b): bool = cmp(a, b) == 0
  31. proc getOrDefault*[Key, Val](b: BTree[Key, Val], key: Key): Val =
  32. var x = b.root
  33. while x.isInternal:
  34. for j in 0..<x.entries:
  35. if j+1 == x.entries or less(key, x.keys[j+1]):
  36. x = x.links[j]
  37. break
  38. assert(not x.isInternal)
  39. for j in 0..<x.entries:
  40. if eq(key, x.keys[j]): return x.vals[j]
  41. proc contains*[Key, Val](b: BTree[Key, Val], key: Key): bool =
  42. var x = b.root
  43. while x.isInternal:
  44. for j in 0..<x.entries:
  45. if j+1 == x.entries or less(key, x.keys[j+1]):
  46. x = x.links[j]
  47. break
  48. assert(not x.isInternal)
  49. for j in 0..<x.entries:
  50. if eq(key, x.keys[j]): return true
  51. return false
  52. proc copyHalf[Key, Val](h, result: Node[Key, Val]) =
  53. for j in 0..<Mhalf:
  54. result.keys[j] = h.keys[Mhalf + j]
  55. if h.isInternal:
  56. for j in 0..<Mhalf:
  57. result.links[j] = h.links[Mhalf + j]
  58. else:
  59. for j in 0..<Mhalf:
  60. shallowCopy(result.vals[j], h.vals[Mhalf + j])
  61. proc split[Key, Val](h: Node[Key, Val]): Node[Key, Val] =
  62. ## split node in half
  63. result = Node[Key, Val](entries: Mhalf, isInternal: h.isInternal)
  64. h.entries = Mhalf
  65. copyHalf(h, result)
  66. proc insert[Key, Val](h: Node[Key, Val], key: Key, val: Val): Node[Key, Val] =
  67. #var t = Entry(key: key, val: val, next: nil)
  68. var newKey = key
  69. var j = 0
  70. if not h.isInternal:
  71. while j < h.entries:
  72. if eq(key, h.keys[j]):
  73. h.vals[j] = val
  74. return
  75. if less(key, h.keys[j]): break
  76. inc j
  77. for i in countdown(h.entries, j+1):
  78. shallowCopy(h.vals[i], h.vals[i-1])
  79. h.vals[j] = val
  80. else:
  81. var newLink: Node[Key, Val] = nil
  82. while j < h.entries:
  83. if j+1 == h.entries or less(key, h.keys[j+1]):
  84. let u = insert(h.links[j], key, val)
  85. inc j
  86. if u == nil: return nil
  87. newKey = u.keys[0]
  88. newLink = u
  89. break
  90. inc j
  91. for i in countdown(h.entries, j+1):
  92. h.links[i] = h.links[i-1]
  93. h.links[j] = newLink
  94. for i in countdown(h.entries, j+1):
  95. h.keys[i] = h.keys[i-1]
  96. h.keys[j] = newKey
  97. inc h.entries
  98. return if h.entries < M: nil else: split(h)
  99. proc add*[Key, Val](b: var BTree[Key, Val]; key: Key; val: Val) =
  100. let u = insert(b.root, key, val)
  101. inc b.entries
  102. if u == nil: return
  103. # need to split root
  104. let t = Node[Key, Val](entries: 2, isInternal: true)
  105. t.keys[0] = b.root.keys[0]
  106. t.links[0] = b.root
  107. t.keys[1] = u.keys[0]
  108. t.links[1] = u
  109. b.root = t
  110. proc toString[Key, Val](h: Node[Key, Val], indent: string; result: var string) =
  111. if not h.isInternal:
  112. for j in 0..<h.entries:
  113. result.add(indent)
  114. result.add($h.keys[j] & " " & $h.vals[j] & "\n")
  115. else:
  116. for j in 0..<h.entries:
  117. if j > 0: result.add(indent & "(" & $h.keys[j] & ")\n")
  118. toString(h.links[j], indent & " ", result)
  119. proc `$`[Key, Val](b: BTree[Key, Val]): string =
  120. result = ""
  121. toString(b.root, "", result)
  122. proc hasNext*[Key, Val](b: BTree[Key, Val]; index: int): bool = index < b.entries
  123. proc countSubTree[Key, Val](it: Node[Key, Val]): int =
  124. if it.isInternal:
  125. result = 0
  126. for k in 0..<it.entries:
  127. inc result, countSubTree(it.links[k])
  128. else:
  129. result = it.entries
  130. proc next*[Key, Val](b: BTree[Key, Val]; index: int): (Key, Val, int) =
  131. var it = b.root
  132. var i = index
  133. # navigate to the right leaf:
  134. while it.isInternal:
  135. var sum = 0
  136. for k in 0..<it.entries:
  137. let c = countSubTree(it.links[k])
  138. inc sum, c
  139. if sum > i:
  140. it = it.links[k]
  141. dec i, (sum - c)
  142. break
  143. result = (it.keys[i], it.vals[i], index+1)
  144. iterator pairs*[Key, Val](b: BTree[Key, Val]): (Key, Val) =
  145. var i = 0
  146. while hasNext(b, i):
  147. let (k, v, i2) = next(b, i)
  148. i = i2
  149. yield (k, v)
  150. proc len*[Key, Val](b: BTree[Key, Val]): int {.inline.} = b.entries
  151. when isMainModule:
  152. import random, tables
  153. proc main =
  154. var st = initBTree[string, string]()
  155. st.add("www.cs.princeton.edu", "abc")
  156. st.add("www.princeton.edu", "128.112.128.15")
  157. st.add("www.yale.edu", "130.132.143.21")
  158. st.add("www.simpsons.com", "209.052.165.60")
  159. st.add("www.apple.com", "17.112.152.32")
  160. st.add("www.amazon.com", "207.171.182.16")
  161. st.add("www.ebay.com", "66.135.192.87")
  162. st.add("www.cnn.com", "64.236.16.20")
  163. st.add("www.google.com", "216.239.41.99")
  164. st.add("www.nytimes.com", "199.239.136.200")
  165. st.add("www.microsoft.com", "207.126.99.140")
  166. st.add("www.dell.com", "143.166.224.230")
  167. st.add("www.slashdot.org", "66.35.250.151")
  168. st.add("www.espn.com", "199.181.135.201")
  169. st.add("www.weather.com", "63.111.66.11")
  170. st.add("www.yahoo.com", "216.109.118.65")
  171. assert st.getOrDefault("www.cs.princeton.edu") == "abc"
  172. assert st.getOrDefault("www.harvardsucks.com") == ""
  173. assert st.getOrDefault("www.simpsons.com") == "209.052.165.60"
  174. assert st.getOrDefault("www.apple.com") == "17.112.152.32"
  175. assert st.getOrDefault("www.ebay.com") == "66.135.192.87"
  176. assert st.getOrDefault("www.dell.com") == "143.166.224.230"
  177. assert(st.entries == 16)
  178. for k, v in st:
  179. echo k, ": ", v
  180. when false:
  181. var b2 = initBTree[string, string]()
  182. const iters = 10_000
  183. for i in 1..iters:
  184. b2.add($i, $(iters - i))
  185. for i in 1..iters:
  186. let x = b2.getOrDefault($i)
  187. if x != $(iters - i):
  188. echo "got ", x, ", but expected ", iters - i
  189. echo b2.entries
  190. when true:
  191. var b2 = initBTree[int, string]()
  192. var t2 = initTable[int, string]()
  193. const iters = 100_000
  194. for i in 1..iters:
  195. let x = rand(high(int))
  196. if not t2.hasKey(x):
  197. doAssert b2.getOrDefault(x).len == 0, " what, tree has this element " & $x
  198. t2[x] = $x
  199. b2.add(x, $x)
  200. doAssert b2.entries == t2.len
  201. echo "unique entries ", b2.entries
  202. for k, v in t2:
  203. doAssert $k == v
  204. doAssert b2.getOrDefault(k) == $k
  205. main()