critbits.nim 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements a `crit bit tree`:idx: which is an efficient
  10. ## container for a sorted set of strings, or for a sorted mapping of strings. Based on the excellent paper
  11. ## by Adam Langley.
  12. ## (A crit bit tree is a form of `radix tree`:idx: or `patricia trie`:idx:.)
  13. include "system/inclrtl"
  14. type
  15. NodeObj[T] = object {.acyclic.}
  16. byte: int ## byte index of the difference
  17. otherbits: char
  18. case isLeaf: bool
  19. of false: child: array[0..1, ref NodeObj[T]]
  20. of true:
  21. key: string
  22. when T isnot void:
  23. val: T
  24. Node[T] = ref NodeObj[T]
  25. CritBitTree*[T] = object ## The crit bit tree can either be used
  26. ## as a mapping from strings to
  27. ## some type ``T`` or as a set of
  28. ## strings if ``T`` is void.
  29. root: Node[T]
  30. count: int
  31. {.deprecated: [TCritBitTree: CritBitTree].}
  32. proc len*[T](c: CritBitTree[T]): int =
  33. ## returns the number of elements in `c` in O(1).
  34. result = c.count
  35. proc rawGet[T](c: CritBitTree[T], key: string): Node[T] =
  36. var it = c.root
  37. while it != nil:
  38. if not it.isLeaf:
  39. let ch = if it.byte < key.len: key[it.byte] else: '\0'
  40. let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
  41. it = it.child[dir]
  42. else:
  43. return if it.key == key: it else: nil
  44. proc contains*[T](c: CritBitTree[T], key: string): bool {.inline.} =
  45. ## returns true iff `c` contains the given `key`.
  46. result = rawGet(c, key) != nil
  47. proc hasKey*[T](c: CritBitTree[T], key: string): bool {.inline.} =
  48. ## alias for `contains`.
  49. result = rawGet(c, key) != nil
  50. proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] =
  51. if c.root == nil:
  52. new c.root
  53. c.root.isleaf = true
  54. c.root.key = key
  55. result = c.root
  56. else:
  57. var it = c.root
  58. while not it.isLeaf:
  59. let ch = if it.byte < key.len: key[it.byte] else: '\0'
  60. let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
  61. it = it.child[dir]
  62. var newOtherBits = 0
  63. var newByte = 0
  64. block blockX:
  65. while newbyte < key.len:
  66. let ch = if newbyte < it.key.len: it.key[newbyte] else: '\0'
  67. if ch != key[newbyte]:
  68. newotherbits = ch.ord xor key[newbyte].ord
  69. break blockX
  70. inc newbyte
  71. if newbyte < it.key.len:
  72. newotherbits = it.key[newbyte].ord
  73. else:
  74. return it
  75. while (newOtherBits and (newOtherBits-1)) != 0:
  76. newOtherBits = newOtherBits and (newOtherBits-1)
  77. newOtherBits = newOtherBits xor 255
  78. let ch = if newByte < it.key.len: it.key[newByte] else: '\0'
  79. let dir = (1 + (ord(ch) or newOtherBits)) shr 8
  80. var inner: Node[T]
  81. new inner
  82. new result
  83. result.isLeaf = true
  84. result.key = key
  85. inner.otherBits = chr(newOtherBits)
  86. inner.byte = newByte
  87. inner.child[1 - dir] = result
  88. var wherep = addr(c.root)
  89. while true:
  90. var p = wherep[]
  91. if p.isLeaf: break
  92. if p.byte > newByte: break
  93. if p.byte == newByte and p.otherBits.ord > newOtherBits: break
  94. let ch = if p.byte < key.len: key[p.byte] else: '\0'
  95. let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
  96. wherep = addr(p.child[dir])
  97. inner.child[dir] = wherep[]
  98. wherep[] = inner
  99. inc c.count
  100. proc exclImpl[T](c: var CritBitTree[T], key: string) : int =
  101. var p = c.root
  102. var wherep = addr(c.root)
  103. var whereq: ptr Node[T] = nil
  104. if p == nil: return c.count
  105. var dir = 0
  106. var q: Node[T]
  107. while not p.isLeaf:
  108. whereq = wherep
  109. q = p
  110. let ch = if p.byte < key.len: key[p.byte] else: '\0'
  111. dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
  112. wherep = addr(p.child[dir])
  113. p = wherep[]
  114. if p.key == key:
  115. # else: not in tree at all
  116. if whereq == nil:
  117. c.root = nil
  118. else:
  119. whereq[] = q.child[1 - dir]
  120. dec c.count
  121. return c.count
  122. proc excl*[T](c: var CritBitTree[T], key: string) =
  123. ## removes `key` (and its associated value) from the set `c`.
  124. ## If the `key` does not exist, nothing happens.
  125. discard exclImpl(c, key)
  126. proc missingOrExcl*[T](c: var CritBitTree[T], key: string): bool =
  127. ## Returns true iff `c` does not contain the given `key`. If the key
  128. ## does exist, c.excl(key) is performed.
  129. let oldCount = c.count
  130. var n = exclImpl(c, key)
  131. result = c.count == oldCount
  132. proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: T): bool =
  133. ## returns true iff `c` contains the given `key`. If the key does not exist
  134. ## ``c[key] = val`` is performed.
  135. let oldCount = c.count
  136. var n = rawInsert(c, key)
  137. result = c.count == oldCount
  138. when T isnot void:
  139. if not result: n.val = val
  140. proc containsOrIncl*(c: var CritBitTree[void], key: string): bool =
  141. ## returns true iff `c` contains the given `key`. If the key does not exist
  142. ## it is inserted into `c`.
  143. let oldCount = c.count
  144. var n = rawInsert(c, key)
  145. result = c.count == oldCount
  146. proc inc*(c: var CritBitTree[int]; key: string, val: int = 1) =
  147. ## increments `c[key]` by `val`.
  148. var n = rawInsert(c, key)
  149. inc n.val, val
  150. proc incl*(c: var CritBitTree[void], key: string) =
  151. ## includes `key` in `c`.
  152. discard rawInsert(c, key)
  153. proc incl*[T](c: var CritBitTree[T], key: string, val: T) =
  154. ## inserts `key` with value `val` into `c`.
  155. var n = rawInsert(c, key)
  156. n.val = val
  157. proc `[]=`*[T](c: var CritBitTree[T], key: string, val: T) =
  158. ## puts a (key, value)-pair into `t`.
  159. var n = rawInsert(c, key)
  160. n.val = val
  161. template get[T](c: CritBitTree[T], key: string): T =
  162. let n = rawGet(c, key)
  163. if n == nil:
  164. when compiles($key):
  165. raise newException(KeyError, "key not found: " & $key)
  166. else:
  167. raise newException(KeyError, "key not found")
  168. n.val
  169. proc `[]`*[T](c: CritBitTree[T], key: string): T {.inline, deprecatedGet.} =
  170. ## retrieves the value at ``c[key]``. If `key` is not in `t`, the
  171. ## ``KeyError`` exception is raised. One can check with ``hasKey`` whether
  172. ## the key exists.
  173. get(c, key)
  174. proc `[]`*[T](c: var CritBitTree[T], key: string): var T {.inline,
  175. deprecatedGet.} =
  176. ## retrieves the value at ``c[key]``. The value can be modified.
  177. ## If `key` is not in `t`, the ``KeyError`` exception is raised.
  178. get(c, key)
  179. proc mget*[T](c: var CritBitTree[T], key: string): var T {.inline, deprecated.} =
  180. ## retrieves the value at ``c[key]``. The value can be modified.
  181. ## If `key` is not in `t`, the ``KeyError`` exception is raised.
  182. ## Use ```[]``` instead.
  183. get(c, key)
  184. iterator leaves[T](n: Node[T]): Node[T] =
  185. if n != nil:
  186. # XXX actually we could compute the necessary stack size in advance:
  187. # it's roughly log2(c.count).
  188. var stack = @[n]
  189. while stack.len > 0:
  190. var it = stack.pop
  191. while not it.isLeaf:
  192. stack.add(it.child[1])
  193. it = it.child[0]
  194. assert(it != nil)
  195. yield it
  196. iterator keys*[T](c: CritBitTree[T]): string =
  197. ## yields all keys in lexicographical order.
  198. for x in leaves(c.root): yield x.key
  199. iterator values*[T](c: CritBitTree[T]): T =
  200. ## yields all values of `c` in the lexicographical order of the
  201. ## corresponding keys.
  202. for x in leaves(c.root): yield x.val
  203. iterator mvalues*[T](c: var CritBitTree[T]): var T =
  204. ## yields all values of `c` in the lexicographical order of the
  205. ## corresponding keys. The values can be modified.
  206. for x in leaves(c.root): yield x.val
  207. iterator items*[T](c: CritBitTree[T]): string =
  208. ## yields all keys in lexicographical order.
  209. for x in leaves(c.root): yield x.key
  210. iterator pairs*[T](c: CritBitTree[T]): tuple[key: string, val: T] =
  211. ## yields all (key, value)-pairs of `c`.
  212. for x in leaves(c.root): yield (x.key, x.val)
  213. iterator mpairs*[T](c: var CritBitTree[T]): tuple[key: string, val: var T] =
  214. ## yields all (key, value)-pairs of `c`. The yielded values can be modified.
  215. for x in leaves(c.root): yield (x.key, x.val)
  216. proc allprefixedAux[T](c: CritBitTree[T], key: string; longestMatch: bool): Node[T] =
  217. var p = c.root
  218. var top = p
  219. if p != nil:
  220. while not p.isLeaf:
  221. var q = p
  222. let ch = if p.byte < key.len: key[p.byte] else: '\0'
  223. let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
  224. p = p.child[dir]
  225. if q.byte < key.len: top = p
  226. if not longestMatch:
  227. for i in 0 ..< key.len:
  228. if p.key[i] != key[i]: return
  229. result = top
  230. iterator itemsWithPrefix*[T](c: CritBitTree[T], prefix: string;
  231. longestMatch=false): string =
  232. ## yields all keys starting with `prefix`. If `longestMatch` is true,
  233. ## the longest match is returned, it doesn't have to be a complete match then.
  234. let top = allprefixedAux(c, prefix, longestMatch)
  235. for x in leaves(top): yield x.key
  236. iterator keysWithPrefix*[T](c: CritBitTree[T], prefix: string;
  237. longestMatch=false): string =
  238. ## yields all keys starting with `prefix`.
  239. let top = allprefixedAux(c, prefix, longestMatch)
  240. for x in leaves(top): yield x.key
  241. iterator valuesWithPrefix*[T](c: CritBitTree[T], prefix: string;
  242. longestMatch=false): T =
  243. ## yields all values of `c` starting with `prefix` of the
  244. ## corresponding keys.
  245. let top = allprefixedAux(c, prefix, longestMatch)
  246. for x in leaves(top): yield x.val
  247. iterator mvaluesWithPrefix*[T](c: var CritBitTree[T], prefix: string;
  248. longestMatch=false): var T =
  249. ## yields all values of `c` starting with `prefix` of the
  250. ## corresponding keys. The values can be modified.
  251. let top = allprefixedAux(c, prefix, longestMatch)
  252. for x in leaves(top): yield x.val
  253. iterator pairsWithPrefix*[T](c: CritBitTree[T],
  254. prefix: string;
  255. longestMatch=false): tuple[key: string, val: T] =
  256. ## yields all (key, value)-pairs of `c` starting with `prefix`.
  257. let top = allprefixedAux(c, prefix, longestMatch)
  258. for x in leaves(top): yield (x.key, x.val)
  259. iterator mpairsWithPrefix*[T](c: var CritBitTree[T],
  260. prefix: string;
  261. longestMatch=false): tuple[key: string, val: var T] =
  262. ## yields all (key, value)-pairs of `c` starting with `prefix`.
  263. ## The yielded values can be modified.
  264. let top = allprefixedAux(c, prefix, longestMatch)
  265. for x in leaves(top): yield (x.key, x.val)
  266. proc `$`*[T](c: CritBitTree[T]): string =
  267. ## turns `c` into a string representation. Example outputs:
  268. ## ``{keyA: value, keyB: value}``, ``{:}``
  269. ## If `T` is void the outputs look like:
  270. ## ``{keyA, keyB}``, ``{}``.
  271. if c.len == 0:
  272. when T is void:
  273. result = "{}"
  274. else:
  275. result = "{:}"
  276. else:
  277. # an educated guess is better than nothing:
  278. when T is void:
  279. const avgItemLen = 8
  280. else:
  281. const avgItemLen = 16
  282. result = newStringOfCap(c.count * avgItemLen)
  283. result.add("{")
  284. when T is void:
  285. for key in keys(c):
  286. if result.len > 1: result.add(", ")
  287. result.addQuoted(key)
  288. else:
  289. for key, val in pairs(c):
  290. if result.len > 1: result.add(", ")
  291. result.addQuoted(key)
  292. result.add(": ")
  293. result.addQuoted(val)
  294. result.add("}")
  295. when isMainModule:
  296. import sequtils
  297. var r: CritBitTree[void]
  298. r.incl "abc"
  299. r.incl "xyz"
  300. r.incl "def"
  301. r.incl "definition"
  302. r.incl "prefix"
  303. r.incl "foo"
  304. doAssert r.contains"def"
  305. r.excl "def"
  306. assert r.missingOrExcl("foo") == false
  307. assert "foo" notin toSeq(r.items)
  308. assert r.missingOrExcl("foo") == true
  309. assert toSeq(r.items) == @["abc", "definition", "prefix", "xyz"]
  310. assert toSeq(r.itemsWithPrefix("de")) == @["definition"]
  311. var c = CritBitTree[int]()
  312. c.inc("a")
  313. assert c["a"] == 1
  314. c.inc("a", 4)
  315. assert c["a"] == 5
  316. c.inc("a", -5)
  317. assert c["a"] == 0
  318. c.inc("b", 2)
  319. assert c["b"] == 2
  320. c.inc("c", 3)
  321. assert c["c"] == 3
  322. c.inc("a", 1)
  323. assert c["a"] == 1
  324. var cf = CritBitTree[float]()
  325. cf.incl("a", 1.0)
  326. assert cf["a"] == 1.0
  327. cf.incl("b", 2.0)
  328. assert cf["b"] == 2.0
  329. cf.incl("c", 3.0)
  330. assert cf["c"] == 3.0
  331. assert cf.len == 3
  332. cf.excl("c")
  333. assert cf.len == 2