critbits.nim 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements a `crit bit tree`:idx: which is an efficient
  10. ## container for a sorted set of strings, or for a sorted mapping of strings. Based on the
  11. ## [excellent paper by Adam Langley](https://www.imperialviolet.org/binary/critbit.pdf).
  12. ## (A crit bit tree is a form of `radix tree`:idx: or `patricia trie`:idx:.)
  13. runnableExamples:
  14. from std/sequtils import toSeq
  15. var critbitAsSet: CritBitTree[void] = ["kitten", "puppy"].toCritBitTree
  16. doAssert critbitAsSet.len == 2
  17. critbitAsSet.incl("")
  18. doAssert "" in critbitAsSet
  19. critbitAsSet.excl("")
  20. doAssert "" notin critbitAsSet
  21. doAssert toSeq(critbitAsSet.items) == @["kitten", "puppy"]
  22. let same = ["puppy", "kitten", "puppy"].toCritBitTree
  23. doAssert toSeq(same.keys) == toSeq(critbitAsSet.keys)
  24. var critbitAsDict: CritBitTree[int] = {"key1": 42}.toCritBitTree
  25. doAssert critbitAsDict.len == 1
  26. critbitAsDict["key2"] = 0
  27. doAssert "key2" in critbitAsDict
  28. doAssert critbitAsDict["key2"] == 0
  29. critbitAsDict.excl("key1")
  30. doAssert "key1" notin critbitAsDict
  31. doAssert toSeq(critbitAsDict.pairs) == @[("key2", 0)]
  32. import std/private/since
  33. type
  34. NodeObj[T] {.acyclic.} = object
  35. byte: int ## byte index of the difference
  36. otherBits: char
  37. case isLeaf: bool
  38. of false: child: array[0..1, ref NodeObj[T]]
  39. of true:
  40. key: string
  41. when T isnot void:
  42. val: T
  43. Node[T] = ref NodeObj[T]
  44. CritBitTree*[T] = object ## The crit bit tree can either be used
  45. ## as a mapping from strings to
  46. ## some type `T` or as a set of
  47. ## strings if `T` is `void`.
  48. root: Node[T]
  49. count: int
  50. func len*[T](c: CritBitTree[T]): int {.inline.} =
  51. ## Returns the number of elements in `c` in O(1).
  52. runnableExamples:
  53. let c = ["key1", "key2"].toCritBitTree
  54. doAssert c.len == 2
  55. result = c.count
  56. proc rawGet[T](c: CritBitTree[T], key: string): Node[T] =
  57. var it = c.root
  58. while it != nil:
  59. if not it.isLeaf:
  60. let ch = if it.byte < key.len: key[it.byte] else: '\0'
  61. let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
  62. it = it.child[dir]
  63. else:
  64. return if it.key == key: it else: nil
  65. func contains*[T](c: CritBitTree[T], key: string): bool {.inline.} =
  66. ## Returns true if `c` contains the given `key`.
  67. runnableExamples:
  68. var c: CritBitTree[void]
  69. incl(c, "key")
  70. doAssert c.contains("key")
  71. result = rawGet(c, key) != nil
  72. func hasKey*[T](c: CritBitTree[T], key: string): bool {.inline.} =
  73. ## Alias for `contains <#contains,CritBitTree[T],string>`_.
  74. result = rawGet(c, key) != nil
  75. proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] =
  76. if c.root == nil:
  77. c.root = Node[T](isleaf: true, key: key)
  78. result = c.root
  79. else:
  80. var it = c.root
  81. while not it.isLeaf:
  82. let ch = if it.byte < key.len: key[it.byte] else: '\0'
  83. let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
  84. it = it.child[dir]
  85. var newOtherBits = 0
  86. var newByte = 0
  87. block blockX:
  88. while newByte < key.len:
  89. let ch = if newByte < it.key.len: it.key[newByte] else: '\0'
  90. if ch != key[newByte]:
  91. newOtherBits = ch.ord xor key[newByte].ord
  92. break blockX
  93. inc newByte
  94. if newByte < it.key.len:
  95. newOtherBits = it.key[newByte].ord
  96. else:
  97. return it
  98. while (newOtherBits and (newOtherBits-1)) != 0:
  99. newOtherBits = newOtherBits and (newOtherBits-1)
  100. newOtherBits = newOtherBits xor 255
  101. let ch = if newByte < it.key.len: it.key[newByte] else: '\0'
  102. let dir = (1 + (ord(ch) or newOtherBits)) shr 8
  103. var inner: Node[T]
  104. new inner
  105. result = Node[T](isLeaf: true, key: key)
  106. inner.otherBits = chr(newOtherBits)
  107. inner.byte = newByte
  108. inner.child[1 - dir] = result
  109. var wherep = addr(c.root)
  110. while true:
  111. var p = wherep[]
  112. if p.isLeaf: break
  113. if p.byte > newByte: break
  114. if p.byte == newByte and p.otherBits.ord > newOtherBits: break
  115. let ch = if p.byte < key.len: key[p.byte] else: '\0'
  116. let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
  117. wherep = addr(p.child[dir])
  118. inner.child[dir] = wherep[]
  119. wherep[] = inner
  120. inc c.count
  121. func exclImpl[T](c: var CritBitTree[T], key: string): int =
  122. var p = c.root
  123. var wherep = addr(c.root)
  124. var whereq: ptr Node[T] = nil
  125. if p == nil: return c.count
  126. var dir = 0
  127. var q: Node[T]
  128. while not p.isLeaf:
  129. whereq = wherep
  130. q = p
  131. let ch = if p.byte < key.len: key[p.byte] else: '\0'
  132. dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
  133. wherep = addr(p.child[dir])
  134. p = wherep[]
  135. if p.key == key:
  136. # else: not in tree at all
  137. if whereq == nil:
  138. c.root = nil
  139. else:
  140. whereq[] = q.child[1 - dir]
  141. dec c.count
  142. return c.count
  143. proc excl*[T](c: var CritBitTree[T], key: string) =
  144. ## Removes `key` (and its associated value) from the set `c`.
  145. ## If the `key` does not exist, nothing happens.
  146. ##
  147. ## **See also:**
  148. ## * `incl proc <#incl,CritBitTree[void],string>`_
  149. ## * `incl proc <#incl,CritBitTree[T],string,T>`_
  150. runnableExamples:
  151. var c: CritBitTree[void]
  152. incl(c, "key")
  153. excl(c, "key")
  154. doAssert not c.contains("key")
  155. discard exclImpl(c, key)
  156. proc missingOrExcl*[T](c: var CritBitTree[T], key: string): bool =
  157. ## Returns true if `c` does not contain the given `key`. If the key
  158. ## does exist, `c.excl(key)` is performed.
  159. ##
  160. ## **See also:**
  161. ## * `excl proc <#excl,CritBitTree[T],string>`_
  162. ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_
  163. ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_
  164. runnableExamples:
  165. block:
  166. var c: CritBitTree[void]
  167. doAssert c.missingOrExcl("key")
  168. block:
  169. var c: CritBitTree[void]
  170. incl(c, "key")
  171. doAssert not c.missingOrExcl("key")
  172. doAssert not c.contains("key")
  173. let oldCount = c.count
  174. discard exclImpl(c, key)
  175. result = c.count == oldCount
  176. proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: T): bool =
  177. ## Returns true if `c` contains the given `key`. If the key does not exist,
  178. ## `c[key] = val` is performed.
  179. ##
  180. ## **See also:**
  181. ## * `incl proc <#incl,CritBitTree[void],string>`_
  182. ## * `incl proc <#incl,CritBitTree[T],string,T>`_
  183. ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_
  184. ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_
  185. runnableExamples:
  186. block:
  187. var c: CritBitTree[int]
  188. doAssert not c.containsOrIncl("key", 42)
  189. doAssert c.contains("key")
  190. block:
  191. var c: CritBitTree[int]
  192. incl(c, "key", 21)
  193. doAssert c.containsOrIncl("key", 42)
  194. doAssert c["key"] == 21
  195. let oldCount = c.count
  196. var n = rawInsert(c, key)
  197. result = c.count == oldCount
  198. when T isnot void:
  199. if not result: n.val = val
  200. proc containsOrIncl*(c: var CritBitTree[void], key: string): bool =
  201. ## Returns true if `c` contains the given `key`. If the key does not exist,
  202. ## it is inserted into `c`.
  203. ##
  204. ## **See also:**
  205. ## * `incl proc <#incl,CritBitTree[void],string>`_
  206. ## * `incl proc <#incl,CritBitTree[T],string,T>`_
  207. ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_
  208. ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_
  209. runnableExamples:
  210. block:
  211. var c: CritBitTree[void]
  212. doAssert not c.containsOrIncl("key")
  213. doAssert c.contains("key")
  214. block:
  215. var c: CritBitTree[void]
  216. incl(c, "key")
  217. doAssert c.containsOrIncl("key")
  218. let oldCount = c.count
  219. discard rawInsert(c, key)
  220. result = c.count == oldCount
  221. proc inc*(c: var CritBitTree[int]; key: string, val: int = 1) =
  222. ## Increments `c[key]` by `val`.
  223. runnableExamples:
  224. var c: CritBitTree[int]
  225. c["key"] = 1
  226. inc(c, "key")
  227. doAssert c["key"] == 2
  228. var n = rawInsert(c, key)
  229. inc n.val, val
  230. proc incl*(c: var CritBitTree[void], key: string) =
  231. ## Includes `key` in `c`.
  232. ##
  233. ## **See also:**
  234. ## * `excl proc <#excl,CritBitTree[T],string>`_
  235. ## * `incl proc <#incl,CritBitTree[T],string,T>`_
  236. runnableExamples:
  237. var c: CritBitTree[void]
  238. incl(c, "key")
  239. doAssert c.hasKey("key")
  240. discard rawInsert(c, key)
  241. proc incl*[T](c: var CritBitTree[T], key: string, val: T) =
  242. ## Inserts `key` with value `val` into `c`.
  243. ##
  244. ## **See also:**
  245. ## * `excl proc <#excl,CritBitTree[T],string>`_
  246. ## * `incl proc <#incl,CritBitTree[void],string>`_
  247. runnableExamples:
  248. var c: CritBitTree[int]
  249. incl(c, "key", 42)
  250. doAssert c["key"] == 42
  251. var n = rawInsert(c, key)
  252. n.val = val
  253. proc `[]=`*[T](c: var CritBitTree[T], key: string, val: T) =
  254. ## Alias for `incl <#incl,CritBitTree[T],string,T>`_.
  255. ##
  256. ## **See also:**
  257. ## * `[] proc <#[],CritBitTree[T],string>`_
  258. ## * `[] proc <#[],CritBitTree[T],string_2>`_
  259. var n = rawInsert(c, key)
  260. n.val = val
  261. template get[T](c: CritBitTree[T], key: string): T =
  262. let n = rawGet(c, key)
  263. if n == nil:
  264. raise newException(KeyError, "key not found: " & key)
  265. n.val
  266. func `[]`*[T](c: CritBitTree[T], key: string): T {.inline.} =
  267. ## Retrieves the value at `c[key]`. If `key` is not in `t`, the
  268. ## `KeyError` exception is raised. One can check with `hasKey` whether
  269. ## the key exists.
  270. ##
  271. ## **See also:**
  272. ## * `[] proc <#[],CritBitTree[T],string_2>`_
  273. ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_
  274. get(c, key)
  275. func `[]`*[T](c: var CritBitTree[T], key: string): var T {.inline.} =
  276. ## Retrieves the value at `c[key]`. The value can be modified.
  277. ## If `key` is not in `t`, the `KeyError` exception is raised.
  278. ##
  279. ## **See also:**
  280. ## * `[] proc <#[],CritBitTree[T],string>`_
  281. ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_
  282. get(c, key)
  283. iterator leaves[T](n: Node[T]): Node[T] =
  284. if n != nil:
  285. # XXX actually we could compute the necessary stack size in advance:
  286. # it's roughly log2(c.count).
  287. var stack = @[n]
  288. while stack.len > 0:
  289. var it = stack.pop
  290. while not it.isLeaf:
  291. stack.add(it.child[1])
  292. it = it.child[0]
  293. assert(it != nil)
  294. yield it
  295. iterator keys*[T](c: CritBitTree[T]): string =
  296. ## Yields all keys in lexicographical order.
  297. runnableExamples:
  298. from std/sequtils import toSeq
  299. let c = {"key1": 1, "key2": 2}.toCritBitTree
  300. doAssert toSeq(c.keys) == @["key1", "key2"]
  301. for x in leaves(c.root): yield x.key
  302. iterator values*[T](c: CritBitTree[T]): T =
  303. ## Yields all values of `c` in the lexicographical order of the
  304. ## corresponding keys.
  305. ##
  306. ## **See also:**
  307. ## * `mvalues iterator <#mvalues.i,CritBitTree[T]>`_
  308. runnableExamples:
  309. from std/sequtils import toSeq
  310. let c = {"key1": 1, "key2": 2}.toCritBitTree
  311. doAssert toSeq(c.values) == @[1, 2]
  312. for x in leaves(c.root): yield x.val
  313. iterator mvalues*[T](c: var CritBitTree[T]): var T =
  314. ## Yields all values of `c` in the lexicographical order of the
  315. ## corresponding keys. The values can be modified.
  316. ##
  317. ## **See also:**
  318. ## * `values iterator <#values.i,CritBitTree[T]>`_
  319. for x in leaves(c.root): yield x.val
  320. iterator items*[T](c: CritBitTree[T]): string =
  321. ## Alias for `keys <#keys.i,CritBitTree[T]>`_.
  322. for x in leaves(c.root): yield x.key
  323. iterator pairs*[T](c: CritBitTree[T]): tuple[key: string, val: T] =
  324. ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the
  325. ## corresponding keys.
  326. ##
  327. ## **See also:**
  328. ## * `mpairs iterator <#mpairs.i,CritBitTree[T]>`_
  329. runnableExamples:
  330. from std/sequtils import toSeq
  331. let c = {"key1": 1, "key2": 2}.toCritBitTree
  332. doAssert toSeq(c.pairs) == @[(key: "key1", val: 1), (key: "key2", val: 2)]
  333. for x in leaves(c.root): yield (x.key, x.val)
  334. iterator mpairs*[T](c: var CritBitTree[T]): tuple[key: string, val: var T] =
  335. ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the
  336. ## corresponding keys. The yielded values can be modified.
  337. ##
  338. ## **See also:**
  339. ## * `pairs iterator <#pairs.i,CritBitTree[T]>`_
  340. for x in leaves(c.root): yield (x.key, x.val)
  341. proc allprefixedAux[T](c: CritBitTree[T], key: string): Node[T] =
  342. var p = c.root
  343. var top = p
  344. if p != nil:
  345. while not p.isLeaf:
  346. var q = p
  347. let ch = if p.byte < key.len: key[p.byte] else: '\0'
  348. let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
  349. p = p.child[dir]
  350. if q.byte < key.len: top = p
  351. for i in 0 ..< key.len:
  352. if i >= p.key.len or p.key[i] != key[i]: return
  353. result = top
  354. iterator keysWithPrefix*[T](c: CritBitTree[T], prefix: string): string =
  355. ## Yields all keys starting with `prefix`.
  356. runnableExamples:
  357. from std/sequtils import toSeq
  358. let c = {"key1": 42, "key2": 43}.toCritBitTree
  359. doAssert toSeq(c.keysWithPrefix("key")) == @["key1", "key2"]
  360. let top = allprefixedAux(c, prefix)
  361. for x in leaves(top): yield x.key
  362. iterator valuesWithPrefix*[T](c: CritBitTree[T], prefix: string): T =
  363. ## Yields all values of `c` starting with `prefix` of the
  364. ## corresponding keys.
  365. ##
  366. ## **See also:**
  367. ## * `mvaluesWithPrefix iterator <#mvaluesWithPrefix.i,CritBitTree[T],string>`_
  368. runnableExamples:
  369. from std/sequtils import toSeq
  370. let c = {"key1": 42, "key2": 43}.toCritBitTree
  371. doAssert toSeq(c.valuesWithPrefix("key")) == @[42, 43]
  372. let top = allprefixedAux(c, prefix)
  373. for x in leaves(top): yield x.val
  374. iterator mvaluesWithPrefix*[T](c: var CritBitTree[T], prefix: string): var T =
  375. ## Yields all values of `c` starting with `prefix` of the
  376. ## corresponding keys. The values can be modified.
  377. ##
  378. ## **See also:**
  379. ## * `valuesWithPrefix iterator <#valuesWithPrefix.i,CritBitTree[T],string>`_
  380. let top = allprefixedAux(c, prefix)
  381. for x in leaves(top): yield x.val
  382. iterator itemsWithPrefix*[T](c: CritBitTree[T], prefix: string): string =
  383. ## Alias for `keysWithPrefix <#keysWithPrefix.i,CritBitTree[T],string>`_.
  384. let top = allprefixedAux(c, prefix)
  385. for x in leaves(top): yield x.key
  386. iterator pairsWithPrefix*[T](c: CritBitTree[T],
  387. prefix: string): tuple[key: string, val: T] =
  388. ## Yields all (key, value)-pairs of `c` starting with `prefix`.
  389. ##
  390. ## **See also:**
  391. ## * `mpairsWithPrefix iterator <#mpairsWithPrefix.i,CritBitTree[T],string>`_
  392. runnableExamples:
  393. from std/sequtils import toSeq
  394. let c = {"key1": 42, "key2": 43}.toCritBitTree
  395. doAssert toSeq(c.pairsWithPrefix("key")) == @[(key: "key1", val: 42), (key: "key2", val: 43)]
  396. let top = allprefixedAux(c, prefix)
  397. for x in leaves(top): yield (x.key, x.val)
  398. iterator mpairsWithPrefix*[T](c: var CritBitTree[T],
  399. prefix: string): tuple[key: string, val: var T] =
  400. ## Yields all (key, value)-pairs of `c` starting with `prefix`.
  401. ## The yielded values can be modified.
  402. ##
  403. ## **See also:**
  404. ## * `pairsWithPrefix iterator <#pairsWithPrefix.i,CritBitTree[T],string>`_
  405. let top = allprefixedAux(c, prefix)
  406. for x in leaves(top): yield (x.key, x.val)
  407. func `$`*[T](c: CritBitTree[T]): string =
  408. ## Turns `c` into a string representation.
  409. runnableExamples:
  410. doAssert $CritBitTree[int].default == "{:}"
  411. doAssert $toCritBitTree({"key1": 1, "key2": 2}) == """{"key1": 1, "key2": 2}"""
  412. doAssert $CritBitTree[void].default == "{}"
  413. doAssert $toCritBitTree(["key1", "key2"]) == """{"key1", "key2"}"""
  414. if c.len == 0:
  415. when T is void:
  416. result = "{}"
  417. else:
  418. result = "{:}"
  419. else:
  420. # an educated guess is better than nothing:
  421. when T is void:
  422. const avgItemLen = 8
  423. else:
  424. const avgItemLen = 16
  425. result = newStringOfCap(c.count * avgItemLen)
  426. result.add("{")
  427. when T is void:
  428. for key in keys(c):
  429. if result.len > 1: result.add(", ")
  430. result.addQuoted(key)
  431. else:
  432. for key, val in pairs(c):
  433. if result.len > 1: result.add(", ")
  434. result.addQuoted(key)
  435. result.add(": ")
  436. result.addQuoted(val)
  437. result.add("}")
  438. func commonPrefixLen*[T](c: CritBitTree[T]): int {.inline, since((1, 3)).} =
  439. ## Returns the length of the longest common prefix of all keys in `c`.
  440. ## If `c` is empty, returns 0.
  441. runnableExamples:
  442. var c: CritBitTree[void]
  443. doAssert c.commonPrefixLen == 0
  444. incl(c, "key1")
  445. doAssert c.commonPrefixLen == 4
  446. incl(c, "key2")
  447. doAssert c.commonPrefixLen == 3
  448. if c.root != nil:
  449. if c.root.isLeaf: len(c.root.key)
  450. else: c.root.byte
  451. else: 0
  452. proc toCritBitTree*[T](pairs: openArray[(string, T)]): CritBitTree[T] {.since: (1, 3).} =
  453. ## Creates a new `CritBitTree` that contains the given `pairs`.
  454. runnableExamples:
  455. doAssert {"a": "0", "b": "1", "c": "2"}.toCritBitTree is CritBitTree[string]
  456. doAssert {"a": 0, "b": 1, "c": 2}.toCritBitTree is CritBitTree[int]
  457. for item in pairs: result.incl item[0], item[1]
  458. proc toCritBitTree*(items: openArray[string]): CritBitTree[void] {.since: (1, 3).} =
  459. ## Creates a new `CritBitTree` that contains the given `items`.
  460. runnableExamples:
  461. doAssert ["a", "b", "c"].toCritBitTree is CritBitTree[void]
  462. for item in items: result.incl item