critbits.nim 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements a `crit bit tree`:idx: which is an efficient
  10. ## container for a sorted set of strings, or for a sorted mapping of strings. Based on the
  11. ## [excellent paper by Adam Langley](https://www.imperialviolet.org/binary/critbit.pdf).
  12. ## (A crit bit tree is a form of `radix tree`:idx: or `patricia trie`:idx:.)
  13. runnableExamples:
  14. from std/sequtils import toSeq
  15. var critbitAsSet: CritBitTree[void] = ["kitten", "puppy"].toCritBitTree
  16. doAssert critbitAsSet.len == 2
  17. critbitAsSet.incl("")
  18. doAssert "" in critbitAsSet
  19. critbitAsSet.excl("")
  20. doAssert "" notin critbitAsSet
  21. doAssert toSeq(critbitAsSet.items) == @["kitten", "puppy"]
  22. let same = ["puppy", "kitten", "puppy"].toCritBitTree
  23. doAssert toSeq(same.keys) == toSeq(critbitAsSet.keys)
  24. var critbitAsDict: CritBitTree[int] = {"key1": 42}.toCritBitTree
  25. doAssert critbitAsDict.len == 1
  26. critbitAsDict["key2"] = 0
  27. doAssert "key2" in critbitAsDict
  28. doAssert critbitAsDict["key2"] == 0
  29. critbitAsDict.excl("key1")
  30. doAssert "key1" notin critbitAsDict
  31. doAssert toSeq(critbitAsDict.pairs) == @[("key2", 0)]
  32. import std/private/since
  33. when defined(nimPreviewSlimSystem):
  34. import std/assertions
  35. type
  36. NodeObj[T] {.acyclic.} = object
  37. byte: int ## byte index of the difference
  38. otherBits: char
  39. case isLeaf: bool
  40. of false: child: array[0..1, ref NodeObj[T]]
  41. of true:
  42. key: string
  43. when T isnot void:
  44. val: T
  45. Node[T] = ref NodeObj[T]
  46. CritBitTree*[T] = object ## The crit bit tree can either be used
  47. ## as a mapping from strings to
  48. ## some type `T` or as a set of
  49. ## strings if `T` is `void`.
  50. root: Node[T]
  51. count: int
  52. func len*[T](c: CritBitTree[T]): int {.inline.} =
  53. ## Returns the number of elements in `c` in O(1).
  54. runnableExamples:
  55. let c = ["key1", "key2"].toCritBitTree
  56. doAssert c.len == 2
  57. result = c.count
  58. proc rawGet[T](c: CritBitTree[T], key: string): Node[T] =
  59. var it = c.root
  60. while it != nil:
  61. if not it.isLeaf:
  62. let ch = if it.byte < key.len: key[it.byte] else: '\0'
  63. let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
  64. it = it.child[dir]
  65. else:
  66. return if it.key == key: it else: nil
  67. func contains*[T](c: CritBitTree[T], key: string): bool {.inline.} =
  68. ## Returns true if `c` contains the given `key`.
  69. runnableExamples:
  70. var c: CritBitTree[void]
  71. incl(c, "key")
  72. doAssert c.contains("key")
  73. result = rawGet(c, key) != nil
  74. func hasKey*[T](c: CritBitTree[T], key: string): bool {.inline.} =
  75. ## Alias for `contains <#contains,CritBitTree[T],string>`_.
  76. result = rawGet(c, key) != nil
  77. proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] =
  78. if c.root == nil:
  79. c.root = Node[T](isleaf: true, key: key)
  80. result = c.root
  81. else:
  82. var it = c.root
  83. while not it.isLeaf:
  84. let ch = if it.byte < key.len: key[it.byte] else: '\0'
  85. let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
  86. it = it.child[dir]
  87. var newOtherBits = 0
  88. var newByte = 0
  89. block blockX:
  90. while newByte < key.len:
  91. let ch = if newByte < it.key.len: it.key[newByte] else: '\0'
  92. if ch != key[newByte]:
  93. newOtherBits = ch.ord xor key[newByte].ord
  94. break blockX
  95. inc newByte
  96. if newByte < it.key.len:
  97. newOtherBits = it.key[newByte].ord
  98. else:
  99. return it
  100. while (newOtherBits and (newOtherBits-1)) != 0:
  101. newOtherBits = newOtherBits and (newOtherBits-1)
  102. newOtherBits = newOtherBits xor 255
  103. let ch = if newByte < it.key.len: it.key[newByte] else: '\0'
  104. let dir = (1 + (ord(ch) or newOtherBits)) shr 8
  105. var inner: Node[T]
  106. new inner
  107. result = Node[T](isLeaf: true, key: key)
  108. inner.otherBits = chr(newOtherBits)
  109. inner.byte = newByte
  110. inner.child[1 - dir] = result
  111. var wherep = addr(c.root)
  112. while true:
  113. var p = wherep[]
  114. if p.isLeaf: break
  115. if p.byte > newByte: break
  116. if p.byte == newByte and p.otherBits.ord > newOtherBits: break
  117. let ch = if p.byte < key.len: key[p.byte] else: '\0'
  118. let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
  119. wherep = addr(p.child[dir])
  120. inner.child[dir] = wherep[]
  121. wherep[] = inner
  122. inc c.count
  123. func exclImpl[T](c: var CritBitTree[T], key: string): int =
  124. var p = c.root
  125. var wherep = addr(c.root)
  126. var whereq: ptr Node[T] = nil
  127. if p == nil: return c.count
  128. var dir = 0
  129. var q: Node[T]
  130. while not p.isLeaf:
  131. whereq = wherep
  132. q = p
  133. let ch = if p.byte < key.len: key[p.byte] else: '\0'
  134. dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
  135. wherep = addr(p.child[dir])
  136. p = wherep[]
  137. if p.key == key:
  138. # else: not in tree at all
  139. if whereq == nil:
  140. c.root = nil
  141. else:
  142. whereq[] = q.child[1 - dir]
  143. dec c.count
  144. return c.count
  145. proc excl*[T](c: var CritBitTree[T], key: string) =
  146. ## Removes `key` (and its associated value) from the set `c`.
  147. ## If the `key` does not exist, nothing happens.
  148. ##
  149. ## **See also:**
  150. ## * `incl proc <#incl,CritBitTree[void],string>`_
  151. ## * `incl proc <#incl,CritBitTree[T],string,T>`_
  152. runnableExamples:
  153. var c: CritBitTree[void]
  154. incl(c, "key")
  155. excl(c, "key")
  156. doAssert not c.contains("key")
  157. discard exclImpl(c, key)
  158. proc missingOrExcl*[T](c: var CritBitTree[T], key: string): bool =
  159. ## Returns true if `c` does not contain the given `key`. If the key
  160. ## does exist, `c.excl(key)` is performed.
  161. ##
  162. ## **See also:**
  163. ## * `excl proc <#excl,CritBitTree[T],string>`_
  164. ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_
  165. ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_
  166. runnableExamples:
  167. block:
  168. var c: CritBitTree[void]
  169. doAssert c.missingOrExcl("key")
  170. block:
  171. var c: CritBitTree[void]
  172. incl(c, "key")
  173. doAssert not c.missingOrExcl("key")
  174. doAssert not c.contains("key")
  175. let oldCount = c.count
  176. discard exclImpl(c, key)
  177. result = c.count == oldCount
  178. proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: sink T): bool =
  179. ## Returns true if `c` contains the given `key`. If the key does not exist,
  180. ## `c[key] = val` is performed.
  181. ##
  182. ## **See also:**
  183. ## * `incl proc <#incl,CritBitTree[void],string>`_
  184. ## * `incl proc <#incl,CritBitTree[T],string,T>`_
  185. ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_
  186. ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_
  187. runnableExamples:
  188. block:
  189. var c: CritBitTree[int]
  190. doAssert not c.containsOrIncl("key", 42)
  191. doAssert c.contains("key")
  192. block:
  193. var c: CritBitTree[int]
  194. incl(c, "key", 21)
  195. doAssert c.containsOrIncl("key", 42)
  196. doAssert c["key"] == 21
  197. let oldCount = c.count
  198. var n = rawInsert(c, key)
  199. result = c.count == oldCount
  200. when T isnot void:
  201. if not result: n.val = val
  202. proc containsOrIncl*(c: var CritBitTree[void], key: string): bool =
  203. ## Returns true if `c` contains the given `key`. If the key does not exist,
  204. ## it is inserted into `c`.
  205. ##
  206. ## **See also:**
  207. ## * `incl proc <#incl,CritBitTree[void],string>`_
  208. ## * `incl proc <#incl,CritBitTree[T],string,T>`_
  209. ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_
  210. ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_
  211. runnableExamples:
  212. block:
  213. var c: CritBitTree[void]
  214. doAssert not c.containsOrIncl("key")
  215. doAssert c.contains("key")
  216. block:
  217. var c: CritBitTree[void]
  218. incl(c, "key")
  219. doAssert c.containsOrIncl("key")
  220. let oldCount = c.count
  221. discard rawInsert(c, key)
  222. result = c.count == oldCount
  223. proc inc*(c: var CritBitTree[int]; key: string, val: int = 1) =
  224. ## Increments `c[key]` by `val`.
  225. runnableExamples:
  226. var c: CritBitTree[int]
  227. c["key"] = 1
  228. inc(c, "key")
  229. doAssert c["key"] == 2
  230. var n = rawInsert(c, key)
  231. inc n.val, val
  232. proc incl*(c: var CritBitTree[void], key: string) =
  233. ## Includes `key` in `c`.
  234. ##
  235. ## **See also:**
  236. ## * `excl proc <#excl,CritBitTree[T],string>`_
  237. ## * `incl proc <#incl,CritBitTree[T],string,T>`_
  238. runnableExamples:
  239. var c: CritBitTree[void]
  240. incl(c, "key")
  241. doAssert c.hasKey("key")
  242. discard rawInsert(c, key)
  243. proc incl*[T](c: var CritBitTree[T], key: string, val: sink T) =
  244. ## Inserts `key` with value `val` into `c`.
  245. ##
  246. ## **See also:**
  247. ## * `excl proc <#excl,CritBitTree[T],string>`_
  248. ## * `incl proc <#incl,CritBitTree[void],string>`_
  249. runnableExamples:
  250. var c: CritBitTree[int]
  251. incl(c, "key", 42)
  252. doAssert c["key"] == 42
  253. var n = rawInsert(c, key)
  254. n.val = val
  255. proc `[]=`*[T](c: var CritBitTree[T], key: string, val: sink T) =
  256. ## Alias for `incl <#incl,CritBitTree[T],string,T>`_.
  257. ##
  258. ## **See also:**
  259. ## * `[] proc <#[],CritBitTree[T],string>`_
  260. ## * `[] proc <#[],CritBitTree[T],string_2>`_
  261. var n = rawInsert(c, key)
  262. n.val = val
  263. template get[T](c: CritBitTree[T], key: string): T =
  264. let n = rawGet(c, key)
  265. if n == nil:
  266. raise newException(KeyError, "key not found: " & key)
  267. n.val
  268. func `[]`*[T](c: CritBitTree[T], key: string): lent T {.inline.} =
  269. ## Retrieves the value at `c[key]`. If `key` is not in `t`, the
  270. ## `KeyError` exception is raised. One can check with `hasKey` whether
  271. ## the key exists.
  272. ##
  273. ## **See also:**
  274. ## * `[] proc <#[],CritBitTree[T],string_2>`_
  275. ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_
  276. get(c, key)
  277. func `[]`*[T](c: var CritBitTree[T], key: string): var T {.inline.} =
  278. ## Retrieves the value at `c[key]`. The value can be modified.
  279. ## If `key` is not in `t`, the `KeyError` exception is raised.
  280. ##
  281. ## **See also:**
  282. ## * `[] proc <#[],CritBitTree[T],string>`_
  283. ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_
  284. get(c, key)
  285. iterator leaves[T](n: Node[T]): Node[T] =
  286. if n != nil:
  287. # XXX actually we could compute the necessary stack size in advance:
  288. # it's roughly log2(c.count).
  289. var stack = @[n]
  290. while stack.len > 0:
  291. var it = stack.pop
  292. while not it.isLeaf:
  293. stack.add(it.child[1])
  294. it = it.child[0]
  295. assert(it != nil)
  296. yield it
  297. iterator keys*[T](c: CritBitTree[T]): string =
  298. ## Yields all keys in lexicographical order.
  299. runnableExamples:
  300. from std/sequtils import toSeq
  301. let c = {"key1": 1, "key2": 2}.toCritBitTree
  302. doAssert toSeq(c.keys) == @["key1", "key2"]
  303. for x in leaves(c.root): yield x.key
  304. iterator values*[T](c: CritBitTree[T]): lent T =
  305. ## Yields all values of `c` in the lexicographical order of the
  306. ## corresponding keys.
  307. ##
  308. ## **See also:**
  309. ## * `mvalues iterator <#mvalues.i,CritBitTree[T]>`_
  310. runnableExamples:
  311. from std/sequtils import toSeq
  312. let c = {"key1": 1, "key2": 2}.toCritBitTree
  313. doAssert toSeq(c.values) == @[1, 2]
  314. for x in leaves(c.root): yield x.val
  315. iterator mvalues*[T](c: var CritBitTree[T]): var T =
  316. ## Yields all values of `c` in the lexicographical order of the
  317. ## corresponding keys. The values can be modified.
  318. ##
  319. ## **See also:**
  320. ## * `values iterator <#values.i,CritBitTree[T]>`_
  321. for x in leaves(c.root): yield x.val
  322. iterator items*[T](c: CritBitTree[T]): string =
  323. ## Alias for `keys <#keys.i,CritBitTree[T]>`_.
  324. for x in leaves(c.root): yield x.key
  325. iterator pairs*[T](c: CritBitTree[T]): tuple[key: string, val: T] =
  326. ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the
  327. ## corresponding keys.
  328. ##
  329. ## **See also:**
  330. ## * `mpairs iterator <#mpairs.i,CritBitTree[T]>`_
  331. runnableExamples:
  332. from std/sequtils import toSeq
  333. let c = {"key1": 1, "key2": 2}.toCritBitTree
  334. doAssert toSeq(c.pairs) == @[(key: "key1", val: 1), (key: "key2", val: 2)]
  335. for x in leaves(c.root): yield (x.key, x.val)
  336. iterator mpairs*[T](c: var CritBitTree[T]): tuple[key: string, val: var T] =
  337. ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the
  338. ## corresponding keys. The yielded values can be modified.
  339. ##
  340. ## **See also:**
  341. ## * `pairs iterator <#pairs.i,CritBitTree[T]>`_
  342. for x in leaves(c.root): yield (x.key, x.val)
  343. proc allprefixedAux[T](c: CritBitTree[T], key: string): Node[T] =
  344. var p = c.root
  345. var top = p
  346. if p != nil:
  347. while not p.isLeaf:
  348. var q = p
  349. let ch = if p.byte < key.len: key[p.byte] else: '\0'
  350. let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
  351. p = p.child[dir]
  352. if q.byte < key.len: top = p
  353. for i in 0 ..< key.len:
  354. if i >= p.key.len or p.key[i] != key[i]: return
  355. result = top
  356. iterator keysWithPrefix*[T](c: CritBitTree[T], prefix: string): string =
  357. ## Yields all keys starting with `prefix`.
  358. runnableExamples:
  359. from std/sequtils import toSeq
  360. let c = {"key1": 42, "key2": 43}.toCritBitTree
  361. doAssert toSeq(c.keysWithPrefix("key")) == @["key1", "key2"]
  362. let top = allprefixedAux(c, prefix)
  363. for x in leaves(top): yield x.key
  364. iterator valuesWithPrefix*[T](c: CritBitTree[T], prefix: string): lent T =
  365. ## Yields all values of `c` starting with `prefix` of the
  366. ## corresponding keys.
  367. ##
  368. ## **See also:**
  369. ## * `mvaluesWithPrefix iterator <#mvaluesWithPrefix.i,CritBitTree[T],string>`_
  370. runnableExamples:
  371. from std/sequtils import toSeq
  372. let c = {"key1": 42, "key2": 43}.toCritBitTree
  373. doAssert toSeq(c.valuesWithPrefix("key")) == @[42, 43]
  374. let top = allprefixedAux(c, prefix)
  375. for x in leaves(top): yield x.val
  376. iterator mvaluesWithPrefix*[T](c: var CritBitTree[T], prefix: string): var T =
  377. ## Yields all values of `c` starting with `prefix` of the
  378. ## corresponding keys. The values can be modified.
  379. ##
  380. ## **See also:**
  381. ## * `valuesWithPrefix iterator <#valuesWithPrefix.i,CritBitTree[T],string>`_
  382. let top = allprefixedAux(c, prefix)
  383. for x in leaves(top): yield x.val
  384. iterator itemsWithPrefix*[T](c: CritBitTree[T], prefix: string): string =
  385. ## Alias for `keysWithPrefix <#keysWithPrefix.i,CritBitTree[T],string>`_.
  386. let top = allprefixedAux(c, prefix)
  387. for x in leaves(top): yield x.key
  388. iterator pairsWithPrefix*[T](c: CritBitTree[T],
  389. prefix: string): tuple[key: string, val: T] =
  390. ## Yields all (key, value)-pairs of `c` starting with `prefix`.
  391. ##
  392. ## **See also:**
  393. ## * `mpairsWithPrefix iterator <#mpairsWithPrefix.i,CritBitTree[T],string>`_
  394. runnableExamples:
  395. from std/sequtils import toSeq
  396. let c = {"key1": 42, "key2": 43}.toCritBitTree
  397. doAssert toSeq(c.pairsWithPrefix("key")) == @[(key: "key1", val: 42), (key: "key2", val: 43)]
  398. let top = allprefixedAux(c, prefix)
  399. for x in leaves(top): yield (x.key, x.val)
  400. iterator mpairsWithPrefix*[T](c: var CritBitTree[T],
  401. prefix: string): tuple[key: string, val: var T] =
  402. ## Yields all (key, value)-pairs of `c` starting with `prefix`.
  403. ## The yielded values can be modified.
  404. ##
  405. ## **See also:**
  406. ## * `pairsWithPrefix iterator <#pairsWithPrefix.i,CritBitTree[T],string>`_
  407. let top = allprefixedAux(c, prefix)
  408. for x in leaves(top): yield (x.key, x.val)
  409. func `$`*[T](c: CritBitTree[T]): string =
  410. ## Turns `c` into a string representation.
  411. runnableExamples:
  412. doAssert $CritBitTree[int].default == "{:}"
  413. doAssert $toCritBitTree({"key1": 1, "key2": 2}) == """{"key1": 1, "key2": 2}"""
  414. doAssert $CritBitTree[void].default == "{}"
  415. doAssert $toCritBitTree(["key1", "key2"]) == """{"key1", "key2"}"""
  416. if c.len == 0:
  417. when T is void:
  418. result = "{}"
  419. else:
  420. result = "{:}"
  421. else:
  422. # an educated guess is better than nothing:
  423. when T is void:
  424. const avgItemLen = 8
  425. else:
  426. const avgItemLen = 16
  427. result = newStringOfCap(c.count * avgItemLen)
  428. result.add("{")
  429. when T is void:
  430. for key in keys(c):
  431. if result.len > 1: result.add(", ")
  432. result.addQuoted(key)
  433. else:
  434. for key, val in pairs(c):
  435. if result.len > 1: result.add(", ")
  436. result.addQuoted(key)
  437. result.add(": ")
  438. result.addQuoted(val)
  439. result.add("}")
  440. func commonPrefixLen*[T](c: CritBitTree[T]): int {.inline, since((1, 3)).} =
  441. ## Returns the length of the longest common prefix of all keys in `c`.
  442. ## If `c` is empty, returns 0.
  443. runnableExamples:
  444. var c: CritBitTree[void]
  445. doAssert c.commonPrefixLen == 0
  446. incl(c, "key1")
  447. doAssert c.commonPrefixLen == 4
  448. incl(c, "key2")
  449. doAssert c.commonPrefixLen == 3
  450. if c.root != nil:
  451. if c.root.isLeaf: len(c.root.key)
  452. else: c.root.byte
  453. else: 0
  454. proc toCritBitTree*[T](pairs: sink openArray[(string, T)]): CritBitTree[T] {.since: (1, 3).} =
  455. ## Creates a new `CritBitTree` that contains the given `pairs`.
  456. runnableExamples:
  457. doAssert {"a": "0", "b": "1", "c": "2"}.toCritBitTree is CritBitTree[string]
  458. doAssert {"a": 0, "b": 1, "c": 2}.toCritBitTree is CritBitTree[int]
  459. for item in pairs: result.incl item[0], item[1]
  460. proc toCritBitTree*(items: sink openArray[string]): CritBitTree[void] {.since: (1, 3).} =
  461. ## Creates a new `CritBitTree` that contains the given `items`.
  462. runnableExamples:
  463. doAssert ["a", "b", "c"].toCritBitTree is CritBitTree[void]
  464. for item in items: result.incl item