packedsets.nim 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## The `packedsets` module implements an efficient `Ordinal` set implemented as a
  10. ## `sparse bit set`:idx:.
  11. ##
  12. ## Supports any Ordinal type.
  13. ##
  14. ## See also
  15. ## ========
  16. ## * `sets module <sets.html>`_ for more general hash sets
  17. import std/private/since
  18. import hashes
  19. when defined(nimPreviewSlimSystem):
  20. import std/assertions
  21. type
  22. BitScalar = uint
  23. const
  24. InitIntSetSize = 8 # must be a power of two!
  25. TrunkShift = 9
  26. BitsPerTrunk = 1 shl TrunkShift # needs to be a power of 2 and
  27. # divisible by 64
  28. TrunkMask = BitsPerTrunk - 1
  29. IntsPerTrunk = BitsPerTrunk div (sizeof(BitScalar) * 8)
  30. IntShift = 5 + ord(sizeof(BitScalar) == 8) # 5 or 6, depending on int width
  31. IntMask = 1 shl IntShift - 1
  32. type
  33. Trunk {.acyclic.} = ref object
  34. next: Trunk # all nodes are connected with this pointer
  35. key: int # start address at bit 0
  36. bits: array[0..IntsPerTrunk - 1, BitScalar] # a bit vector
  37. TrunkSeq = seq[Trunk]
  38. PackedSet*[A: Ordinal] = object
  39. ## An efficient set of `Ordinal` types implemented as a sparse bit set.
  40. elems: int # only valid for small numbers
  41. counter, max: int
  42. head: Trunk
  43. data: TrunkSeq
  44. a: array[0..33, int] # profiling shows that 34 elements are enough
  45. proc mustRehash[T](t: T): bool {.inline.} =
  46. let length = t.max + 1
  47. assert length > t.counter
  48. result = (length * 2 < t.counter * 3) or (length - t.counter < 4)
  49. proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
  50. const PERTURB_SHIFT = 5
  51. var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
  52. perturb = cast[Hash](perturb2)
  53. result = ((5 * h) + 1 + perturb) and maxHash
  54. proc packedSetGet[A](t: PackedSet[A], key: int): Trunk =
  55. var h = key and t.max
  56. var perturb = key
  57. while t.data[h] != nil:
  58. if t.data[h].key == key:
  59. return t.data[h]
  60. h = nextTry(h, t.max, perturb)
  61. result = nil
  62. proc intSetRawInsert[A](t: PackedSet[A], data: var TrunkSeq, desc: Trunk) =
  63. var h = desc.key and t.max
  64. var perturb = desc.key
  65. while data[h] != nil:
  66. assert data[h] != desc
  67. h = nextTry(h, t.max, perturb)
  68. assert data[h] == nil
  69. data[h] = desc
  70. proc intSetEnlarge[A](t: var PackedSet[A]) =
  71. var n: TrunkSeq
  72. var oldMax = t.max
  73. t.max = ((t.max + 1) * 2) - 1
  74. newSeq(n, t.max + 1)
  75. for i in countup(0, oldMax):
  76. if t.data[i] != nil: intSetRawInsert(t, n, t.data[i])
  77. swap(t.data, n)
  78. proc intSetPut[A](t: var PackedSet[A], key: int): Trunk =
  79. var h = key and t.max
  80. var perturb = key
  81. while t.data[h] != nil:
  82. if t.data[h].key == key:
  83. return t.data[h]
  84. h = nextTry(h, t.max, perturb)
  85. if mustRehash(t): intSetEnlarge(t)
  86. inc(t.counter)
  87. h = key and t.max
  88. perturb = key
  89. while t.data[h] != nil: h = nextTry(h, t.max, perturb)
  90. assert t.data[h] == nil
  91. new(result)
  92. result.next = t.head
  93. result.key = key
  94. t.head = result
  95. t.data[h] = result
  96. proc bitincl[A](s: var PackedSet[A], key: int) {.inline.} =
  97. var ret: Trunk
  98. var t = intSetPut(s, key shr TrunkShift)
  99. var u = key and TrunkMask
  100. t.bits[u shr IntShift] = t.bits[u shr IntShift] or
  101. (BitScalar(1) shl (u and IntMask))
  102. proc exclImpl[A](s: var PackedSet[A], key: int) =
  103. if s.elems <= s.a.len:
  104. for i in 0..<s.elems:
  105. if s.a[i] == key:
  106. s.a[i] = s.a[s.elems - 1]
  107. dec(s.elems)
  108. return
  109. else:
  110. var t = packedSetGet(s, key shr TrunkShift)
  111. if t != nil:
  112. var u = key and TrunkMask
  113. t.bits[u shr IntShift] = t.bits[u shr IntShift] and
  114. not(BitScalar(1) shl (u and IntMask))
  115. template dollarImpl(): untyped =
  116. result = "{"
  117. for key in items(s):
  118. if result.len > 1: result.add(", ")
  119. result.add $key
  120. result.add("}")
  121. iterator items*[A](s: PackedSet[A]): A {.inline.} =
  122. ## Iterates over any included element of `s`.
  123. if s.elems <= s.a.len:
  124. for i in 0..<s.elems:
  125. yield A(s.a[i])
  126. else:
  127. var r = s.head
  128. while r != nil:
  129. var i = 0
  130. while i <= high(r.bits):
  131. var w: uint = r.bits[i]
  132. # taking a copy of r.bits[i] here is correct, because
  133. # modifying operations are not allowed during traversation
  134. var j = 0
  135. while w != 0: # test all remaining bits for zero
  136. if (w and 1) != 0: # the bit is set!
  137. yield A((r.key shl TrunkShift) or (i shl IntShift +% j))
  138. inc(j)
  139. w = w shr 1
  140. inc(i)
  141. r = r.next
  142. proc initPackedSet*[A]: PackedSet[A] =
  143. ## Returns an empty `PackedSet[A]`.
  144. ## `A` must be `Ordinal`.
  145. ##
  146. ## **See also:**
  147. ## * `toPackedSet proc <#toPackedSet,openArray[A]>`_
  148. runnableExamples:
  149. let a = initPackedSet[int]()
  150. assert len(a) == 0
  151. type Id = distinct int
  152. var ids = initPackedSet[Id]()
  153. ids.incl(3.Id)
  154. result = PackedSet[A](
  155. elems: 0,
  156. counter: 0,
  157. max: 0,
  158. head: nil,
  159. data: @[])
  160. # a: array[0..33, int] # profiling shows that 34 elements are enough
  161. proc contains*[A](s: PackedSet[A], key: A): bool =
  162. ## Returns true if `key` is in `s`.
  163. ##
  164. ## This allows the usage of the `in` operator.
  165. runnableExamples:
  166. type ABCD = enum A, B, C, D
  167. let a = [1, 3, 5].toPackedSet
  168. assert a.contains(3)
  169. assert 3 in a
  170. assert not a.contains(8)
  171. assert 8 notin a
  172. let letters = [A, C].toPackedSet
  173. assert A in letters
  174. assert C in letters
  175. assert B notin letters
  176. if s.elems <= s.a.len:
  177. for i in 0..<s.elems:
  178. if s.a[i] == ord(key): return true
  179. else:
  180. var t = packedSetGet(s, ord(key) shr TrunkShift)
  181. if t != nil:
  182. var u = ord(key) and TrunkMask
  183. result = (t.bits[u shr IntShift] and
  184. (BitScalar(1) shl (u and IntMask))) != 0
  185. else:
  186. result = false
  187. proc incl*[A](s: var PackedSet[A], key: A) =
  188. ## Includes an element `key` in `s`.
  189. ##
  190. ## This doesn't do anything if `key` is already in `s`.
  191. ##
  192. ## **See also:**
  193. ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element
  194. ## * `incl proc <#incl,PackedSet[A],PackedSet[A]>`_ for including a set
  195. ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_
  196. runnableExamples:
  197. var a = initPackedSet[int]()
  198. a.incl(3)
  199. a.incl(3)
  200. assert len(a) == 1
  201. if s.elems <= s.a.len:
  202. for i in 0..<s.elems:
  203. if s.a[i] == ord(key): return
  204. if s.elems < s.a.len:
  205. s.a[s.elems] = ord(key)
  206. inc(s.elems)
  207. return
  208. newSeq(s.data, InitIntSetSize)
  209. s.max = InitIntSetSize - 1
  210. for i in 0..<s.elems:
  211. bitincl(s, s.a[i])
  212. s.elems = s.a.len + 1
  213. # fall through:
  214. bitincl(s, ord(key))
  215. proc incl*[A](s: var PackedSet[A], other: PackedSet[A]) =
  216. ## Includes all elements from `other` into `s`.
  217. ##
  218. ## This is the in-place version of `s + other <#+,PackedSet[A],PackedSet[A]>`_.
  219. ##
  220. ## **See also:**
  221. ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding a set
  222. ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element
  223. ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_
  224. runnableExamples:
  225. var a = [1].toPackedSet
  226. a.incl([5].toPackedSet)
  227. assert len(a) == 2
  228. assert 5 in a
  229. for item in other.items: incl(s, item)
  230. proc toPackedSet*[A](x: openArray[A]): PackedSet[A] {.since: (1, 3).} =
  231. ## Creates a new `PackedSet[A]` that contains the elements of `x`.
  232. ##
  233. ## Duplicates are removed.
  234. ##
  235. ## **See also:**
  236. ## * `initPackedSet proc <#initPackedSet>`_
  237. runnableExamples:
  238. let a = [5, 6, 7, 8, 8].toPackedSet
  239. assert len(a) == 4
  240. assert $a == "{5, 6, 7, 8}"
  241. result = initPackedSet[A]()
  242. for item in x:
  243. result.incl(item)
  244. proc containsOrIncl*[A](s: var PackedSet[A], key: A): bool =
  245. ## Includes `key` in the set `s` and tells if `key` was already in `s`.
  246. ##
  247. ## The difference with regards to the `incl proc <#incl,PackedSet[A],A>`_ is
  248. ## that this proc returns true if `s` already contained `key`. The
  249. ## proc will return false if `key` was added as a new value to `s` during
  250. ## this call.
  251. ##
  252. ## **See also:**
  253. ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element
  254. ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_
  255. runnableExamples:
  256. var a = initPackedSet[int]()
  257. assert a.containsOrIncl(3) == false
  258. assert a.containsOrIncl(3) == true
  259. assert a.containsOrIncl(4) == false
  260. if s.elems <= s.a.len:
  261. for i in 0..<s.elems:
  262. if s.a[i] == ord(key):
  263. return true
  264. incl(s, key)
  265. result = false
  266. else:
  267. var t = packedSetGet(s, ord(key) shr TrunkShift)
  268. if t != nil:
  269. var u = ord(key) and TrunkMask
  270. result = (t.bits[u shr IntShift] and BitScalar(1) shl (u and IntMask)) != 0
  271. if not result:
  272. t.bits[u shr IntShift] = t.bits[u shr IntShift] or
  273. (BitScalar(1) shl (u and IntMask))
  274. else:
  275. incl(s, key)
  276. result = false
  277. proc excl*[A](s: var PackedSet[A], key: A) =
  278. ## Excludes `key` from the set `s`.
  279. ##
  280. ## This doesn't do anything if `key` is not found in `s`.
  281. ##
  282. ## **See also:**
  283. ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element
  284. ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding a set
  285. ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_
  286. runnableExamples:
  287. var a = [3].toPackedSet
  288. a.excl(3)
  289. a.excl(3)
  290. a.excl(99)
  291. assert len(a) == 0
  292. exclImpl[A](s, ord(key))
  293. proc excl*[A](s: var PackedSet[A], other: PackedSet[A]) =
  294. ## Excludes all elements from `other` from `s`.
  295. ##
  296. ## This is the in-place version of `s - other <#-,PackedSet[A],PackedSet[A]>`_.
  297. ##
  298. ## **See also:**
  299. ## * `incl proc <#incl,PackedSet[A],PackedSet[A]>`_ for including a set
  300. ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element
  301. ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_
  302. runnableExamples:
  303. var a = [1, 5].toPackedSet
  304. a.excl([5].toPackedSet)
  305. assert len(a) == 1
  306. assert 5 notin a
  307. for item in other.items:
  308. excl(s, item)
  309. proc len*[A](s: PackedSet[A]): int {.inline.} =
  310. ## Returns the number of elements in `s`.
  311. runnableExamples:
  312. let a = [1, 3, 5].toPackedSet
  313. assert len(a) == 3
  314. if s.elems < s.a.len:
  315. result = s.elems
  316. else:
  317. result = 0
  318. for _ in s.items:
  319. # pending bug #11167; when fixed, check each explicit `items` to see if it can be removed
  320. inc(result)
  321. proc missingOrExcl*[A](s: var PackedSet[A], key: A): bool =
  322. ## Excludes `key` from the set `s` and tells if `key` was already missing from `s`.
  323. ##
  324. ## The difference with regards to the `excl proc <#excl,PackedSet[A],A>`_ is
  325. ## that this proc returns true if `key` was missing from `s`.
  326. ## The proc will return false if `key` was in `s` and it was removed
  327. ## during this call.
  328. ##
  329. ## **See also:**
  330. ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element
  331. ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding a set
  332. ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_
  333. runnableExamples:
  334. var a = [5].toPackedSet
  335. assert a.missingOrExcl(5) == false
  336. assert a.missingOrExcl(5) == true
  337. var count = s.len
  338. exclImpl(s, ord(key))
  339. result = count == s.len
  340. proc clear*[A](result: var PackedSet[A]) =
  341. ## Clears the `PackedSet[A]` back to an empty state.
  342. runnableExamples:
  343. var a = [5, 7].toPackedSet
  344. clear(a)
  345. assert len(a) == 0
  346. # setLen(result.data, InitIntSetSize)
  347. # for i in 0..InitIntSetSize - 1: result.data[i] = nil
  348. # result.max = InitIntSetSize - 1
  349. result.data = @[]
  350. result.max = 0
  351. result.counter = 0
  352. result.head = nil
  353. result.elems = 0
  354. proc isNil*[A](x: PackedSet[A]): bool {.inline.} =
  355. ## Returns true if `x` is empty, false otherwise.
  356. runnableExamples:
  357. var a = initPackedSet[int]()
  358. assert a.isNil
  359. a.incl(2)
  360. assert not a.isNil
  361. a.excl(2)
  362. assert a.isNil
  363. x.head.isNil and x.elems == 0
  364. proc `=copy`*[A](dest: var PackedSet[A], src: PackedSet[A]) =
  365. ## Copies `src` to `dest`.
  366. ## `dest` does not need to be initialized by the `initPackedSet proc <#initPackedSet>`_.
  367. if src.elems <= src.a.len:
  368. dest.data = @[]
  369. dest.max = 0
  370. dest.counter = src.counter
  371. dest.head = nil
  372. dest.elems = src.elems
  373. dest.a = src.a
  374. else:
  375. dest.counter = src.counter
  376. dest.max = src.max
  377. dest.elems = src.elems
  378. newSeq(dest.data, src.data.len)
  379. var it = src.head
  380. while it != nil:
  381. var h = it.key and dest.max
  382. var perturb = it.key
  383. while dest.data[h] != nil: h = nextTry(h, dest.max, perturb)
  384. assert dest.data[h] == nil
  385. var n: Trunk
  386. new(n)
  387. n.next = dest.head
  388. n.key = it.key
  389. n.bits = it.bits
  390. dest.head = n
  391. dest.data[h] = n
  392. it = it.next
  393. proc assign*[A](dest: var PackedSet[A], src: PackedSet[A]) {.inline, deprecated.} =
  394. ## Copies `src` to `dest`.
  395. ## `dest` does not need to be initialized by the `initPackedSet proc <#initPackedSet>`_.
  396. runnableExamples:
  397. var
  398. a = initPackedSet[int]()
  399. b = initPackedSet[int]()
  400. b.incl(5)
  401. b.incl(7)
  402. a.assign(b)
  403. assert len(a) == 2
  404. `=copy`(dest, src)
  405. proc union*[A](s1, s2: PackedSet[A]): PackedSet[A] =
  406. ## Returns the union of the sets `s1` and `s2`.
  407. ##
  408. ## The same as `s1 + s2 <#+,PackedSet[A],PackedSet[A]>`_.
  409. runnableExamples:
  410. let
  411. a = [1, 2, 3].toPackedSet
  412. b = [3, 4, 5].toPackedSet
  413. c = union(a, b)
  414. assert c.len == 5
  415. assert c == [1, 2, 3, 4, 5].toPackedSet
  416. result.assign(s1)
  417. incl(result, s2)
  418. proc intersection*[A](s1, s2: PackedSet[A]): PackedSet[A] =
  419. ## Returns the intersection of the sets `s1` and `s2`.
  420. ##
  421. ## The same as `s1 * s2 <#*,PackedSet[A],PackedSet[A]>`_.
  422. runnableExamples:
  423. let
  424. a = [1, 2, 3].toPackedSet
  425. b = [3, 4, 5].toPackedSet
  426. c = intersection(a, b)
  427. assert c.len == 1
  428. assert c == [3].toPackedSet
  429. result = initPackedSet[A]()
  430. for item in s1.items:
  431. if contains(s2, item):
  432. incl(result, item)
  433. proc difference*[A](s1, s2: PackedSet[A]): PackedSet[A] =
  434. ## Returns the difference of the sets `s1` and `s2`.
  435. ##
  436. ## The same as `s1 - s2 <#-,PackedSet[A],PackedSet[A]>`_.
  437. runnableExamples:
  438. let
  439. a = [1, 2, 3].toPackedSet
  440. b = [3, 4, 5].toPackedSet
  441. c = difference(a, b)
  442. assert c.len == 2
  443. assert c == [1, 2].toPackedSet
  444. result = initPackedSet[A]()
  445. for item in s1.items:
  446. if not contains(s2, item):
  447. incl(result, item)
  448. proc symmetricDifference*[A](s1, s2: PackedSet[A]): PackedSet[A] =
  449. ## Returns the symmetric difference of the sets `s1` and `s2`.
  450. runnableExamples:
  451. let
  452. a = [1, 2, 3].toPackedSet
  453. b = [3, 4, 5].toPackedSet
  454. c = symmetricDifference(a, b)
  455. assert c.len == 4
  456. assert c == [1, 2, 4, 5].toPackedSet
  457. result.assign(s1)
  458. for item in s2.items:
  459. if containsOrIncl(result, item):
  460. excl(result, item)
  461. proc `+`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} =
  462. ## Alias for `union(s1, s2) <#union,PackedSet[A],PackedSet[A]>`_.
  463. result = union(s1, s2)
  464. proc `*`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} =
  465. ## Alias for `intersection(s1, s2) <#intersection,PackedSet[A],PackedSet[A]>`_.
  466. result = intersection(s1, s2)
  467. proc `-`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} =
  468. ## Alias for `difference(s1, s2) <#difference,PackedSet[A],PackedSet[A]>`_.
  469. result = difference(s1, s2)
  470. proc disjoint*[A](s1, s2: PackedSet[A]): bool =
  471. ## Returns true if the sets `s1` and `s2` have no items in common.
  472. runnableExamples:
  473. let
  474. a = [1, 2].toPackedSet
  475. b = [2, 3].toPackedSet
  476. c = [3, 4].toPackedSet
  477. assert disjoint(a, b) == false
  478. assert disjoint(a, c) == true
  479. for item in s1.items:
  480. if contains(s2, item):
  481. return false
  482. return true
  483. proc card*[A](s: PackedSet[A]): int {.inline.} =
  484. ## Alias for `len() <#len,PackedSet[A]>`_.
  485. ##
  486. ## Card stands for the [cardinality](http://en.wikipedia.org/wiki/Cardinality)
  487. ## of a set.
  488. result = s.len()
  489. proc `<=`*[A](s1, s2: PackedSet[A]): bool =
  490. ## Returns true if `s1` is a subset of `s2`.
  491. ##
  492. ## A subset `s1` has all of its elements in `s2`, but `s2` doesn't necessarily
  493. ## have more elements than `s1`. That is, `s1` can be equal to `s2`.
  494. runnableExamples:
  495. let
  496. a = [1].toPackedSet
  497. b = [1, 2].toPackedSet
  498. c = [1, 3].toPackedSet
  499. assert a <= b
  500. assert b <= b
  501. assert not (c <= b)
  502. for item in s1.items:
  503. if not s2.contains(item):
  504. return false
  505. return true
  506. proc `<`*[A](s1, s2: PackedSet[A]): bool =
  507. ## Returns true if `s1` is a proper subset of `s2`.
  508. ##
  509. ## A strict or proper subset `s1` has all of its elements in `s2`, but `s2` has
  510. ## more elements than `s1`.
  511. runnableExamples:
  512. let
  513. a = [1].toPackedSet
  514. b = [1, 2].toPackedSet
  515. c = [1, 3].toPackedSet
  516. assert a < b
  517. assert not (b < b)
  518. assert not (c < b)
  519. return s1 <= s2 and not (s2 <= s1)
  520. proc `==`*[A](s1, s2: PackedSet[A]): bool =
  521. ## Returns true if both `s1` and `s2` have the same elements and set size.
  522. runnableExamples:
  523. assert [1, 2].toPackedSet == [2, 1].toPackedSet
  524. assert [1, 2].toPackedSet == [2, 1, 2].toPackedSet
  525. return s1 <= s2 and s2 <= s1
  526. proc `$`*[A](s: PackedSet[A]): string =
  527. ## Converts `s` to a string.
  528. runnableExamples:
  529. let a = [1, 2, 3].toPackedSet
  530. assert $a == "{1, 2, 3}"
  531. dollarImpl()