sets.nim 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## The ``sets`` module implements an efficient `hash set`:idx: and
  10. ## ordered hash set.
  11. ##
  12. ## Hash sets are different from the `built in set type
  13. ## <manual.html#types-set-type>`_. Sets allow you to store any value that can be
  14. ## `hashed <hashes.html>`_ and they don't contain duplicate entries.
  15. ##
  16. ## **Note**: The data types declared here have *value semantics*: This means
  17. ## that ``=`` performs a copy of the set.
  18. import
  19. hashes, math
  20. {.pragma: myShallow.}
  21. when not defined(nimhygiene):
  22. {.pragma: dirty.}
  23. # For "integer-like A" that are too big for intsets/bit-vectors to be practical,
  24. # it would be best to shrink hcode to the same size as the integer. Larger
  25. # codes should never be needed, and this can pack more entries per cache-line.
  26. # Losing hcode entirely is also possible - if some element value is forbidden.
  27. type
  28. KeyValuePair[A] = tuple[hcode: Hash, key: A]
  29. KeyValuePairSeq[A] = seq[KeyValuePair[A]]
  30. HashSet* {.myShallow.}[A] = object ## \
  31. ## A generic hash set.
  32. ##
  33. ## Use `init() <#init,HashSet[A],int>`_ or `initSet[type]() <#initSet>`_
  34. ## before calling other procs on it.
  35. data: KeyValuePairSeq[A]
  36. counter: int
  37. {.deprecated: [TSet: HashSet].}
  38. template default[T](t: typedesc[T]): T =
  39. ## Used by clear methods to get a default value.
  40. var v: T
  41. v
  42. proc clear*[A](s: var HashSet[A]) =
  43. ## Clears the HashSet back to an empty state, without shrinking
  44. ## any of the existing storage. O(n) where n is the size of the hash bucket.
  45. s.counter = 0
  46. for i in 0..<s.data.len:
  47. s.data[i].hcode = 0
  48. s.data[i].key = default(type(s.data[i].key))
  49. # hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These
  50. # two procs retain clarity of that encoding without the space cost of an enum.
  51. proc isEmpty(hcode: Hash): bool {.inline.} =
  52. result = hcode == 0
  53. proc isFilled(hcode: Hash): bool {.inline.} =
  54. result = hcode != 0
  55. proc isValid*[A](s: HashSet[A]): bool =
  56. ## Returns `true` if the set has been initialized with `initSet <#initSet>`_.
  57. ##
  58. ## Most operations over an uninitialized set will crash at runtime and
  59. ## `assert <system.html#assert>`_ in debug builds. You can use this proc in
  60. ## your own procs to verify that sets passed to your procs are correctly
  61. ## initialized. Example:
  62. ##
  63. ## .. code-block ::
  64. ## proc savePreferences(options: HashSet[string]) =
  65. ## assert options.isValid, "Pass an initialized set!"
  66. ## # Do stuff here, may crash in release builds!
  67. result = s.data.len > 0
  68. proc len*[A](s: HashSet[A]): int =
  69. ## Returns the number of keys in `s`.
  70. ##
  71. ## Due to an implementation detail you can call this proc on variables which
  72. ## have not been initialized yet. The proc will return zero as the length
  73. ## then. Example:
  74. ##
  75. ## .. code-block::
  76. ##
  77. ## var values: HashSet[int]
  78. ## assert(not values.isValid)
  79. ## assert values.len == 0
  80. result = s.counter
  81. proc card*[A](s: HashSet[A]): int =
  82. ## Alias for `len() <#len,TSet[A]>`_.
  83. ##
  84. ## Card stands for the `cardinality
  85. ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set.
  86. result = s.counter
  87. iterator items*[A](s: HashSet[A]): A =
  88. ## Iterates over keys in the set `s`.
  89. ##
  90. ## If you need a sequence with the keys you can use `sequtils.toSeq()
  91. ## <sequtils.html#toSeq>`_ on the iterator. Usage example:
  92. ##
  93. ## .. code-block::
  94. ## type
  95. ## pair = tuple[a, b: int]
  96. ## var
  97. ## a, b = initSet[pair]()
  98. ## a.incl((2, 3))
  99. ## a.incl((3, 2))
  100. ## a.incl((2, 3))
  101. ## for x, y in a.items:
  102. ## b.incl((x - 2, y + 1))
  103. ## assert a.len == 2
  104. ## echo b
  105. ## # --> {(a: 1, b: 3), (a: 0, b: 4)}
  106. assert s.isValid, "The set needs to be initialized."
  107. for h in 0..high(s.data):
  108. if isFilled(s.data[h].hcode): yield s.data[h].key
  109. proc hash*[A](s: HashSet[A]): Hash =
  110. ## hashing of HashSet
  111. assert s.isValid, "The set needs to be initialized."
  112. for h in 0..high(s.data):
  113. result = result xor s.data[h].hcode
  114. result = !$result
  115. const
  116. growthFactor = 2
  117. proc mustRehash(length, counter: int): bool {.inline.} =
  118. assert(length > counter)
  119. result = (length * 2 < counter * 3) or (length - counter < 4)
  120. proc rightSize*(count: Natural): int {.inline.} =
  121. ## Return the value of `initialSize` to support `count` items.
  122. ##
  123. ## If more items are expected to be added, simply add that
  124. ## expected extra amount to the parameter before calling this.
  125. ##
  126. ## Internally, we want mustRehash(rightSize(x), x) == false.
  127. result = nextPowerOfTwo(count * 3 div 2 + 4)
  128. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  129. result = (h + 1) and maxHash
  130. template rawGetKnownHCImpl() {.dirty.} =
  131. var h: Hash = hc and high(s.data) # start with real hash value
  132. while isFilled(s.data[h].hcode):
  133. # Compare hc THEN key with boolean short circuit. This makes the common case
  134. # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
  135. # It does slow down succeeding lookups by one extra Hash cmp&and..usually
  136. # just a few clock cycles, generally worth it for any non-integer-like A.
  137. if s.data[h].hcode == hc and s.data[h].key == key: # compare hc THEN key
  138. return h
  139. h = nextTry(h, high(s.data))
  140. result = -1 - h # < 0 => MISSING; insert idx = -1 - result
  141. template genHash(key: typed): Hash =
  142. var hc = hash(key)
  143. if hc == 0: # This almost never taken branch should be very predictable.
  144. hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
  145. hc
  146. template rawGetImpl() {.dirty.} =
  147. hc = genHash(key)
  148. rawGetKnownHCImpl()
  149. template rawInsertImpl() {.dirty.} =
  150. data[h].key = key
  151. data[h].hcode = hc
  152. proc rawGetKnownHC[A](s: HashSet[A], key: A, hc: Hash): int {.inline.} =
  153. rawGetKnownHCImpl()
  154. proc rawGet[A](s: HashSet[A], key: A, hc: var Hash): int {.inline.} =
  155. rawGetImpl()
  156. proc `[]`*[A](s: var HashSet[A], key: A): var A =
  157. ## returns the element that is actually stored in 's' which has the same
  158. ## value as 'key' or raises the ``KeyError`` exception. This is useful
  159. ## when one overloaded 'hash' and '==' but still needs reference semantics
  160. ## for sharing.
  161. assert s.isValid, "The set needs to be initialized."
  162. var hc: Hash
  163. var index = rawGet(s, key, hc)
  164. if index >= 0: result = s.data[index].key
  165. else:
  166. when compiles($key):
  167. raise newException(KeyError, "key not found: " & $key)
  168. else:
  169. raise newException(KeyError, "key not found")
  170. proc mget*[A](s: var HashSet[A], key: A): var A {.deprecated.} =
  171. ## returns the element that is actually stored in 's' which has the same
  172. ## value as 'key' or raises the ``KeyError`` exception. This is useful
  173. ## when one overloaded 'hash' and '==' but still needs reference semantics
  174. ## for sharing. Use ```[]``` instead.
  175. s[key]
  176. proc contains*[A](s: HashSet[A], key: A): bool =
  177. ## Returns true iff `key` is in `s`.
  178. ##
  179. ## Example:
  180. ##
  181. ## .. code-block::
  182. ## var values = initSet[int]()
  183. ## assert(not values.contains(2))
  184. ## values.incl(2)
  185. ## assert values.contains(2)
  186. ## values.excl(2)
  187. ## assert(not values.contains(2))
  188. assert s.isValid, "The set needs to be initialized."
  189. var hc: Hash
  190. var index = rawGet(s, key, hc)
  191. result = index >= 0
  192. proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A,
  193. hc: Hash, h: Hash) =
  194. rawInsertImpl()
  195. proc enlarge[A](s: var HashSet[A]) =
  196. var n: KeyValuePairSeq[A]
  197. newSeq(n, len(s.data) * growthFactor)
  198. swap(s.data, n) # n is now old seq
  199. for i in countup(0, high(n)):
  200. if isFilled(n[i].hcode):
  201. var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode)
  202. rawInsert(s, s.data, n[i].key, n[i].hcode, j)
  203. template inclImpl() {.dirty.} =
  204. var hc: Hash
  205. var index = rawGet(s, key, hc)
  206. if index < 0:
  207. if mustRehash(len(s.data), s.counter):
  208. enlarge(s)
  209. index = rawGetKnownHC(s, key, hc)
  210. rawInsert(s, s.data, key, hc, -1 - index)
  211. inc(s.counter)
  212. template containsOrInclImpl() {.dirty.} =
  213. var hc: Hash
  214. var index = rawGet(s, key, hc)
  215. if index >= 0:
  216. result = true
  217. else:
  218. if mustRehash(len(s.data), s.counter):
  219. enlarge(s)
  220. index = rawGetKnownHC(s, key, hc)
  221. rawInsert(s, s.data, key, hc, -1 - index)
  222. inc(s.counter)
  223. proc incl*[A](s: var HashSet[A], key: A) =
  224. ## Includes an element `key` in `s`.
  225. ##
  226. ## This doesn't do anything if `key` is already in `s`. Example:
  227. ##
  228. ## .. code-block::
  229. ## var values = initSet[int]()
  230. ## values.incl(2)
  231. ## values.incl(2)
  232. ## assert values.len == 1
  233. assert s.isValid, "The set needs to be initialized."
  234. inclImpl()
  235. proc incl*[A](s: var HashSet[A], other: HashSet[A]) =
  236. ## Includes all elements from `other` into `s`.
  237. ##
  238. ## Example:
  239. ##
  240. ## .. code-block::
  241. ## var values = initSet[int]()
  242. ## values.incl(2)
  243. ## var others = toSet([6, 7])
  244. ## values.incl(others)
  245. ## assert values.len == 3
  246. assert s.isValid, "The set `s` needs to be initialized."
  247. assert other.isValid, "The set `other` needs to be initialized."
  248. for item in other: incl(s, item)
  249. template doWhile(a, b) =
  250. while true:
  251. b
  252. if not a: break
  253. template default[T](t: typedesc[T]): T =
  254. var v: T
  255. v
  256. proc exclImpl[A](s: var HashSet[A], key: A) : bool {. inline .} =
  257. assert s.isValid, "The set needs to be initialized."
  258. var hc: Hash
  259. var i = rawGet(s, key, hc)
  260. var msk = high(s.data)
  261. result = true
  262. if i >= 0:
  263. result = false
  264. dec(s.counter)
  265. while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
  266. var j = i # The correctness of this depends on (h+1) in nextTry,
  267. var r = j # though may be adaptable to other simple sequences.
  268. s.data[i].hcode = 0 # mark current EMPTY
  269. s.data[i].key = default(type(s.data[i].key))
  270. doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
  271. i = (i + 1) and msk # increment mod table size
  272. if isEmpty(s.data[i].hcode): # end of collision cluster; So all done
  273. return
  274. r = s.data[i].hcode and msk # "home" location of key@i
  275. shallowCopy(s.data[j], s.data[i]) # data[i] will be marked EMPTY next loop
  276. proc missingOrExcl*[A](s: var HashSet[A], key: A): bool =
  277. ## Excludes `key` in the set `s` and tells if `key` was removed from `s`.
  278. ##
  279. ## The difference with regards to the `excl() <#excl,TSet[A],A>`_ proc is
  280. ## that this proc returns `true` if `key` was not present in `s`. Example:
  281. ##
  282. ## .. code-block::
  283. ## var s = toSet([2, 3, 6, 7])
  284. ## assert s.missingOrExcl(4) == true
  285. ## assert s.missingOrExcl(6) == false
  286. exclImpl(s, key)
  287. proc excl*[A](s: var HashSet[A], key: A) =
  288. ## Excludes `key` from the set `s`.
  289. ##
  290. ## This doesn't do anything if `key` is not found in `s`. Example:
  291. ##
  292. ## .. code-block::
  293. ## var s = toSet([2, 3, 6, 7])
  294. ## s.excl(2)
  295. ## s.excl(2)
  296. ## assert s.len == 3
  297. discard exclImpl(s, key)
  298. proc excl*[A](s: var HashSet[A], other: HashSet[A]) =
  299. ## Excludes everything in `other` from `s`.
  300. ##
  301. ## Example:
  302. ##
  303. ## .. code-block::
  304. ## var
  305. ## numbers = toSet([1, 2, 3, 4, 5])
  306. ## even = toSet([2, 4, 6, 8])
  307. ## numbers.excl(even)
  308. ## echo numbers
  309. ## # --> {1, 3, 5}
  310. assert s.isValid, "The set `s` needs to be initialized."
  311. assert other.isValid, "The set `other` needs to be initialized."
  312. for item in other: discard exclImpl(s, item)
  313. proc pop*[A](s: var HashSet[A]): A =
  314. ## Remove and return an arbitrary element from the set `s`.
  315. ##
  316. ## Raises KeyError if the set `s` is empty.
  317. ##
  318. for h in 0..high(s.data):
  319. if isFilled(s.data[h].hcode):
  320. result = s.data[h].key
  321. excl(s, result)
  322. return result
  323. raise newException(KeyError, "set is empty")
  324. proc containsOrIncl*[A](s: var HashSet[A], key: A): bool =
  325. ## Includes `key` in the set `s` and tells if `key` was added to `s`.
  326. ##
  327. ## The difference with regards to the `incl() <#incl,TSet[A],A>`_ proc is
  328. ## that this proc returns `true` if `key` was already present in `s`. The
  329. ## proc will return false if `key` was added as a new value to `s` during
  330. ## this call. Example:
  331. ##
  332. ## .. code-block::
  333. ## var values = initSet[int]()
  334. ## assert values.containsOrIncl(2) == false
  335. ## assert values.containsOrIncl(2) == true
  336. assert s.isValid, "The set needs to be initialized."
  337. containsOrInclImpl()
  338. proc init*[A](s: var HashSet[A], initialSize=64) =
  339. ## Initializes a hash set.
  340. ##
  341. ## The `initialSize` parameter needs to be a power of two. You can use
  342. ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to
  343. ## guarantee that at runtime. All set variables must be initialized before
  344. ## use with other procs from this module with the exception of `isValid()
  345. ## <#isValid,TSet[A]>`_ and `len() <#len,TSet[A]>`_.
  346. ##
  347. ## You can call this proc on a previously initialized hash set, which will
  348. ## discard all its values. This might be more convenient than iterating over
  349. ## existing values and calling `excl() <#excl,TSet[A],A>`_ on them. Example:
  350. ##
  351. ## .. code-block ::
  352. ## var a: HashSet[int]
  353. ## a.init(4)
  354. ## a.incl(2)
  355. ## a.init
  356. ## assert a.len == 0 and a.isValid
  357. assert isPowerOfTwo(initialSize)
  358. s.counter = 0
  359. newSeq(s.data, initialSize)
  360. proc initSet*[A](initialSize=64): HashSet[A] =
  361. ## Wrapper around `init() <#init,TSet[A],int>`_ for initialization of hash
  362. ## sets.
  363. ##
  364. ## Returns an empty hash set you can assign directly in ``var`` blocks in a
  365. ## single line. Example:
  366. ##
  367. ## .. code-block ::
  368. ## var a = initSet[int](4)
  369. ## a.incl(2)
  370. result.init(initialSize)
  371. proc toSet*[A](keys: openArray[A]): HashSet[A] =
  372. ## Creates a new hash set that contains the given `keys`.
  373. ##
  374. ## Example:
  375. ##
  376. ## .. code-block::
  377. ## var numbers = toSet([1, 2, 3, 4, 5])
  378. ## assert numbers.contains(2)
  379. ## assert numbers.contains(4)
  380. result = initSet[A](rightSize(keys.len))
  381. for key in items(keys): result.incl(key)
  382. template dollarImpl() {.dirty.} =
  383. result = "{"
  384. for key in items(s):
  385. if result.len > 1: result.add(", ")
  386. result.addQuoted(key)
  387. result.add("}")
  388. proc `$`*[A](s: HashSet[A]): string =
  389. ## Converts the set `s` to a string, mostly for logging purposes.
  390. ##
  391. ## Don't use this proc for serialization, the representation may change at
  392. ## any moment and values are not escaped. Example:
  393. ##
  394. ## Example:
  395. ##
  396. ## .. code-block::
  397. ## echo toSet([2, 4, 5])
  398. ## # --> {2, 4, 5}
  399. ## echo toSet(["no", "esc'aping", "is \" provided"])
  400. ## # --> {no, esc'aping, is " provided}
  401. assert s.isValid, "The set needs to be initialized."
  402. dollarImpl()
  403. proc union*[A](s1, s2: HashSet[A]): HashSet[A] =
  404. ## Returns the union of the sets `s1` and `s2`.
  405. ##
  406. ## The union of two sets is represented mathematically as *A ∪ B* and is the
  407. ## set of all objects that are members of `s1`, `s2` or both. Example:
  408. ##
  409. ## .. code-block::
  410. ## var
  411. ## a = toSet(["a", "b"])
  412. ## b = toSet(["b", "c"])
  413. ## c = union(a, b)
  414. ## assert c == toSet(["a", "b", "c"])
  415. assert s1.isValid, "The set `s1` needs to be initialized."
  416. assert s2.isValid, "The set `s2` needs to be initialized."
  417. result = s1
  418. incl(result, s2)
  419. proc intersection*[A](s1, s2: HashSet[A]): HashSet[A] =
  420. ## Returns the intersection of the sets `s1` and `s2`.
  421. ##
  422. ## The intersection of two sets is represented mathematically as *A ∩ B* and
  423. ## is the set of all objects that are members of `s1` and `s2` at the same
  424. ## time. Example:
  425. ##
  426. ## .. code-block::
  427. ## var
  428. ## a = toSet(["a", "b"])
  429. ## b = toSet(["b", "c"])
  430. ## c = intersection(a, b)
  431. ## assert c == toSet(["b"])
  432. assert s1.isValid, "The set `s1` needs to be initialized."
  433. assert s2.isValid, "The set `s2` needs to be initialized."
  434. result = initSet[A](min(s1.data.len, s2.data.len))
  435. for item in s1:
  436. if item in s2: incl(result, item)
  437. proc difference*[A](s1, s2: HashSet[A]): HashSet[A] =
  438. ## Returns the difference of the sets `s1` and `s2`.
  439. ##
  440. ## The difference of two sets is represented mathematically as *A \ B* and is
  441. ## the set of all objects that are members of `s1` and not members of `s2`.
  442. ## Example:
  443. ##
  444. ## .. code-block::
  445. ## var
  446. ## a = toSet(["a", "b"])
  447. ## b = toSet(["b", "c"])
  448. ## c = difference(a, b)
  449. ## assert c == toSet(["a"])
  450. assert s1.isValid, "The set `s1` needs to be initialized."
  451. assert s2.isValid, "The set `s2` needs to be initialized."
  452. result = initSet[A]()
  453. for item in s1:
  454. if not contains(s2, item):
  455. incl(result, item)
  456. proc symmetricDifference*[A](s1, s2: HashSet[A]): HashSet[A] =
  457. ## Returns the symmetric difference of the sets `s1` and `s2`.
  458. ##
  459. ## The symmetric difference of two sets is represented mathematically as *A △
  460. ## B* or *A ⊖ B* and is the set of all objects that are members of `s1` or
  461. ## `s2` but not both at the same time. Example:
  462. ##
  463. ## .. code-block::
  464. ## var
  465. ## a = toSet(["a", "b"])
  466. ## b = toSet(["b", "c"])
  467. ## c = symmetricDifference(a, b)
  468. ## assert c == toSet(["a", "c"])
  469. assert s1.isValid, "The set `s1` needs to be initialized."
  470. assert s2.isValid, "The set `s2` needs to be initialized."
  471. result = s1
  472. for item in s2:
  473. if containsOrIncl(result, item): excl(result, item)
  474. proc `+`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  475. ## Alias for `union(s1, s2) <#union>`_.
  476. result = union(s1, s2)
  477. proc `*`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  478. ## Alias for `intersection(s1, s2) <#intersection>`_.
  479. result = intersection(s1, s2)
  480. proc `-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  481. ## Alias for `difference(s1, s2) <#difference>`_.
  482. result = difference(s1, s2)
  483. proc `-+-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  484. ## Alias for `symmetricDifference(s1, s2) <#symmetricDifference>`_.
  485. result = symmetricDifference(s1, s2)
  486. proc disjoint*[A](s1, s2: HashSet[A]): bool =
  487. ## Returns true iff the sets `s1` and `s2` have no items in common.
  488. ##
  489. ## Example:
  490. ##
  491. ## .. code-block::
  492. ## var
  493. ## a = toSet(["a", "b"])
  494. ## b = toSet(["b", "c"])
  495. ## assert disjoint(a, b) == false
  496. ## assert disjoint(a, b - a) == true
  497. assert s1.isValid, "The set `s1` needs to be initialized."
  498. assert s2.isValid, "The set `s2` needs to be initialized."
  499. for item in s1:
  500. if item in s2: return false
  501. return true
  502. proc `<`*[A](s, t: HashSet[A]): bool =
  503. ## Returns true if `s` is a strict or proper subset of `t`.
  504. ##
  505. ## A strict or proper subset `s` has all of its members in `t` but `t` has
  506. ## more elements than `s`. Example:
  507. ##
  508. ## .. code-block::
  509. ## var
  510. ## a = toSet(["a", "b"])
  511. ## b = toSet(["b", "c"])
  512. ## c = intersection(a, b)
  513. ## assert c < a and c < b
  514. ## assert((a < a) == false)
  515. s.counter != t.counter and s <= t
  516. proc `<=`*[A](s, t: HashSet[A]): bool =
  517. ## Returns true if `s` is subset of `t`.
  518. ##
  519. ## A subset `s` has all of its members in `t` and `t` doesn't necessarily
  520. ## have more members than `s`. That is, `s` can be equal to `t`. Example:
  521. ##
  522. ## .. code-block::
  523. ## var
  524. ## a = toSet(["a", "b"])
  525. ## b = toSet(["b", "c"])
  526. ## c = intersection(a, b)
  527. ## assert c <= a and c <= b
  528. ## assert((a <= a))
  529. result = false
  530. if s.counter > t.counter: return
  531. result = true
  532. for item in s:
  533. if not(t.contains(item)):
  534. result = false
  535. return
  536. proc `==`*[A](s, t: HashSet[A]): bool =
  537. ## Returns true if both `s` and `t` have the same members and set size.
  538. ##
  539. ## Example:
  540. ##
  541. ## .. code-block::
  542. ## var
  543. ## a = toSet([1, 2])
  544. ## b = toSet([1])
  545. ## b.incl(2)
  546. ## assert a == b
  547. s.counter == t.counter and s <= t
  548. proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] =
  549. ## Returns a new set after applying `op` on each of the elements of `data`.
  550. ##
  551. ## You can use this proc to transform the elements from a set. Example:
  552. ##
  553. ## .. code-block::
  554. ## var a = toSet([1, 2, 3])
  555. ## var b = a.map(proc (x: int): string = $x)
  556. ## assert b == toSet(["1", "2", "3"])
  557. result = initSet[B]()
  558. for item in data: result.incl(op(item))
  559. # ------------------------------ ordered set ------------------------------
  560. type
  561. OrderedKeyValuePair[A] = tuple[
  562. hcode: Hash, next: int, key: A]
  563. OrderedKeyValuePairSeq[A] = seq[OrderedKeyValuePair[A]]
  564. OrderedSet* {.myShallow.}[A] = object ## \
  565. ## A generic hash set that remembers insertion order.
  566. ##
  567. ## Use `init() <#init,OrderedSet[A],int>`_ or `initOrderedSet[type]()
  568. ## <#initOrderedSet>`_ before calling other procs on it.
  569. data: OrderedKeyValuePairSeq[A]
  570. counter, first, last: int
  571. {.deprecated: [TOrderedSet: OrderedSet].}
  572. proc clear*[A](s: var OrderedSet[A]) =
  573. ## Clears the OrderedSet back to an empty state, without shrinking
  574. ## any of the existing storage. O(n) where n is the size of the hash bucket.
  575. s.counter = 0
  576. s.first = -1
  577. s.last = -1
  578. for i in 0..<s.data.len:
  579. s.data[i].hcode = 0
  580. s.data[i].next = 0
  581. s.data[i].key = default(type(s.data[i].key))
  582. proc isValid*[A](s: OrderedSet[A]): bool =
  583. ## Returns `true` if the ordered set has been initialized with `initSet
  584. ## <#initOrderedSet>`_.
  585. ##
  586. ## Most operations over an uninitialized ordered set will crash at runtime
  587. ## and `assert <system.html#assert>`_ in debug builds. You can use this proc
  588. ## in your own procs to verify that ordered sets passed to your procs are
  589. ## correctly initialized. Example:
  590. ##
  591. ## .. code-block::
  592. ## proc saveTarotCards(cards: OrderedSet[int]) =
  593. ## assert cards.isValid, "Pass an initialized set!"
  594. ## # Do stuff here, may crash in release builds!
  595. result = s.data.len > 0
  596. proc len*[A](s: OrderedSet[A]): int {.inline.} =
  597. ## Returns the number of keys in `s`.
  598. ##
  599. ## Due to an implementation detail you can call this proc on variables which
  600. ## have not been initialized yet. The proc will return zero as the length
  601. ## then. Example:
  602. ##
  603. ## .. code-block::
  604. ##
  605. ## var values: OrderedSet[int]
  606. ## assert(not values.isValid)
  607. ## assert values.len == 0
  608. result = s.counter
  609. proc card*[A](s: OrderedSet[A]): int {.inline.} =
  610. ## Alias for `len() <#len,TOrderedSet[A]>`_.
  611. ##
  612. ## Card stands for the `cardinality
  613. ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set.
  614. result = s.counter
  615. template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
  616. var h = s.first
  617. var idx = 0
  618. while h >= 0:
  619. var nxt = s.data[h].next
  620. if isFilled(s.data[h].hcode):
  621. yieldStmt
  622. inc(idx)
  623. h = nxt
  624. iterator items*[A](s: OrderedSet[A]): A =
  625. ## Iterates over keys in the ordered set `s` in insertion order.
  626. ##
  627. ## If you need a sequence with the keys you can use `sequtils.toSeq()
  628. ## <sequtils.html#toSeq>`_ on the iterator. Usage example:
  629. ##
  630. ## .. code-block::
  631. ## var a = initOrderedSet[int]()
  632. ## for value in [9, 2, 1, 5, 1, 8, 4, 2]:
  633. ## a.incl(value)
  634. ## for value in a.items:
  635. ## echo "Got ", value
  636. ## # --> Got 9
  637. ## # --> Got 2
  638. ## # --> Got 1
  639. ## # --> Got 5
  640. ## # --> Got 8
  641. ## # --> Got 4
  642. assert s.isValid, "The set needs to be initialized."
  643. forAllOrderedPairs:
  644. yield s.data[h].key
  645. proc hash*[A](s: OrderedSet[A]): Hash =
  646. ## hashing of OrderedSet
  647. assert s.isValid, "The set needs to be initialized."
  648. forAllOrderedPairs:
  649. result = result !& s.data[h].hcode
  650. result = !$result
  651. iterator pairs*[A](s: OrderedSet[A]): tuple[a: int, b: A] =
  652. assert s.isValid, "The set needs to be initialized"
  653. forAllOrderedPairs:
  654. yield (idx, s.data[h].key)
  655. proc rawGetKnownHC[A](s: OrderedSet[A], key: A, hc: Hash): int {.inline.} =
  656. rawGetKnownHCImpl()
  657. proc rawGet[A](s: OrderedSet[A], key: A, hc: var Hash): int {.inline.} =
  658. rawGetImpl()
  659. proc contains*[A](s: OrderedSet[A], key: A): bool =
  660. ## Returns true iff `key` is in `s`.
  661. ##
  662. ## Example:
  663. ##
  664. ## .. code-block::
  665. ## var values = initOrderedSet[int]()
  666. ## assert(not values.contains(2))
  667. ## values.incl(2)
  668. ## assert values.contains(2)
  669. assert s.isValid, "The set needs to be initialized."
  670. var hc: Hash
  671. var index = rawGet(s, key, hc)
  672. result = index >= 0
  673. proc rawInsert[A](s: var OrderedSet[A], data: var OrderedKeyValuePairSeq[A],
  674. key: A, hc: Hash, h: Hash) =
  675. rawInsertImpl()
  676. data[h].next = -1
  677. if s.first < 0: s.first = h
  678. if s.last >= 0: data[s.last].next = h
  679. s.last = h
  680. proc enlarge[A](s: var OrderedSet[A]) =
  681. var n: OrderedKeyValuePairSeq[A]
  682. newSeq(n, len(s.data) * growthFactor)
  683. var h = s.first
  684. s.first = -1
  685. s.last = -1
  686. swap(s.data, n)
  687. while h >= 0:
  688. var nxt = n[h].next
  689. if isFilled(n[h].hcode):
  690. var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode)
  691. rawInsert(s, s.data, n[h].key, n[h].hcode, j)
  692. h = nxt
  693. proc incl*[A](s: var OrderedSet[A], key: A) =
  694. ## Includes an element `key` in `s`.
  695. ##
  696. ## This doesn't do anything if `key` is already in `s`. Example:
  697. ##
  698. ## .. code-block::
  699. ## var values = initOrderedSet[int]()
  700. ## values.incl(2)
  701. ## values.incl(2)
  702. ## assert values.len == 1
  703. assert s.isValid, "The set needs to be initialized."
  704. inclImpl()
  705. proc incl*[A](s: var HashSet[A], other: OrderedSet[A]) =
  706. ## Includes all elements from `other` into `s`.
  707. ##
  708. ## Example:
  709. ##
  710. ## .. code-block::
  711. ## var values = initOrderedSet[int]()
  712. ## values.incl(2)
  713. ## var others = toOrderedSet([6, 7])
  714. ## values.incl(others)
  715. ## assert values.len == 3
  716. assert s.isValid, "The set `s` needs to be initialized."
  717. assert other.isValid, "The set `other` needs to be initialized."
  718. for item in other: incl(s, item)
  719. proc exclImpl[A](s: var OrderedSet[A], key: A) : bool {. inline .} =
  720. assert s.isValid, "The set needs to be initialized."
  721. var n: OrderedKeyValuePairSeq[A]
  722. newSeq(n, len(s.data))
  723. var h = s.first
  724. s.first = -1
  725. s.last = -1
  726. swap(s.data, n)
  727. let hc = genHash(key)
  728. result = true
  729. while h >= 0:
  730. var nxt = n[h].next
  731. if isFilled(n[h].hcode):
  732. if n[h].hcode == hc and n[h].key == key:
  733. dec s.counter
  734. result = false
  735. else:
  736. var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode)
  737. rawInsert(s, s.data, n[h].key, n[h].hcode, j)
  738. h = nxt
  739. proc missingOrExcl*[A](s: var OrderedSet[A], key: A): bool =
  740. ## Excludes `key` in the set `s` and tells if `key` was removed from `s`. Efficiency: O(n).
  741. ##
  742. ## The difference with regards to the `excl() <#excl,TOrderedSet[A],A>`_ proc is
  743. ## that this proc returns `true` if `key` was not present in `s`. Example:
  744. ##
  745. ## .. code-block::
  746. ## var s = toOrderedSet([2, 3, 6, 7])
  747. ## assert s.missingOrExcl(4) == true
  748. ## assert s.missingOrExcl(6) == false
  749. exclImpl(s, key)
  750. proc excl*[A](s: var OrderedSet[A], key: A) =
  751. ## Excludes `key` from the set `s`. Efficiency: O(n).
  752. ##
  753. ## This doesn't do anything if `key` is not found in `s`. Example:
  754. ##
  755. ## .. code-block::
  756. ## var s = toOrderedSet([2, 3, 6, 7])
  757. ## s.excl(2)
  758. ## s.excl(2)
  759. ## assert s.len == 3
  760. discard exclImpl(s, key)
  761. proc containsOrIncl*[A](s: var OrderedSet[A], key: A): bool =
  762. ## Includes `key` in the set `s` and tells if `key` was added to `s`.
  763. ##
  764. ## The difference with regards to the `incl() <#incl,TOrderedSet[A],A>`_ proc
  765. ## is that this proc returns `true` if `key` was already present in `s`. The
  766. ## proc will return false if `key` was added as a new value to `s` during
  767. ## this call. Example:
  768. ##
  769. ## .. code-block::
  770. ## var values = initOrderedSet[int]()
  771. ## assert values.containsOrIncl(2) == false
  772. ## assert values.containsOrIncl(2) == true
  773. assert s.isValid, "The set needs to be initialized."
  774. containsOrInclImpl()
  775. proc init*[A](s: var OrderedSet[A], initialSize=64) =
  776. ## Initializes an ordered hash set.
  777. ##
  778. ## The `initialSize` parameter needs to be a power of two. You can use
  779. ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to
  780. ## guarantee that at runtime. All set variables must be initialized before
  781. ## use with other procs from this module with the exception of `isValid()
  782. ## <#isValid,TOrderedSet[A]>`_ and `len() <#len,TOrderedSet[A]>`_.
  783. ##
  784. ## You can call this proc on a previously initialized ordered hash set to
  785. ## discard its values. At the moment this is the only proc to remove elements
  786. ## from an ordered hash set. Example:
  787. ##
  788. ## .. code-block ::
  789. ## var a: OrderedSet[int]
  790. ## a.init(4)
  791. ## a.incl(2)
  792. ## a.init
  793. ## assert a.len == 0 and a.isValid
  794. assert isPowerOfTwo(initialSize)
  795. s.counter = 0
  796. s.first = -1
  797. s.last = -1
  798. newSeq(s.data, initialSize)
  799. proc initOrderedSet*[A](initialSize=64): OrderedSet[A] =
  800. ## Wrapper around `init() <#init,TOrderedSet[A],int>`_ for initialization of
  801. ## ordered hash sets.
  802. ##
  803. ## Returns an empty ordered hash set you can assign directly in ``var``
  804. ## blocks in a single line. Example:
  805. ##
  806. ## .. code-block ::
  807. ## var a = initOrderedSet[int](4)
  808. ## a.incl(2)
  809. result.init(initialSize)
  810. proc toOrderedSet*[A](keys: openArray[A]): OrderedSet[A] =
  811. ## Creates a new ordered hash set that contains the given `keys`.
  812. ##
  813. ## Example:
  814. ##
  815. ## .. code-block::
  816. ## var numbers = toOrderedSet([1, 2, 3, 4, 5])
  817. ## assert numbers.contains(2)
  818. ## assert numbers.contains(4)
  819. result = initOrderedSet[A](rightSize(keys.len))
  820. for key in items(keys): result.incl(key)
  821. proc `$`*[A](s: OrderedSet[A]): string =
  822. ## Converts the ordered hash set `s` to a string, mostly for logging purposes.
  823. ##
  824. ## Don't use this proc for serialization, the representation may change at
  825. ## any moment and values are not escaped. Example:
  826. ##
  827. ## Example:
  828. ##
  829. ## .. code-block::
  830. ## echo toOrderedSet([2, 4, 5])
  831. ## # --> {2, 4, 5}
  832. ## echo toOrderedSet(["no", "esc'aping", "is \" provided"])
  833. ## # --> {no, esc'aping, is " provided}
  834. assert s.isValid, "The set needs to be initialized."
  835. dollarImpl()
  836. proc `==`*[A](s, t: OrderedSet[A]): bool =
  837. ## Equality for ordered sets.
  838. if s.counter != t.counter: return false
  839. var h = s.first
  840. var g = t.first
  841. var compared = 0
  842. while h >= 0 and g >= 0:
  843. var nxh = s.data[h].next
  844. var nxg = t.data[g].next
  845. if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode):
  846. if s.data[h].key == t.data[g].key:
  847. inc compared
  848. else:
  849. return false
  850. h = nxh
  851. g = nxg
  852. result = compared == s.counter
  853. when isMainModule and not defined(release):
  854. proc testModule() =
  855. ## Internal micro test to validate docstrings and such.
  856. block isValidTest:
  857. var options: HashSet[string]
  858. proc savePreferences(options: HashSet[string]) =
  859. assert options.isValid, "Pass an initialized set!"
  860. options = initSet[string]()
  861. options.savePreferences
  862. block lenTest:
  863. var values: HashSet[int]
  864. assert(not values.isValid)
  865. assert values.len == 0
  866. assert values.card == 0
  867. block setIterator:
  868. type pair = tuple[a, b: int]
  869. var a, b = initSet[pair]()
  870. a.incl((2, 3))
  871. a.incl((3, 2))
  872. a.incl((2, 3))
  873. for x, y in a.items:
  874. b.incl((x - 2, y + 1))
  875. assert a.len == b.card
  876. assert a.len == 2
  877. #echo b
  878. block setContains:
  879. var values = initSet[int]()
  880. assert(not values.contains(2))
  881. values.incl(2)
  882. assert values.contains(2)
  883. values.excl(2)
  884. assert(not values.contains(2))
  885. values.incl(4)
  886. var others = toSet([6, 7])
  887. values.incl(others)
  888. assert values.len == 3
  889. values.init
  890. assert values.containsOrIncl(2) == false
  891. assert values.containsOrIncl(2) == true
  892. var
  893. a = toSet([1, 2])
  894. b = toSet([1])
  895. b.incl(2)
  896. assert a == b
  897. block exclusions:
  898. var s = toSet([2, 3, 6, 7])
  899. s.excl(2)
  900. s.excl(2)
  901. assert s.len == 3
  902. var
  903. numbers = toSet([1, 2, 3, 4, 5])
  904. even = toSet([2, 4, 6, 8])
  905. numbers.excl(even)
  906. #echo numbers
  907. # --> {1, 3, 5}
  908. block toSeqAndString:
  909. var a = toSet([2, 4, 5])
  910. var b = initSet[int]()
  911. for x in [2, 4, 5]: b.incl(x)
  912. assert($a == $b)
  913. #echo a
  914. #echo toSet(["no", "esc'aping", "is \" provided"])
  915. #block orderedToSeqAndString:
  916. # echo toOrderedSet([2, 4, 5])
  917. # echo toOrderedSet(["no", "esc'aping", "is \" provided"])
  918. block setOperations:
  919. var
  920. a = toSet(["a", "b"])
  921. b = toSet(["b", "c"])
  922. c = union(a, b)
  923. assert c == toSet(["a", "b", "c"])
  924. var d = intersection(a, b)
  925. assert d == toSet(["b"])
  926. var e = difference(a, b)
  927. assert e == toSet(["a"])
  928. var f = symmetricDifference(a, b)
  929. assert f == toSet(["a", "c"])
  930. assert d < a and d < b
  931. assert((a < a) == false)
  932. assert d <= a and d <= b
  933. assert((a <= a))
  934. # Alias test.
  935. assert a + b == toSet(["a", "b", "c"])
  936. assert a * b == toSet(["b"])
  937. assert a - b == toSet(["a"])
  938. assert a -+- b == toSet(["a", "c"])
  939. assert disjoint(a, b) == false
  940. assert disjoint(a, b - a) == true
  941. block mapSet:
  942. var a = toSet([1, 2, 3])
  943. var b = a.map(proc (x: int): string = $x)
  944. assert b == toSet(["1", "2", "3"])
  945. block isValidTest:
  946. var cards: OrderedSet[string]
  947. proc saveTarotCards(cards: OrderedSet[string]) =
  948. assert cards.isValid, "Pass an initialized set!"
  949. cards = initOrderedSet[string]()
  950. cards.saveTarotCards
  951. block lenTest:
  952. var values: OrderedSet[int]
  953. assert(not values.isValid)
  954. assert values.len == 0
  955. assert values.card == 0
  956. block setIterator:
  957. type pair = tuple[a, b: int]
  958. var a, b = initOrderedSet[pair]()
  959. a.incl((2, 3))
  960. a.incl((3, 2))
  961. a.incl((2, 3))
  962. for x, y in a.items:
  963. b.incl((x - 2, y + 1))
  964. assert a.len == b.card
  965. assert a.len == 2
  966. block setPairsIterator:
  967. var s = toOrderedSet([1, 3, 5, 7])
  968. var items = newSeq[tuple[a: int, b: int]]()
  969. for idx, item in s: items.add((idx, item))
  970. assert items == @[(0, 1), (1, 3), (2, 5), (3, 7)]
  971. block exclusions:
  972. var s = toOrderedSet([1, 2, 3, 6, 7, 4])
  973. s.excl(3)
  974. s.excl(3)
  975. s.excl(1)
  976. s.excl(4)
  977. var items = newSeq[int]()
  978. for item in s: items.add item
  979. assert items == @[2, 6, 7]
  980. block: #9005
  981. var s = initOrderedSet[(int, int)]()
  982. for i in 0 .. 30: incl(s, (i, 0))
  983. for i in 0 .. 30: excl(s, (i, 0))
  984. doAssert s.len == 0
  985. #block orderedSetIterator:
  986. # var a = initOrderedSet[int]()
  987. # for value in [9, 2, 1, 5, 1, 8, 4, 2]:
  988. # a.incl(value)
  989. # for value in a.items:
  990. # echo "Got ", value
  991. block setContains:
  992. var values = initOrderedSet[int]()
  993. assert(not values.contains(2))
  994. values.incl(2)
  995. assert values.contains(2)
  996. block toSeqAndString:
  997. var a = toOrderedSet([2, 4, 5])
  998. var b = initOrderedSet[int]()
  999. for x in [2, 4, 5]: b.incl(x)
  1000. assert($a == $b)
  1001. assert(a == b) # https://github.com/Araq/Nim/issues/1413
  1002. block initBlocks:
  1003. var a: OrderedSet[int]
  1004. a.init(4)
  1005. a.incl(2)
  1006. a.init
  1007. assert a.len == 0 and a.isValid
  1008. a = initOrderedSet[int](4)
  1009. a.incl(2)
  1010. assert a.len == 1
  1011. var b: HashSet[int]
  1012. b.init(4)
  1013. b.incl(2)
  1014. b.init
  1015. assert b.len == 0 and b.isValid
  1016. b = initSet[int](4)
  1017. b.incl(2)
  1018. assert b.len == 1
  1019. for i in 0 .. 32:
  1020. var s = rightSize(i)
  1021. if s <= i or mustRehash(s, i):
  1022. echo "performance issue: rightSize() will not elide enlarge() at ", i
  1023. block missingOrExcl:
  1024. var s = toOrderedSet([2, 3, 6, 7])
  1025. assert s.missingOrExcl(4) == true
  1026. assert s.missingOrExcl(6) == false
  1027. block orderedSetEquality:
  1028. type pair = tuple[a, b: int]
  1029. var aa = initOrderedSet[pair]()
  1030. var bb = initOrderedSet[pair]()
  1031. var x = (a:1,b:2)
  1032. var y = (a:3,b:4)
  1033. aa.incl(x)
  1034. aa.incl(y)
  1035. bb.incl(x)
  1036. bb.incl(y)
  1037. assert aa == bb
  1038. when not defined(testing):
  1039. echo "Micro tests run successfully."
  1040. testModule()