sets.nim 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## The ``sets`` module implements an efficient `hash set`:idx: and
  10. ## ordered hash set.
  11. ##
  12. ## Hash sets are different from the `built in set type
  13. ## <manual.html#types-set-type>`_. Sets allow you to store any value that can be
  14. ## `hashed <hashes.html>`_ and they don't contain duplicate entries.
  15. ##
  16. ## **Note**: The data types declared here have *value semantics*: This means
  17. ## that ``=`` performs a copy of the set.
  18. import
  19. hashes, math
  20. {.pragma: myShallow.}
  21. when not defined(nimhygiene):
  22. {.pragma: dirty.}
  23. # For "integer-like A" that are too big for intsets/bit-vectors to be practical,
  24. # it would be best to shrink hcode to the same size as the integer. Larger
  25. # codes should never be needed, and this can pack more entries per cache-line.
  26. # Losing hcode entirely is also possible - if some element value is forbidden.
  27. type
  28. KeyValuePair[A] = tuple[hcode: Hash, key: A]
  29. KeyValuePairSeq[A] = seq[KeyValuePair[A]]
  30. HashSet* {.myShallow.}[A] = object ## \
  31. ## A generic hash set.
  32. ##
  33. ## Use `init() <#init,HashSet[A],int>`_ or `initSet[type]() <#initSet>`_
  34. ## before calling other procs on it.
  35. data: KeyValuePairSeq[A]
  36. counter: int
  37. template default[T](t: typedesc[T]): T =
  38. ## Used by clear methods to get a default value.
  39. var v: T
  40. v
  41. proc clear*[A](s: var HashSet[A]) =
  42. ## Clears the HashSet back to an empty state, without shrinking
  43. ## any of the existing storage. O(n) where n is the size of the hash bucket.
  44. s.counter = 0
  45. for i in 0..<s.data.len:
  46. s.data[i].hcode = 0
  47. s.data[i].key = default(type(s.data[i].key))
  48. # hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These
  49. # two procs retain clarity of that encoding without the space cost of an enum.
  50. proc isEmpty(hcode: Hash): bool {.inline.} =
  51. result = hcode == 0
  52. proc isFilled(hcode: Hash): bool {.inline.} =
  53. result = hcode != 0
  54. proc isValid*[A](s: HashSet[A]): bool =
  55. ## Returns `true` if the set has been initialized with `initSet <#initSet>`_.
  56. ##
  57. ## Most operations over an uninitialized set will crash at runtime and
  58. ## `assert <system.html#assert>`_ in debug builds. You can use this proc in
  59. ## your own procs to verify that sets passed to your procs are correctly
  60. ## initialized. Example:
  61. ##
  62. ## .. code-block ::
  63. ## proc savePreferences(options: HashSet[string]) =
  64. ## assert options.isValid, "Pass an initialized set!"
  65. ## # Do stuff here, may crash in release builds!
  66. result = s.data.len > 0
  67. proc len*[A](s: HashSet[A]): int =
  68. ## Returns the number of keys in `s`.
  69. ##
  70. ## Due to an implementation detail you can call this proc on variables which
  71. ## have not been initialized yet. The proc will return zero as the length
  72. ## then. Example:
  73. ##
  74. ## .. code-block::
  75. ##
  76. ## var values: HashSet[int]
  77. ## assert(not values.isValid)
  78. ## assert values.len == 0
  79. result = s.counter
  80. proc card*[A](s: HashSet[A]): int =
  81. ## Alias for `len() <#len,TSet[A]>`_.
  82. ##
  83. ## Card stands for the `cardinality
  84. ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set.
  85. result = s.counter
  86. iterator items*[A](s: HashSet[A]): A =
  87. ## Iterates over keys in the set `s`.
  88. ##
  89. ## If you need a sequence with the keys you can use `sequtils.toSeq()
  90. ## <sequtils.html#toSeq>`_ on the iterator. Usage example:
  91. ##
  92. ## .. code-block::
  93. ## type
  94. ## pair = tuple[a, b: int]
  95. ## var
  96. ## a, b = initSet[pair]()
  97. ## a.incl((2, 3))
  98. ## a.incl((3, 2))
  99. ## a.incl((2, 3))
  100. ## for x, y in a.items:
  101. ## b.incl((x - 2, y + 1))
  102. ## assert a.len == 2
  103. ## echo b
  104. ## # --> {(a: 1, b: 3), (a: 0, b: 4)}
  105. assert s.isValid, "The set needs to be initialized."
  106. for h in 0..high(s.data):
  107. if isFilled(s.data[h].hcode): yield s.data[h].key
  108. proc hash*[A](s: HashSet[A]): Hash =
  109. ## hashing of HashSet
  110. assert s.isValid, "The set needs to be initialized."
  111. for h in 0..high(s.data):
  112. result = result xor s.data[h].hcode
  113. result = !$result
  114. const
  115. growthFactor = 2
  116. proc mustRehash(length, counter: int): bool {.inline.} =
  117. assert(length > counter)
  118. result = (length * 2 < counter * 3) or (length - counter < 4)
  119. proc rightSize*(count: Natural): int {.inline.} =
  120. ## Return the value of `initialSize` to support `count` items.
  121. ##
  122. ## If more items are expected to be added, simply add that
  123. ## expected extra amount to the parameter before calling this.
  124. ##
  125. ## Internally, we want mustRehash(rightSize(x), x) == false.
  126. result = nextPowerOfTwo(count * 3 div 2 + 4)
  127. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  128. result = (h + 1) and maxHash
  129. template rawGetKnownHCImpl() {.dirty.} =
  130. var h: Hash = hc and high(s.data) # start with real hash value
  131. while isFilled(s.data[h].hcode):
  132. # Compare hc THEN key with boolean short circuit. This makes the common case
  133. # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
  134. # It does slow down succeeding lookups by one extra Hash cmp&and..usually
  135. # just a few clock cycles, generally worth it for any non-integer-like A.
  136. if s.data[h].hcode == hc and s.data[h].key == key: # compare hc THEN key
  137. return h
  138. h = nextTry(h, high(s.data))
  139. result = -1 - h # < 0 => MISSING; insert idx = -1 - result
  140. template genHash(key: typed): Hash =
  141. var hc = hash(key)
  142. if hc == 0: # This almost never taken branch should be very predictable.
  143. hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
  144. hc
  145. template rawGetImpl() {.dirty.} =
  146. hc = genHash(key)
  147. rawGetKnownHCImpl()
  148. template rawInsertImpl() {.dirty.} =
  149. data[h].key = key
  150. data[h].hcode = hc
  151. proc rawGetKnownHC[A](s: HashSet[A], key: A, hc: Hash): int {.inline.} =
  152. rawGetKnownHCImpl()
  153. proc rawGet[A](s: HashSet[A], key: A, hc: var Hash): int {.inline.} =
  154. rawGetImpl()
  155. proc `[]`*[A](s: var HashSet[A], key: A): var A =
  156. ## returns the element that is actually stored in 's' which has the same
  157. ## value as 'key' or raises the ``KeyError`` exception. This is useful
  158. ## when one overloaded 'hash' and '==' but still needs reference semantics
  159. ## for sharing.
  160. assert s.isValid, "The set needs to be initialized."
  161. var hc: Hash
  162. var index = rawGet(s, key, hc)
  163. if index >= 0: result = s.data[index].key
  164. else:
  165. when compiles($key):
  166. raise newException(KeyError, "key not found: " & $key)
  167. else:
  168. raise newException(KeyError, "key not found")
  169. proc mget*[A](s: var HashSet[A], key: A): var A {.deprecated.} =
  170. ## returns the element that is actually stored in 's' which has the same
  171. ## value as 'key' or raises the ``KeyError`` exception. This is useful
  172. ## when one overloaded 'hash' and '==' but still needs reference semantics
  173. ## for sharing. Use ```[]``` instead.
  174. s[key]
  175. proc contains*[A](s: HashSet[A], key: A): bool =
  176. ## Returns true iff `key` is in `s`.
  177. ##
  178. ## Example:
  179. ##
  180. ## .. code-block::
  181. ## var values = initSet[int]()
  182. ## assert(not values.contains(2))
  183. ## values.incl(2)
  184. ## assert values.contains(2)
  185. ## values.excl(2)
  186. ## assert(not values.contains(2))
  187. assert s.isValid, "The set needs to be initialized."
  188. var hc: Hash
  189. var index = rawGet(s, key, hc)
  190. result = index >= 0
  191. proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A,
  192. hc: Hash, h: Hash) =
  193. rawInsertImpl()
  194. proc enlarge[A](s: var HashSet[A]) =
  195. var n: KeyValuePairSeq[A]
  196. newSeq(n, len(s.data) * growthFactor)
  197. swap(s.data, n) # n is now old seq
  198. for i in countup(0, high(n)):
  199. if isFilled(n[i].hcode):
  200. var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode)
  201. rawInsert(s, s.data, n[i].key, n[i].hcode, j)
  202. template inclImpl() {.dirty.} =
  203. var hc: Hash
  204. var index = rawGet(s, key, hc)
  205. if index < 0:
  206. if mustRehash(len(s.data), s.counter):
  207. enlarge(s)
  208. index = rawGetKnownHC(s, key, hc)
  209. rawInsert(s, s.data, key, hc, -1 - index)
  210. inc(s.counter)
  211. template containsOrInclImpl() {.dirty.} =
  212. var hc: Hash
  213. var index = rawGet(s, key, hc)
  214. if index >= 0:
  215. result = true
  216. else:
  217. if mustRehash(len(s.data), s.counter):
  218. enlarge(s)
  219. index = rawGetKnownHC(s, key, hc)
  220. rawInsert(s, s.data, key, hc, -1 - index)
  221. inc(s.counter)
  222. proc incl*[A](s: var HashSet[A], key: A) =
  223. ## Includes an element `key` in `s`.
  224. ##
  225. ## This doesn't do anything if `key` is already in `s`. Example:
  226. ##
  227. ## .. code-block::
  228. ## var values = initSet[int]()
  229. ## values.incl(2)
  230. ## values.incl(2)
  231. ## assert values.len == 1
  232. assert s.isValid, "The set needs to be initialized."
  233. inclImpl()
  234. proc incl*[A](s: var HashSet[A], other: HashSet[A]) =
  235. ## Includes all elements from `other` into `s`.
  236. ##
  237. ## Example:
  238. ##
  239. ## .. code-block::
  240. ## var values = initSet[int]()
  241. ## values.incl(2)
  242. ## var others = toSet([6, 7])
  243. ## values.incl(others)
  244. ## assert values.len == 3
  245. assert s.isValid, "The set `s` needs to be initialized."
  246. assert other.isValid, "The set `other` needs to be initialized."
  247. for item in other: incl(s, item)
  248. template doWhile(a, b) =
  249. while true:
  250. b
  251. if not a: break
  252. template default[T](t: typedesc[T]): T =
  253. var v: T
  254. v
  255. proc exclImpl[A](s: var HashSet[A], key: A) : bool {. inline .} =
  256. assert s.isValid, "The set needs to be initialized."
  257. var hc: Hash
  258. var i = rawGet(s, key, hc)
  259. var msk = high(s.data)
  260. result = true
  261. if i >= 0:
  262. result = false
  263. dec(s.counter)
  264. while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
  265. var j = i # The correctness of this depends on (h+1) in nextTry,
  266. var r = j # though may be adaptable to other simple sequences.
  267. s.data[i].hcode = 0 # mark current EMPTY
  268. s.data[i].key = default(type(s.data[i].key))
  269. doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
  270. i = (i + 1) and msk # increment mod table size
  271. if isEmpty(s.data[i].hcode): # end of collision cluster; So all done
  272. return
  273. r = s.data[i].hcode and msk # "home" location of key@i
  274. shallowCopy(s.data[j], s.data[i]) # data[i] will be marked EMPTY next loop
  275. proc missingOrExcl*[A](s: var HashSet[A], key: A): bool =
  276. ## Excludes `key` in the set `s` and tells if `key` was removed from `s`.
  277. ##
  278. ## The difference with regards to the `excl() <#excl,TSet[A],A>`_ proc is
  279. ## that this proc returns `true` if `key` was not present in `s`. Example:
  280. ##
  281. ## .. code-block::
  282. ## var s = toSet([2, 3, 6, 7])
  283. ## assert s.missingOrExcl(4) == true
  284. ## assert s.missingOrExcl(6) == false
  285. exclImpl(s, key)
  286. proc excl*[A](s: var HashSet[A], key: A) =
  287. ## Excludes `key` from the set `s`.
  288. ##
  289. ## This doesn't do anything if `key` is not found in `s`. Example:
  290. ##
  291. ## .. code-block::
  292. ## var s = toSet([2, 3, 6, 7])
  293. ## s.excl(2)
  294. ## s.excl(2)
  295. ## assert s.len == 3
  296. discard exclImpl(s, key)
  297. proc excl*[A](s: var HashSet[A], other: HashSet[A]) =
  298. ## Excludes everything in `other` from `s`.
  299. ##
  300. ## Example:
  301. ##
  302. ## .. code-block::
  303. ## var
  304. ## numbers = toSet([1, 2, 3, 4, 5])
  305. ## even = toSet([2, 4, 6, 8])
  306. ## numbers.excl(even)
  307. ## echo numbers
  308. ## # --> {1, 3, 5}
  309. assert s.isValid, "The set `s` needs to be initialized."
  310. assert other.isValid, "The set `other` needs to be initialized."
  311. for item in other: discard exclImpl(s, item)
  312. proc pop*[A](s: var HashSet[A]): A =
  313. ## Remove and return an arbitrary element from the set `s`.
  314. ##
  315. ## Raises KeyError if the set `s` is empty.
  316. ##
  317. for h in 0..high(s.data):
  318. if isFilled(s.data[h].hcode):
  319. result = s.data[h].key
  320. excl(s, result)
  321. return result
  322. raise newException(KeyError, "set is empty")
  323. proc containsOrIncl*[A](s: var HashSet[A], key: A): bool =
  324. ## Includes `key` in the set `s` and tells if `key` was added to `s`.
  325. ##
  326. ## The difference with regards to the `incl() <#incl,TSet[A],A>`_ proc is
  327. ## that this proc returns `true` if `key` was already present in `s`. The
  328. ## proc will return false if `key` was added as a new value to `s` during
  329. ## this call. Example:
  330. ##
  331. ## .. code-block::
  332. ## var values = initSet[int]()
  333. ## assert values.containsOrIncl(2) == false
  334. ## assert values.containsOrIncl(2) == true
  335. assert s.isValid, "The set needs to be initialized."
  336. containsOrInclImpl()
  337. proc init*[A](s: var HashSet[A], initialSize=64) =
  338. ## Initializes a hash set.
  339. ##
  340. ## The `initialSize` parameter needs to be a power of two. You can use
  341. ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to
  342. ## guarantee that at runtime. All set variables must be initialized before
  343. ## use with other procs from this module with the exception of `isValid()
  344. ## <#isValid,TSet[A]>`_ and `len() <#len,TSet[A]>`_.
  345. ##
  346. ## You can call this proc on a previously initialized hash set, which will
  347. ## discard all its values. This might be more convenient than iterating over
  348. ## existing values and calling `excl() <#excl,TSet[A],A>`_ on them. Example:
  349. ##
  350. ## .. code-block ::
  351. ## var a: HashSet[int]
  352. ## a.init(4)
  353. ## a.incl(2)
  354. ## a.init
  355. ## assert a.len == 0 and a.isValid
  356. assert isPowerOfTwo(initialSize)
  357. s.counter = 0
  358. newSeq(s.data, initialSize)
  359. proc initSet*[A](initialSize=64): HashSet[A] =
  360. ## Wrapper around `init() <#init,TSet[A],int>`_ for initialization of hash
  361. ## sets.
  362. ##
  363. ## Returns an empty hash set you can assign directly in ``var`` blocks in a
  364. ## single line. Example:
  365. ##
  366. ## .. code-block ::
  367. ## var a = initSet[int](4)
  368. ## a.incl(2)
  369. result.init(initialSize)
  370. proc toSet*[A](keys: openArray[A]): HashSet[A] =
  371. ## Creates a new hash set that contains the given `keys`.
  372. ##
  373. ## Example:
  374. ##
  375. ## .. code-block::
  376. ## var numbers = toSet([1, 2, 3, 4, 5])
  377. ## assert numbers.contains(2)
  378. ## assert numbers.contains(4)
  379. result = initSet[A](rightSize(keys.len))
  380. for key in items(keys): result.incl(key)
  381. template dollarImpl() {.dirty.} =
  382. result = "{"
  383. for key in items(s):
  384. if result.len > 1: result.add(", ")
  385. result.addQuoted(key)
  386. result.add("}")
  387. proc `$`*[A](s: HashSet[A]): string =
  388. ## Converts the set `s` to a string, mostly for logging purposes.
  389. ##
  390. ## Don't use this proc for serialization, the representation may change at
  391. ## any moment and values are not escaped. Example:
  392. ##
  393. ## Example:
  394. ##
  395. ## .. code-block::
  396. ## echo toSet([2, 4, 5])
  397. ## # --> {2, 4, 5}
  398. ## echo toSet(["no", "esc'aping", "is \" provided"])
  399. ## # --> {no, esc'aping, is " provided}
  400. assert s.isValid, "The set needs to be initialized."
  401. dollarImpl()
  402. proc union*[A](s1, s2: HashSet[A]): HashSet[A] =
  403. ## Returns the union of the sets `s1` and `s2`.
  404. ##
  405. ## The union of two sets is represented mathematically as *A ∪ B* and is the
  406. ## set of all objects that are members of `s1`, `s2` or both. Example:
  407. ##
  408. ## .. code-block::
  409. ## var
  410. ## a = toSet(["a", "b"])
  411. ## b = toSet(["b", "c"])
  412. ## c = union(a, b)
  413. ## assert c == toSet(["a", "b", "c"])
  414. assert s1.isValid, "The set `s1` needs to be initialized."
  415. assert s2.isValid, "The set `s2` needs to be initialized."
  416. result = s1
  417. incl(result, s2)
  418. proc intersection*[A](s1, s2: HashSet[A]): HashSet[A] =
  419. ## Returns the intersection of the sets `s1` and `s2`.
  420. ##
  421. ## The intersection of two sets is represented mathematically as *A ∩ B* and
  422. ## is the set of all objects that are members of `s1` and `s2` at the same
  423. ## time. Example:
  424. ##
  425. ## .. code-block::
  426. ## var
  427. ## a = toSet(["a", "b"])
  428. ## b = toSet(["b", "c"])
  429. ## c = intersection(a, b)
  430. ## assert c == toSet(["b"])
  431. assert s1.isValid, "The set `s1` needs to be initialized."
  432. assert s2.isValid, "The set `s2` needs to be initialized."
  433. result = initSet[A](min(s1.data.len, s2.data.len))
  434. for item in s1:
  435. if item in s2: incl(result, item)
  436. proc difference*[A](s1, s2: HashSet[A]): HashSet[A] =
  437. ## Returns the difference of the sets `s1` and `s2`.
  438. ##
  439. ## The difference of two sets is represented mathematically as *A \ B* and is
  440. ## the set of all objects that are members of `s1` and not members of `s2`.
  441. ## Example:
  442. ##
  443. ## .. code-block::
  444. ## var
  445. ## a = toSet(["a", "b"])
  446. ## b = toSet(["b", "c"])
  447. ## c = difference(a, b)
  448. ## assert c == toSet(["a"])
  449. assert s1.isValid, "The set `s1` needs to be initialized."
  450. assert s2.isValid, "The set `s2` needs to be initialized."
  451. result = initSet[A]()
  452. for item in s1:
  453. if not contains(s2, item):
  454. incl(result, item)
  455. proc symmetricDifference*[A](s1, s2: HashSet[A]): HashSet[A] =
  456. ## Returns the symmetric difference of the sets `s1` and `s2`.
  457. ##
  458. ## The symmetric difference of two sets is represented mathematically as *A △
  459. ## B* or *A ⊖ B* and is the set of all objects that are members of `s1` or
  460. ## `s2` but not both at the same time. Example:
  461. ##
  462. ## .. code-block::
  463. ## var
  464. ## a = toSet(["a", "b"])
  465. ## b = toSet(["b", "c"])
  466. ## c = symmetricDifference(a, b)
  467. ## assert c == toSet(["a", "c"])
  468. assert s1.isValid, "The set `s1` needs to be initialized."
  469. assert s2.isValid, "The set `s2` needs to be initialized."
  470. result = s1
  471. for item in s2:
  472. if containsOrIncl(result, item): excl(result, item)
  473. proc `+`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  474. ## Alias for `union(s1, s2) <#union>`_.
  475. result = union(s1, s2)
  476. proc `*`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  477. ## Alias for `intersection(s1, s2) <#intersection>`_.
  478. result = intersection(s1, s2)
  479. proc `-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  480. ## Alias for `difference(s1, s2) <#difference>`_.
  481. result = difference(s1, s2)
  482. proc `-+-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  483. ## Alias for `symmetricDifference(s1, s2) <#symmetricDifference>`_.
  484. result = symmetricDifference(s1, s2)
  485. proc disjoint*[A](s1, s2: HashSet[A]): bool =
  486. ## Returns true iff the sets `s1` and `s2` have no items in common.
  487. ##
  488. ## Example:
  489. ##
  490. ## .. code-block::
  491. ## var
  492. ## a = toSet(["a", "b"])
  493. ## b = toSet(["b", "c"])
  494. ## assert disjoint(a, b) == false
  495. ## assert disjoint(a, b - a) == true
  496. assert s1.isValid, "The set `s1` needs to be initialized."
  497. assert s2.isValid, "The set `s2` needs to be initialized."
  498. for item in s1:
  499. if item in s2: return false
  500. return true
  501. proc `<`*[A](s, t: HashSet[A]): bool =
  502. ## Returns true if `s` is a strict or proper subset of `t`.
  503. ##
  504. ## A strict or proper subset `s` has all of its members in `t` but `t` has
  505. ## more elements than `s`. Example:
  506. ##
  507. ## .. code-block::
  508. ## var
  509. ## a = toSet(["a", "b"])
  510. ## b = toSet(["b", "c"])
  511. ## c = intersection(a, b)
  512. ## assert c < a and c < b
  513. ## assert((a < a) == false)
  514. s.counter != t.counter and s <= t
  515. proc `<=`*[A](s, t: HashSet[A]): bool =
  516. ## Returns true if `s` is subset of `t`.
  517. ##
  518. ## A subset `s` has all of its members in `t` and `t` doesn't necessarily
  519. ## have more members than `s`. That is, `s` can be equal to `t`. Example:
  520. ##
  521. ## .. code-block::
  522. ## var
  523. ## a = toSet(["a", "b"])
  524. ## b = toSet(["b", "c"])
  525. ## c = intersection(a, b)
  526. ## assert c <= a and c <= b
  527. ## assert((a <= a))
  528. result = false
  529. if s.counter > t.counter: return
  530. result = true
  531. for item in s:
  532. if not(t.contains(item)):
  533. result = false
  534. return
  535. proc `==`*[A](s, t: HashSet[A]): bool =
  536. ## Returns true if both `s` and `t` have the same members and set size.
  537. ##
  538. ## Example:
  539. ##
  540. ## .. code-block::
  541. ## var
  542. ## a = toSet([1, 2])
  543. ## b = toSet([1])
  544. ## b.incl(2)
  545. ## assert a == b
  546. s.counter == t.counter and s <= t
  547. proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] =
  548. ## Returns a new set after applying `op` on each of the elements of `data`.
  549. ##
  550. ## You can use this proc to transform the elements from a set. Example:
  551. ##
  552. ## .. code-block::
  553. ## var a = toSet([1, 2, 3])
  554. ## var b = a.map(proc (x: int): string = $x)
  555. ## assert b == toSet(["1", "2", "3"])
  556. result = initSet[B]()
  557. for item in data: result.incl(op(item))
  558. # ------------------------------ ordered set ------------------------------
  559. type
  560. OrderedKeyValuePair[A] = tuple[
  561. hcode: Hash, next: int, key: A]
  562. OrderedKeyValuePairSeq[A] = seq[OrderedKeyValuePair[A]]
  563. OrderedSet* {.myShallow.}[A] = object ## \
  564. ## A generic hash set that remembers insertion order.
  565. ##
  566. ## Use `init() <#init,OrderedSet[A],int>`_ or `initOrderedSet[type]()
  567. ## <#initOrderedSet>`_ before calling other procs on it.
  568. data: OrderedKeyValuePairSeq[A]
  569. counter, first, last: int
  570. proc clear*[A](s: var OrderedSet[A]) =
  571. ## Clears the OrderedSet back to an empty state, without shrinking
  572. ## any of the existing storage. O(n) where n is the size of the hash bucket.
  573. s.counter = 0
  574. s.first = -1
  575. s.last = -1
  576. for i in 0..<s.data.len:
  577. s.data[i].hcode = 0
  578. s.data[i].next = 0
  579. s.data[i].key = default(type(s.data[i].key))
  580. proc isValid*[A](s: OrderedSet[A]): bool =
  581. ## Returns `true` if the ordered set has been initialized with `initSet
  582. ## <#initOrderedSet>`_.
  583. ##
  584. ## Most operations over an uninitialized ordered set will crash at runtime
  585. ## and `assert <system.html#assert>`_ in debug builds. You can use this proc
  586. ## in your own procs to verify that ordered sets passed to your procs are
  587. ## correctly initialized. Example:
  588. ##
  589. ## .. code-block::
  590. ## proc saveTarotCards(cards: OrderedSet[int]) =
  591. ## assert cards.isValid, "Pass an initialized set!"
  592. ## # Do stuff here, may crash in release builds!
  593. result = s.data.len > 0
  594. proc len*[A](s: OrderedSet[A]): int {.inline.} =
  595. ## Returns the number of keys in `s`.
  596. ##
  597. ## Due to an implementation detail you can call this proc on variables which
  598. ## have not been initialized yet. The proc will return zero as the length
  599. ## then. Example:
  600. ##
  601. ## .. code-block::
  602. ##
  603. ## var values: OrderedSet[int]
  604. ## assert(not values.isValid)
  605. ## assert values.len == 0
  606. result = s.counter
  607. proc card*[A](s: OrderedSet[A]): int {.inline.} =
  608. ## Alias for `len() <#len,TOrderedSet[A]>`_.
  609. ##
  610. ## Card stands for the `cardinality
  611. ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set.
  612. result = s.counter
  613. template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
  614. var h = s.first
  615. var idx = 0
  616. while h >= 0:
  617. var nxt = s.data[h].next
  618. if isFilled(s.data[h].hcode):
  619. yieldStmt
  620. inc(idx)
  621. h = nxt
  622. iterator items*[A](s: OrderedSet[A]): A =
  623. ## Iterates over keys in the ordered set `s` in insertion order.
  624. ##
  625. ## If you need a sequence with the keys you can use `sequtils.toSeq()
  626. ## <sequtils.html#toSeq>`_ on the iterator. Usage example:
  627. ##
  628. ## .. code-block::
  629. ## var a = initOrderedSet[int]()
  630. ## for value in [9, 2, 1, 5, 1, 8, 4, 2]:
  631. ## a.incl(value)
  632. ## for value in a.items:
  633. ## echo "Got ", value
  634. ## # --> Got 9
  635. ## # --> Got 2
  636. ## # --> Got 1
  637. ## # --> Got 5
  638. ## # --> Got 8
  639. ## # --> Got 4
  640. assert s.isValid, "The set needs to be initialized."
  641. forAllOrderedPairs:
  642. yield s.data[h].key
  643. proc hash*[A](s: OrderedSet[A]): Hash =
  644. ## hashing of OrderedSet
  645. assert s.isValid, "The set needs to be initialized."
  646. forAllOrderedPairs:
  647. result = result !& s.data[h].hcode
  648. result = !$result
  649. iterator pairs*[A](s: OrderedSet[A]): tuple[a: int, b: A] =
  650. assert s.isValid, "The set needs to be initialized"
  651. forAllOrderedPairs:
  652. yield (idx, s.data[h].key)
  653. proc rawGetKnownHC[A](s: OrderedSet[A], key: A, hc: Hash): int {.inline.} =
  654. rawGetKnownHCImpl()
  655. proc rawGet[A](s: OrderedSet[A], key: A, hc: var Hash): int {.inline.} =
  656. rawGetImpl()
  657. proc contains*[A](s: OrderedSet[A], key: A): bool =
  658. ## Returns true iff `key` is in `s`.
  659. ##
  660. ## Example:
  661. ##
  662. ## .. code-block::
  663. ## var values = initOrderedSet[int]()
  664. ## assert(not values.contains(2))
  665. ## values.incl(2)
  666. ## assert values.contains(2)
  667. assert s.isValid, "The set needs to be initialized."
  668. var hc: Hash
  669. var index = rawGet(s, key, hc)
  670. result = index >= 0
  671. proc rawInsert[A](s: var OrderedSet[A], data: var OrderedKeyValuePairSeq[A],
  672. key: A, hc: Hash, h: Hash) =
  673. rawInsertImpl()
  674. data[h].next = -1
  675. if s.first < 0: s.first = h
  676. if s.last >= 0: data[s.last].next = h
  677. s.last = h
  678. proc enlarge[A](s: var OrderedSet[A]) =
  679. var n: OrderedKeyValuePairSeq[A]
  680. newSeq(n, len(s.data) * growthFactor)
  681. var h = s.first
  682. s.first = -1
  683. s.last = -1
  684. swap(s.data, n)
  685. while h >= 0:
  686. var nxt = n[h].next
  687. if isFilled(n[h].hcode):
  688. var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode)
  689. rawInsert(s, s.data, n[h].key, n[h].hcode, j)
  690. h = nxt
  691. proc incl*[A](s: var OrderedSet[A], key: A) =
  692. ## Includes an element `key` in `s`.
  693. ##
  694. ## This doesn't do anything if `key` is already in `s`. Example:
  695. ##
  696. ## .. code-block::
  697. ## var values = initOrderedSet[int]()
  698. ## values.incl(2)
  699. ## values.incl(2)
  700. ## assert values.len == 1
  701. assert s.isValid, "The set needs to be initialized."
  702. inclImpl()
  703. proc incl*[A](s: var HashSet[A], other: OrderedSet[A]) =
  704. ## Includes all elements from `other` into `s`.
  705. ##
  706. ## Example:
  707. ##
  708. ## .. code-block::
  709. ## var values = initOrderedSet[int]()
  710. ## values.incl(2)
  711. ## var others = toOrderedSet([6, 7])
  712. ## values.incl(others)
  713. ## assert values.len == 3
  714. assert s.isValid, "The set `s` needs to be initialized."
  715. assert other.isValid, "The set `other` needs to be initialized."
  716. for item in other: incl(s, item)
  717. proc exclImpl[A](s: var OrderedSet[A], key: A) : bool {. inline .} =
  718. assert s.isValid, "The set needs to be initialized."
  719. var n: OrderedKeyValuePairSeq[A]
  720. newSeq(n, len(s.data))
  721. var h = s.first
  722. s.first = -1
  723. s.last = -1
  724. swap(s.data, n)
  725. let hc = genHash(key)
  726. result = true
  727. while h >= 0:
  728. var nxt = n[h].next
  729. if isFilled(n[h].hcode):
  730. if n[h].hcode == hc and n[h].key == key:
  731. dec s.counter
  732. result = false
  733. else:
  734. var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode)
  735. rawInsert(s, s.data, n[h].key, n[h].hcode, j)
  736. h = nxt
  737. proc missingOrExcl*[A](s: var OrderedSet[A], key: A): bool =
  738. ## Excludes `key` in the set `s` and tells if `key` was removed from `s`. Efficiency: O(n).
  739. ##
  740. ## The difference with regards to the `excl() <#excl,TOrderedSet[A],A>`_ proc is
  741. ## that this proc returns `true` if `key` was not present in `s`. Example:
  742. ##
  743. ## .. code-block::
  744. ## var s = toOrderedSet([2, 3, 6, 7])
  745. ## assert s.missingOrExcl(4) == true
  746. ## assert s.missingOrExcl(6) == false
  747. exclImpl(s, key)
  748. proc excl*[A](s: var OrderedSet[A], key: A) =
  749. ## Excludes `key` from the set `s`. Efficiency: O(n).
  750. ##
  751. ## This doesn't do anything if `key` is not found in `s`. Example:
  752. ##
  753. ## .. code-block::
  754. ## var s = toOrderedSet([2, 3, 6, 7])
  755. ## s.excl(2)
  756. ## s.excl(2)
  757. ## assert s.len == 3
  758. discard exclImpl(s, key)
  759. proc containsOrIncl*[A](s: var OrderedSet[A], key: A): bool =
  760. ## Includes `key` in the set `s` and tells if `key` was added to `s`.
  761. ##
  762. ## The difference with regards to the `incl() <#incl,TOrderedSet[A],A>`_ proc
  763. ## is that this proc returns `true` if `key` was already present in `s`. The
  764. ## proc will return false if `key` was added as a new value to `s` during
  765. ## this call. Example:
  766. ##
  767. ## .. code-block::
  768. ## var values = initOrderedSet[int]()
  769. ## assert values.containsOrIncl(2) == false
  770. ## assert values.containsOrIncl(2) == true
  771. assert s.isValid, "The set needs to be initialized."
  772. containsOrInclImpl()
  773. proc init*[A](s: var OrderedSet[A], initialSize=64) =
  774. ## Initializes an ordered hash set.
  775. ##
  776. ## The `initialSize` parameter needs to be a power of two. You can use
  777. ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to
  778. ## guarantee that at runtime. All set variables must be initialized before
  779. ## use with other procs from this module with the exception of `isValid()
  780. ## <#isValid,TOrderedSet[A]>`_ and `len() <#len,TOrderedSet[A]>`_.
  781. ##
  782. ## You can call this proc on a previously initialized ordered hash set to
  783. ## discard its values. At the moment this is the only proc to remove elements
  784. ## from an ordered hash set. Example:
  785. ##
  786. ## .. code-block ::
  787. ## var a: OrderedSet[int]
  788. ## a.init(4)
  789. ## a.incl(2)
  790. ## a.init
  791. ## assert a.len == 0 and a.isValid
  792. assert isPowerOfTwo(initialSize)
  793. s.counter = 0
  794. s.first = -1
  795. s.last = -1
  796. newSeq(s.data, initialSize)
  797. proc initOrderedSet*[A](initialSize=64): OrderedSet[A] =
  798. ## Wrapper around `init() <#init,TOrderedSet[A],int>`_ for initialization of
  799. ## ordered hash sets.
  800. ##
  801. ## Returns an empty ordered hash set you can assign directly in ``var``
  802. ## blocks in a single line. Example:
  803. ##
  804. ## .. code-block ::
  805. ## var a = initOrderedSet[int](4)
  806. ## a.incl(2)
  807. result.init(initialSize)
  808. proc toOrderedSet*[A](keys: openArray[A]): OrderedSet[A] =
  809. ## Creates a new ordered hash set that contains the given `keys`.
  810. ##
  811. ## Example:
  812. ##
  813. ## .. code-block::
  814. ## var numbers = toOrderedSet([1, 2, 3, 4, 5])
  815. ## assert numbers.contains(2)
  816. ## assert numbers.contains(4)
  817. result = initOrderedSet[A](rightSize(keys.len))
  818. for key in items(keys): result.incl(key)
  819. proc `$`*[A](s: OrderedSet[A]): string =
  820. ## Converts the ordered hash set `s` to a string, mostly for logging purposes.
  821. ##
  822. ## Don't use this proc for serialization, the representation may change at
  823. ## any moment and values are not escaped. Example:
  824. ##
  825. ## Example:
  826. ##
  827. ## .. code-block::
  828. ## echo toOrderedSet([2, 4, 5])
  829. ## # --> {2, 4, 5}
  830. ## echo toOrderedSet(["no", "esc'aping", "is \" provided"])
  831. ## # --> {no, esc'aping, is " provided}
  832. assert s.isValid, "The set needs to be initialized."
  833. dollarImpl()
  834. proc `==`*[A](s, t: OrderedSet[A]): bool =
  835. ## Equality for ordered sets.
  836. if s.counter != t.counter: return false
  837. var h = s.first
  838. var g = t.first
  839. var compared = 0
  840. while h >= 0 and g >= 0:
  841. var nxh = s.data[h].next
  842. var nxg = t.data[g].next
  843. if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode):
  844. if s.data[h].key == t.data[g].key:
  845. inc compared
  846. else:
  847. return false
  848. h = nxh
  849. g = nxg
  850. result = compared == s.counter
  851. when isMainModule and not defined(release):
  852. proc testModule() =
  853. ## Internal micro test to validate docstrings and such.
  854. block isValidTest:
  855. var options: HashSet[string]
  856. proc savePreferences(options: HashSet[string]) =
  857. assert options.isValid, "Pass an initialized set!"
  858. options = initSet[string]()
  859. options.savePreferences
  860. block lenTest:
  861. var values: HashSet[int]
  862. assert(not values.isValid)
  863. assert values.len == 0
  864. assert values.card == 0
  865. block setIterator:
  866. type pair = tuple[a, b: int]
  867. var a, b = initSet[pair]()
  868. a.incl((2, 3))
  869. a.incl((3, 2))
  870. a.incl((2, 3))
  871. for x, y in a.items:
  872. b.incl((x - 2, y + 1))
  873. assert a.len == b.card
  874. assert a.len == 2
  875. #echo b
  876. block setContains:
  877. var values = initSet[int]()
  878. assert(not values.contains(2))
  879. values.incl(2)
  880. assert values.contains(2)
  881. values.excl(2)
  882. assert(not values.contains(2))
  883. values.incl(4)
  884. var others = toSet([6, 7])
  885. values.incl(others)
  886. assert values.len == 3
  887. values.init
  888. assert values.containsOrIncl(2) == false
  889. assert values.containsOrIncl(2) == true
  890. var
  891. a = toSet([1, 2])
  892. b = toSet([1])
  893. b.incl(2)
  894. assert a == b
  895. block exclusions:
  896. var s = toSet([2, 3, 6, 7])
  897. s.excl(2)
  898. s.excl(2)
  899. assert s.len == 3
  900. var
  901. numbers = toSet([1, 2, 3, 4, 5])
  902. even = toSet([2, 4, 6, 8])
  903. numbers.excl(even)
  904. #echo numbers
  905. # --> {1, 3, 5}
  906. block toSeqAndString:
  907. var a = toSet([2, 4, 5])
  908. var b = initSet[int]()
  909. for x in [2, 4, 5]: b.incl(x)
  910. assert($a == $b)
  911. #echo a
  912. #echo toSet(["no", "esc'aping", "is \" provided"])
  913. #block orderedToSeqAndString:
  914. # echo toOrderedSet([2, 4, 5])
  915. # echo toOrderedSet(["no", "esc'aping", "is \" provided"])
  916. block setOperations:
  917. var
  918. a = toSet(["a", "b"])
  919. b = toSet(["b", "c"])
  920. c = union(a, b)
  921. assert c == toSet(["a", "b", "c"])
  922. var d = intersection(a, b)
  923. assert d == toSet(["b"])
  924. var e = difference(a, b)
  925. assert e == toSet(["a"])
  926. var f = symmetricDifference(a, b)
  927. assert f == toSet(["a", "c"])
  928. assert d < a and d < b
  929. assert((a < a) == false)
  930. assert d <= a and d <= b
  931. assert((a <= a))
  932. # Alias test.
  933. assert a + b == toSet(["a", "b", "c"])
  934. assert a * b == toSet(["b"])
  935. assert a - b == toSet(["a"])
  936. assert a -+- b == toSet(["a", "c"])
  937. assert disjoint(a, b) == false
  938. assert disjoint(a, b - a) == true
  939. block mapSet:
  940. var a = toSet([1, 2, 3])
  941. var b = a.map(proc (x: int): string = $x)
  942. assert b == toSet(["1", "2", "3"])
  943. block isValidTest:
  944. var cards: OrderedSet[string]
  945. proc saveTarotCards(cards: OrderedSet[string]) =
  946. assert cards.isValid, "Pass an initialized set!"
  947. cards = initOrderedSet[string]()
  948. cards.saveTarotCards
  949. block lenTest:
  950. var values: OrderedSet[int]
  951. assert(not values.isValid)
  952. assert values.len == 0
  953. assert values.card == 0
  954. block setIterator:
  955. type pair = tuple[a, b: int]
  956. var a, b = initOrderedSet[pair]()
  957. a.incl((2, 3))
  958. a.incl((3, 2))
  959. a.incl((2, 3))
  960. for x, y in a.items:
  961. b.incl((x - 2, y + 1))
  962. assert a.len == b.card
  963. assert a.len == 2
  964. block setPairsIterator:
  965. var s = toOrderedSet([1, 3, 5, 7])
  966. var items = newSeq[tuple[a: int, b: int]]()
  967. for idx, item in s: items.add((idx, item))
  968. assert items == @[(0, 1), (1, 3), (2, 5), (3, 7)]
  969. block exclusions:
  970. var s = toOrderedSet([1, 2, 3, 6, 7, 4])
  971. s.excl(3)
  972. s.excl(3)
  973. s.excl(1)
  974. s.excl(4)
  975. var items = newSeq[int]()
  976. for item in s: items.add item
  977. assert items == @[2, 6, 7]
  978. block: #9005
  979. var s = initOrderedSet[(int, int)]()
  980. for i in 0 .. 30: incl(s, (i, 0))
  981. for i in 0 .. 30: excl(s, (i, 0))
  982. doAssert s.len == 0
  983. #block orderedSetIterator:
  984. # var a = initOrderedSet[int]()
  985. # for value in [9, 2, 1, 5, 1, 8, 4, 2]:
  986. # a.incl(value)
  987. # for value in a.items:
  988. # echo "Got ", value
  989. block setContains:
  990. var values = initOrderedSet[int]()
  991. assert(not values.contains(2))
  992. values.incl(2)
  993. assert values.contains(2)
  994. block toSeqAndString:
  995. var a = toOrderedSet([2, 4, 5])
  996. var b = initOrderedSet[int]()
  997. for x in [2, 4, 5]: b.incl(x)
  998. assert($a == $b)
  999. assert(a == b) # https://github.com/Araq/Nim/issues/1413
  1000. block initBlocks:
  1001. var a: OrderedSet[int]
  1002. a.init(4)
  1003. a.incl(2)
  1004. a.init
  1005. assert a.len == 0 and a.isValid
  1006. a = initOrderedSet[int](4)
  1007. a.incl(2)
  1008. assert a.len == 1
  1009. var b: HashSet[int]
  1010. b.init(4)
  1011. b.incl(2)
  1012. b.init
  1013. assert b.len == 0 and b.isValid
  1014. b = initSet[int](4)
  1015. b.incl(2)
  1016. assert b.len == 1
  1017. for i in 0 .. 32:
  1018. var s = rightSize(i)
  1019. if s <= i or mustRehash(s, i):
  1020. echo "performance issue: rightSize() will not elide enlarge() at ", i
  1021. block missingOrExcl:
  1022. var s = toOrderedSet([2, 3, 6, 7])
  1023. assert s.missingOrExcl(4) == true
  1024. assert s.missingOrExcl(6) == false
  1025. block orderedSetEquality:
  1026. type pair = tuple[a, b: int]
  1027. var aa = initOrderedSet[pair]()
  1028. var bb = initOrderedSet[pair]()
  1029. var x = (a:1,b:2)
  1030. var y = (a:3,b:4)
  1031. aa.incl(x)
  1032. aa.incl(y)
  1033. bb.incl(x)
  1034. bb.incl(y)
  1035. assert aa == bb
  1036. when not defined(testing):
  1037. echo "Micro tests run successfully."
  1038. testModule()