tsimplesort.nim 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. discard """
  2. output: '''true'''
  3. """
  4. import hashes, math
  5. {.pragma: myShallow.}
  6. type
  7. TSlotEnum = enum seEmpty, seFilled, seDeleted
  8. TKeyValuePair[A, B] = tuple[slot: TSlotEnum, key: A, val: B]
  9. TKeyValuePairSeq[A, B] = seq[TKeyValuePair[A, B]]
  10. TTable* {.final, myShallow.}[A, B] = object
  11. data: TKeyValuePairSeq[A, B]
  12. counter: int
  13. proc len*[A, B](t: TTable[A, B]): int =
  14. ## returns the number of keys in `t`.
  15. result = t.counter
  16. iterator pairs*[A, B](t: TTable[A, B]): tuple[key: A, val: B] =
  17. ## iterates over any (key, value) pair in the table `t`.
  18. for h in 0..high(t.data):
  19. if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val)
  20. iterator keys*[A, B](t: TTable[A, B]): A =
  21. ## iterates over any key in the table `t`.
  22. for h in 0..high(t.data):
  23. if t.data[h].slot == seFilled: yield t.data[h].key
  24. iterator values*[A, B](t: TTable[A, B]): B =
  25. ## iterates over any value in the table `t`.
  26. for h in 0..high(t.data):
  27. if t.data[h].slot == seFilled: yield t.data[h].val
  28. const
  29. growthFactor = 2
  30. proc mustRehash(length, counter: int): bool {.inline.} =
  31. assert(length > counter)
  32. result = (length * 2 < counter * 3) or (length - counter < 4)
  33. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  34. result = ((5 * h) + 1) and maxHash
  35. template rawGetImpl() =
  36. var h: Hash = hash(key) and high(t.data) # start with real hash value
  37. while t.data[h].slot != seEmpty:
  38. if t.data[h].key == key and t.data[h].slot == seFilled:
  39. return h
  40. h = nextTry(h, high(t.data))
  41. result = -1
  42. template rawInsertImpl() =
  43. var h: Hash = hash(key) and high(data)
  44. while data[h].slot == seFilled:
  45. h = nextTry(h, high(data))
  46. data[h].key = key
  47. data[h].val = val
  48. data[h].slot = seFilled
  49. proc rawGet[A, B](t: TTable[A, B], key: A): int =
  50. rawGetImpl()
  51. proc `[]`*[A, B](t: TTable[A, B], key: A): B =
  52. ## retrieves the value at ``t[key]``. If `key` is not in `t`,
  53. ## default empty value for the type `B` is returned
  54. ## and no exception is raised. One can check with ``hasKey`` whether the key
  55. ## exists.
  56. var index = rawGet(t, key)
  57. if index >= 0: result = t.data[index].val
  58. proc hasKey*[A, B](t: TTable[A, B], key: A): bool =
  59. ## returns true iff `key` is in the table `t`.
  60. result = rawGet(t, key) >= 0
  61. proc rawInsert[A, B](t: var TTable[A, B], data: var TKeyValuePairSeq[A, B],
  62. key: A, val: B) =
  63. rawInsertImpl()
  64. proc enlarge[A, B](t: var TTable[A, B]) =
  65. var n: TKeyValuePairSeq[A, B]
  66. newSeq(n, len(t.data) * growthFactor)
  67. for i in countup(0, high(t.data)):
  68. if t.data[i].slot == seFilled: rawInsert(t, n, t.data[i].key, t.data[i].val)
  69. swap(t.data, n)
  70. template putImpl() =
  71. var index = rawGet(t, key)
  72. if index >= 0:
  73. t.data[index].val = val
  74. else:
  75. if mustRehash(len(t.data), t.counter): enlarge(t)
  76. rawInsert(t, t.data, key, val)
  77. inc(t.counter)
  78. proc `[]=`*[A, B](t: var TTable[A, B], key: A, val: B) =
  79. ## puts a (key, value)-pair into `t`.
  80. putImpl()
  81. proc del*[A, B](t: var TTable[A, B], key: A) =
  82. ## deletes `key` from hash table `t`.
  83. var index = rawGet(t, key)
  84. if index >= 0:
  85. t.data[index].slot = seDeleted
  86. dec(t.counter)
  87. proc initTable*[A, B](initialSize=64): TTable[A, B] =
  88. ## creates a new hash table that is empty. `initialSize` needs to be
  89. ## a power of two.
  90. assert isPowerOfTwo(initialSize)
  91. result.counter = 0
  92. newSeq(result.data, initialSize)
  93. proc toTable*[A, B](pairs: openarray[tuple[key: A,
  94. val: B]]): TTable[A, B] =
  95. ## creates a new hash table that contains the given `pairs`.
  96. result = initTable[A, B](nextPowerOfTwo(pairs.len+10))
  97. for key, val in items(pairs): result[key] = val
  98. template dollarImpl(): typed =
  99. if t.len == 0:
  100. result = "{:}"
  101. else:
  102. result = "{"
  103. for key, val in pairs(t):
  104. if result.len > 1: result.add(", ")
  105. result.add($key)
  106. result.add(": ")
  107. result.add($val)
  108. result.add("}")
  109. proc `$`*[A, B](t: TTable[A, B]): string =
  110. ## The `$` operator for hash tables.
  111. dollarImpl()
  112. # ------------------------------ count tables -------------------------------
  113. type
  114. TCountTable* {.final, myShallow.}[
  115. A] = object ## table that counts the number of each key
  116. data: seq[tuple[key: A, val: int]]
  117. counter: int
  118. proc len*[A](t: TCountTable[A]): int =
  119. ## returns the number of keys in `t`.
  120. result = t.counter
  121. iterator pairs*[A](t: TCountTable[A]): tuple[key: A, val: int] =
  122. ## iterates over any (key, value) pair in the table `t`.
  123. for h in 0..high(t.data):
  124. if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val)
  125. iterator keys*[A](t: TCountTable[A]): A =
  126. ## iterates over any key in the table `t`.
  127. for h in 0..high(t.data):
  128. if t.data[h].val != 0: yield t.data[h].key
  129. iterator values*[A](t: TCountTable[A]): int =
  130. ## iterates over any value in the table `t`.
  131. for h in 0..high(t.data):
  132. if t.data[h].val != 0: yield t.data[h].val
  133. proc RawGet[A](t: TCountTable[A], key: A): int =
  134. var h: Hash = hash(key) and high(t.data) # start with real hash value
  135. while t.data[h].val != 0:
  136. if t.data[h].key == key: return h
  137. h = nextTry(h, high(t.data))
  138. result = -1
  139. proc `[]`*[A](t: TCountTable[A], key: A): int =
  140. ## retrieves the value at ``t[key]``. If `key` is not in `t`,
  141. ## 0 is returned. One can check with ``hasKey`` whether the key
  142. ## exists.
  143. var index = RawGet(t, key)
  144. if index >= 0: result = t.data[index].val
  145. proc hasKey*[A](t: TCountTable[A], key: A): bool =
  146. ## returns true iff `key` is in the table `t`.
  147. result = rawGet(t, key) >= 0
  148. proc rawInsert[A](t: TCountTable[A], data: var seq[tuple[key: A, val: int]],
  149. key: A, val: int) =
  150. var h: Hash = hash(key) and high(data)
  151. while data[h].val != 0: h = nextTry(h, high(data))
  152. data[h].key = key
  153. data[h].val = val
  154. proc enlarge[A](t: var TCountTable[A]) =
  155. var n: seq[tuple[key: A, val: int]]
  156. newSeq(n, len(t.data) * growthFactor)
  157. for i in countup(0, high(t.data)):
  158. if t.data[i].val != 0: rawInsert(t, n, t.data[i].key, t.data[i].val)
  159. swap(t.data, n)
  160. proc `[]=`*[A](t: var TCountTable[A], key: A, val: int) =
  161. ## puts a (key, value)-pair into `t`. `val` has to be positive.
  162. assert val > 0
  163. putImpl()
  164. proc initCountTable*[A](initialSize=64): TCountTable[A] =
  165. ## creates a new count table that is empty. `initialSize` needs to be
  166. ## a power of two.
  167. assert isPowerOfTwo(initialSize)
  168. result.counter = 0
  169. newSeq(result.data, initialSize)
  170. proc toCountTable*[A](keys: openArray[A]): TCountTable[A] =
  171. ## creates a new count table with every key in `keys` having a count of 1.
  172. result = initCountTable[A](nextPowerOfTwo(keys.len+10))
  173. for key in items(keys): result[key] = 1
  174. proc `$`*[A](t: TCountTable[A]): string =
  175. ## The `$` operator for count tables.
  176. dollarImpl()
  177. proc inc*[A](t: var TCountTable[A], key: A, val = 1) =
  178. ## increments `t[key]` by `val`.
  179. var index = RawGet(t, key)
  180. if index >= 0:
  181. inc(t.data[index].val, val)
  182. else:
  183. if mustRehash(len(t.data), t.counter): enlarge(t)
  184. rawInsert(t, t.data, key, val)
  185. inc(t.counter)
  186. proc smallest*[A](t: TCountTable[A]): tuple[key: A, val: int] =
  187. ## returns the largest (key,val)-pair. Efficiency: O(n)
  188. assert t.len > 0
  189. var minIdx = 0
  190. for h in 1..high(t.data):
  191. if t.data[h].val > 0 and t.data[minIdx].val > t.data[h].val: minIdx = h
  192. result.key = t.data[minIdx].key
  193. result.val = t.data[minIdx].val
  194. proc largest*[A](t: TCountTable[A]): tuple[key: A, val: int] =
  195. ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n)
  196. assert t.len > 0
  197. var maxIdx = 0
  198. for h in 1..high(t.data):
  199. if t.data[maxIdx].val < t.data[h].val: maxIdx = h
  200. result.key = t.data[maxIdx].key
  201. result.val = t.data[maxIdx].val
  202. proc sort*[A](t: var TCountTable[A]) =
  203. ## sorts the count table so that the entry with the highest counter comes
  204. ## first. This is destructive! You must not modify `t` afterwards!
  205. ## You can use the iterators `pairs`, `keys`, and `values` to iterate over
  206. ## `t` in the sorted order.
  207. # we use shellsort here; fast enough and simple
  208. var h = 1
  209. while true:
  210. h = 3 * h + 1
  211. if h >= high(t.data): break
  212. while true:
  213. h = h div 3
  214. for i in countup(h, high(t.data)):
  215. var j = i
  216. while t.data[j-h].val <= t.data[j].val:
  217. var xyz = t.data[j]
  218. t.data[j] = t.data[j-h]
  219. t.data[j-h] = xyz
  220. j = j-h
  221. if j < h: break
  222. if h == 1: break
  223. const
  224. data = {
  225. "34": 123456, "12": 789,
  226. "90": 343, "0": 34404,
  227. "1": 344004, "2": 344774,
  228. "3": 342244, "4": 3412344,
  229. "5": 341232144, "6": 34214544,
  230. "7": 3434544, "8": 344544,
  231. "9": 34435644, "---00": 346677844,
  232. "10": 34484, "11": 34474, "19": 34464,
  233. "20": 34454, "30": 34141244, "40": 344114,
  234. "50": 344490, "60": 344491, "70": 344492,
  235. "80": 344497}
  236. proc countTableTest1 =
  237. var s = initTable[string, int](64)
  238. for key, val in items(data): s[key] = val
  239. var w: tuple[key: string, val: int] #type(otherCountTable.data[0])
  240. var t = initCountTable[string]()
  241. for k, v in items(data): t.inc(k)
  242. for k in t.keys: assert t[k] == 1
  243. t.inc("90", 3)
  244. t.inc("12", 2)
  245. t.inc("34", 1)
  246. assert t.largest()[0] == "90"
  247. t.sort()
  248. var i = 0
  249. for k, v in t.pairs:
  250. case i
  251. of 0: assert k == "90" and v == 4
  252. of 1: assert k == "12" and v == 3
  253. of 2: assert k == "34" and v == 2
  254. else: break
  255. inc i
  256. countTableTest1()
  257. echo true