hashes.nim 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements efficient computations of hash values for diverse
  10. ## Nim types. All the procs are based on these two building blocks:
  11. ## - `!& proc <#!&>`_ used to start or mix a hash value, and
  12. ## - `!$ proc <#!$>`_ used to *finish* the hash value.
  13. ## If you want to implement hash procs for
  14. ## your custom types you will end up writing the following kind of skeleton of
  15. ## code:
  16. ##
  17. ## .. code-block:: Nim
  18. ## proc hash(x: Something): Hash =
  19. ## ## Computes a Hash from `x`.
  20. ## var h: Hash = 0
  21. ## # Iterate over parts of `x`.
  22. ## for xAtom in x:
  23. ## # Mix the atom with the partial hash.
  24. ## h = h !& xAtom
  25. ## # Finish the hash.
  26. ## result = !$h
  27. ##
  28. ## If your custom types contain fields for which there already is a hash proc,
  29. ## like for example objects made up of ``strings``, you can simply hash
  30. ## together the hash value of the individual fields:
  31. ##
  32. ## .. code-block:: Nim
  33. ## proc hash(x: Something): Hash =
  34. ## ## Computes a Hash from `x`.
  35. ## var h: Hash = 0
  36. ## h = h !& hash(x.foo)
  37. ## h = h !& hash(x.bar)
  38. ## result = !$h
  39. import
  40. strutils
  41. type
  42. Hash* = int ## a hash value; hash tables using these values should
  43. ## always have a size of a power of two and can use the ``and``
  44. ## operator instead of ``mod`` for truncation of the hash value.
  45. {.deprecated: [THash: Hash].}
  46. proc `!&`*(h: Hash, val: int): Hash {.inline.} =
  47. ## mixes a hash value `h` with `val` to produce a new hash value. This is
  48. ## only needed if you need to implement a hash proc for a new datatype.
  49. result = h +% val
  50. result = result +% result shl 10
  51. result = result xor (result shr 6)
  52. proc `!$`*(h: Hash): Hash {.inline.} =
  53. ## finishes the computation of the hash value. This is
  54. ## only needed if you need to implement a hash proc for a new datatype.
  55. result = h +% h shl 3
  56. result = result xor (result shr 11)
  57. result = result +% result shl 15
  58. proc hashData*(data: pointer, size: int): Hash =
  59. ## hashes an array of bytes of size `size`
  60. var h: Hash = 0
  61. when defined(js):
  62. var p: cstring
  63. asm """`p` = `Data`;"""
  64. else:
  65. var p = cast[cstring](data)
  66. var i = 0
  67. var s = size
  68. while s > 0:
  69. h = h !& ord(p[i])
  70. inc(i)
  71. dec(s)
  72. result = !$h
  73. when defined(js):
  74. var objectID = 0
  75. proc hash*(x: pointer): Hash {.inline.} =
  76. ## efficient hashing of pointers
  77. when defined(js):
  78. asm """
  79. if (typeof `x` == "object") {
  80. if ("_NimID" in `x`)
  81. `result` = `x`["_NimID"];
  82. else {
  83. `result` = ++`objectID`;
  84. `x`["_NimID"] = `result`;
  85. }
  86. }
  87. """
  88. else:
  89. result = (cast[Hash](x)) shr 3 # skip the alignment
  90. when not defined(booting):
  91. proc hash*[T: proc](x: T): Hash {.inline.} =
  92. ## efficient hashing of proc vars; closures are supported too.
  93. when T is "closure":
  94. result = hash(rawProc(x)) !& hash(rawEnv(x))
  95. else:
  96. result = hash(pointer(x))
  97. proc hash*(x: int): Hash {.inline.} =
  98. ## efficient hashing of integers
  99. result = x
  100. proc hash*(x: int64): Hash {.inline.} =
  101. ## efficient hashing of int64 integers
  102. result = toU32(x)
  103. proc hash*(x: uint): Hash {.inline.} =
  104. ## efficient hashing of unsigned integers
  105. result = cast[int](x)
  106. proc hash*(x: uint64): Hash {.inline.} =
  107. ## efficient hashing of uint64 integers
  108. result = toU32(cast[int](x))
  109. proc hash*(x: char): Hash {.inline.} =
  110. ## efficient hashing of characters
  111. result = ord(x)
  112. proc hash*[T: Ordinal](x: T): Hash {.inline.} =
  113. ## efficient hashing of other ordinal types (e.g., enums)
  114. result = ord(x)
  115. proc hash*(x: string): Hash =
  116. ## efficient hashing of strings
  117. var h: Hash = 0
  118. for i in 0..x.len-1:
  119. h = h !& ord(x[i])
  120. result = !$h
  121. proc hash*(x: cstring): Hash =
  122. ## efficient hashing of null-terminated strings
  123. var h: Hash = 0
  124. var i = 0
  125. when defined(js):
  126. while i < x.len:
  127. h = h !& ord(x[i])
  128. inc i
  129. else:
  130. while x[i] != 0.char:
  131. h = h !& ord(x[i])
  132. inc i
  133. result = !$h
  134. proc hash*(sBuf: string, sPos, ePos: int): Hash =
  135. ## efficient hashing of a string buffer, from starting
  136. ## position `sPos` to ending position `ePos`
  137. ##
  138. ## ``hash(myStr, 0, myStr.high)`` is equivalent to ``hash(myStr)``
  139. var h: Hash = 0
  140. for i in sPos..ePos:
  141. h = h !& ord(sBuf[i])
  142. result = !$h
  143. proc hashIgnoreStyle*(x: string): Hash =
  144. ## efficient hashing of strings; style is ignored
  145. var h: Hash = 0
  146. var i = 0
  147. let xLen = x.len
  148. while i < xLen:
  149. var c = x[i]
  150. if c == '_':
  151. inc(i)
  152. else:
  153. if c in {'A'..'Z'}:
  154. c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
  155. h = h !& ord(c)
  156. inc(i)
  157. result = !$h
  158. proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
  159. ## efficient hashing of a string buffer, from starting
  160. ## position `sPos` to ending position `ePos`; style is ignored
  161. ##
  162. ## ``hashIgnoreStyle(myBuf, 0, myBuf.high)`` is equivalent
  163. ## to ``hashIgnoreStyle(myBuf)``
  164. var h: Hash = 0
  165. var i = sPos
  166. while i <= ePos:
  167. var c = sBuf[i]
  168. if c == '_':
  169. inc(i)
  170. else:
  171. if c in {'A'..'Z'}:
  172. c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
  173. h = h !& ord(c)
  174. inc(i)
  175. result = !$h
  176. proc hashIgnoreCase*(x: string): Hash =
  177. ## efficient hashing of strings; case is ignored
  178. var h: Hash = 0
  179. for i in 0..x.len-1:
  180. var c = x[i]
  181. if c in {'A'..'Z'}:
  182. c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
  183. h = h !& ord(c)
  184. result = !$h
  185. proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash =
  186. ## efficient hashing of a string buffer, from starting
  187. ## position `sPos` to ending position `ePos`; case is ignored
  188. ##
  189. ## ``hashIgnoreCase(myBuf, 0, myBuf.high)`` is equivalent
  190. ## to ``hashIgnoreCase(myBuf)``
  191. var h: Hash = 0
  192. for i in sPos..ePos:
  193. var c = sBuf[i]
  194. if c in {'A'..'Z'}:
  195. c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
  196. h = h !& ord(c)
  197. result = !$h
  198. proc hash*(x: float): Hash {.inline.} =
  199. ## efficient hashing of floats.
  200. var y = x + 1.0
  201. result = cast[ptr Hash](addr(y))[]
  202. # Forward declarations before methods that hash containers. This allows
  203. # containers to contain other containers
  204. proc hash*[A](x: openArray[A]): Hash
  205. proc hash*[A](x: set[A]): Hash
  206. proc hash*[T: tuple](x: T): Hash =
  207. ## efficient hashing of tuples.
  208. for f in fields(x):
  209. result = result !& hash(f)
  210. result = !$result
  211. proc hash*[A](x: openArray[A]): Hash =
  212. ## efficient hashing of arrays and sequences.
  213. for it in items(x): result = result !& hash(it)
  214. result = !$result
  215. proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
  216. ## efficient hashing of portions of arrays and sequences.
  217. ##
  218. ## ``hash(myBuf, 0, myBuf.high)`` is equivalent to ``hash(myBuf)``
  219. for i in sPos..ePos:
  220. result = result !& hash(aBuf[i])
  221. result = !$result
  222. proc hash*[A](x: set[A]): Hash =
  223. ## efficient hashing of sets.
  224. for it in items(x): result = result !& hash(it)
  225. result = !$result
  226. when isMainModule:
  227. doAssert( hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13) )
  228. doAssert( hash("aa bb aaaa1234") == hash(cstring("aa bb aaaa1234")) )
  229. doAssert( hashIgnoreCase("aa bb aaaa1234") == hash("aa bb aaaa1234") )
  230. doAssert( hashIgnoreStyle("aa bb aaaa1234") == hashIgnoreCase("aa bb aaaa1234") )
  231. let xx = @['H','e','l','l','o']
  232. let ss = "Hello"
  233. doAssert( hash(xx) == hash(ss) )
  234. doAssert( hash(xx) == hash(xx, 0, xx.high) )
  235. doAssert( hash(ss) == hash(ss, 0, ss.high) )