hashes.nim 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements efficient computations of hash values for diverse
  10. ## Nim types. All the procs are based on these two building blocks:
  11. ## - `!& proc <#!&,Hash,int>`_ used to start or mix a hash value, and
  12. ## - `!$ proc <#!$,Hash>`_ used to *finish* the hash value.
  13. ##
  14. ## If you want to implement hash procs for your custom types,
  15. ## you will end up writing the following kind of skeleton of code:
  16. ##
  17. ## .. code-block:: Nim
  18. ## proc hash(x: Something): Hash =
  19. ## ## Computes a Hash from `x`.
  20. ## var h: Hash = 0
  21. ## # Iterate over parts of `x`.
  22. ## for xAtom in x:
  23. ## # Mix the atom with the partial hash.
  24. ## h = h !& xAtom
  25. ## # Finish the hash.
  26. ## result = !$h
  27. ##
  28. ## If your custom types contain fields for which there already is a hash proc,
  29. ## like for example objects made up of ``strings``, you can simply hash
  30. ## together the hash value of the individual fields:
  31. ##
  32. ## .. code-block:: Nim
  33. ## proc hash(x: Something): Hash =
  34. ## ## Computes a Hash from `x`.
  35. ## var h: Hash = 0
  36. ## h = h !& hash(x.foo)
  37. ## h = h !& hash(x.bar)
  38. ## result = !$h
  39. ##
  40. ## **See also:**
  41. ## * `md5 module <md5.html>`_ for MD5 checksum algorithm
  42. ## * `base64 module <base64.html>`_ for a base64 encoder and decoder
  43. ## * `std/sha1 module <sha1.html>`_ for a sha1 encoder and decoder
  44. ## * `tables module <tables.html>`_ for hash tables
  45. import std/private/since
  46. type
  47. Hash* = int ## A hash value. Hash tables using these values should
  48. ## always have a size of a power of two and can use the ``and``
  49. ## operator instead of ``mod`` for truncation of the hash value.
  50. proc `!&`*(h: Hash, val: int): Hash {.inline.} =
  51. ## Mixes a hash value `h` with `val` to produce a new hash value.
  52. ##
  53. ## This is only needed if you need to implement a hash proc for a new datatype.
  54. let h = cast[uint](h)
  55. let val = cast[uint](val)
  56. var res = h + val
  57. res = res + res shl 10
  58. res = res xor (res shr 6)
  59. result = cast[Hash](res)
  60. proc `!$`*(h: Hash): Hash {.inline.} =
  61. ## Finishes the computation of the hash value.
  62. ##
  63. ## This is only needed if you need to implement a hash proc for a new datatype.
  64. let h = cast[uint](h) # Hash is practically unsigned.
  65. var res = h + h shl 3
  66. res = res xor (res shr 11)
  67. res = res + res shl 15
  68. result = cast[Hash](res)
  69. proc hiXorLoFallback64(a, b: uint64): uint64 {.inline.} =
  70. let # Fall back in 64-bit arithmetic
  71. aH = a shr 32
  72. aL = a and 0xFFFFFFFF'u64
  73. bH = b shr 32
  74. bL = b and 0xFFFFFFFF'u64
  75. rHH = aH * bH
  76. rHL = aH * bL
  77. rLH = aL * bH
  78. rLL = aL * bL
  79. t = rLL + (rHL shl 32)
  80. var c = if t < rLL: 1'u64 else: 0'u64
  81. let lo = t + (rLH shl 32)
  82. c += (if lo < t: 1'u64 else: 0'u64)
  83. let hi = rHH + (rHL shr 32) + (rLH shr 32) + c
  84. return hi xor lo
  85. proc hiXorLo(a, b: uint64): uint64 {.inline.} =
  86. # Xor of high & low 8B of full 16B product
  87. when nimvm:
  88. result = hiXorLoFallback64(a, b) # `result =` is necessary here.
  89. else:
  90. when Hash.sizeof < 8:
  91. result = hiXorLoFallback64(a, b)
  92. elif defined(gcc) or defined(llvm_gcc) or defined(clang):
  93. {.emit: """__uint128_t r = `a`; r *= `b`; `result` = (r >> 64) ^ r;""".}
  94. elif defined(windows) and not defined(tcc):
  95. proc umul128(a, b: uint64, c: ptr uint64): uint64 {.importc: "_umul128", header: "intrin.h".}
  96. var b = b
  97. let c = umul128(a, b, addr b)
  98. result = c xor b
  99. else:
  100. result = hiXorLoFallback64(a, b)
  101. proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} =
  102. ## Wang Yi's hash_v1 for 8B int. https://github.com/rurban/smhasher has more
  103. ## details. This passed all scrambling tests in Spring 2019 and is simple.
  104. ## NOTE: It's ok to define ``proc(x: int16): Hash = hashWangYi1(Hash(x))``.
  105. const P0 = 0xa0761d6478bd642f'u64
  106. const P1 = 0xe7037ed1a0b428db'u64
  107. const P58 = 0xeb44accab455d165'u64 xor 8'u64
  108. when nimvm:
  109. cast[Hash](hiXorLo(hiXorLo(P0, uint64(x) xor P1), P58))
  110. else:
  111. when defined(js):
  112. asm """
  113. if (typeof BigInt == 'undefined') {
  114. `result` = `x`; // For Node < 10.4, etc. we do the old identity hash
  115. } else { // Otherwise we match the low 32-bits of C/C++ hash
  116. function hi_xor_lo_js(a, b) {
  117. const prod = BigInt(a) * BigInt(b);
  118. const mask = (BigInt(1) << BigInt(64)) - BigInt(1);
  119. return (prod >> BigInt(64)) ^ (prod & mask);
  120. }
  121. const P0 = BigInt(0xa0761d64)<<BigInt(32)|BigInt(0x78bd642f);
  122. const P1 = BigInt(0xe7037ed1)<<BigInt(32)|BigInt(0xa0b428db);
  123. const P58 = BigInt(0xeb44acca)<<BigInt(32)|BigInt(0xb455d165)^BigInt(8);
  124. var res = hi_xor_lo_js(hi_xor_lo_js(P0, BigInt(`x`) ^ P1), P58);
  125. `result` = Number(res & ((BigInt(1) << BigInt(53)) - BigInt(1)));
  126. }"""
  127. else:
  128. cast[Hash](hiXorLo(hiXorLo(P0, uint64(x) xor P1), P58))
  129. proc hashData*(data: pointer, size: int): Hash =
  130. ## Hashes an array of bytes of size `size`.
  131. var h: Hash = 0
  132. when defined(js):
  133. var p: cstring
  134. asm """`p` = `Data`;"""
  135. else:
  136. var p = cast[cstring](data)
  137. var i = 0
  138. var s = size
  139. while s > 0:
  140. h = h !& ord(p[i])
  141. inc(i)
  142. dec(s)
  143. result = !$h
  144. when defined(js):
  145. var objectID = 0
  146. proc hash*(x: pointer): Hash {.inline.} =
  147. ## Efficient hashing of pointers.
  148. when defined(js):
  149. asm """
  150. if (typeof `x` == "object") {
  151. if ("_NimID" in `x`)
  152. `result` = `x`["_NimID"];
  153. else {
  154. `result` = ++`objectID`;
  155. `x`["_NimID"] = `result`;
  156. }
  157. }
  158. """
  159. else:
  160. result = cast[Hash](cast[uint](x) shr 3) # skip the alignment
  161. proc hash*[T: proc](x: T): Hash {.inline.} =
  162. ## Efficient hashing of proc vars. Closures are supported too.
  163. when T is "closure":
  164. result = hash(rawProc(x)) !& hash(rawEnv(x))
  165. else:
  166. result = hash(pointer(x))
  167. proc hashIdentity*[T: Ordinal|enum](x: T): Hash {.inline, since: (1, 3).} =
  168. ## The identity hash. I.e. ``hashIdentity(x) = x``.
  169. cast[Hash](ord(x))
  170. when defined(nimIntHash1):
  171. proc hash*[T: Ordinal|enum](x: T): Hash {.inline.} =
  172. ## Efficient hashing of integers.
  173. cast[Hash](ord(x))
  174. else:
  175. proc hash*[T: Ordinal|enum](x: T): Hash {.inline.} =
  176. ## Efficient hashing of integers.
  177. hashWangYi1(uint64(ord(x)))
  178. proc hash*(x: float): Hash {.inline.} =
  179. ## Efficient hashing of floats.
  180. var y = x + 0.0 # for denormalization
  181. result = hash(cast[ptr Hash](addr(y))[])
  182. # Forward declarations before methods that hash containers. This allows
  183. # containers to contain other containers
  184. proc hash*[A](x: openArray[A]): Hash
  185. proc hash*[A](x: set[A]): Hash
  186. when defined(js):
  187. proc imul(a, b: uint32): uint32 =
  188. # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/imul
  189. let mask = 0xffff'u32
  190. var
  191. aHi = (a shr 16) and mask
  192. aLo = a and mask
  193. bHi = (b shr 16) and mask
  194. bLo = b and mask
  195. result = (aLo * bLo) + (aHi * bLo + aLo * bHi) shl 16
  196. else:
  197. template imul(a, b: uint32): untyped = a * b
  198. proc rotl32(x: uint32, r: int): uint32 {.inline.} =
  199. (x shl r) or (x shr (32 - r))
  200. proc murmurHash(x: openArray[byte]): Hash =
  201. # https://github.com/PeterScott/murmur3/blob/master/murmur3.c
  202. const
  203. c1 = 0xcc9e2d51'u32
  204. c2 = 0x1b873593'u32
  205. n1 = 0xe6546b64'u32
  206. m1 = 0x85ebca6b'u32
  207. m2 = 0xc2b2ae35'u32
  208. let
  209. size = len(x)
  210. stepSize = 4 # 32-bit
  211. n = size div stepSize
  212. var
  213. h1: uint32
  214. i = 0
  215. # body
  216. while i < n * stepSize:
  217. var k1: uint32
  218. when defined(js) or defined(sparc) or defined(sparc64):
  219. var j = stepSize
  220. while j > 0:
  221. dec j
  222. k1 = (k1 shl 8) or (ord(x[i+j])).uint32
  223. else:
  224. k1 = cast[ptr uint32](unsafeAddr x[i])[]
  225. inc i, stepSize
  226. k1 = imul(k1, c1)
  227. k1 = rotl32(k1, 15)
  228. k1 = imul(k1, c2)
  229. h1 = h1 xor k1
  230. h1 = rotl32(h1, 13)
  231. h1 = h1*5 + n1
  232. # tail
  233. var k1: uint32
  234. var rem = size mod stepSize
  235. while rem > 0:
  236. dec rem
  237. k1 = (k1 shl 8) or (ord(x[i+rem])).uint32
  238. k1 = imul(k1, c1)
  239. k1 = rotl32(k1, 15)
  240. k1 = imul(k1, c2)
  241. h1 = h1 xor k1
  242. # finalization
  243. h1 = h1 xor size.uint32
  244. h1 = h1 xor (h1 shr 16)
  245. h1 = imul(h1, m1)
  246. h1 = h1 xor (h1 shr 13)
  247. h1 = imul(h1, m2)
  248. h1 = h1 xor (h1 shr 16)
  249. return cast[Hash](h1)
  250. proc hashVmImpl(x: string, sPos, ePos: int): Hash =
  251. doAssert false, "implementation override in compiler/vmops.nim"
  252. proc hashVmImplChar(x: openArray[char], sPos, ePos: int): Hash =
  253. doAssert false, "implementation override in compiler/vmops.nim"
  254. proc hashVmImplByte(x: openArray[byte], sPos, ePos: int): Hash =
  255. doAssert false, "implementation override in compiler/vmops.nim"
  256. proc hash*(x: string): Hash =
  257. ## Efficient hashing of strings.
  258. ##
  259. ## See also:
  260. ## * `hashIgnoreStyle <#hashIgnoreStyle,string>`_
  261. ## * `hashIgnoreCase <#hashIgnoreCase,string>`_
  262. runnableExamples:
  263. doAssert hash("abracadabra") != hash("AbracadabrA")
  264. when not defined(nimToOpenArrayCString):
  265. result = 0
  266. for c in x:
  267. result = result !& ord(c)
  268. result = !$result
  269. else:
  270. when nimvm:
  271. result = hashVmImpl(x, 0, high(x))
  272. else:
  273. result = murmurHash(toOpenArrayByte(x, 0, high(x)))
  274. proc hash*(x: cstring): Hash =
  275. ## Efficient hashing of null-terminated strings.
  276. runnableExamples:
  277. doAssert hash(cstring"abracadabra") == hash("abracadabra")
  278. doAssert hash(cstring"AbracadabrA") == hash("AbracadabrA")
  279. doAssert hash(cstring"abracadabra") != hash(cstring"AbracadabrA")
  280. when not defined(nimToOpenArrayCString):
  281. result = 0
  282. var i = 0
  283. while x[i] != '\0':
  284. result = result !& ord(x[i])
  285. inc i
  286. result = !$result
  287. else:
  288. when not defined(js) and defined(nimToOpenArrayCString):
  289. murmurHash(toOpenArrayByte(x, 0, x.high))
  290. else:
  291. let xx = $x
  292. murmurHash(toOpenArrayByte(xx, 0, high(xx)))
  293. proc hash*(sBuf: string, sPos, ePos: int): Hash =
  294. ## Efficient hashing of a string buffer, from starting
  295. ## position `sPos` to ending position `ePos` (included).
  296. ##
  297. ## ``hash(myStr, 0, myStr.high)`` is equivalent to ``hash(myStr)``.
  298. runnableExamples:
  299. var a = "abracadabra"
  300. doAssert hash(a, 0, 3) == hash(a, 7, 10)
  301. when not defined(nimToOpenArrayCString):
  302. result = 0
  303. for i in sPos..ePos:
  304. result = result !& ord(sBuf[i])
  305. result = !$result
  306. else:
  307. murmurHash(toOpenArrayByte(sBuf, sPos, ePos))
  308. proc hashIgnoreStyle*(x: string): Hash =
  309. ## Efficient hashing of strings; style is ignored.
  310. ##
  311. ## **Note:** This uses different hashing algorithm than `hash(string)`.
  312. ##
  313. ## See also:
  314. ## * `hashIgnoreCase <#hashIgnoreCase,string>`_
  315. runnableExamples:
  316. doAssert hashIgnoreStyle("aBr_aCa_dAB_ra") == hashIgnoreStyle("abracadabra")
  317. doAssert hashIgnoreStyle("abcdefghi") != hash("abcdefghi")
  318. var h: Hash = 0
  319. var i = 0
  320. let xLen = x.len
  321. while i < xLen:
  322. var c = x[i]
  323. if c == '_':
  324. inc(i)
  325. else:
  326. if c in {'A'..'Z'}:
  327. c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
  328. h = h !& ord(c)
  329. inc(i)
  330. result = !$h
  331. proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
  332. ## Efficient hashing of a string buffer, from starting
  333. ## position `sPos` to ending position `ePos` (included); style is ignored.
  334. ##
  335. ## **Note:** This uses different hashing algorithm than `hash(string)`.
  336. ##
  337. ## ``hashIgnoreStyle(myBuf, 0, myBuf.high)`` is equivalent
  338. ## to ``hashIgnoreStyle(myBuf)``.
  339. runnableExamples:
  340. var a = "ABracada_b_r_a"
  341. doAssert hashIgnoreStyle(a, 0, 3) == hashIgnoreStyle(a, 7, a.high)
  342. var h: Hash = 0
  343. var i = sPos
  344. while i <= ePos:
  345. var c = sBuf[i]
  346. if c == '_':
  347. inc(i)
  348. else:
  349. if c in {'A'..'Z'}:
  350. c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
  351. h = h !& ord(c)
  352. inc(i)
  353. result = !$h
  354. proc hashIgnoreCase*(x: string): Hash =
  355. ## Efficient hashing of strings; case is ignored.
  356. ##
  357. ## **Note:** This uses different hashing algorithm than `hash(string)`.
  358. ##
  359. ## See also:
  360. ## * `hashIgnoreStyle <#hashIgnoreStyle,string>`_
  361. runnableExamples:
  362. doAssert hashIgnoreCase("ABRAcaDABRA") == hashIgnoreCase("abRACAdabra")
  363. doAssert hashIgnoreCase("abcdefghi") != hash("abcdefghi")
  364. var h: Hash = 0
  365. for i in 0..x.len-1:
  366. var c = x[i]
  367. if c in {'A'..'Z'}:
  368. c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
  369. h = h !& ord(c)
  370. result = !$h
  371. proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash =
  372. ## Efficient hashing of a string buffer, from starting
  373. ## position `sPos` to ending position `ePos` (included); case is ignored.
  374. ##
  375. ## **Note:** This uses different hashing algorithm than `hash(string)`.
  376. ##
  377. ## ``hashIgnoreCase(myBuf, 0, myBuf.high)`` is equivalent
  378. ## to ``hashIgnoreCase(myBuf)``.
  379. runnableExamples:
  380. var a = "ABracadabRA"
  381. doAssert hashIgnoreCase(a, 0, 3) == hashIgnoreCase(a, 7, 10)
  382. var h: Hash = 0
  383. for i in sPos..ePos:
  384. var c = sBuf[i]
  385. if c in {'A'..'Z'}:
  386. c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
  387. h = h !& ord(c)
  388. result = !$h
  389. proc hash*[T: tuple](x: T): Hash =
  390. ## Efficient hashing of tuples.
  391. for f in fields(x):
  392. result = result !& hash(f)
  393. result = !$result
  394. proc hash*[A](x: openArray[A]): Hash =
  395. ## Efficient hashing of arrays and sequences.
  396. when A is byte:
  397. result = murmurHash(x)
  398. elif A is char:
  399. when nimvm:
  400. result = hashVmImplChar(x, 0, x.high)
  401. else:
  402. result = murmurHash(toOpenArrayByte(x, 0, x.high))
  403. else:
  404. for a in x:
  405. result = result !& hash(a)
  406. result = !$result
  407. proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
  408. ## Efficient hashing of portions of arrays and sequences, from starting
  409. ## position `sPos` to ending position `ePos` (included).
  410. ##
  411. ## ``hash(myBuf, 0, myBuf.high)`` is equivalent to ``hash(myBuf)``.
  412. runnableExamples:
  413. let a = [1, 2, 5, 1, 2, 6]
  414. doAssert hash(a, 0, 1) == hash(a, 3, 4)
  415. when A is byte:
  416. when nimvm:
  417. result = hashVmImplByte(aBuf, sPos, ePos)
  418. else:
  419. result = murmurHash(toOpenArray(aBuf, sPos, ePos))
  420. elif A is char:
  421. when nimvm:
  422. result = hashVmImplChar(aBuf, sPos, ePos)
  423. else:
  424. result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos))
  425. else:
  426. for i in sPos .. ePos:
  427. result = result !& hash(aBuf[i])
  428. result = !$result
  429. proc hash*[A](x: set[A]): Hash =
  430. ## Efficient hashing of sets.
  431. for it in items(x):
  432. result = result !& hash(it)
  433. result = !$result
  434. when isMainModule:
  435. block empty:
  436. var
  437. a = ""
  438. b = newSeq[char]()
  439. c = newSeq[int]()
  440. d = cstring""
  441. e = "abcd"
  442. doAssert hash(a) == 0
  443. doAssert hash(b) == 0
  444. doAssert hash(c) == 0
  445. doAssert hash(d) == 0
  446. doAssert hashIgnoreCase(a) == 0
  447. doAssert hashIgnoreStyle(a) == 0
  448. doAssert hash(e, 3, 2) == 0
  449. block sameButDifferent:
  450. doAssert hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13)
  451. doAssert hash("aa bb aaaa1234") == hash(cstring"aa bb aaaa1234")
  452. doAssert hashIgnoreCase("aA bb aAAa1234") == hashIgnoreCase("aa bb aaaa1234")
  453. doAssert hashIgnoreStyle("aa_bb_AAaa1234") == hashIgnoreCase("aaBBAAAa1234")
  454. block smallSize: # no multibyte hashing
  455. let
  456. xx = @['H', 'i']
  457. ii = @[72'u8, 105]
  458. ss = "Hi"
  459. doAssert hash(xx) == hash(ii)
  460. doAssert hash(xx) == hash(ss)
  461. doAssert hash(xx) == hash(xx, 0, xx.high)
  462. doAssert hash(ss) == hash(ss, 0, ss.high)
  463. block largeSize: # longer than 4 characters
  464. let
  465. xx = @['H', 'e', 'l', 'l', 'o']
  466. xxl = @['H', 'e', 'l', 'l', 'o', 'w', 'e', 'e', 'n', 's']
  467. ssl = "Helloweens"
  468. doAssert hash(xxl) == hash(ssl)
  469. doAssert hash(xxl) == hash(xxl, 0, xxl.high)
  470. doAssert hash(ssl) == hash(ssl, 0, ssl.high)
  471. doAssert hash(xx) == hash(xxl, 0, 4)
  472. doAssert hash(xx) == hash(ssl, 0, 4)
  473. doAssert hash(xx, 0, 3) == hash(xxl, 0, 3)
  474. doAssert hash(xx, 0, 3) == hash(ssl, 0, 3)