123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267 |
- #
- #
- # Nim's Runtime Library
- # (c) Copyright 2012 Andreas Rumpf
- #
- # See the file "copying.txt", included in this
- # distribution, for details about the copyright.
- #
- ## This module implements efficient computations of hash values for diverse
- ## Nim types. All the procs are based on these two building blocks:
- ## - `!& proc <#!&>`_ used to start or mix a hash value, and
- ## - `!$ proc <#!$>`_ used to *finish* the hash value.
- ## If you want to implement hash procs for
- ## your custom types you will end up writing the following kind of skeleton of
- ## code:
- ##
- ## .. code-block:: Nim
- ## proc hash(x: Something): Hash =
- ## ## Computes a Hash from `x`.
- ## var h: Hash = 0
- ## # Iterate over parts of `x`.
- ## for xAtom in x:
- ## # Mix the atom with the partial hash.
- ## h = h !& xAtom
- ## # Finish the hash.
- ## result = !$h
- ##
- ## If your custom types contain fields for which there already is a hash proc,
- ## like for example objects made up of ``strings``, you can simply hash
- ## together the hash value of the individual fields:
- ##
- ## .. code-block:: Nim
- ## proc hash(x: Something): Hash =
- ## ## Computes a Hash from `x`.
- ## var h: Hash = 0
- ## h = h !& hash(x.foo)
- ## h = h !& hash(x.bar)
- ## result = !$h
- import
- strutils
- type
- Hash* = int ## a hash value; hash tables using these values should
- ## always have a size of a power of two and can use the ``and``
- ## operator instead of ``mod`` for truncation of the hash value.
- proc `!&`*(h: Hash, val: int): Hash {.inline.} =
- ## mixes a hash value `h` with `val` to produce a new hash value. This is
- ## only needed if you need to implement a hash proc for a new datatype.
- result = h +% val
- result = result +% result shl 10
- result = result xor (result shr 6)
- proc `!$`*(h: Hash): Hash {.inline.} =
- ## finishes the computation of the hash value. This is
- ## only needed if you need to implement a hash proc for a new datatype.
- result = h +% h shl 3
- result = result xor (result shr 11)
- result = result +% result shl 15
- proc hashData*(data: pointer, size: int): Hash =
- ## hashes an array of bytes of size `size`
- var h: Hash = 0
- when defined(js):
- var p: cstring
- asm """`p` = `Data`;"""
- else:
- var p = cast[cstring](data)
- var i = 0
- var s = size
- while s > 0:
- h = h !& ord(p[i])
- inc(i)
- dec(s)
- result = !$h
- when defined(js):
- var objectID = 0
- proc hash*(x: pointer): Hash {.inline.} =
- ## efficient hashing of pointers
- when defined(js):
- asm """
- if (typeof `x` == "object") {
- if ("_NimID" in `x`)
- `result` = `x`["_NimID"];
- else {
- `result` = ++`objectID`;
- `x`["_NimID"] = `result`;
- }
- }
- """
- else:
- result = (cast[Hash](x)) shr 3 # skip the alignment
- when not defined(booting):
- proc hash*[T: proc](x: T): Hash {.inline.} =
- ## efficient hashing of proc vars; closures are supported too.
- when T is "closure":
- result = hash(rawProc(x)) !& hash(rawEnv(x))
- else:
- result = hash(pointer(x))
- proc hash*(x: int): Hash {.inline.} =
- ## efficient hashing of integers
- result = x
- proc hash*(x: int64): Hash {.inline.} =
- ## efficient hashing of int64 integers
- result = toU32(x)
- proc hash*(x: uint): Hash {.inline.} =
- ## efficient hashing of unsigned integers
- result = cast[int](x)
- proc hash*(x: uint64): Hash {.inline.} =
- ## efficient hashing of uint64 integers
- result = toU32(cast[int](x))
- proc hash*(x: char): Hash {.inline.} =
- ## efficient hashing of characters
- result = ord(x)
- proc hash*[T: Ordinal](x: T): Hash {.inline.} =
- ## efficient hashing of other ordinal types (e.g., enums)
- result = ord(x)
- proc hash*(x: string): Hash =
- ## efficient hashing of strings
- var h: Hash = 0
- for i in 0..x.len-1:
- h = h !& ord(x[i])
- result = !$h
- proc hash*(x: cstring): Hash =
- ## efficient hashing of null-terminated strings
- var h: Hash = 0
- var i = 0
- when defined(js):
- while i < x.len:
- h = h !& ord(x[i])
- inc i
- else:
- while x[i] != 0.char:
- h = h !& ord(x[i])
- inc i
- result = !$h
- proc hash*(sBuf: string, sPos, ePos: int): Hash =
- ## efficient hashing of a string buffer, from starting
- ## position `sPos` to ending position `ePos`
- ##
- ## ``hash(myStr, 0, myStr.high)`` is equivalent to ``hash(myStr)``
- var h: Hash = 0
- for i in sPos..ePos:
- h = h !& ord(sBuf[i])
- result = !$h
- proc hashIgnoreStyle*(x: string): Hash =
- ## efficient hashing of strings; style is ignored
- var h: Hash = 0
- var i = 0
- let xLen = x.len
- while i < xLen:
- var c = x[i]
- if c == '_':
- inc(i)
- else:
- if c in {'A'..'Z'}:
- c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
- h = h !& ord(c)
- inc(i)
- result = !$h
- proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
- ## efficient hashing of a string buffer, from starting
- ## position `sPos` to ending position `ePos`; style is ignored
- ##
- ## ``hashIgnoreStyle(myBuf, 0, myBuf.high)`` is equivalent
- ## to ``hashIgnoreStyle(myBuf)``
- var h: Hash = 0
- var i = sPos
- while i <= ePos:
- var c = sBuf[i]
- if c == '_':
- inc(i)
- else:
- if c in {'A'..'Z'}:
- c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
- h = h !& ord(c)
- inc(i)
- result = !$h
- proc hashIgnoreCase*(x: string): Hash =
- ## efficient hashing of strings; case is ignored
- var h: Hash = 0
- for i in 0..x.len-1:
- var c = x[i]
- if c in {'A'..'Z'}:
- c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
- h = h !& ord(c)
- result = !$h
- proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash =
- ## efficient hashing of a string buffer, from starting
- ## position `sPos` to ending position `ePos`; case is ignored
- ##
- ## ``hashIgnoreCase(myBuf, 0, myBuf.high)`` is equivalent
- ## to ``hashIgnoreCase(myBuf)``
- var h: Hash = 0
- for i in sPos..ePos:
- var c = sBuf[i]
- if c in {'A'..'Z'}:
- c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
- h = h !& ord(c)
- result = !$h
- proc hash*(x: float): Hash {.inline.} =
- ## efficient hashing of floats.
- var y = x + 1.0
- result = cast[ptr Hash](addr(y))[]
- # Forward declarations before methods that hash containers. This allows
- # containers to contain other containers
- proc hash*[A](x: openArray[A]): Hash
- proc hash*[A](x: set[A]): Hash
- proc hash*[T: tuple](x: T): Hash =
- ## efficient hashing of tuples.
- for f in fields(x):
- result = result !& hash(f)
- result = !$result
- proc hash*[A](x: openArray[A]): Hash =
- ## efficient hashing of arrays and sequences.
- for it in items(x): result = result !& hash(it)
- result = !$result
- proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
- ## efficient hashing of portions of arrays and sequences.
- ##
- ## ``hash(myBuf, 0, myBuf.high)`` is equivalent to ``hash(myBuf)``
- for i in sPos..ePos:
- result = result !& hash(aBuf[i])
- result = !$result
- proc hash*[A](x: set[A]): Hash =
- ## efficient hashing of sets.
- for it in items(x): result = result !& hash(it)
- result = !$result
- when isMainModule:
- doAssert( hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13) )
- doAssert( hash("aa bb aaaa1234") == hash(cstring("aa bb aaaa1234")) )
- doAssert( hashIgnoreCase("aa bb aaaa1234") == hash("aa bb aaaa1234") )
- doAssert( hashIgnoreStyle("aa bb aaaa1234") == hashIgnoreCase("aa bb aaaa1234") )
- let xx = @['H','e','l','l','o']
- let ss = "Hello"
- doAssert( hash(xx) == hash(ss) )
- doAssert( hash(xx) == hash(xx, 0, xx.high) )
- doAssert( hash(ss) == hash(ss, 0, ss.high) )
|