123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370 |
- #
- #
- # The Nim Compiler
- # (c) Copyright 2017 Andreas Rumpf
- #
- # See the file "copying.txt", included in this
- # distribution, for details about the copyright.
- #
- ## Computes hash values for routine (proc, method etc) signatures.
- import ast, md5, tables, ropes
- from hashes import Hash
- from astalgo import debug
- import types
- from strutils import startsWith, contains
- when false:
- type
- SigHash* = uint32 ## a hash good enough for a filename or a proc signature
- proc sdbmHash(hash: SigHash, c: char): SigHash {.inline.} =
- return SigHash(c) + (hash shl 6) + (hash shl 16) - hash
- template `&=`*(x: var SigHash, c: char) = x = sdbmHash(x, c)
- template `&=`*(x: var SigHash, s: string) =
- for c in s: x = sdbmHash(x, c)
- else:
- type
- SigHash* = distinct Md5Digest
- const
- cb64 = [
- "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
- "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
- "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n",
- "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
- "0", "1", "2", "3", "4", "5", "6", "7", "8", "9a",
- "9b", "9c"]
- proc toBase64a(s: cstring, len: int): string =
- ## encodes `s` into base64 representation.
- result = newStringOfCap(((len + 2) div 3) * 4)
- result.add '_'
- var i = 0
- while i < len - 2:
- let a = ord(s[i])
- let b = ord(s[i+1])
- let c = ord(s[i+2])
- result.add cb64[a shr 2]
- result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
- result.add cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)]
- result.add cb64[c and 0x3F]
- inc(i, 3)
- if i < len-1:
- let a = ord(s[i])
- let b = ord(s[i+1])
- result.add cb64[a shr 2]
- result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
- result.add cb64[((b and 0x0F) shl 2)]
- elif i < len:
- let a = ord(s[i])
- result.add cb64[a shr 2]
- result.add cb64[(a and 3) shl 4]
- proc `$`*(u: SigHash): string =
- toBase64a(cast[cstring](unsafeAddr u), sizeof(u))
- proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
- proc `&=`(c: var MD5Context, ch: char) = md5Update(c, unsafeAddr ch, 1)
- proc `&=`(c: var MD5Context, r: Rope) =
- for l in leaves(r): md5Update(c, l, l.len)
- proc `&=`(c: var MD5Context, i: BiggestInt) =
- md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
- template lowlevel(v) =
- md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
- proc `==`*(a, b: SigHash): bool =
- # {.borrow.}
- result = equalMem(unsafeAddr a, unsafeAddr b, sizeof(a))
- proc hash*(u: SigHash): Hash =
- result = 0
- for x in 0..3:
- result = (result shl 8) or u.MD5Digest[x].int
- type
- ConsiderFlag* = enum
- CoProc
- CoType
- CoOwnerSig
- CoIgnoreRange
- proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag])
- proc hashSym(c: var MD5Context, s: PSym) =
- if sfAnon in s.flags or s.kind == skGenericParam:
- c &= ":anon"
- else:
- var it = s
- while it != nil:
- c &= it.name.s
- c &= "."
- it = it.owner
- proc hashTypeSym(c: var MD5Context, s: PSym) =
- if sfAnon in s.flags or s.kind == skGenericParam:
- c &= ":anon"
- else:
- var it = s
- while it != nil:
- if sfFromGeneric in it.flags and it.kind in routineKinds and
- it.typ != nil:
- hashType c, it.typ, {CoProc}
- c &= it.name.s
- c &= "."
- it = it.owner
- proc hashTree(c: var MD5Context, n: PNode) =
- if n == nil:
- c &= "\255"
- return
- let k = n.kind
- c &= char(k)
- # we really must not hash line information. 'n.typ' is debatable but
- # shouldn't be necessary for now and avoids potential infinite recursions.
- case n.kind
- of nkEmpty, nkNilLit, nkType: discard
- of nkIdent:
- c &= n.ident.s
- of nkSym:
- hashSym(c, n.sym)
- of nkCharLit..nkUInt64Lit:
- let v = n.intVal
- lowlevel v
- of nkFloatLit..nkFloat64Lit:
- let v = n.floatVal
- lowlevel v
- of nkStrLit..nkTripleStrLit:
- c &= n.strVal
- else:
- for i in 0..<n.len: hashTree(c, n.sons[i])
- proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]) =
- if t == nil:
- c &= "\254"
- return
- case t.kind
- of tyGenericInvocation:
- for i in countup(0, sonsLen(t) - 1):
- c.hashType t.sons[i], flags
- of tyDistinct:
- if CoType in flags:
- c.hashType t.lastSon, flags
- else:
- c.hashSym(t.sym)
- of tyGenericInst:
- if sfInfixCall in t.base.sym.flags:
- # This is an imported C++ generic type.
- # We cannot trust the `lastSon` to hold a properly populated and unique
- # value for each instantiation, so we hash the generic parameters here:
- let normalizedType = t.skipGenericAlias
- for i in 0 .. normalizedType.len - 2:
- c.hashType t.sons[i], flags
- else:
- c.hashType t.lastSon, flags
- of tyAlias, tySink, tyUserTypeClasses, tyInferred:
- c.hashType t.lastSon, flags
- of tyBool, tyChar, tyInt..tyUInt64:
- # no canonicalization for integral types, so that e.g. ``pid_t`` is
- # produced instead of ``NI``:
- c &= char(t.kind)
- if t.sym != nil and {sfImportc, sfExportc} * t.sym.flags != {}:
- c.hashSym(t.sym)
- of tyObject, tyEnum:
- if t.typeInst != nil:
- # prevent against infinite recursions here, see bug #8883:
- let inst = t.typeInst
- t.typeInst = nil
- assert inst.kind == tyGenericInst
- for i in countup(0, inst.len - 2):
- c.hashType inst.sons[i], flags
- t.typeInst = inst
- return
- c &= char(t.kind)
- # Every cyclic type in Nim need to be constructed via some 't.sym', so this
- # is actually safe without an infinite recursion check:
- if t.sym != nil:
- if {sfCompilerProc} * t.sym.flags != {}:
- doAssert t.sym.loc.r != nil
- # The user has set a specific name for this type
- c &= t.sym.loc.r
- elif CoOwnerSig in flags:
- c.hashTypeSym(t.sym)
- else:
- c.hashSym(t.sym)
- if {sfAnon, sfGenSym} * t.sym.flags != {}:
- # generated object names can be identical, so we need to
- # disambiguate furthermore by hashing the field types and names:
- # mild hack to prevent endless recursions (makes nimforum compile again):
- let oldFlags = t.sym.flags
- t.sym.flags = t.sym.flags - {sfAnon, sfGenSym}
- let n = t.n
- for i in 0 ..< n.len:
- assert n[i].kind == nkSym
- let s = n[i].sym
- c.hashSym s
- c.hashType s.typ, flags
- t.sym.flags = oldFlags
- else:
- c &= t.id
- if t.len > 0 and t.sons[0] != nil:
- hashType c, t.sons[0], flags
- of tyRef, tyPtr, tyGenericBody, tyVar:
- c &= char(t.kind)
- c.hashType t.lastSon, flags
- if tfVarIsPtr in t.flags: c &= ".varisptr"
- of tyFromExpr:
- c &= char(t.kind)
- c.hashTree(t.n)
- of tyTuple:
- c &= char(t.kind)
- if t.n != nil and CoType notin flags:
- assert(sonsLen(t.n) == sonsLen(t))
- for i in countup(0, sonsLen(t.n) - 1):
- assert(t.n.sons[i].kind == nkSym)
- c &= t.n.sons[i].sym.name.s
- c &= ':'
- c.hashType(t.sons[i], flags+{CoIgnoreRange})
- c &= ','
- else:
- for i in countup(0, sonsLen(t) - 1): c.hashType t.sons[i], flags+{CoIgnoreRange}
- of tyRange:
- if CoIgnoreRange notin flags:
- c &= char(t.kind)
- c.hashTree(t.n)
- c.hashType(t.sons[0], flags)
- of tyStatic:
- c &= char(t.kind)
- c.hashTree(t.n)
- c.hashType(t.sons[0], flags)
- of tyProc:
- c &= char(t.kind)
- c &= (if tfIterator in t.flags: "iterator " else: "proc ")
- if CoProc in flags and t.n != nil:
- let params = t.n
- for i in 1..<params.len:
- let param = params[i].sym
- c &= param.name.s
- c &= ':'
- c.hashType(param.typ, flags)
- c &= ','
- c.hashType(t.sons[0], flags)
- else:
- for i in 0..<t.len: c.hashType(t.sons[i], flags)
- c &= char(t.callConv)
- if CoType notin flags:
- if tfNoSideEffect in t.flags: c &= ".noSideEffect"
- if tfThread in t.flags: c &= ".thread"
- if tfVarargs in t.flags: c &= ".varargs"
- of tyArray:
- c &= char(t.kind)
- for i in 0..<t.len: c.hashType(t.sons[i], flags-{CoIgnoreRange})
- else:
- c &= char(t.kind)
- for i in 0..<t.len: c.hashType(t.sons[i], flags)
- if tfNotNil in t.flags and CoType notin flags: c &= "not nil"
- when defined(debugSigHashes):
- import db_sqlite
- let db = open(connection="sighashes.db", user="araq", password="",
- database="sighashes")
- db.exec(sql"DROP TABLE IF EXISTS sighashes")
- db.exec sql"""CREATE TABLE sighashes(
- id integer primary key,
- hash varchar(5000) not null,
- type varchar(5000) not null,
- unique (hash, type))"""
- # select hash, type from sighashes where hash in
- # (select hash from sighashes group by hash having count(*) > 1) order by hash;
- proc hashType*(t: PType; flags: set[ConsiderFlag] = {CoType}): SigHash =
- var c: MD5Context
- md5Init c
- hashType c, t, flags+{CoOwnerSig}
- md5Final c, result.Md5Digest
- when defined(debugSigHashes):
- db.exec(sql"INSERT OR IGNORE INTO sighashes(type, hash) VALUES (?, ?)",
- typeToString(t), $result)
- proc hashProc*(s: PSym): SigHash =
- var c: MD5Context
- md5Init c
- hashType c, s.typ, {CoProc}
- var m = s
- while m.kind != skModule: m = m.owner
- let p = m.owner
- assert p.kind == skPackage
- c &= p.name.s
- c &= "."
- c &= m.name.s
- if sfDispatcher in s.flags:
- c &= ".dispatcher"
- # so that createThread[void]() (aka generic specialization) gets a unique
- # hash, we also hash the line information. This is pretty bad, but the best
- # solution for now:
- #c &= s.info.line
- md5Final c, result.Md5Digest
- proc hashNonProc*(s: PSym): SigHash =
- var c: MD5Context
- md5Init c
- hashSym(c, s)
- var it = s
- while it != nil:
- c &= it.name.s
- c &= "."
- it = it.owner
- # for bug #5135 we also take the position into account, but only
- # for parameters, because who knows what else position dependency
- # might cause:
- if s.kind == skParam:
- c &= s.position
- md5Final c, result.Md5Digest
- proc hashOwner*(s: PSym): SigHash =
- var c: MD5Context
- md5Init c
- var m = s
- while m.kind != skModule: m = m.owner
- let p = m.owner
- assert p.kind == skPackage
- c &= p.name.s
- c &= "."
- c &= m.name.s
- md5Final c, result.Md5Digest
- proc idOrSig*(s: PSym, currentModule: string,
- sigCollisions: var CountTable[SigHash]): Rope =
- if s.kind in routineKinds and s.typ != nil:
- # signatures for exported routines are reliable enough to
- # produce a unique name and this means produced C++ is more stable wrt
- # Nim changes:
- let sig = hashProc(s)
- result = rope($sig)
- #let m = if s.typ.callConv != ccInline: findPendingModule(m, s) else: m
- let counter = sigCollisions.getOrDefault(sig)
- #if sigs == "_jckmNePK3i2MFnWwZlp6Lg" and s.name.s == "contains":
- # echo "counter ", counter, " ", s.id
- if counter != 0:
- result.add "_" & rope(counter+1)
- # this minor hack is necessary to make tests/collections/thashes compile.
- # The inlined hash function's original module is ambiguous so we end up
- # generating duplicate names otherwise:
- if s.typ.callConv == ccInline:
- result.add rope(currentModule)
- sigCollisions.inc(sig)
- else:
- let sig = hashNonProc(s)
- result = rope($sig)
- let counter = sigCollisions.getOrDefault(sig)
- if counter != 0:
- result.add "_" & rope(counter+1)
- sigCollisions.inc(sig)
|