sighashes.nim 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2017 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Computes hash values for routine (proc, method etc) signatures.
  10. import ast, md5, tables, ropes
  11. from hashes import Hash
  12. from astalgo import debug
  13. import types
  14. from strutils import startsWith, contains
  15. when false:
  16. type
  17. SigHash* = uint32 ## a hash good enough for a filename or a proc signature
  18. proc sdbmHash(hash: SigHash, c: char): SigHash {.inline.} =
  19. return SigHash(c) + (hash shl 6) + (hash shl 16) - hash
  20. template `&=`*(x: var SigHash, c: char) = x = sdbmHash(x, c)
  21. template `&=`*(x: var SigHash, s: string) =
  22. for c in s: x = sdbmHash(x, c)
  23. else:
  24. type
  25. SigHash* = distinct Md5Digest
  26. const
  27. cb64 = [
  28. "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
  29. "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
  30. "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n",
  31. "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
  32. "0", "1", "2", "3", "4", "5", "6", "7", "8", "9a",
  33. "9b", "9c"]
  34. proc toBase64a(s: cstring, len: int): string =
  35. ## encodes `s` into base64 representation.
  36. result = newStringOfCap(((len + 2) div 3) * 4)
  37. result.add '_'
  38. var i = 0
  39. while i < len - 2:
  40. let a = ord(s[i])
  41. let b = ord(s[i+1])
  42. let c = ord(s[i+2])
  43. result.add cb64[a shr 2]
  44. result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
  45. result.add cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)]
  46. result.add cb64[c and 0x3F]
  47. inc(i, 3)
  48. if i < len-1:
  49. let a = ord(s[i])
  50. let b = ord(s[i+1])
  51. result.add cb64[a shr 2]
  52. result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
  53. result.add cb64[((b and 0x0F) shl 2)]
  54. elif i < len:
  55. let a = ord(s[i])
  56. result.add cb64[a shr 2]
  57. result.add cb64[(a and 3) shl 4]
  58. proc `$`*(u: SigHash): string =
  59. toBase64a(cast[cstring](unsafeAddr u), sizeof(u))
  60. proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
  61. proc `&=`(c: var MD5Context, ch: char) = md5Update(c, unsafeAddr ch, 1)
  62. proc `&=`(c: var MD5Context, r: Rope) =
  63. for l in leaves(r): md5Update(c, l, l.len)
  64. proc `&=`(c: var MD5Context, i: BiggestInt) =
  65. md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
  66. template lowlevel(v) =
  67. md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
  68. proc `==`*(a, b: SigHash): bool =
  69. # {.borrow.}
  70. result = equalMem(unsafeAddr a, unsafeAddr b, sizeof(a))
  71. proc hash*(u: SigHash): Hash =
  72. result = 0
  73. for x in 0..3:
  74. result = (result shl 8) or u.MD5Digest[x].int
  75. type
  76. ConsiderFlag* = enum
  77. CoProc
  78. CoType
  79. CoOwnerSig
  80. CoIgnoreRange
  81. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag])
  82. proc hashSym(c: var MD5Context, s: PSym) =
  83. if sfAnon in s.flags or s.kind == skGenericParam:
  84. c &= ":anon"
  85. else:
  86. var it = s
  87. while it != nil:
  88. c &= it.name.s
  89. c &= "."
  90. it = it.owner
  91. proc hashTypeSym(c: var MD5Context, s: PSym) =
  92. if sfAnon in s.flags or s.kind == skGenericParam:
  93. c &= ":anon"
  94. else:
  95. var it = s
  96. while it != nil:
  97. if sfFromGeneric in it.flags and it.kind in routineKinds and
  98. it.typ != nil:
  99. hashType c, it.typ, {CoProc}
  100. c &= it.name.s
  101. c &= "."
  102. it = it.owner
  103. proc hashTree(c: var MD5Context, n: PNode) =
  104. if n == nil:
  105. c &= "\255"
  106. return
  107. let k = n.kind
  108. c &= char(k)
  109. # we really must not hash line information. 'n.typ' is debatable but
  110. # shouldn't be necessary for now and avoids potential infinite recursions.
  111. case n.kind
  112. of nkEmpty, nkNilLit, nkType: discard
  113. of nkIdent:
  114. c &= n.ident.s
  115. of nkSym:
  116. hashSym(c, n.sym)
  117. of nkCharLit..nkUInt64Lit:
  118. let v = n.intVal
  119. lowlevel v
  120. of nkFloatLit..nkFloat64Lit:
  121. let v = n.floatVal
  122. lowlevel v
  123. of nkStrLit..nkTripleStrLit:
  124. c &= n.strVal
  125. else:
  126. for i in 0..<n.len: hashTree(c, n.sons[i])
  127. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]) =
  128. if t == nil:
  129. c &= "\254"
  130. return
  131. case t.kind
  132. of tyGenericInvocation:
  133. for i in countup(0, sonsLen(t) - 1):
  134. c.hashType t.sons[i], flags
  135. of tyDistinct:
  136. if CoType in flags:
  137. c.hashType t.lastSon, flags
  138. else:
  139. c.hashSym(t.sym)
  140. of tyGenericInst:
  141. if sfInfixCall in t.base.sym.flags:
  142. # This is an imported C++ generic type.
  143. # We cannot trust the `lastSon` to hold a properly populated and unique
  144. # value for each instantiation, so we hash the generic parameters here:
  145. let normalizedType = t.skipGenericAlias
  146. for i in 0 .. normalizedType.len - 2:
  147. c.hashType t.sons[i], flags
  148. else:
  149. c.hashType t.lastSon, flags
  150. of tyAlias, tySink, tyUserTypeClasses, tyInferred:
  151. c.hashType t.lastSon, flags
  152. of tyBool, tyChar, tyInt..tyUInt64:
  153. # no canonicalization for integral types, so that e.g. ``pid_t`` is
  154. # produced instead of ``NI``:
  155. c &= char(t.kind)
  156. if t.sym != nil and {sfImportc, sfExportc} * t.sym.flags != {}:
  157. c.hashSym(t.sym)
  158. of tyObject, tyEnum:
  159. if t.typeInst != nil:
  160. # prevent against infinite recursions here, see bug #8883:
  161. let inst = t.typeInst
  162. t.typeInst = nil
  163. assert inst.kind == tyGenericInst
  164. for i in countup(0, inst.len - 2):
  165. c.hashType inst.sons[i], flags
  166. t.typeInst = inst
  167. return
  168. c &= char(t.kind)
  169. # Every cyclic type in Nim need to be constructed via some 't.sym', so this
  170. # is actually safe without an infinite recursion check:
  171. if t.sym != nil:
  172. if {sfCompilerProc} * t.sym.flags != {}:
  173. doAssert t.sym.loc.r != nil
  174. # The user has set a specific name for this type
  175. c &= t.sym.loc.r
  176. elif CoOwnerSig in flags:
  177. c.hashTypeSym(t.sym)
  178. else:
  179. c.hashSym(t.sym)
  180. if {sfAnon, sfGenSym} * t.sym.flags != {}:
  181. # generated object names can be identical, so we need to
  182. # disambiguate furthermore by hashing the field types and names:
  183. # mild hack to prevent endless recursions (makes nimforum compile again):
  184. let oldFlags = t.sym.flags
  185. t.sym.flags = t.sym.flags - {sfAnon, sfGenSym}
  186. let n = t.n
  187. for i in 0 ..< n.len:
  188. assert n[i].kind == nkSym
  189. let s = n[i].sym
  190. c.hashSym s
  191. c.hashType s.typ, flags
  192. t.sym.flags = oldFlags
  193. else:
  194. c &= t.id
  195. if t.len > 0 and t.sons[0] != nil:
  196. hashType c, t.sons[0], flags
  197. of tyRef, tyPtr, tyGenericBody, tyVar:
  198. c &= char(t.kind)
  199. c.hashType t.lastSon, flags
  200. if tfVarIsPtr in t.flags: c &= ".varisptr"
  201. of tyFromExpr:
  202. c &= char(t.kind)
  203. c.hashTree(t.n)
  204. of tyTuple:
  205. c &= char(t.kind)
  206. if t.n != nil and CoType notin flags:
  207. assert(sonsLen(t.n) == sonsLen(t))
  208. for i in countup(0, sonsLen(t.n) - 1):
  209. assert(t.n.sons[i].kind == nkSym)
  210. c &= t.n.sons[i].sym.name.s
  211. c &= ':'
  212. c.hashType(t.sons[i], flags+{CoIgnoreRange})
  213. c &= ','
  214. else:
  215. for i in countup(0, sonsLen(t) - 1): c.hashType t.sons[i], flags+{CoIgnoreRange}
  216. of tyRange:
  217. if CoIgnoreRange notin flags:
  218. c &= char(t.kind)
  219. c.hashTree(t.n)
  220. c.hashType(t.sons[0], flags)
  221. of tyStatic:
  222. c &= char(t.kind)
  223. c.hashTree(t.n)
  224. c.hashType(t.sons[0], flags)
  225. of tyProc:
  226. c &= char(t.kind)
  227. c &= (if tfIterator in t.flags: "iterator " else: "proc ")
  228. if CoProc in flags and t.n != nil:
  229. let params = t.n
  230. for i in 1..<params.len:
  231. let param = params[i].sym
  232. c &= param.name.s
  233. c &= ':'
  234. c.hashType(param.typ, flags)
  235. c &= ','
  236. c.hashType(t.sons[0], flags)
  237. else:
  238. for i in 0..<t.len: c.hashType(t.sons[i], flags)
  239. c &= char(t.callConv)
  240. if CoType notin flags:
  241. if tfNoSideEffect in t.flags: c &= ".noSideEffect"
  242. if tfThread in t.flags: c &= ".thread"
  243. if tfVarargs in t.flags: c &= ".varargs"
  244. of tyArray:
  245. c &= char(t.kind)
  246. for i in 0..<t.len: c.hashType(t.sons[i], flags-{CoIgnoreRange})
  247. else:
  248. c &= char(t.kind)
  249. for i in 0..<t.len: c.hashType(t.sons[i], flags)
  250. if tfNotNil in t.flags and CoType notin flags: c &= "not nil"
  251. when defined(debugSigHashes):
  252. import db_sqlite
  253. let db = open(connection="sighashes.db", user="araq", password="",
  254. database="sighashes")
  255. db.exec(sql"DROP TABLE IF EXISTS sighashes")
  256. db.exec sql"""CREATE TABLE sighashes(
  257. id integer primary key,
  258. hash varchar(5000) not null,
  259. type varchar(5000) not null,
  260. unique (hash, type))"""
  261. # select hash, type from sighashes where hash in
  262. # (select hash from sighashes group by hash having count(*) > 1) order by hash;
  263. proc hashType*(t: PType; flags: set[ConsiderFlag] = {CoType}): SigHash =
  264. var c: MD5Context
  265. md5Init c
  266. hashType c, t, flags+{CoOwnerSig}
  267. md5Final c, result.Md5Digest
  268. when defined(debugSigHashes):
  269. db.exec(sql"INSERT OR IGNORE INTO sighashes(type, hash) VALUES (?, ?)",
  270. typeToString(t), $result)
  271. proc hashProc*(s: PSym): SigHash =
  272. var c: MD5Context
  273. md5Init c
  274. hashType c, s.typ, {CoProc}
  275. var m = s
  276. while m.kind != skModule: m = m.owner
  277. let p = m.owner
  278. assert p.kind == skPackage
  279. c &= p.name.s
  280. c &= "."
  281. c &= m.name.s
  282. if sfDispatcher in s.flags:
  283. c &= ".dispatcher"
  284. # so that createThread[void]() (aka generic specialization) gets a unique
  285. # hash, we also hash the line information. This is pretty bad, but the best
  286. # solution for now:
  287. #c &= s.info.line
  288. md5Final c, result.Md5Digest
  289. proc hashNonProc*(s: PSym): SigHash =
  290. var c: MD5Context
  291. md5Init c
  292. hashSym(c, s)
  293. var it = s
  294. while it != nil:
  295. c &= it.name.s
  296. c &= "."
  297. it = it.owner
  298. # for bug #5135 we also take the position into account, but only
  299. # for parameters, because who knows what else position dependency
  300. # might cause:
  301. if s.kind == skParam:
  302. c &= s.position
  303. md5Final c, result.Md5Digest
  304. proc hashOwner*(s: PSym): SigHash =
  305. var c: MD5Context
  306. md5Init c
  307. var m = s
  308. while m.kind != skModule: m = m.owner
  309. let p = m.owner
  310. assert p.kind == skPackage
  311. c &= p.name.s
  312. c &= "."
  313. c &= m.name.s
  314. md5Final c, result.Md5Digest
  315. proc idOrSig*(s: PSym, currentModule: string,
  316. sigCollisions: var CountTable[SigHash]): Rope =
  317. if s.kind in routineKinds and s.typ != nil:
  318. # signatures for exported routines are reliable enough to
  319. # produce a unique name and this means produced C++ is more stable wrt
  320. # Nim changes:
  321. let sig = hashProc(s)
  322. result = rope($sig)
  323. #let m = if s.typ.callConv != ccInline: findPendingModule(m, s) else: m
  324. let counter = sigCollisions.getOrDefault(sig)
  325. #if sigs == "_jckmNePK3i2MFnWwZlp6Lg" and s.name.s == "contains":
  326. # echo "counter ", counter, " ", s.id
  327. if counter != 0:
  328. result.add "_" & rope(counter+1)
  329. # this minor hack is necessary to make tests/collections/thashes compile.
  330. # The inlined hash function's original module is ambiguous so we end up
  331. # generating duplicate names otherwise:
  332. if s.typ.callConv == ccInline:
  333. result.add rope(currentModule)
  334. sigCollisions.inc(sig)
  335. else:
  336. let sig = hashNonProc(s)
  337. result = rope($sig)
  338. let counter = sigCollisions.getOrDefault(sig)
  339. if counter != 0:
  340. result.add "_" & rope(counter+1)
  341. sigCollisions.inc(sig)