sighashes.nim 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2017 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Computes hash values for routine (proc, method etc) signatures.
  10. import ast, tables, ropes, md5, modulegraphs
  11. from hashes import Hash
  12. import types
  13. proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
  14. proc `&=`(c: var MD5Context, ch: char) = md5Update(c, unsafeAddr ch, 1)
  15. proc `&=`(c: var MD5Context, r: Rope) =
  16. for l in leaves(r): md5Update(c, l, l.len)
  17. proc `&=`(c: var MD5Context, i: BiggestInt) =
  18. md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
  19. proc `&=`(c: var MD5Context, f: BiggestFloat) =
  20. md5Update(c, cast[cstring](unsafeAddr f), sizeof(f))
  21. proc `&=`(c: var MD5Context, s: SigHash) =
  22. md5Update(c, cast[cstring](unsafeAddr s), sizeof(s))
  23. template lowlevel(v) =
  24. md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
  25. type
  26. ConsiderFlag* = enum
  27. CoProc
  28. CoType
  29. CoOwnerSig
  30. CoIgnoreRange
  31. CoConsiderOwned
  32. CoDistinct
  33. CoHashTypeInsideNode
  34. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag])
  35. proc hashSym(c: var MD5Context, s: PSym) =
  36. if sfAnon in s.flags or s.kind == skGenericParam:
  37. c &= ":anon"
  38. else:
  39. var it = s
  40. while it != nil:
  41. c &= it.name.s
  42. c &= "."
  43. it = it.owner
  44. proc hashTypeSym(c: var MD5Context, s: PSym) =
  45. if sfAnon in s.flags or s.kind == skGenericParam:
  46. c &= ":anon"
  47. else:
  48. var it = s
  49. while it != nil:
  50. if sfFromGeneric in it.flags and it.kind in routineKinds and
  51. it.typ != nil:
  52. hashType c, it.typ, {CoProc}
  53. c &= it.name.s
  54. c &= "."
  55. it = it.owner
  56. proc hashTree(c: var MD5Context, n: PNode; flags: set[ConsiderFlag]) =
  57. if n == nil:
  58. c &= "\255"
  59. return
  60. let k = n.kind
  61. c &= char(k)
  62. # we really must not hash line information. 'n.typ' is debatable but
  63. # shouldn't be necessary for now and avoids potential infinite recursions.
  64. case n.kind
  65. of nkEmpty, nkNilLit, nkType: discard
  66. of nkIdent:
  67. c &= n.ident.s
  68. of nkSym:
  69. hashSym(c, n.sym)
  70. if CoHashTypeInsideNode in flags and n.sym.typ != nil:
  71. hashType(c, n.sym.typ, flags)
  72. of nkCharLit..nkUInt64Lit:
  73. let v = n.intVal
  74. lowlevel v
  75. of nkFloatLit..nkFloat64Lit:
  76. let v = n.floatVal
  77. lowlevel v
  78. of nkStrLit..nkTripleStrLit:
  79. c &= n.strVal
  80. else:
  81. for i in 0..<n.len: hashTree(c, n[i], flags)
  82. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]) =
  83. if t == nil:
  84. c &= "\254"
  85. return
  86. case t.kind
  87. of tyGenericInvocation:
  88. for i in 0..<t.len:
  89. c.hashType t[i], flags
  90. of tyDistinct:
  91. if CoDistinct in flags:
  92. if t.sym != nil: c.hashSym(t.sym)
  93. if t.sym == nil or tfFromGeneric in t.flags:
  94. c.hashType t.lastSon, flags
  95. elif CoType in flags or t.sym == nil:
  96. c.hashType t.lastSon, flags
  97. else:
  98. c.hashSym(t.sym)
  99. of tyGenericInst:
  100. if sfInfixCall in t.base.sym.flags:
  101. # This is an imported C++ generic type.
  102. # We cannot trust the `lastSon` to hold a properly populated and unique
  103. # value for each instantiation, so we hash the generic parameters here:
  104. let normalizedType = t.skipGenericAlias
  105. for i in 0..<normalizedType.len - 1:
  106. c.hashType t[i], flags
  107. else:
  108. c.hashType t.lastSon, flags
  109. of tyAlias, tySink, tyUserTypeClasses, tyInferred:
  110. c.hashType t.lastSon, flags
  111. of tyOwned:
  112. if CoConsiderOwned in flags:
  113. c &= char(t.kind)
  114. c.hashType t.lastSon, flags
  115. of tyBool, tyChar, tyInt..tyUInt64:
  116. # no canonicalization for integral types, so that e.g. ``pid_t`` is
  117. # produced instead of ``NI``:
  118. c &= char(t.kind)
  119. if t.sym != nil and {sfImportc, sfExportc} * t.sym.flags != {}:
  120. c.hashSym(t.sym)
  121. of tyObject, tyEnum:
  122. if t.typeInst != nil:
  123. # prevent against infinite recursions here, see bug #8883:
  124. let inst = t.typeInst
  125. t.typeInst = nil
  126. assert inst.kind == tyGenericInst
  127. for i in 0..<inst.len - 1:
  128. c.hashType inst[i], flags
  129. t.typeInst = inst
  130. return
  131. c &= char(t.kind)
  132. # Every cyclic type in Nim need to be constructed via some 't.sym', so this
  133. # is actually safe without an infinite recursion check:
  134. if t.sym != nil:
  135. if {sfCompilerProc} * t.sym.flags != {}:
  136. doAssert t.sym.loc.r != nil
  137. # The user has set a specific name for this type
  138. c &= t.sym.loc.r
  139. elif CoOwnerSig in flags:
  140. c.hashTypeSym(t.sym)
  141. else:
  142. c.hashSym(t.sym)
  143. if {sfAnon, sfGenSym} * t.sym.flags != {}:
  144. # Generated object names can be identical, so we need to
  145. # disambiguate furthermore by hashing the field types and names.
  146. if t.n.len > 0:
  147. let oldFlags = t.sym.flags
  148. # Mild hack to prevent endless recursion.
  149. t.sym.flags.excl {sfAnon, sfGenSym}
  150. hashTree(c, t.n, flags + {CoHashTypeInsideNode})
  151. t.sym.flags = oldFlags
  152. else:
  153. # The object has no fields: we _must_ add something here in order to
  154. # make the hash different from the one we produce by hashing only the
  155. # type name.
  156. c &= ".empty"
  157. else:
  158. c &= t.id
  159. if t.len > 0 and t[0] != nil:
  160. hashType c, t[0], flags
  161. of tyRef, tyPtr, tyGenericBody, tyVar:
  162. c &= char(t.kind)
  163. c.hashType t.lastSon, flags
  164. if tfVarIsPtr in t.flags: c &= ".varisptr"
  165. of tyFromExpr:
  166. c &= char(t.kind)
  167. c.hashTree(t.n, {})
  168. of tyTuple:
  169. c &= char(t.kind)
  170. if t.n != nil and CoType notin flags:
  171. assert(t.n.len == t.len)
  172. for i in 0..<t.n.len:
  173. assert(t.n[i].kind == nkSym)
  174. c &= t.n[i].sym.name.s
  175. c &= ':'
  176. c.hashType(t[i], flags+{CoIgnoreRange})
  177. c &= ','
  178. else:
  179. for i in 0..<t.len: c.hashType t[i], flags+{CoIgnoreRange}
  180. of tyRange:
  181. if CoIgnoreRange notin flags:
  182. c &= char(t.kind)
  183. c.hashTree(t.n, {})
  184. c.hashType(t[0], flags)
  185. of tyStatic:
  186. c &= char(t.kind)
  187. c.hashTree(t.n, {})
  188. c.hashType(t[0], flags)
  189. of tyProc:
  190. c &= char(t.kind)
  191. c &= (if tfIterator in t.flags: "iterator " else: "proc ")
  192. if CoProc in flags and t.n != nil:
  193. let params = t.n
  194. for i in 1..<params.len:
  195. let param = params[i].sym
  196. c &= param.name.s
  197. c &= ':'
  198. c.hashType(param.typ, flags)
  199. c &= ','
  200. c.hashType(t[0], flags)
  201. else:
  202. for i in 0..<t.len: c.hashType(t[i], flags)
  203. c &= char(t.callConv)
  204. # purity of functions doesn't have to affect the mangling (which is in fact
  205. # problematic for HCR - someone could have cached a pointer to another
  206. # function which changes its purity and suddenly the cached pointer is danglign)
  207. # IMHO anything that doesn't affect the overload resolution shouldn't be part of the mangling...
  208. # if CoType notin flags:
  209. # if tfNoSideEffect in t.flags: c &= ".noSideEffect"
  210. # if tfThread in t.flags: c &= ".thread"
  211. if tfVarargs in t.flags: c &= ".varargs"
  212. of tyArray:
  213. c &= char(t.kind)
  214. for i in 0..<t.len: c.hashType(t[i], flags-{CoIgnoreRange})
  215. else:
  216. c &= char(t.kind)
  217. for i in 0..<t.len: c.hashType(t[i], flags)
  218. if tfNotNil in t.flags and CoType notin flags: c &= "not nil"
  219. when defined(debugSigHashes):
  220. import db_sqlite
  221. let db = open(connection="sighashes.db", user="araq", password="",
  222. database="sighashes")
  223. db.exec(sql"DROP TABLE IF EXISTS sighashes")
  224. db.exec sql"""CREATE TABLE sighashes(
  225. id integer primary key,
  226. hash varchar(5000) not null,
  227. type varchar(5000) not null,
  228. unique (hash, type))"""
  229. # select hash, type from sighashes where hash in
  230. # (select hash from sighashes group by hash having count(*) > 1) order by hash;
  231. proc hashType*(t: PType; flags: set[ConsiderFlag] = {CoType}): SigHash =
  232. var c: MD5Context
  233. md5Init c
  234. hashType c, t, flags+{CoOwnerSig}
  235. md5Final c, result.MD5Digest
  236. when defined(debugSigHashes):
  237. db.exec(sql"INSERT OR IGNORE INTO sighashes(type, hash) VALUES (?, ?)",
  238. typeToString(t), $result)
  239. proc hashProc*(s: PSym): SigHash =
  240. var c: MD5Context
  241. md5Init c
  242. hashType c, s.typ, {CoProc}
  243. var m = s
  244. while m.kind != skModule: m = m.owner
  245. let p = m.owner
  246. assert p.kind == skPackage
  247. c &= p.name.s
  248. c &= "."
  249. c &= m.name.s
  250. if sfDispatcher in s.flags:
  251. c &= ".dispatcher"
  252. # so that createThread[void]() (aka generic specialization) gets a unique
  253. # hash, we also hash the line information. This is pretty bad, but the best
  254. # solution for now:
  255. #c &= s.info.line
  256. md5Final c, result.MD5Digest
  257. proc hashNonProc*(s: PSym): SigHash =
  258. var c: MD5Context
  259. md5Init c
  260. hashSym(c, s)
  261. var it = s
  262. while it != nil:
  263. c &= it.name.s
  264. c &= "."
  265. it = it.owner
  266. # for bug #5135 we also take the position into account, but only
  267. # for parameters, because who knows what else position dependency
  268. # might cause:
  269. if s.kind == skParam:
  270. c &= s.position
  271. md5Final c, result.MD5Digest
  272. proc hashOwner*(s: PSym): SigHash =
  273. var c: MD5Context
  274. md5Init c
  275. var m = s
  276. while m.kind != skModule: m = m.owner
  277. let p = m.owner
  278. assert p.kind == skPackage
  279. c &= p.name.s
  280. c &= "."
  281. c &= m.name.s
  282. md5Final c, result.MD5Digest
  283. proc sigHash*(s: PSym): SigHash =
  284. if s.kind in routineKinds and s.typ != nil:
  285. result = hashProc(s)
  286. else:
  287. result = hashNonProc(s)
  288. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash
  289. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode)
  290. proc hashVarSymBody(graph: ModuleGraph, c: var MD5Context, s: PSym) =
  291. assert: s.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}
  292. if sfGlobal notin s.flags:
  293. c &= char(s.kind)
  294. c &= s.name.s
  295. else:
  296. c &= hashNonProc(s)
  297. # this one works for let and const but not for var. True variables can change value
  298. # later on. it is user resposibility to hash his global state if required
  299. if s.ast != nil and s.ast.kind == nkIdentDefs:
  300. hashBodyTree(graph, c, s.ast[^1])
  301. else:
  302. hashBodyTree(graph, c, s.ast)
  303. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode) =
  304. # hash Nim tree recursing into simply
  305. if n == nil:
  306. c &= "nil"
  307. return
  308. c &= char(n.kind)
  309. case n.kind
  310. of nkEmpty, nkNilLit, nkType: discard
  311. of nkIdent:
  312. c &= n.ident.s
  313. of nkSym:
  314. if n.sym.kind in skProcKinds:
  315. c &= symBodyDigest(graph, n.sym)
  316. elif n.sym.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}:
  317. hashVarSymBody(graph, c, n.sym)
  318. else:
  319. c &= hashNonProc(n.sym)
  320. of nkProcDef, nkFuncDef, nkTemplateDef, nkMacroDef:
  321. discard # we track usage of proc symbols not their definition
  322. of nkCharLit..nkUInt64Lit:
  323. c &= n.intVal
  324. of nkFloatLit..nkFloat64Lit:
  325. c &= n.floatVal
  326. of nkStrLit..nkTripleStrLit:
  327. c &= n.strVal
  328. else:
  329. for i in 0..<n.len:
  330. hashBodyTree(graph, c, n[i])
  331. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash =
  332. ## compute unique digest of the proc/func/method symbols
  333. ## recursing into invoked symbols as well
  334. assert(sym.kind in skProcKinds, $sym.kind)
  335. graph.symBodyHashes.withValue(sym.id, value):
  336. return value[]
  337. var c: MD5Context
  338. md5Init(c)
  339. c.hashType(sym.typ, {CoProc})
  340. c &= char(sym.kind)
  341. c.md5Final(result.MD5Digest)
  342. graph.symBodyHashes[sym.id] = result # protect from recursion in the body
  343. if sym.ast != nil:
  344. md5Init(c)
  345. c.md5Update(cast[cstring](result.addr), sizeof(result))
  346. hashBodyTree(graph, c, sym.ast[bodyPos])
  347. c.md5Final(result.MD5Digest)
  348. graph.symBodyHashes[sym.id] = result
  349. proc idOrSig*(s: PSym, currentModule: string,
  350. sigCollisions: var CountTable[SigHash]): Rope =
  351. if s.kind in routineKinds and s.typ != nil:
  352. # signatures for exported routines are reliable enough to
  353. # produce a unique name and this means produced C++ is more stable wrt
  354. # Nim changes:
  355. let sig = hashProc(s)
  356. result = rope($sig)
  357. #let m = if s.typ.callConv != ccInline: findPendingModule(m, s) else: m
  358. let counter = sigCollisions.getOrDefault(sig)
  359. #if sigs == "_jckmNePK3i2MFnWwZlp6Lg" and s.name.s == "contains":
  360. # echo "counter ", counter, " ", s.id
  361. if counter != 0:
  362. result.add "_" & rope(counter+1)
  363. # this minor hack is necessary to make tests/collections/thashes compile.
  364. # The inlined hash function's original module is ambiguous so we end up
  365. # generating duplicate names otherwise:
  366. if s.typ.callConv == ccInline:
  367. result.add rope(currentModule)
  368. sigCollisions.inc(sig)
  369. else:
  370. let sig = hashNonProc(s)
  371. result = rope($sig)
  372. let counter = sigCollisions.getOrDefault(sig)
  373. if counter != 0:
  374. result.add "_" & rope(counter+1)
  375. sigCollisions.inc(sig)