sighashes.nim 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2017 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Computes hash values for routine (proc, method etc) signatures.
  10. import ast, ropes, modulegraphs, options, msgs, pathutils
  11. from std/hashes import Hash
  12. import std/tables
  13. import types
  14. import ../dist/checksums/src/checksums/md5
  15. when defined(nimPreviewSlimSystem):
  16. import std/assertions
  17. proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
  18. proc `&=`(c: var MD5Context, ch: char) =
  19. # XXX suspicious code here; relies on ch being zero terminated?
  20. md5Update(c, cast[cstring](unsafeAddr ch), 1)
  21. proc `&=`(c: var MD5Context, i: BiggestInt) =
  22. md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
  23. proc `&=`(c: var MD5Context, f: BiggestFloat) =
  24. md5Update(c, cast[cstring](unsafeAddr f), sizeof(f))
  25. proc `&=`(c: var MD5Context, s: SigHash) =
  26. md5Update(c, cast[cstring](unsafeAddr s), sizeof(s))
  27. template lowlevel(v) =
  28. md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
  29. type
  30. ConsiderFlag* = enum
  31. CoProc
  32. CoType
  33. CoOwnerSig
  34. CoIgnoreRange
  35. CoConsiderOwned
  36. CoDistinct
  37. CoHashTypeInsideNode
  38. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]; conf: ConfigRef)
  39. proc hashSym(c: var MD5Context, s: PSym) =
  40. if sfAnon in s.flags or s.kind == skGenericParam:
  41. c &= ":anon"
  42. else:
  43. var it = s
  44. while it != nil:
  45. c &= it.name.s
  46. c &= "."
  47. it = it.owner
  48. proc hashTypeSym(c: var MD5Context, s: PSym; conf: ConfigRef) =
  49. if sfAnon in s.flags or s.kind == skGenericParam:
  50. c &= ":anon"
  51. else:
  52. var it = s
  53. c &= customPath(conf.toFullPath(s.info))
  54. while it != nil:
  55. if sfFromGeneric in it.flags and it.kind in routineKinds and
  56. it.typ != nil:
  57. hashType c, it.typ, {CoProc}, conf
  58. c &= it.name.s
  59. c &= "."
  60. it = it.owner
  61. proc hashTree(c: var MD5Context, n: PNode; flags: set[ConsiderFlag]; conf: ConfigRef) =
  62. if n == nil:
  63. c &= "\255"
  64. return
  65. let k = n.kind
  66. c &= char(k)
  67. # we really must not hash line information. 'n.typ' is debatable but
  68. # shouldn't be necessary for now and avoids potential infinite recursions.
  69. case n.kind
  70. of nkEmpty, nkNilLit, nkType: discard
  71. of nkIdent:
  72. c &= n.ident.s
  73. of nkSym:
  74. hashSym(c, n.sym)
  75. if CoHashTypeInsideNode in flags and n.sym.typ != nil:
  76. hashType(c, n.sym.typ, flags, conf)
  77. of nkCharLit..nkUInt64Lit:
  78. let v = n.intVal
  79. lowlevel v
  80. of nkFloatLit..nkFloat64Lit:
  81. let v = n.floatVal
  82. lowlevel v
  83. of nkStrLit..nkTripleStrLit:
  84. c &= n.strVal
  85. else:
  86. for i in 0..<n.len: hashTree(c, n[i], flags, conf)
  87. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]; conf: ConfigRef) =
  88. if t == nil:
  89. c &= "\254"
  90. return
  91. case t.kind
  92. of tyGenericInvocation:
  93. for a in t.kids:
  94. c.hashType a, flags, conf
  95. of tyDistinct:
  96. if CoDistinct in flags:
  97. if t.sym != nil: c.hashSym(t.sym)
  98. if t.sym == nil or tfFromGeneric in t.flags:
  99. c.hashType t.elementType, flags, conf
  100. elif CoType in flags or t.sym == nil:
  101. c.hashType t.elementType, flags, conf
  102. else:
  103. c.hashSym(t.sym)
  104. of tyGenericInst:
  105. if sfInfixCall in t.base.sym.flags:
  106. # This is an imported C++ generic type.
  107. # We cannot trust the `lastSon` to hold a properly populated and unique
  108. # value for each instantiation, so we hash the generic parameters here:
  109. let normalizedType = t.skipGenericAlias
  110. c.hashType normalizedType.genericHead, flags, conf
  111. for _, a in normalizedType.genericInstParams:
  112. c.hashType a, flags, conf
  113. else:
  114. c.hashType t.skipModifier, flags, conf
  115. of tyAlias, tySink, tyUserTypeClasses, tyInferred:
  116. c.hashType t.skipModifier, flags, conf
  117. of tyOwned:
  118. if CoConsiderOwned in flags:
  119. c &= char(t.kind)
  120. c.hashType t.skipModifier, flags, conf
  121. of tyBool, tyChar, tyInt..tyUInt64:
  122. # no canonicalization for integral types, so that e.g. ``pid_t`` is
  123. # produced instead of ``NI``:
  124. c &= char(t.kind)
  125. if t.sym != nil and {sfImportc, sfExportc} * t.sym.flags != {}:
  126. c.hashSym(t.sym)
  127. of tyObject, tyEnum:
  128. if t.typeInst != nil:
  129. # prevent against infinite recursions here, see bug #8883:
  130. let inst = t.typeInst
  131. t.typeInst = nil
  132. assert inst.kind == tyGenericInst
  133. c.hashType inst.genericHead, flags, conf
  134. for _, a in inst.genericInstParams:
  135. c.hashType a, flags, conf
  136. t.typeInst = inst
  137. return
  138. c &= char(t.kind)
  139. # Every cyclic type in Nim need to be constructed via some 't.sym', so this
  140. # is actually safe without an infinite recursion check:
  141. if t.sym != nil:
  142. if {sfCompilerProc} * t.sym.flags != {}:
  143. doAssert t.sym.loc.r != ""
  144. # The user has set a specific name for this type
  145. c &= t.sym.loc.r
  146. elif CoOwnerSig in flags:
  147. c.hashTypeSym(t.sym, conf)
  148. else:
  149. c.hashSym(t.sym)
  150. var symWithFlags: PSym = nil
  151. template hasFlag(sym): bool =
  152. let ret = {sfAnon, sfGenSym} * sym.flags != {}
  153. if ret: symWithFlags = sym
  154. ret
  155. if hasFlag(t.sym) or (t.kind == tyObject and t.owner.kind == skType and t.owner.typ.kind == tyRef and hasFlag(t.owner)):
  156. # for `PFoo:ObjectType`, arising from `type PFoo = ref object`
  157. # Generated object names can be identical, so we need to
  158. # disambiguate furthermore by hashing the field types and names.
  159. if t.n.len > 0:
  160. let oldFlags = symWithFlags.flags
  161. # Hack to prevent endless recursion
  162. # xxx instead, use a hash table to indicate we've already visited a type, which
  163. # would also be more efficient.
  164. symWithFlags.flags.excl {sfAnon, sfGenSym}
  165. hashTree(c, t.n, flags + {CoHashTypeInsideNode}, conf)
  166. symWithFlags.flags = oldFlags
  167. else:
  168. # The object has no fields: we _must_ add something here in order to
  169. # make the hash different from the one we produce by hashing only the
  170. # type name.
  171. c &= ".empty"
  172. else:
  173. c &= t.id
  174. if t.hasElementType and t.baseClass != nil:
  175. hashType c, t.baseClass, flags, conf
  176. of tyRef, tyPtr, tyVar:
  177. c &= char(t.kind)
  178. if t.hasElementType:
  179. c.hashType t.elementType, flags, conf
  180. if tfVarIsPtr in t.flags: c &= ".varisptr"
  181. of tyGenericBody:
  182. c &= char(t.kind)
  183. if t.hasElementType:
  184. c.hashType t.typeBodyImpl, flags, conf
  185. of tyFromExpr:
  186. c &= char(t.kind)
  187. c.hashTree(t.n, {}, conf)
  188. of tyTuple:
  189. c &= char(t.kind)
  190. if t.n != nil and CoType notin flags:
  191. for i in 0..<t.n.len:
  192. assert(t.n[i].kind == nkSym)
  193. c &= t.n[i].sym.name.s
  194. c &= ':'
  195. c.hashType(t.n[i].sym.typ, flags+{CoIgnoreRange}, conf)
  196. c &= ','
  197. else:
  198. for a in t.kids: c.hashType a, flags+{CoIgnoreRange}, conf
  199. of tyRange:
  200. if CoIgnoreRange notin flags:
  201. c &= char(t.kind)
  202. c.hashTree(t.n, {}, conf)
  203. c.hashType(t.elementType, flags, conf)
  204. of tyStatic:
  205. c &= char(t.kind)
  206. c.hashTree(t.n, {}, conf)
  207. c.hashType(t.skipModifier, flags, conf)
  208. of tyProc:
  209. c &= char(t.kind)
  210. c &= (if tfIterator in t.flags: "iterator " else: "proc ")
  211. if CoProc in flags and t.n != nil:
  212. let params = t.n
  213. for i in 1..<params.len:
  214. let param = params[i].sym
  215. c &= param.name.s
  216. c &= ':'
  217. c.hashType(param.typ, flags, conf)
  218. c &= ','
  219. c.hashType(t.returnType, flags, conf)
  220. else:
  221. for a in t.signature: c.hashType(a, flags, conf)
  222. c &= char(t.callConv)
  223. # purity of functions doesn't have to affect the mangling (which is in fact
  224. # problematic for HCR - someone could have cached a pointer to another
  225. # function which changes its purity and suddenly the cached pointer is danglign)
  226. # IMHO anything that doesn't affect the overload resolution shouldn't be part of the mangling...
  227. # if CoType notin flags:
  228. # if tfNoSideEffect in t.flags: c &= ".noSideEffect"
  229. # if tfThread in t.flags: c &= ".thread"
  230. if tfVarargs in t.flags: c &= ".varargs"
  231. of tyArray:
  232. c &= char(t.kind)
  233. c.hashType(t.indexType, flags-{CoIgnoreRange}, conf)
  234. c.hashType(t.elementType, flags-{CoIgnoreRange}, conf)
  235. else:
  236. c &= char(t.kind)
  237. for a in t.kids: c.hashType(a, flags, conf)
  238. if tfNotNil in t.flags and CoType notin flags: c &= "not nil"
  239. when defined(debugSigHashes):
  240. import db_sqlite
  241. let db = open(connection="sighashes.db", user="araq", password="",
  242. database="sighashes")
  243. db.exec(sql"DROP TABLE IF EXISTS sighashes")
  244. db.exec sql"""CREATE TABLE sighashes(
  245. id integer primary key,
  246. hash varchar(5000) not null,
  247. type varchar(5000) not null,
  248. unique (hash, type))"""
  249. # select hash, type from sighashes where hash in
  250. # (select hash from sighashes group by hash having count(*) > 1) order by hash;
  251. proc hashType*(t: PType; conf: ConfigRef; flags: set[ConsiderFlag] = {CoType}): SigHash =
  252. result = default(SigHash)
  253. var c: MD5Context
  254. md5Init c
  255. hashType c, t, flags+{CoOwnerSig}, conf
  256. md5Final c, result.MD5Digest
  257. when defined(debugSigHashes):
  258. db.exec(sql"INSERT OR IGNORE INTO sighashes(type, hash) VALUES (?, ?)",
  259. typeToString(t), $result)
  260. proc hashProc(s: PSym; conf: ConfigRef): SigHash =
  261. result = default(SigHash)
  262. var c: MD5Context
  263. md5Init c
  264. hashType c, s.typ, {CoProc}, conf
  265. var m = s
  266. while m.kind != skModule: m = m.owner
  267. let p = m.owner
  268. assert p.kind == skPackage
  269. c &= p.name.s
  270. c &= "."
  271. c &= m.name.s
  272. if sfDispatcher in s.flags:
  273. c &= ".dispatcher"
  274. # so that createThread[void]() (aka generic specialization) gets a unique
  275. # hash, we also hash the line information. This is pretty bad, but the best
  276. # solution for now:
  277. #c &= s.info.line
  278. md5Final c, result.MD5Digest
  279. proc hashNonProc*(s: PSym): SigHash =
  280. result = default(SigHash)
  281. var c: MD5Context
  282. md5Init c
  283. hashSym(c, s)
  284. var it = s
  285. while it != nil:
  286. c &= it.name.s
  287. c &= "."
  288. it = it.owner
  289. # for bug #5135 we also take the position into account, but only
  290. # for parameters, because who knows what else position dependency
  291. # might cause:
  292. if s.kind == skParam:
  293. c &= s.position
  294. md5Final c, result.MD5Digest
  295. proc hashOwner*(s: PSym): SigHash =
  296. result = default(SigHash)
  297. var c: MD5Context
  298. md5Init c
  299. var m = s
  300. while m.kind != skModule: m = m.owner
  301. let p = m.owner
  302. assert p.kind == skPackage
  303. c &= p.name.s
  304. c &= "."
  305. c &= m.name.s
  306. md5Final c, result.MD5Digest
  307. proc sigHash*(s: PSym; conf: ConfigRef): SigHash =
  308. if s.kind in routineKinds and s.typ != nil:
  309. result = hashProc(s, conf)
  310. else:
  311. result = hashNonProc(s)
  312. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash
  313. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode)
  314. proc hashVarSymBody(graph: ModuleGraph, c: var MD5Context, s: PSym) =
  315. assert: s.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}
  316. if sfGlobal notin s.flags:
  317. c &= char(s.kind)
  318. c &= s.name.s
  319. else:
  320. c &= hashNonProc(s)
  321. # this one works for let and const but not for var. True variables can change value
  322. # later on. it is user resposibility to hash his global state if required
  323. if s.ast != nil and s.ast.kind in {nkIdentDefs, nkConstDef}:
  324. hashBodyTree(graph, c, s.ast[^1])
  325. else:
  326. hashBodyTree(graph, c, s.ast)
  327. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode) =
  328. # hash Nim tree recursing into simply
  329. if n == nil:
  330. c &= "nil"
  331. return
  332. c &= char(n.kind)
  333. case n.kind
  334. of nkEmpty, nkNilLit, nkType: discard
  335. of nkIdent:
  336. c &= n.ident.s
  337. of nkSym:
  338. if n.sym.kind in skProcKinds:
  339. c &= symBodyDigest(graph, n.sym)
  340. elif n.sym.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}:
  341. hashVarSymBody(graph, c, n.sym)
  342. else:
  343. c &= hashNonProc(n.sym)
  344. of nkProcDef, nkFuncDef, nkTemplateDef, nkMacroDef:
  345. discard # we track usage of proc symbols not their definition
  346. of nkCharLit..nkUInt64Lit:
  347. c &= n.intVal
  348. of nkFloatLit..nkFloat64Lit:
  349. c &= n.floatVal
  350. of nkStrLit..nkTripleStrLit:
  351. c &= n.strVal
  352. else:
  353. for i in 0..<n.len:
  354. hashBodyTree(graph, c, n[i])
  355. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash =
  356. ## compute unique digest of the proc/func/method symbols
  357. ## recursing into invoked symbols as well
  358. assert(sym.kind in skProcKinds, $sym.kind)
  359. result = default(SigHash)
  360. graph.symBodyHashes.withValue(sym.id, value):
  361. return value[]
  362. var c: MD5Context
  363. md5Init(c)
  364. c.hashType(sym.typ, {CoProc}, graph.config)
  365. c &= char(sym.kind)
  366. c.md5Final(result.MD5Digest)
  367. graph.symBodyHashes[sym.id] = result # protect from recursion in the body
  368. if sym.ast != nil:
  369. md5Init(c)
  370. c.md5Update(cast[cstring](result.addr), sizeof(result))
  371. hashBodyTree(graph, c, getBody(graph, sym))
  372. c.md5Final(result.MD5Digest)
  373. graph.symBodyHashes[sym.id] = result
  374. proc idOrSig*(s: PSym, currentModule: string,
  375. sigCollisions: var CountTable[SigHash]; conf: ConfigRef): Rope =
  376. if s.kind in routineKinds and s.typ != nil:
  377. # signatures for exported routines are reliable enough to
  378. # produce a unique name and this means produced C++ is more stable regarding
  379. # Nim changes:
  380. let sig = hashProc(s, conf)
  381. result = rope($sig)
  382. #let m = if s.typ.callConv != ccInline: findPendingModule(m, s) else: m
  383. let counter = sigCollisions.getOrDefault(sig)
  384. #if sigs == "_jckmNePK3i2MFnWwZlp6Lg" and s.name.s == "contains":
  385. # echo "counter ", counter, " ", s.id
  386. if counter != 0:
  387. result.add "_" & rope(counter+1)
  388. # this minor hack is necessary to make tests/collections/thashes compile.
  389. # The inlined hash function's original module is ambiguous so we end up
  390. # generating duplicate names otherwise:
  391. if s.typ.callConv == ccInline:
  392. result.add rope(currentModule)
  393. sigCollisions.inc(sig)
  394. else:
  395. let sig = hashNonProc(s)
  396. result = rope($sig)
  397. let counter = sigCollisions.getOrDefault(sig)
  398. if counter != 0:
  399. result.add "_" & rope(counter+1)
  400. sigCollisions.inc(sig)