sighashes.nim 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2017 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Computes hash values for routine (proc, method etc) signatures.
  10. import ast, ropes, modulegraphs, options, msgs, pathutils
  11. from std/hashes import Hash
  12. import std/tables
  13. import types
  14. import ../dist/checksums/src/checksums/md5
  15. when defined(nimPreviewSlimSystem):
  16. import std/assertions
  17. proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
  18. proc `&=`(c: var MD5Context, ch: char) =
  19. # XXX suspicious code here; relies on ch being zero terminated?
  20. md5Update(c, cast[cstring](unsafeAddr ch), 1)
  21. proc `&=`(c: var MD5Context, i: BiggestInt) =
  22. md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
  23. proc `&=`(c: var MD5Context, f: BiggestFloat) =
  24. md5Update(c, cast[cstring](unsafeAddr f), sizeof(f))
  25. proc `&=`(c: var MD5Context, s: SigHash) =
  26. md5Update(c, cast[cstring](unsafeAddr s), sizeof(s))
  27. template lowlevel(v) =
  28. md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
  29. type
  30. ConsiderFlag* = enum
  31. CoProc
  32. CoType
  33. CoOwnerSig
  34. CoIgnoreRange
  35. CoConsiderOwned
  36. CoDistinct
  37. CoHashTypeInsideNode
  38. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]; conf: ConfigRef)
  39. proc hashSym(c: var MD5Context, s: PSym) =
  40. if sfAnon in s.flags or s.kind == skGenericParam:
  41. c &= ":anon"
  42. else:
  43. var it = s
  44. while it != nil:
  45. c &= it.name.s
  46. c &= "."
  47. it = it.owner
  48. c &= "#"
  49. c &= s.disamb
  50. proc hashTypeSym(c: var MD5Context, s: PSym; conf: ConfigRef) =
  51. if sfAnon in s.flags or s.kind == skGenericParam:
  52. c &= ":anon"
  53. else:
  54. var it = s
  55. c &= customPath(conf.toFullPath(s.info))
  56. while it != nil:
  57. if sfFromGeneric in it.flags and it.kind in routineKinds and
  58. it.typ != nil:
  59. hashType c, it.typ, {CoProc}, conf
  60. c &= it.name.s
  61. c &= "."
  62. it = it.owner
  63. c &= "#"
  64. c &= s.disamb
  65. proc hashTree(c: var MD5Context, n: PNode; flags: set[ConsiderFlag]; conf: ConfigRef) =
  66. if n == nil:
  67. c &= "\255"
  68. return
  69. let k = n.kind
  70. c &= char(k)
  71. # we really must not hash line information. 'n.typ' is debatable but
  72. # shouldn't be necessary for now and avoids potential infinite recursions.
  73. case n.kind
  74. of nkEmpty, nkNilLit, nkType: discard
  75. of nkIdent:
  76. c &= n.ident.s
  77. of nkSym:
  78. hashSym(c, n.sym)
  79. if CoHashTypeInsideNode in flags and n.sym.typ != nil:
  80. hashType(c, n.sym.typ, flags, conf)
  81. of nkCharLit..nkUInt64Lit:
  82. let v = n.intVal
  83. lowlevel v
  84. of nkFloatLit..nkFloat64Lit:
  85. let v = n.floatVal
  86. lowlevel v
  87. of nkStrLit..nkTripleStrLit:
  88. c &= n.strVal
  89. else:
  90. for i in 0..<n.len: hashTree(c, n[i], flags, conf)
  91. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]; conf: ConfigRef) =
  92. if t == nil:
  93. c &= "\254"
  94. return
  95. case t.kind
  96. of tyGenericInvocation:
  97. for a in t.kids:
  98. c.hashType a, flags, conf
  99. of tyDistinct:
  100. if CoDistinct in flags:
  101. if t.sym != nil: c.hashSym(t.sym)
  102. if t.sym == nil or tfFromGeneric in t.flags:
  103. c.hashType t.elementType, flags, conf
  104. elif CoType in flags or t.sym == nil:
  105. c.hashType t.elementType, flags, conf
  106. else:
  107. c.hashSym(t.sym)
  108. of tyGenericInst:
  109. if sfInfixCall in t.base.sym.flags:
  110. # This is an imported C++ generic type.
  111. # We cannot trust the `lastSon` to hold a properly populated and unique
  112. # value for each instantiation, so we hash the generic parameters here:
  113. let normalizedType = t.skipGenericAlias
  114. c.hashType normalizedType.genericHead, flags, conf
  115. for _, a in normalizedType.genericInstParams:
  116. c.hashType a, flags, conf
  117. else:
  118. c.hashType t.skipModifier, flags, conf
  119. of tyAlias, tySink, tyUserTypeClasses, tyInferred:
  120. c.hashType t.skipModifier, flags, conf
  121. of tyOwned:
  122. if CoConsiderOwned in flags:
  123. c &= char(t.kind)
  124. c.hashType t.skipModifier, flags, conf
  125. of tyBool, tyChar, tyInt..tyUInt64:
  126. # no canonicalization for integral types, so that e.g. ``pid_t`` is
  127. # produced instead of ``NI``:
  128. c &= char(t.kind)
  129. if t.sym != nil and {sfImportc, sfExportc} * t.sym.flags != {}:
  130. c.hashSym(t.sym)
  131. of tyObject, tyEnum:
  132. if t.typeInst != nil:
  133. # prevent against infinite recursions here, see bug #8883:
  134. let inst = t.typeInst
  135. t.typeInst = nil
  136. assert inst.kind == tyGenericInst
  137. c.hashType inst.genericHead, flags, conf
  138. for _, a in inst.genericInstParams:
  139. c.hashType a, flags, conf
  140. t.typeInst = inst
  141. return
  142. c &= char(t.kind)
  143. # Every cyclic type in Nim need to be constructed via some 't.sym', so this
  144. # is actually safe without an infinite recursion check:
  145. if t.sym != nil:
  146. if {sfCompilerProc} * t.sym.flags != {}:
  147. doAssert t.sym.loc.snippet != ""
  148. # The user has set a specific name for this type
  149. c &= t.sym.loc.snippet
  150. elif CoOwnerSig in flags:
  151. c.hashTypeSym(t.sym, conf)
  152. else:
  153. c.hashSym(t.sym)
  154. var symWithFlags: PSym = nil
  155. template hasFlag(sym): bool =
  156. let ret = {sfAnon, sfGenSym} * sym.flags != {}
  157. if ret: symWithFlags = sym
  158. ret
  159. if hasFlag(t.sym) or (t.kind == tyObject and t.owner.kind == skType and t.owner.typ.kind == tyRef and hasFlag(t.owner)):
  160. # for `PFoo:ObjectType`, arising from `type PFoo = ref object`
  161. # Generated object names can be identical, so we need to
  162. # disambiguate furthermore by hashing the field types and names.
  163. if t.n.len > 0:
  164. let oldFlags = symWithFlags.flags
  165. # Hack to prevent endless recursion
  166. # xxx instead, use a hash table to indicate we've already visited a type, which
  167. # would also be more efficient.
  168. symWithFlags.flags.excl {sfAnon, sfGenSym}
  169. hashTree(c, t.n, flags + {CoHashTypeInsideNode}, conf)
  170. symWithFlags.flags = oldFlags
  171. else:
  172. # The object has no fields: we _must_ add something here in order to
  173. # make the hash different from the one we produce by hashing only the
  174. # type name.
  175. c &= ".empty"
  176. else:
  177. c &= t.id
  178. if t.hasElementType and t.baseClass != nil:
  179. hashType c, t.baseClass, flags, conf
  180. of tyRef, tyPtr, tyVar:
  181. c &= char(t.kind)
  182. if t.hasElementType:
  183. c.hashType t.elementType, flags, conf
  184. if tfVarIsPtr in t.flags: c &= ".varisptr"
  185. of tyGenericBody:
  186. c &= char(t.kind)
  187. if t.hasElementType:
  188. c.hashType t.typeBodyImpl, flags, conf
  189. of tyFromExpr:
  190. c &= char(t.kind)
  191. c.hashTree(t.n, {}, conf)
  192. of tyTuple:
  193. c &= char(t.kind)
  194. if t.n != nil and CoType notin flags:
  195. for i in 0..<t.n.len:
  196. assert(t.n[i].kind == nkSym)
  197. c &= t.n[i].sym.name.s
  198. c &= ':'
  199. c.hashType(t.n[i].sym.typ, flags+{CoIgnoreRange}, conf)
  200. c &= ','
  201. else:
  202. for a in t.kids: c.hashType a, flags+{CoIgnoreRange}, conf
  203. of tyRange:
  204. if CoIgnoreRange notin flags:
  205. c &= char(t.kind)
  206. c.hashTree(t.n, {}, conf)
  207. c.hashType(t.elementType, flags, conf)
  208. of tyStatic:
  209. c &= char(t.kind)
  210. c.hashTree(t.n, {}, conf)
  211. c.hashType(t.skipModifier, flags, conf)
  212. of tyProc:
  213. c &= char(t.kind)
  214. c &= (if tfIterator in t.flags: "iterator " else: "proc ")
  215. if CoProc in flags and t.n != nil:
  216. let params = t.n
  217. for i in 1..<params.len:
  218. let param = params[i].sym
  219. c &= param.name.s
  220. c &= ':'
  221. c.hashType(param.typ, flags, conf)
  222. c &= ','
  223. c.hashType(t.returnType, flags, conf)
  224. else:
  225. for a in t.signature: c.hashType(a, flags, conf)
  226. c &= char(t.callConv)
  227. # purity of functions doesn't have to affect the mangling (which is in fact
  228. # problematic for HCR - someone could have cached a pointer to another
  229. # function which changes its purity and suddenly the cached pointer is danglign)
  230. # IMHO anything that doesn't affect the overload resolution shouldn't be part of the mangling...
  231. # if CoType notin flags:
  232. # if tfNoSideEffect in t.flags: c &= ".noSideEffect"
  233. # if tfThread in t.flags: c &= ".thread"
  234. if tfVarargs in t.flags: c &= ".varargs"
  235. of tyArray:
  236. c &= char(t.kind)
  237. c.hashType(t.indexType, flags-{CoIgnoreRange}, conf)
  238. c.hashType(t.elementType, flags-{CoIgnoreRange}, conf)
  239. else:
  240. c &= char(t.kind)
  241. for a in t.kids: c.hashType(a, flags, conf)
  242. if tfNotNil in t.flags and CoType notin flags: c &= "not nil"
  243. when defined(debugSigHashes):
  244. import db_sqlite
  245. let db = open(connection="sighashes.db", user="araq", password="",
  246. database="sighashes")
  247. db.exec(sql"DROP TABLE IF EXISTS sighashes")
  248. db.exec sql"""CREATE TABLE sighashes(
  249. id integer primary key,
  250. hash varchar(5000) not null,
  251. type varchar(5000) not null,
  252. unique (hash, type))"""
  253. # select hash, type from sighashes where hash in
  254. # (select hash from sighashes group by hash having count(*) > 1) order by hash;
  255. proc hashType*(t: PType; conf: ConfigRef; flags: set[ConsiderFlag] = {CoType}): SigHash =
  256. result = default(SigHash)
  257. var c: MD5Context = default(MD5Context)
  258. md5Init c
  259. hashType c, t, flags+{CoOwnerSig}, conf
  260. md5Final c, result.MD5Digest
  261. when defined(debugSigHashes):
  262. db.exec(sql"INSERT OR IGNORE INTO sighashes(type, hash) VALUES (?, ?)",
  263. typeToString(t), $result)
  264. proc hashProc(s: PSym; conf: ConfigRef): SigHash =
  265. result = default(SigHash)
  266. var c: MD5Context = default(MD5Context)
  267. md5Init c
  268. hashType c, s.typ, {CoProc}, conf
  269. var m = s
  270. while m.kind != skModule: m = m.owner
  271. let p = m.owner
  272. assert p.kind == skPackage
  273. c &= p.name.s
  274. c &= "."
  275. c &= m.name.s
  276. if sfDispatcher in s.flags:
  277. c &= ".dispatcher"
  278. # so that createThread[void]() (aka generic specialization) gets a unique
  279. # hash, we also hash the line information. This is pretty bad, but the best
  280. # solution for now:
  281. #c &= s.info.line
  282. md5Final c, result.MD5Digest
  283. proc hashNonProc*(s: PSym): SigHash =
  284. result = default(SigHash)
  285. var c: MD5Context = default(MD5Context)
  286. md5Init c
  287. hashSym(c, s)
  288. var it = s
  289. while it != nil:
  290. c &= it.name.s
  291. c &= "."
  292. it = it.owner
  293. # for bug #5135 we also take the position into account, but only
  294. # for parameters, because who knows what else position dependency
  295. # might cause:
  296. if s.kind == skParam:
  297. c &= s.position
  298. md5Final c, result.MD5Digest
  299. proc hashOwner*(s: PSym): SigHash =
  300. result = default(SigHash)
  301. var c: MD5Context = default(MD5Context)
  302. md5Init c
  303. var m = s
  304. while m.kind != skModule: m = m.owner
  305. let p = m.owner
  306. assert p.kind == skPackage
  307. c &= p.name.s
  308. c &= "."
  309. c &= m.name.s
  310. md5Final c, result.MD5Digest
  311. proc sigHash*(s: PSym; conf: ConfigRef): SigHash =
  312. if s.kind in routineKinds and s.typ != nil:
  313. result = hashProc(s, conf)
  314. else:
  315. result = hashNonProc(s)
  316. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash
  317. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode)
  318. proc hashVarSymBody(graph: ModuleGraph, c: var MD5Context, s: PSym) =
  319. assert: s.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}
  320. if sfGlobal notin s.flags:
  321. c &= char(s.kind)
  322. c &= s.name.s
  323. else:
  324. c &= hashNonProc(s)
  325. # this one works for let and const but not for var. True variables can change value
  326. # later on. it is user resposibility to hash his global state if required
  327. if s.ast != nil and s.ast.kind in {nkIdentDefs, nkConstDef}:
  328. hashBodyTree(graph, c, s.ast[^1])
  329. else:
  330. hashBodyTree(graph, c, s.ast)
  331. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode) =
  332. # hash Nim tree recursing into simply
  333. if n == nil:
  334. c &= "nil"
  335. return
  336. c &= char(n.kind)
  337. case n.kind
  338. of nkEmpty, nkNilLit, nkType: discard
  339. of nkIdent:
  340. c &= n.ident.s
  341. of nkSym:
  342. if n.sym.kind in skProcKinds:
  343. c &= symBodyDigest(graph, n.sym)
  344. elif n.sym.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}:
  345. hashVarSymBody(graph, c, n.sym)
  346. else:
  347. c &= hashNonProc(n.sym)
  348. of nkProcDef, nkFuncDef, nkTemplateDef, nkMacroDef:
  349. discard # we track usage of proc symbols not their definition
  350. of nkCharLit..nkUInt64Lit:
  351. c &= n.intVal
  352. of nkFloatLit..nkFloat64Lit:
  353. c &= n.floatVal
  354. of nkStrLit..nkTripleStrLit:
  355. c &= n.strVal
  356. else:
  357. for i in 0..<n.len:
  358. hashBodyTree(graph, c, n[i])
  359. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash =
  360. ## compute unique digest of the proc/func/method symbols
  361. ## recursing into invoked symbols as well
  362. assert(sym.kind in skProcKinds, $sym.kind)
  363. result = default(SigHash)
  364. graph.symBodyHashes.withValue(sym.id, value):
  365. return value[]
  366. var c: MD5Context = default(MD5Context)
  367. md5Init(c)
  368. c.hashType(sym.typ, {CoProc}, graph.config)
  369. c &= char(sym.kind)
  370. c.md5Final(result.MD5Digest)
  371. graph.symBodyHashes[sym.id] = result # protect from recursion in the body
  372. if sym.ast != nil:
  373. md5Init(c)
  374. c.md5Update(cast[cstring](result.addr), sizeof(result))
  375. hashBodyTree(graph, c, getBody(graph, sym))
  376. c.md5Final(result.MD5Digest)
  377. graph.symBodyHashes[sym.id] = result
  378. proc idOrSig*(s: PSym, currentModule: string,
  379. sigCollisions: var CountTable[SigHash]; conf: ConfigRef): Rope =
  380. if s.kind in routineKinds and s.typ != nil:
  381. # signatures for exported routines are reliable enough to
  382. # produce a unique name and this means produced C++ is more stable regarding
  383. # Nim changes:
  384. let sig = hashProc(s, conf)
  385. result = rope($sig)
  386. #let m = if s.typ.callConv != ccInline: findPendingModule(m, s) else: m
  387. let counter = sigCollisions.getOrDefault(sig)
  388. #if sigs == "_jckmNePK3i2MFnWwZlp6Lg" and s.name.s == "contains":
  389. # echo "counter ", counter, " ", s.id
  390. if counter != 0:
  391. result.add "_" & rope(counter+1)
  392. # this minor hack is necessary to make tests/collections/thashes compile.
  393. # The inlined hash function's original module is ambiguous so we end up
  394. # generating duplicate names otherwise:
  395. if s.typ.callConv == ccInline:
  396. result.add rope(currentModule)
  397. sigCollisions.inc(sig)
  398. else:
  399. let sig = hashNonProc(s)
  400. result = rope($sig)
  401. let counter = sigCollisions.getOrDefault(sig)
  402. if counter != 0:
  403. result.add "_" & rope(counter+1)
  404. sigCollisions.inc(sig)