sighashes.nim 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2017 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Computes hash values for routine (proc, method etc) signatures.
  10. import ast, tables, ropes, md5_old, modulegraphs
  11. from hashes import Hash
  12. import types
  13. when defined(nimPreviewSlimSystem):
  14. import std/assertions
  15. proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
  16. proc `&=`(c: var MD5Context, ch: char) =
  17. # XXX suspicious code here; relies on ch being zero terminated?
  18. md5Update(c, unsafeAddr ch, 1)
  19. proc `&=`(c: var MD5Context, i: BiggestInt) =
  20. md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
  21. proc `&=`(c: var MD5Context, f: BiggestFloat) =
  22. md5Update(c, cast[cstring](unsafeAddr f), sizeof(f))
  23. proc `&=`(c: var MD5Context, s: SigHash) =
  24. md5Update(c, cast[cstring](unsafeAddr s), sizeof(s))
  25. template lowlevel(v) =
  26. md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
  27. type
  28. ConsiderFlag* = enum
  29. CoProc
  30. CoType
  31. CoOwnerSig
  32. CoIgnoreRange
  33. CoConsiderOwned
  34. CoDistinct
  35. CoHashTypeInsideNode
  36. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag])
  37. proc hashSym(c: var MD5Context, s: PSym) =
  38. if sfAnon in s.flags or s.kind == skGenericParam:
  39. c &= ":anon"
  40. else:
  41. var it = s
  42. while it != nil:
  43. c &= it.name.s
  44. c &= "."
  45. it = it.owner
  46. proc hashTypeSym(c: var MD5Context, s: PSym) =
  47. if sfAnon in s.flags or s.kind == skGenericParam:
  48. c &= ":anon"
  49. else:
  50. var it = s
  51. while it != nil:
  52. if sfFromGeneric in it.flags and it.kind in routineKinds and
  53. it.typ != nil:
  54. hashType c, it.typ, {CoProc}
  55. c &= it.name.s
  56. c &= "."
  57. it = it.owner
  58. proc hashTree(c: var MD5Context, n: PNode; flags: set[ConsiderFlag]) =
  59. if n == nil:
  60. c &= "\255"
  61. return
  62. let k = n.kind
  63. c &= char(k)
  64. # we really must not hash line information. 'n.typ' is debatable but
  65. # shouldn't be necessary for now and avoids potential infinite recursions.
  66. case n.kind
  67. of nkEmpty, nkNilLit, nkType: discard
  68. of nkIdent:
  69. c &= n.ident.s
  70. of nkSym:
  71. hashSym(c, n.sym)
  72. if CoHashTypeInsideNode in flags and n.sym.typ != nil:
  73. hashType(c, n.sym.typ, flags)
  74. of nkCharLit..nkUInt64Lit:
  75. let v = n.intVal
  76. lowlevel v
  77. of nkFloatLit..nkFloat64Lit:
  78. let v = n.floatVal
  79. lowlevel v
  80. of nkStrLit..nkTripleStrLit:
  81. c &= n.strVal
  82. else:
  83. for i in 0..<n.len: hashTree(c, n[i], flags)
  84. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]) =
  85. if t == nil:
  86. c &= "\254"
  87. return
  88. case t.kind
  89. of tyGenericInvocation:
  90. for i in 0..<t.len:
  91. c.hashType t[i], flags
  92. of tyDistinct:
  93. if CoDistinct in flags:
  94. if t.sym != nil: c.hashSym(t.sym)
  95. if t.sym == nil or tfFromGeneric in t.flags:
  96. c.hashType t.lastSon, flags
  97. elif CoType in flags or t.sym == nil:
  98. c.hashType t.lastSon, flags
  99. else:
  100. c.hashSym(t.sym)
  101. of tyGenericInst:
  102. if sfInfixCall in t.base.sym.flags:
  103. # This is an imported C++ generic type.
  104. # We cannot trust the `lastSon` to hold a properly populated and unique
  105. # value for each instantiation, so we hash the generic parameters here:
  106. let normalizedType = t.skipGenericAlias
  107. for i in 0..<normalizedType.len - 1:
  108. c.hashType t[i], flags
  109. else:
  110. c.hashType t.lastSon, flags
  111. of tyAlias, tySink, tyUserTypeClasses, tyInferred:
  112. c.hashType t.lastSon, flags
  113. of tyOwned:
  114. if CoConsiderOwned in flags:
  115. c &= char(t.kind)
  116. c.hashType t.lastSon, flags
  117. of tyBool, tyChar, tyInt..tyUInt64:
  118. # no canonicalization for integral types, so that e.g. ``pid_t`` is
  119. # produced instead of ``NI``:
  120. c &= char(t.kind)
  121. if t.sym != nil and {sfImportc, sfExportc} * t.sym.flags != {}:
  122. c.hashSym(t.sym)
  123. of tyObject, tyEnum:
  124. if t.typeInst != nil:
  125. # prevent against infinite recursions here, see bug #8883:
  126. let inst = t.typeInst
  127. t.typeInst = nil
  128. assert inst.kind == tyGenericInst
  129. for i in 0..<inst.len - 1:
  130. c.hashType inst[i], flags
  131. t.typeInst = inst
  132. return
  133. c &= char(t.kind)
  134. # Every cyclic type in Nim need to be constructed via some 't.sym', so this
  135. # is actually safe without an infinite recursion check:
  136. if t.sym != nil:
  137. if {sfCompilerProc} * t.sym.flags != {}:
  138. doAssert t.sym.loc.r != ""
  139. # The user has set a specific name for this type
  140. c &= t.sym.loc.r
  141. elif CoOwnerSig in flags:
  142. c.hashTypeSym(t.sym)
  143. else:
  144. c.hashSym(t.sym)
  145. var symWithFlags: PSym
  146. template hasFlag(sym): bool =
  147. let ret = {sfAnon, sfGenSym} * sym.flags != {}
  148. if ret: symWithFlags = sym
  149. ret
  150. if hasFlag(t.sym) or (t.kind == tyObject and t.owner.kind == skType and t.owner.typ.kind == tyRef and hasFlag(t.owner)):
  151. # for `PFoo:ObjectType`, arising from `type PFoo = ref object`
  152. # Generated object names can be identical, so we need to
  153. # disambiguate furthermore by hashing the field types and names.
  154. if t.n.len > 0:
  155. let oldFlags = symWithFlags.flags
  156. # Hack to prevent endless recursion
  157. # xxx instead, use a hash table to indicate we've already visited a type, which
  158. # would also be more efficient.
  159. symWithFlags.flags.excl {sfAnon, sfGenSym}
  160. hashTree(c, t.n, flags + {CoHashTypeInsideNode})
  161. symWithFlags.flags = oldFlags
  162. else:
  163. # The object has no fields: we _must_ add something here in order to
  164. # make the hash different from the one we produce by hashing only the
  165. # type name.
  166. c &= ".empty"
  167. else:
  168. c &= t.id
  169. if t.len > 0 and t[0] != nil:
  170. hashType c, t[0], flags
  171. of tyRef, tyPtr, tyGenericBody, tyVar:
  172. c &= char(t.kind)
  173. c.hashType t.lastSon, flags
  174. if tfVarIsPtr in t.flags: c &= ".varisptr"
  175. of tyFromExpr:
  176. c &= char(t.kind)
  177. c.hashTree(t.n, {})
  178. of tyTuple:
  179. c &= char(t.kind)
  180. if t.n != nil and CoType notin flags:
  181. assert(t.n.len == t.len)
  182. for i in 0..<t.n.len:
  183. assert(t.n[i].kind == nkSym)
  184. c &= t.n[i].sym.name.s
  185. c &= ':'
  186. c.hashType(t[i], flags+{CoIgnoreRange})
  187. c &= ','
  188. else:
  189. for i in 0..<t.len: c.hashType t[i], flags+{CoIgnoreRange}
  190. of tyRange:
  191. if CoIgnoreRange notin flags:
  192. c &= char(t.kind)
  193. c.hashTree(t.n, {})
  194. c.hashType(t[0], flags)
  195. of tyStatic:
  196. c &= char(t.kind)
  197. c.hashTree(t.n, {})
  198. c.hashType(t[0], flags)
  199. of tyProc:
  200. c &= char(t.kind)
  201. c &= (if tfIterator in t.flags: "iterator " else: "proc ")
  202. if CoProc in flags and t.n != nil:
  203. let params = t.n
  204. for i in 1..<params.len:
  205. let param = params[i].sym
  206. c &= param.name.s
  207. c &= ':'
  208. c.hashType(param.typ, flags)
  209. c &= ','
  210. c.hashType(t[0], flags)
  211. else:
  212. for i in 0..<t.len: c.hashType(t[i], flags)
  213. c &= char(t.callConv)
  214. # purity of functions doesn't have to affect the mangling (which is in fact
  215. # problematic for HCR - someone could have cached a pointer to another
  216. # function which changes its purity and suddenly the cached pointer is danglign)
  217. # IMHO anything that doesn't affect the overload resolution shouldn't be part of the mangling...
  218. # if CoType notin flags:
  219. # if tfNoSideEffect in t.flags: c &= ".noSideEffect"
  220. # if tfThread in t.flags: c &= ".thread"
  221. if tfVarargs in t.flags: c &= ".varargs"
  222. of tyArray:
  223. c &= char(t.kind)
  224. for i in 0..<t.len: c.hashType(t[i], flags-{CoIgnoreRange})
  225. else:
  226. c &= char(t.kind)
  227. for i in 0..<t.len: c.hashType(t[i], flags)
  228. if tfNotNil in t.flags and CoType notin flags: c &= "not nil"
  229. when defined(debugSigHashes):
  230. import db_sqlite
  231. let db = open(connection="sighashes.db", user="araq", password="",
  232. database="sighashes")
  233. db.exec(sql"DROP TABLE IF EXISTS sighashes")
  234. db.exec sql"""CREATE TABLE sighashes(
  235. id integer primary key,
  236. hash varchar(5000) not null,
  237. type varchar(5000) not null,
  238. unique (hash, type))"""
  239. # select hash, type from sighashes where hash in
  240. # (select hash from sighashes group by hash having count(*) > 1) order by hash;
  241. proc hashType*(t: PType; flags: set[ConsiderFlag] = {CoType}): SigHash =
  242. var c: MD5Context
  243. md5Init c
  244. hashType c, t, flags+{CoOwnerSig}
  245. md5Final c, result.MD5Digest
  246. when defined(debugSigHashes):
  247. db.exec(sql"INSERT OR IGNORE INTO sighashes(type, hash) VALUES (?, ?)",
  248. typeToString(t), $result)
  249. proc hashProc*(s: PSym): SigHash =
  250. var c: MD5Context
  251. md5Init c
  252. hashType c, s.typ, {CoProc}
  253. var m = s
  254. while m.kind != skModule: m = m.owner
  255. let p = m.owner
  256. assert p.kind == skPackage
  257. c &= p.name.s
  258. c &= "."
  259. c &= m.name.s
  260. if sfDispatcher in s.flags:
  261. c &= ".dispatcher"
  262. # so that createThread[void]() (aka generic specialization) gets a unique
  263. # hash, we also hash the line information. This is pretty bad, but the best
  264. # solution for now:
  265. #c &= s.info.line
  266. md5Final c, result.MD5Digest
  267. proc hashNonProc*(s: PSym): SigHash =
  268. var c: MD5Context
  269. md5Init c
  270. hashSym(c, s)
  271. var it = s
  272. while it != nil:
  273. c &= it.name.s
  274. c &= "."
  275. it = it.owner
  276. # for bug #5135 we also take the position into account, but only
  277. # for parameters, because who knows what else position dependency
  278. # might cause:
  279. if s.kind == skParam:
  280. c &= s.position
  281. md5Final c, result.MD5Digest
  282. proc hashOwner*(s: PSym): SigHash =
  283. var c: MD5Context
  284. md5Init c
  285. var m = s
  286. while m.kind != skModule: m = m.owner
  287. let p = m.owner
  288. assert p.kind == skPackage
  289. c &= p.name.s
  290. c &= "."
  291. c &= m.name.s
  292. md5Final c, result.MD5Digest
  293. proc sigHash*(s: PSym): SigHash =
  294. if s.kind in routineKinds and s.typ != nil:
  295. result = hashProc(s)
  296. else:
  297. result = hashNonProc(s)
  298. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash
  299. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode)
  300. proc hashVarSymBody(graph: ModuleGraph, c: var MD5Context, s: PSym) =
  301. assert: s.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}
  302. if sfGlobal notin s.flags:
  303. c &= char(s.kind)
  304. c &= s.name.s
  305. else:
  306. c &= hashNonProc(s)
  307. # this one works for let and const but not for var. True variables can change value
  308. # later on. it is user resposibility to hash his global state if required
  309. if s.ast != nil and s.ast.kind in {nkIdentDefs, nkConstDef}:
  310. hashBodyTree(graph, c, s.ast[^1])
  311. else:
  312. hashBodyTree(graph, c, s.ast)
  313. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode) =
  314. # hash Nim tree recursing into simply
  315. if n == nil:
  316. c &= "nil"
  317. return
  318. c &= char(n.kind)
  319. case n.kind
  320. of nkEmpty, nkNilLit, nkType: discard
  321. of nkIdent:
  322. c &= n.ident.s
  323. of nkSym:
  324. if n.sym.kind in skProcKinds:
  325. c &= symBodyDigest(graph, n.sym)
  326. elif n.sym.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}:
  327. hashVarSymBody(graph, c, n.sym)
  328. else:
  329. c &= hashNonProc(n.sym)
  330. of nkProcDef, nkFuncDef, nkTemplateDef, nkMacroDef:
  331. discard # we track usage of proc symbols not their definition
  332. of nkCharLit..nkUInt64Lit:
  333. c &= n.intVal
  334. of nkFloatLit..nkFloat64Lit:
  335. c &= n.floatVal
  336. of nkStrLit..nkTripleStrLit:
  337. c &= n.strVal
  338. else:
  339. for i in 0..<n.len:
  340. hashBodyTree(graph, c, n[i])
  341. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash =
  342. ## compute unique digest of the proc/func/method symbols
  343. ## recursing into invoked symbols as well
  344. assert(sym.kind in skProcKinds, $sym.kind)
  345. graph.symBodyHashes.withValue(sym.id, value):
  346. return value[]
  347. var c: MD5Context
  348. md5Init(c)
  349. c.hashType(sym.typ, {CoProc})
  350. c &= char(sym.kind)
  351. c.md5Final(result.MD5Digest)
  352. graph.symBodyHashes[sym.id] = result # protect from recursion in the body
  353. if sym.ast != nil:
  354. md5Init(c)
  355. c.md5Update(cast[cstring](result.addr), sizeof(result))
  356. hashBodyTree(graph, c, getBody(graph, sym))
  357. c.md5Final(result.MD5Digest)
  358. graph.symBodyHashes[sym.id] = result
  359. proc idOrSig*(s: PSym, currentModule: string,
  360. sigCollisions: var CountTable[SigHash]): Rope =
  361. if s.kind in routineKinds and s.typ != nil:
  362. # signatures for exported routines are reliable enough to
  363. # produce a unique name and this means produced C++ is more stable regarding
  364. # Nim changes:
  365. let sig = hashProc(s)
  366. result = rope($sig)
  367. #let m = if s.typ.callConv != ccInline: findPendingModule(m, s) else: m
  368. let counter = sigCollisions.getOrDefault(sig)
  369. #if sigs == "_jckmNePK3i2MFnWwZlp6Lg" and s.name.s == "contains":
  370. # echo "counter ", counter, " ", s.id
  371. if counter != 0:
  372. result.add "_" & rope(counter+1)
  373. # this minor hack is necessary to make tests/collections/thashes compile.
  374. # The inlined hash function's original module is ambiguous so we end up
  375. # generating duplicate names otherwise:
  376. if s.typ.callConv == ccInline:
  377. result.add rope(currentModule)
  378. sigCollisions.inc(sig)
  379. else:
  380. let sig = hashNonProc(s)
  381. result = rope($sig)
  382. let counter = sigCollisions.getOrDefault(sig)
  383. if counter != 0:
  384. result.add "_" & rope(counter+1)
  385. sigCollisions.inc(sig)