sighashes.nim 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2017 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Computes hash values for routine (proc, method etc) signatures.
  10. import ast, tables, ropes, md5, modulegraphs, options, msgs, packages, pathutils
  11. from hashes import Hash
  12. import types
  13. import std/os
  14. when defined(nimPreviewSlimSystem):
  15. import std/assertions
  16. proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
  17. proc `&=`(c: var MD5Context, ch: char) =
  18. # XXX suspicious code here; relies on ch being zero terminated?
  19. md5Update(c, cast[cstring](unsafeAddr ch), 1)
  20. proc `&=`(c: var MD5Context, i: BiggestInt) =
  21. md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
  22. proc `&=`(c: var MD5Context, f: BiggestFloat) =
  23. md5Update(c, cast[cstring](unsafeAddr f), sizeof(f))
  24. proc `&=`(c: var MD5Context, s: SigHash) =
  25. md5Update(c, cast[cstring](unsafeAddr s), sizeof(s))
  26. template lowlevel(v) =
  27. md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
  28. type
  29. ConsiderFlag* = enum
  30. CoProc
  31. CoType
  32. CoOwnerSig
  33. CoIgnoreRange
  34. CoConsiderOwned
  35. CoDistinct
  36. CoHashTypeInsideNode
  37. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]; conf: ConfigRef)
  38. proc hashSym(c: var MD5Context, s: PSym) =
  39. if sfAnon in s.flags or s.kind == skGenericParam:
  40. c &= ":anon"
  41. else:
  42. var it = s
  43. while it != nil:
  44. c &= it.name.s
  45. c &= "."
  46. it = it.owner
  47. proc hashTypeSym(c: var MD5Context, s: PSym; conf: ConfigRef) =
  48. if sfAnon in s.flags or s.kind == skGenericParam:
  49. c &= ":anon"
  50. else:
  51. var it = s
  52. c &= customPath(conf.toFullPath(s.info))
  53. while it != nil:
  54. if sfFromGeneric in it.flags and it.kind in routineKinds and
  55. it.typ != nil:
  56. hashType c, it.typ, {CoProc}, conf
  57. c &= it.name.s
  58. c &= "."
  59. it = it.owner
  60. proc hashTree(c: var MD5Context, n: PNode; flags: set[ConsiderFlag]; conf: ConfigRef) =
  61. if n == nil:
  62. c &= "\255"
  63. return
  64. let k = n.kind
  65. c &= char(k)
  66. # we really must not hash line information. 'n.typ' is debatable but
  67. # shouldn't be necessary for now and avoids potential infinite recursions.
  68. case n.kind
  69. of nkEmpty, nkNilLit, nkType: discard
  70. of nkIdent:
  71. c &= n.ident.s
  72. of nkSym:
  73. hashSym(c, n.sym)
  74. if CoHashTypeInsideNode in flags and n.sym.typ != nil:
  75. hashType(c, n.sym.typ, flags, conf)
  76. of nkCharLit..nkUInt64Lit:
  77. let v = n.intVal
  78. lowlevel v
  79. of nkFloatLit..nkFloat64Lit:
  80. let v = n.floatVal
  81. lowlevel v
  82. of nkStrLit..nkTripleStrLit:
  83. c &= n.strVal
  84. else:
  85. for i in 0..<n.len: hashTree(c, n[i], flags, conf)
  86. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]; conf: ConfigRef) =
  87. if t == nil:
  88. c &= "\254"
  89. return
  90. case t.kind
  91. of tyGenericInvocation:
  92. for i in 0..<t.len:
  93. c.hashType t[i], flags, conf
  94. of tyDistinct:
  95. if CoDistinct in flags:
  96. if t.sym != nil: c.hashSym(t.sym)
  97. if t.sym == nil or tfFromGeneric in t.flags:
  98. c.hashType t.lastSon, flags, conf
  99. elif CoType in flags or t.sym == nil:
  100. c.hashType t.lastSon, flags, conf
  101. else:
  102. c.hashSym(t.sym)
  103. of tyGenericInst:
  104. if sfInfixCall in t.base.sym.flags:
  105. # This is an imported C++ generic type.
  106. # We cannot trust the `lastSon` to hold a properly populated and unique
  107. # value for each instantiation, so we hash the generic parameters here:
  108. let normalizedType = t.skipGenericAlias
  109. for i in 0..<normalizedType.len - 1:
  110. c.hashType t[i], flags, conf
  111. else:
  112. c.hashType t.lastSon, flags, conf
  113. of tyAlias, tySink, tyUserTypeClasses, tyInferred:
  114. c.hashType t.lastSon, flags, conf
  115. of tyOwned:
  116. if CoConsiderOwned in flags:
  117. c &= char(t.kind)
  118. c.hashType t.lastSon, flags, conf
  119. of tyBool, tyChar, tyInt..tyUInt64:
  120. # no canonicalization for integral types, so that e.g. ``pid_t`` is
  121. # produced instead of ``NI``:
  122. c &= char(t.kind)
  123. if t.sym != nil and {sfImportc, sfExportc} * t.sym.flags != {}:
  124. c.hashSym(t.sym)
  125. of tyObject, tyEnum:
  126. if t.typeInst != nil:
  127. # prevent against infinite recursions here, see bug #8883:
  128. let inst = t.typeInst
  129. t.typeInst = nil
  130. assert inst.kind == tyGenericInst
  131. for i in 0..<inst.len - 1:
  132. c.hashType inst[i], flags, conf
  133. t.typeInst = inst
  134. return
  135. c &= char(t.kind)
  136. # Every cyclic type in Nim need to be constructed via some 't.sym', so this
  137. # is actually safe without an infinite recursion check:
  138. if t.sym != nil:
  139. if {sfCompilerProc} * t.sym.flags != {}:
  140. doAssert t.sym.loc.r != ""
  141. # The user has set a specific name for this type
  142. c &= t.sym.loc.r
  143. elif CoOwnerSig in flags:
  144. c.hashTypeSym(t.sym, conf)
  145. else:
  146. c.hashSym(t.sym)
  147. var symWithFlags: PSym
  148. template hasFlag(sym): bool =
  149. let ret = {sfAnon, sfGenSym} * sym.flags != {}
  150. if ret: symWithFlags = sym
  151. ret
  152. if hasFlag(t.sym) or (t.kind == tyObject and t.owner.kind == skType and t.owner.typ.kind == tyRef and hasFlag(t.owner)):
  153. # for `PFoo:ObjectType`, arising from `type PFoo = ref object`
  154. # Generated object names can be identical, so we need to
  155. # disambiguate furthermore by hashing the field types and names.
  156. if t.n.len > 0:
  157. let oldFlags = symWithFlags.flags
  158. # Hack to prevent endless recursion
  159. # xxx instead, use a hash table to indicate we've already visited a type, which
  160. # would also be more efficient.
  161. symWithFlags.flags.excl {sfAnon, sfGenSym}
  162. hashTree(c, t.n, flags + {CoHashTypeInsideNode}, conf)
  163. symWithFlags.flags = oldFlags
  164. else:
  165. # The object has no fields: we _must_ add something here in order to
  166. # make the hash different from the one we produce by hashing only the
  167. # type name.
  168. c &= ".empty"
  169. else:
  170. c &= t.id
  171. if t.len > 0 and t[0] != nil:
  172. hashType c, t[0], flags, conf
  173. of tyRef, tyPtr, tyGenericBody, tyVar:
  174. c &= char(t.kind)
  175. if t.sons.len > 0:
  176. c.hashType t.lastSon, flags, conf
  177. if tfVarIsPtr in t.flags: c &= ".varisptr"
  178. of tyFromExpr:
  179. c &= char(t.kind)
  180. c.hashTree(t.n, {}, conf)
  181. of tyTuple:
  182. c &= char(t.kind)
  183. if t.n != nil and CoType notin flags:
  184. assert(t.n.len == t.len)
  185. for i in 0..<t.n.len:
  186. assert(t.n[i].kind == nkSym)
  187. c &= t.n[i].sym.name.s
  188. c &= ':'
  189. c.hashType(t[i], flags+{CoIgnoreRange}, conf)
  190. c &= ','
  191. else:
  192. for i in 0..<t.len: c.hashType t[i], flags+{CoIgnoreRange}, conf
  193. of tyRange:
  194. if CoIgnoreRange notin flags:
  195. c &= char(t.kind)
  196. c.hashTree(t.n, {}, conf)
  197. c.hashType(t[0], flags, conf)
  198. of tyStatic:
  199. c &= char(t.kind)
  200. c.hashTree(t.n, {}, conf)
  201. c.hashType(t[0], flags, conf)
  202. of tyProc:
  203. c &= char(t.kind)
  204. c &= (if tfIterator in t.flags: "iterator " else: "proc ")
  205. if CoProc in flags and t.n != nil:
  206. let params = t.n
  207. for i in 1..<params.len:
  208. let param = params[i].sym
  209. c &= param.name.s
  210. c &= ':'
  211. c.hashType(param.typ, flags, conf)
  212. c &= ','
  213. c.hashType(t[0], flags, conf)
  214. else:
  215. for i in 0..<t.len: c.hashType(t[i], flags, conf)
  216. c &= char(t.callConv)
  217. # purity of functions doesn't have to affect the mangling (which is in fact
  218. # problematic for HCR - someone could have cached a pointer to another
  219. # function which changes its purity and suddenly the cached pointer is danglign)
  220. # IMHO anything that doesn't affect the overload resolution shouldn't be part of the mangling...
  221. # if CoType notin flags:
  222. # if tfNoSideEffect in t.flags: c &= ".noSideEffect"
  223. # if tfThread in t.flags: c &= ".thread"
  224. if tfVarargs in t.flags: c &= ".varargs"
  225. of tyArray:
  226. c &= char(t.kind)
  227. for i in 0..<t.len: c.hashType(t[i], flags-{CoIgnoreRange}, conf)
  228. else:
  229. c &= char(t.kind)
  230. for i in 0..<t.len: c.hashType(t[i], flags, conf)
  231. if tfNotNil in t.flags and CoType notin flags: c &= "not nil"
  232. when defined(debugSigHashes):
  233. import db_sqlite
  234. let db = open(connection="sighashes.db", user="araq", password="",
  235. database="sighashes")
  236. db.exec(sql"DROP TABLE IF EXISTS sighashes")
  237. db.exec sql"""CREATE TABLE sighashes(
  238. id integer primary key,
  239. hash varchar(5000) not null,
  240. type varchar(5000) not null,
  241. unique (hash, type))"""
  242. # select hash, type from sighashes where hash in
  243. # (select hash from sighashes group by hash having count(*) > 1) order by hash;
  244. proc hashType*(t: PType; conf: ConfigRef; flags: set[ConsiderFlag] = {CoType}): SigHash =
  245. var c: MD5Context
  246. md5Init c
  247. hashType c, t, flags+{CoOwnerSig}, conf
  248. md5Final c, result.MD5Digest
  249. when defined(debugSigHashes):
  250. db.exec(sql"INSERT OR IGNORE INTO sighashes(type, hash) VALUES (?, ?)",
  251. typeToString(t), $result)
  252. proc hashProc*(s: PSym; conf: ConfigRef): SigHash =
  253. var c: MD5Context
  254. md5Init c
  255. hashType c, s.typ, {CoProc}, conf
  256. var m = s
  257. while m.kind != skModule: m = m.owner
  258. let p = m.owner
  259. assert p.kind == skPackage
  260. c &= p.name.s
  261. c &= "."
  262. c &= m.name.s
  263. if sfDispatcher in s.flags:
  264. c &= ".dispatcher"
  265. # so that createThread[void]() (aka generic specialization) gets a unique
  266. # hash, we also hash the line information. This is pretty bad, but the best
  267. # solution for now:
  268. #c &= s.info.line
  269. md5Final c, result.MD5Digest
  270. proc hashNonProc*(s: PSym): SigHash =
  271. var c: MD5Context
  272. md5Init c
  273. hashSym(c, s)
  274. var it = s
  275. while it != nil:
  276. c &= it.name.s
  277. c &= "."
  278. it = it.owner
  279. # for bug #5135 we also take the position into account, but only
  280. # for parameters, because who knows what else position dependency
  281. # might cause:
  282. if s.kind == skParam:
  283. c &= s.position
  284. md5Final c, result.MD5Digest
  285. proc hashOwner*(s: PSym): SigHash =
  286. var c: MD5Context
  287. md5Init c
  288. var m = s
  289. while m.kind != skModule: m = m.owner
  290. let p = m.owner
  291. assert p.kind == skPackage
  292. c &= p.name.s
  293. c &= "."
  294. c &= m.name.s
  295. md5Final c, result.MD5Digest
  296. proc sigHash*(s: PSym; conf: ConfigRef): SigHash =
  297. if s.kind in routineKinds and s.typ != nil:
  298. result = hashProc(s, conf)
  299. else:
  300. result = hashNonProc(s)
  301. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash
  302. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode)
  303. proc hashVarSymBody(graph: ModuleGraph, c: var MD5Context, s: PSym) =
  304. assert: s.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}
  305. if sfGlobal notin s.flags:
  306. c &= char(s.kind)
  307. c &= s.name.s
  308. else:
  309. c &= hashNonProc(s)
  310. # this one works for let and const but not for var. True variables can change value
  311. # later on. it is user resposibility to hash his global state if required
  312. if s.ast != nil and s.ast.kind in {nkIdentDefs, nkConstDef}:
  313. hashBodyTree(graph, c, s.ast[^1])
  314. else:
  315. hashBodyTree(graph, c, s.ast)
  316. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode) =
  317. # hash Nim tree recursing into simply
  318. if n == nil:
  319. c &= "nil"
  320. return
  321. c &= char(n.kind)
  322. case n.kind
  323. of nkEmpty, nkNilLit, nkType: discard
  324. of nkIdent:
  325. c &= n.ident.s
  326. of nkSym:
  327. if n.sym.kind in skProcKinds:
  328. c &= symBodyDigest(graph, n.sym)
  329. elif n.sym.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}:
  330. hashVarSymBody(graph, c, n.sym)
  331. else:
  332. c &= hashNonProc(n.sym)
  333. of nkProcDef, nkFuncDef, nkTemplateDef, nkMacroDef:
  334. discard # we track usage of proc symbols not their definition
  335. of nkCharLit..nkUInt64Lit:
  336. c &= n.intVal
  337. of nkFloatLit..nkFloat64Lit:
  338. c &= n.floatVal
  339. of nkStrLit..nkTripleStrLit:
  340. c &= n.strVal
  341. else:
  342. for i in 0..<n.len:
  343. hashBodyTree(graph, c, n[i])
  344. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash =
  345. ## compute unique digest of the proc/func/method symbols
  346. ## recursing into invoked symbols as well
  347. assert(sym.kind in skProcKinds, $sym.kind)
  348. graph.symBodyHashes.withValue(sym.id, value):
  349. return value[]
  350. var c: MD5Context
  351. md5Init(c)
  352. c.hashType(sym.typ, {CoProc}, graph.config)
  353. c &= char(sym.kind)
  354. c.md5Final(result.MD5Digest)
  355. graph.symBodyHashes[sym.id] = result # protect from recursion in the body
  356. if sym.ast != nil:
  357. md5Init(c)
  358. c.md5Update(cast[cstring](result.addr), sizeof(result))
  359. hashBodyTree(graph, c, getBody(graph, sym))
  360. c.md5Final(result.MD5Digest)
  361. graph.symBodyHashes[sym.id] = result
  362. proc idOrSig*(s: PSym, currentModule: string,
  363. sigCollisions: var CountTable[SigHash]; conf: ConfigRef): Rope =
  364. if s.kind in routineKinds and s.typ != nil:
  365. # signatures for exported routines are reliable enough to
  366. # produce a unique name and this means produced C++ is more stable regarding
  367. # Nim changes:
  368. let sig = hashProc(s, conf)
  369. result = rope($sig)
  370. #let m = if s.typ.callConv != ccInline: findPendingModule(m, s) else: m
  371. let counter = sigCollisions.getOrDefault(sig)
  372. #if sigs == "_jckmNePK3i2MFnWwZlp6Lg" and s.name.s == "contains":
  373. # echo "counter ", counter, " ", s.id
  374. if counter != 0:
  375. result.add "_" & rope(counter+1)
  376. # this minor hack is necessary to make tests/collections/thashes compile.
  377. # The inlined hash function's original module is ambiguous so we end up
  378. # generating duplicate names otherwise:
  379. if s.typ.callConv == ccInline:
  380. result.add rope(currentModule)
  381. sigCollisions.inc(sig)
  382. else:
  383. let sig = hashNonProc(s)
  384. result = rope($sig)
  385. let counter = sigCollisions.getOrDefault(sig)
  386. if counter != 0:
  387. result.add "_" & rope(counter+1)
  388. sigCollisions.inc(sig)