sighashes.nim 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2017 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Computes hash values for routine (proc, method etc) signatures.
  10. import ast, tables, ropes, md5, modulegraphs
  11. from hashes import Hash
  12. from astalgo import debug
  13. import types
  14. from strutils import startsWith, contains
  15. proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
  16. proc `&=`(c: var MD5Context, ch: char) = md5Update(c, unsafeAddr ch, 1)
  17. proc `&=`(c: var MD5Context, r: Rope) =
  18. for l in leaves(r): md5Update(c, l, l.len)
  19. proc `&=`(c: var MD5Context, i: BiggestInt) =
  20. md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
  21. proc `&=`(c: var MD5Context, f: BiggestFloat) =
  22. md5Update(c, cast[cstring](unsafeAddr f), sizeof(f))
  23. proc `&=`(c: var MD5Context, s: SigHash) =
  24. md5Update(c, cast[cstring](unsafeAddr s), sizeof(s))
  25. template lowlevel(v) =
  26. md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
  27. type
  28. ConsiderFlag* = enum
  29. CoProc
  30. CoType
  31. CoOwnerSig
  32. CoIgnoreRange
  33. CoConsiderOwned
  34. CoDistinct
  35. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag])
  36. proc hashSym(c: var MD5Context, s: PSym) =
  37. if sfAnon in s.flags or s.kind == skGenericParam:
  38. c &= ":anon"
  39. else:
  40. var it = s
  41. while it != nil:
  42. c &= it.name.s
  43. c &= "."
  44. it = it.owner
  45. proc hashTypeSym(c: var MD5Context, s: PSym) =
  46. if sfAnon in s.flags or s.kind == skGenericParam:
  47. c &= ":anon"
  48. else:
  49. var it = s
  50. while it != nil:
  51. if sfFromGeneric in it.flags and it.kind in routineKinds and
  52. it.typ != nil:
  53. hashType c, it.typ, {CoProc}
  54. c &= it.name.s
  55. c &= "."
  56. it = it.owner
  57. proc hashTree(c: var MD5Context, n: PNode) =
  58. if n == nil:
  59. c &= "\255"
  60. return
  61. let k = n.kind
  62. c &= char(k)
  63. # we really must not hash line information. 'n.typ' is debatable but
  64. # shouldn't be necessary for now and avoids potential infinite recursions.
  65. case n.kind
  66. of nkEmpty, nkNilLit, nkType: discard
  67. of nkIdent:
  68. c &= n.ident.s
  69. of nkSym:
  70. hashSym(c, n.sym)
  71. of nkCharLit..nkUInt64Lit:
  72. let v = n.intVal
  73. lowlevel v
  74. of nkFloatLit..nkFloat64Lit:
  75. let v = n.floatVal
  76. lowlevel v
  77. of nkStrLit..nkTripleStrLit:
  78. c &= n.strVal
  79. else:
  80. for i in 0..<n.len: hashTree(c, n.sons[i])
  81. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]) =
  82. if t == nil:
  83. c &= "\254"
  84. return
  85. case t.kind
  86. of tyGenericInvocation:
  87. for i in 0 ..< sonsLen(t):
  88. c.hashType t.sons[i], flags
  89. of tyDistinct:
  90. if CoDistinct in flags:
  91. if t.sym != nil: c.hashSym(t.sym)
  92. if t.sym == nil or tfFromGeneric in t.flags:
  93. c.hashType t.lastSon, flags
  94. elif CoType in flags or t.sym == nil:
  95. c.hashType t.lastSon, flags
  96. else:
  97. c.hashSym(t.sym)
  98. of tyGenericInst:
  99. if sfInfixCall in t.base.sym.flags:
  100. # This is an imported C++ generic type.
  101. # We cannot trust the `lastSon` to hold a properly populated and unique
  102. # value for each instantiation, so we hash the generic parameters here:
  103. let normalizedType = t.skipGenericAlias
  104. for i in 0 .. normalizedType.len - 2:
  105. c.hashType t.sons[i], flags
  106. else:
  107. c.hashType t.lastSon, flags
  108. of tyAlias, tySink, tyUserTypeClasses, tyInferred:
  109. c.hashType t.lastSon, flags
  110. of tyOwned:
  111. if CoConsiderOwned in flags:
  112. c &= char(t.kind)
  113. c.hashType t.lastSon, flags
  114. of tyBool, tyChar, tyInt..tyUInt64:
  115. # no canonicalization for integral types, so that e.g. ``pid_t`` is
  116. # produced instead of ``NI``:
  117. c &= char(t.kind)
  118. if t.sym != nil and {sfImportc, sfExportc} * t.sym.flags != {}:
  119. c.hashSym(t.sym)
  120. of tyObject, tyEnum:
  121. if t.typeInst != nil:
  122. # prevent against infinite recursions here, see bug #8883:
  123. let inst = t.typeInst
  124. t.typeInst = nil
  125. assert inst.kind == tyGenericInst
  126. for i in 0 .. inst.len - 2:
  127. c.hashType inst.sons[i], flags
  128. t.typeInst = inst
  129. return
  130. c &= char(t.kind)
  131. # Every cyclic type in Nim need to be constructed via some 't.sym', so this
  132. # is actually safe without an infinite recursion check:
  133. if t.sym != nil:
  134. if {sfCompilerProc} * t.sym.flags != {}:
  135. doAssert t.sym.loc.r != nil
  136. # The user has set a specific name for this type
  137. c &= t.sym.loc.r
  138. elif CoOwnerSig in flags:
  139. c.hashTypeSym(t.sym)
  140. else:
  141. c.hashSym(t.sym)
  142. if {sfAnon, sfGenSym} * t.sym.flags != {}:
  143. # Generated object names can be identical, so we need to
  144. # disambiguate furthermore by hashing the field types and names.
  145. if t.n.len > 0:
  146. let oldFlags = t.sym.flags
  147. # Mild hack to prevent endless recursion.
  148. t.sym.flags = t.sym.flags - {sfAnon, sfGenSym}
  149. for n in t.n:
  150. assert(n.kind == nkSym)
  151. let s = n.sym
  152. c.hashSym s
  153. c.hashType s.typ, flags
  154. t.sym.flags = oldFlags
  155. else:
  156. # The object has no fields: we _must_ add something here in order to
  157. # make the hash different from the one we produce by hashing only the
  158. # type name.
  159. c &= ".empty"
  160. else:
  161. c &= t.id
  162. if t.len > 0 and t.sons[0] != nil:
  163. hashType c, t.sons[0], flags
  164. of tyRef, tyPtr, tyGenericBody, tyVar:
  165. c &= char(t.kind)
  166. c.hashType t.lastSon, flags
  167. if tfVarIsPtr in t.flags: c &= ".varisptr"
  168. of tyFromExpr:
  169. c &= char(t.kind)
  170. c.hashTree(t.n)
  171. of tyTuple:
  172. c &= char(t.kind)
  173. if t.n != nil and CoType notin flags:
  174. assert(sonsLen(t.n) == sonsLen(t))
  175. for i in 0 ..< sonsLen(t.n):
  176. assert(t.n.sons[i].kind == nkSym)
  177. c &= t.n.sons[i].sym.name.s
  178. c &= ':'
  179. c.hashType(t.sons[i], flags+{CoIgnoreRange})
  180. c &= ','
  181. else:
  182. for i in 0 ..< sonsLen(t): c.hashType t.sons[i], flags+{CoIgnoreRange}
  183. of tyRange:
  184. if CoIgnoreRange notin flags:
  185. c &= char(t.kind)
  186. c.hashTree(t.n)
  187. c.hashType(t.sons[0], flags)
  188. of tyStatic:
  189. c &= char(t.kind)
  190. c.hashTree(t.n)
  191. c.hashType(t.sons[0], flags)
  192. of tyProc:
  193. c &= char(t.kind)
  194. c &= (if tfIterator in t.flags: "iterator " else: "proc ")
  195. if CoProc in flags and t.n != nil:
  196. let params = t.n
  197. for i in 1..<params.len:
  198. let param = params[i].sym
  199. c &= param.name.s
  200. c &= ':'
  201. c.hashType(param.typ, flags)
  202. c &= ','
  203. c.hashType(t.sons[0], flags)
  204. else:
  205. for i in 0..<t.len: c.hashType(t.sons[i], flags)
  206. c &= char(t.callConv)
  207. # purity of functions doesn't have to affect the mangling (which is in fact
  208. # problematic for HCR - someone could have cached a pointer to another
  209. # function which changes its purity and suddenly the cached pointer is danglign)
  210. # IMHO anything that doesn't affect the overload resolution shouldn't be part of the mangling...
  211. # if CoType notin flags:
  212. # if tfNoSideEffect in t.flags: c &= ".noSideEffect"
  213. # if tfThread in t.flags: c &= ".thread"
  214. if tfVarargs in t.flags: c &= ".varargs"
  215. of tyArray:
  216. c &= char(t.kind)
  217. for i in 0..<t.len: c.hashType(t.sons[i], flags-{CoIgnoreRange})
  218. else:
  219. c &= char(t.kind)
  220. for i in 0..<t.len: c.hashType(t.sons[i], flags)
  221. if tfNotNil in t.flags and CoType notin flags: c &= "not nil"
  222. when defined(debugSigHashes):
  223. import db_sqlite
  224. let db = open(connection="sighashes.db", user="araq", password="",
  225. database="sighashes")
  226. db.exec(sql"DROP TABLE IF EXISTS sighashes")
  227. db.exec sql"""CREATE TABLE sighashes(
  228. id integer primary key,
  229. hash varchar(5000) not null,
  230. type varchar(5000) not null,
  231. unique (hash, type))"""
  232. # select hash, type from sighashes where hash in
  233. # (select hash from sighashes group by hash having count(*) > 1) order by hash;
  234. proc hashType*(t: PType; flags: set[ConsiderFlag] = {CoType}): SigHash =
  235. var c: MD5Context
  236. md5Init c
  237. hashType c, t, flags+{CoOwnerSig}
  238. md5Final c, result.Md5Digest
  239. when defined(debugSigHashes):
  240. db.exec(sql"INSERT OR IGNORE INTO sighashes(type, hash) VALUES (?, ?)",
  241. typeToString(t), $result)
  242. proc hashProc*(s: PSym): SigHash =
  243. var c: MD5Context
  244. md5Init c
  245. hashType c, s.typ, {CoProc}
  246. var m = s
  247. while m.kind != skModule: m = m.owner
  248. let p = m.owner
  249. assert p.kind == skPackage
  250. c &= p.name.s
  251. c &= "."
  252. c &= m.name.s
  253. if sfDispatcher in s.flags:
  254. c &= ".dispatcher"
  255. # so that createThread[void]() (aka generic specialization) gets a unique
  256. # hash, we also hash the line information. This is pretty bad, but the best
  257. # solution for now:
  258. #c &= s.info.line
  259. md5Final c, result.Md5Digest
  260. proc hashNonProc*(s: PSym): SigHash =
  261. var c: MD5Context
  262. md5Init c
  263. hashSym(c, s)
  264. var it = s
  265. while it != nil:
  266. c &= it.name.s
  267. c &= "."
  268. it = it.owner
  269. # for bug #5135 we also take the position into account, but only
  270. # for parameters, because who knows what else position dependency
  271. # might cause:
  272. if s.kind == skParam:
  273. c &= s.position
  274. md5Final c, result.Md5Digest
  275. proc hashOwner*(s: PSym): SigHash =
  276. var c: MD5Context
  277. md5Init c
  278. var m = s
  279. while m.kind != skModule: m = m.owner
  280. let p = m.owner
  281. assert p.kind == skPackage
  282. c &= p.name.s
  283. c &= "."
  284. c &= m.name.s
  285. md5Final c, result.Md5Digest
  286. proc sigHash*(s: PSym): SigHash =
  287. if s.kind in routineKinds and s.typ != nil:
  288. result = hashProc(s)
  289. else:
  290. result = hashNonProc(s)
  291. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash
  292. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode)
  293. proc hashVarSymBody(graph: ModuleGraph, c: var MD5Context, s: PSym) =
  294. assert: s.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}
  295. if sfGlobal notin s.flags:
  296. c &= char(s.kind)
  297. c &= s.name.s
  298. else:
  299. c &= hashNonProc(s)
  300. # this one works for let and const but not for var. True variables can change value
  301. # later on. it is user resposibility to hash his global state if required
  302. if s.ast != nil and s.ast.kind == nkIdentDefs:
  303. hashBodyTree(graph, c, s.ast[^1])
  304. else:
  305. hashBodyTree(graph, c, s.ast)
  306. proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode) =
  307. # hash Nim tree recursing into simply
  308. if n == nil:
  309. c &= "nil"
  310. return
  311. c &= char(n.kind)
  312. case n.kind
  313. of nkEmpty, nkNilLit, nkType: discard
  314. of nkIdent:
  315. c &= n.ident.s
  316. of nkSym:
  317. if n.sym.kind in skProcKinds:
  318. c &= symBodyDigest(graph, n.sym)
  319. elif n.sym.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}:
  320. hashVarSymBody(graph, c, n.sym)
  321. else:
  322. c &= hashNonProc(n.sym)
  323. of nkProcDef, nkFuncDef, nkTemplateDef, nkMacroDef:
  324. discard # we track usage of proc symbols not their definition
  325. of nkCharLit..nkUInt64Lit:
  326. c &= n.intVal
  327. of nkFloatLit..nkFloat64Lit:
  328. c &= n.floatVal
  329. of nkStrLit..nkTripleStrLit:
  330. c &= n.strVal
  331. else:
  332. for i in 0..<n.len:
  333. hashBodyTree(graph, c, n.sons[i])
  334. proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash =
  335. ## compute unique digest of the proc/func/method symbols
  336. ## recursing into invoked symbols as well
  337. assert(sym.kind in skProcKinds, $sym.kind)
  338. graph.symBodyHashes.withValue(sym.id, value):
  339. return value[]
  340. var c: MD5Context
  341. md5Init(c)
  342. c.hashType(sym.typ, {CoProc})
  343. c &= char(sym.kind)
  344. c.md5Final(result.Md5Digest)
  345. graph.symBodyHashes[sym.id] = result # protect from recursion in the body
  346. if sym.ast != nil:
  347. md5Init(c)
  348. c.md5Update(cast[cstring](result.addr), sizeof(result))
  349. hashBodyTree(graph, c, sym.ast[bodyPos])
  350. c.md5Final(result.Md5Digest)
  351. graph.symBodyHashes[sym.id] = result
  352. proc idOrSig*(s: PSym, currentModule: string,
  353. sigCollisions: var CountTable[SigHash]): Rope =
  354. if s.kind in routineKinds and s.typ != nil:
  355. # signatures for exported routines are reliable enough to
  356. # produce a unique name and this means produced C++ is more stable wrt
  357. # Nim changes:
  358. let sig = hashProc(s)
  359. result = rope($sig)
  360. #let m = if s.typ.callConv != ccInline: findPendingModule(m, s) else: m
  361. let counter = sigCollisions.getOrDefault(sig)
  362. #if sigs == "_jckmNePK3i2MFnWwZlp6Lg" and s.name.s == "contains":
  363. # echo "counter ", counter, " ", s.id
  364. if counter != 0:
  365. result.add "_" & rope(counter+1)
  366. # this minor hack is necessary to make tests/collections/thashes compile.
  367. # The inlined hash function's original module is ambiguous so we end up
  368. # generating duplicate names otherwise:
  369. if s.typ.callConv == ccInline:
  370. result.add rope(currentModule)
  371. sigCollisions.inc(sig)
  372. else:
  373. let sig = hashNonProc(s)
  374. result = rope($sig)
  375. let counter = sigCollisions.getOrDefault(sig)
  376. if counter != 0:
  377. result.add "_" & rope(counter+1)
  378. sigCollisions.inc(sig)