sighashes.nim 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2017 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Computes hash values for routine (proc, method etc) signatures.
  10. import ast, md5
  11. from hashes import Hash
  12. from astalgo import debug
  13. from types import typeToString, preferDesc
  14. from strutils import startsWith, contains
  15. when false:
  16. type
  17. SigHash* = uint32 ## a hash good enough for a filename or a proc signature
  18. proc sdbmHash(hash: SigHash, c: char): SigHash {.inline.} =
  19. return SigHash(c) + (hash shl 6) + (hash shl 16) - hash
  20. template `&=`*(x: var SigHash, c: char) = x = sdbmHash(x, c)
  21. template `&=`*(x: var SigHash, s: string) =
  22. for c in s: x = sdbmHash(x, c)
  23. else:
  24. type
  25. SigHash* = distinct Md5Digest
  26. const
  27. cb64 = [
  28. "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
  29. "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
  30. "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n",
  31. "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
  32. "0", "1", "2", "3", "4", "5", "6", "7", "8", "9a",
  33. "9b", "9c"]
  34. proc toBase64a(s: cstring, len: int): string =
  35. ## encodes `s` into base64 representation.
  36. result = newStringOfCap(((len + 2) div 3) * 4)
  37. result.add '_'
  38. var i = 0
  39. while i < len - 2:
  40. let a = ord(s[i])
  41. let b = ord(s[i+1])
  42. let c = ord(s[i+2])
  43. result.add cb64[a shr 2]
  44. result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
  45. result.add cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)]
  46. result.add cb64[c and 0x3F]
  47. inc(i, 3)
  48. if i < len-1:
  49. let a = ord(s[i])
  50. let b = ord(s[i+1])
  51. result.add cb64[a shr 2]
  52. result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
  53. result.add cb64[((b and 0x0F) shl 2)]
  54. elif i < len:
  55. let a = ord(s[i])
  56. result.add cb64[a shr 2]
  57. result.add cb64[(a and 3) shl 4]
  58. proc `$`*(u: SigHash): string =
  59. toBase64a(cast[cstring](unsafeAddr u), sizeof(u))
  60. proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
  61. proc `&=`(c: var MD5Context, ch: char) = md5Update(c, unsafeAddr ch, 1)
  62. proc `&=`(c: var MD5Context, i: BiggestInt) =
  63. md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
  64. template lowlevel(v) =
  65. md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
  66. proc `==`*(a, b: SigHash): bool =
  67. # {.borrow.}
  68. result = equalMem(unsafeAddr a, unsafeAddr b, sizeof(a))
  69. proc hash*(u: SigHash): Hash =
  70. result = 0
  71. for x in 0..3:
  72. result = (result shl 8) or u.MD5Digest[x].int
  73. type
  74. ConsiderFlag* = enum
  75. CoProc
  76. CoType
  77. CoOwnerSig
  78. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag])
  79. proc hashSym(c: var MD5Context, s: PSym) =
  80. if sfAnon in s.flags or s.kind == skGenericParam:
  81. c &= ":anon"
  82. else:
  83. var it = s
  84. while it != nil:
  85. c &= it.name.s
  86. c &= "."
  87. it = it.owner
  88. proc hashTypeSym(c: var MD5Context, s: PSym) =
  89. if sfAnon in s.flags or s.kind == skGenericParam:
  90. c &= ":anon"
  91. else:
  92. var it = s
  93. while it != nil:
  94. if sfFromGeneric in it.flags and it.kind in routineKinds and
  95. it.typ != nil:
  96. hashType c, it.typ, {CoProc}
  97. c &= it.name.s
  98. c &= "."
  99. it = it.owner
  100. proc hashTree(c: var MD5Context, n: PNode) =
  101. if n == nil:
  102. c &= "\255"
  103. return
  104. let k = n.kind
  105. c &= char(k)
  106. # we really must not hash line information. 'n.typ' is debatable but
  107. # shouldn't be necessary for now and avoids potential infinite recursions.
  108. case n.kind
  109. of nkEmpty, nkNilLit, nkType: discard
  110. of nkIdent:
  111. c &= n.ident.s
  112. of nkSym:
  113. hashSym(c, n.sym)
  114. of nkCharLit..nkUInt64Lit:
  115. let v = n.intVal
  116. lowlevel v
  117. of nkFloatLit..nkFloat64Lit:
  118. let v = n.floatVal
  119. lowlevel v
  120. of nkStrLit..nkTripleStrLit:
  121. c &= n.strVal
  122. else:
  123. for i in 0.. <n.len: hashTree(c, n.sons[i])
  124. proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]) =
  125. if t == nil:
  126. c &= "\254"
  127. return
  128. case t.kind
  129. of tyGenericInvocation:
  130. for i in countup(0, sonsLen(t) - 1):
  131. c.hashType t.sons[i], flags
  132. return
  133. of tyDistinct:
  134. if CoType in flags:
  135. c.hashType t.lastSon, flags
  136. else:
  137. c.hashSym(t.sym)
  138. return
  139. of tyAlias, tyGenericInst, tyUserTypeClasses:
  140. c.hashType t.lastSon, flags
  141. return
  142. else:
  143. discard
  144. c &= char(t.kind)
  145. case t.kind
  146. of tyBool, tyChar, tyInt..tyUInt64:
  147. # no canonicalization for integral types, so that e.g. ``pid_t`` is
  148. # produced instead of ``NI``:
  149. if t.sym != nil and {sfImportc, sfExportc} * t.sym.flags != {}:
  150. c.hashSym(t.sym)
  151. of tyObject, tyEnum:
  152. if t.typeInst != nil:
  153. assert t.typeInst.kind == tyGenericInst
  154. for i in countup(1, sonsLen(t.typeInst) - 2):
  155. c.hashType t.typeInst.sons[i], flags
  156. # Every cyclic type in Nim need to be constructed via some 't.sym', so this
  157. # is actually safe without an infinite recursion check:
  158. if t.sym != nil:
  159. #if "Future:" in t.sym.name.s and t.typeInst == nil:
  160. # writeStackTrace()
  161. # echo "yes ", t.sym.name.s
  162. # #quit 1
  163. if CoOwnerSig in flags:
  164. c.hashTypeSym(t.sym)
  165. else:
  166. c.hashSym(t.sym)
  167. if sfAnon in t.sym.flags:
  168. # generated object names can be identical, so we need to
  169. # disambiguate furthermore by hashing the field types and names:
  170. # mild hack to prevent endless recursions (makes nimforum compile again):
  171. excl t.sym.flags, sfAnon
  172. let n = t.n
  173. for i in 0 ..< n.len:
  174. assert n[i].kind == nkSym
  175. let s = n[i].sym
  176. c.hashSym s
  177. c.hashType s.typ, flags
  178. incl t.sym.flags, sfAnon
  179. else:
  180. c &= t.id
  181. if t.len > 0 and t.sons[0] != nil:
  182. hashType c, t.sons[0], flags
  183. of tyRef, tyPtr, tyGenericBody, tyVar:
  184. c.hashType t.lastSon, flags
  185. if tfVarIsPtr in t.flags: c &= ".varisptr"
  186. of tyFromExpr:
  187. c.hashTree(t.n)
  188. of tyTuple:
  189. if t.n != nil and CoType notin flags:
  190. assert(sonsLen(t.n) == sonsLen(t))
  191. for i in countup(0, sonsLen(t.n) - 1):
  192. assert(t.n.sons[i].kind == nkSym)
  193. c &= t.n.sons[i].sym.name.s
  194. c &= ':'
  195. c.hashType(t.sons[i], flags)
  196. c &= ','
  197. else:
  198. for i in countup(0, sonsLen(t) - 1): c.hashType t.sons[i], flags
  199. of tyRange, tyStatic:
  200. #if CoType notin flags:
  201. c.hashTree(t.n)
  202. c.hashType(t.sons[0], flags)
  203. of tyProc:
  204. c &= (if tfIterator in t.flags: "iterator " else: "proc ")
  205. if CoProc in flags and t.n != nil:
  206. let params = t.n
  207. for i in 1..<params.len:
  208. let param = params[i].sym
  209. c &= param.name.s
  210. c &= ':'
  211. c.hashType(param.typ, flags)
  212. c &= ','
  213. c.hashType(t.sons[0], flags)
  214. else:
  215. for i in 0.. <t.len: c.hashType(t.sons[i], flags)
  216. c &= char(t.callConv)
  217. if CoType notin flags:
  218. if tfNoSideEffect in t.flags: c &= ".noSideEffect"
  219. if tfThread in t.flags: c &= ".thread"
  220. if tfVarargs in t.flags: c &= ".varargs"
  221. else:
  222. for i in 0.. <t.len: c.hashType(t.sons[i], flags)
  223. if tfNotNil in t.flags and CoType notin flags: c &= "not nil"
  224. when defined(debugSigHashes):
  225. import db_sqlite
  226. let db = open(connection="sighashes.db", user="araq", password="",
  227. database="sighashes")
  228. db.exec(sql"DROP TABLE IF EXISTS sighashes")
  229. db.exec sql"""CREATE TABLE sighashes(
  230. id integer primary key,
  231. hash varchar(5000) not null,
  232. type varchar(5000) not null,
  233. unique (hash, type))"""
  234. # select hash, type from sighashes where hash in
  235. # (select hash from sighashes group by hash having count(*) > 1) order by hash;
  236. proc hashType*(t: PType; flags: set[ConsiderFlag] = {CoType}): SigHash =
  237. var c: MD5Context
  238. md5Init c
  239. hashType c, t, flags+{CoOwnerSig}
  240. md5Final c, result.Md5Digest
  241. when defined(debugSigHashes):
  242. db.exec(sql"INSERT OR IGNORE INTO sighashes(type, hash) VALUES (?, ?)",
  243. typeToString(t), $result)
  244. proc hashProc*(s: PSym): SigHash =
  245. var c: MD5Context
  246. md5Init c
  247. hashType c, s.typ, {CoProc}
  248. var m = s
  249. while m.kind != skModule: m = m.owner
  250. let p = m.owner
  251. assert p.kind == skPackage
  252. c &= p.name.s
  253. c &= "."
  254. c &= m.name.s
  255. if sfDispatcher in s.flags:
  256. c &= ".dispatcher"
  257. # so that createThread[void]() (aka generic specialization) gets a unique
  258. # hash, we also hash the line information. This is pretty bad, but the best
  259. # solution for now:
  260. #c &= s.info.line
  261. md5Final c, result.Md5Digest
  262. proc hashNonProc*(s: PSym): SigHash =
  263. var c: MD5Context
  264. md5Init c
  265. hashSym(c, s)
  266. var it = s
  267. while it != nil:
  268. c &= it.name.s
  269. c &= "."
  270. it = it.owner
  271. # for bug #5135 we also take the position into account, but only
  272. # for parameters, because who knows what else position dependency
  273. # might cause:
  274. if s.kind == skParam:
  275. c &= s.position
  276. md5Final c, result.Md5Digest
  277. proc hashOwner*(s: PSym): SigHash =
  278. var c: MD5Context
  279. md5Init c
  280. var m = s
  281. while m.kind != skModule: m = m.owner
  282. let p = m.owner
  283. assert p.kind == skPackage
  284. c &= p.name.s
  285. c &= "."
  286. c &= m.name.s
  287. md5Final c, result.Md5Digest