canonicalizer.nim 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2015 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements the canonalization for the various caching mechanisms.
  10. import strutils, db_sqlite, md5
  11. var db: DbConn
  12. # We *hash* the relevant information into 128 bit hashes. This should be good
  13. # enough to prevent any collisions.
  14. type
  15. TUid = distinct MD5Digest
  16. # For name mangling we encode these hashes via a variant of base64 (called
  17. # 'base64a') and prepend the *primary* identifier to ease the debugging pain.
  18. # So a signature like:
  19. #
  20. # proc gABI(c: PCtx; n: PNode; opc: TOpcode; a, b: TRegister; imm: BiggestInt)
  21. #
  22. # is mangled into:
  23. # gABI_MTdmOWY5MTQ1MDcyNGQ3ZA
  24. #
  25. # This is a good compromise between correctness and brevity. ;-)
  26. const
  27. cb64 = [
  28. "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
  29. "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
  30. "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n",
  31. "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
  32. "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
  33. "_A", "_B"]
  34. proc toBase64a(s: cstring, len: int): string =
  35. ## encodes `s` into base64 representation. After `lineLen` characters, a
  36. ## `newline` is added.
  37. result = newStringOfCap(((len + 2) div 3) * 4)
  38. var i = 0
  39. while i < s.len - 2:
  40. let a = ord(s[i])
  41. let b = ord(s[i+1])
  42. let c = ord(s[i+2])
  43. result.add cb64[a shr 2]
  44. result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
  45. result.add cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)]
  46. result.add cb64[c and 0x3F]
  47. inc(i, 3)
  48. if i < s.len-1:
  49. let a = ord(s[i])
  50. let b = ord(s[i+1])
  51. result.add cb64[a shr 2]
  52. result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
  53. result.add cb64[((b and 0x0F) shl 2)]
  54. elif i < s.len:
  55. let a = ord(s[i])
  56. result.add cb64[a shr 2]
  57. result.add cb64[(a and 3) shl 4]
  58. proc toBase64a(u: TUid): string = toBase64a(cast[cstring](u), sizeof(u))
  59. proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
  60. proc hashSym(c: var MD5Context, s: PSym) =
  61. if sfAnon in s.flags or s.kind == skGenericParam:
  62. c &= ":anon"
  63. else:
  64. var it = s.owner
  65. while it != nil:
  66. hashSym(c, it)
  67. c &= "."
  68. it = s.owner
  69. c &= s.name.s
  70. proc hashTree(c: var MD5Context, n: PNode) =
  71. if n == nil:
  72. c &= "\255"
  73. return
  74. var k = n.kind
  75. md5Update(c, cast[cstring](addr(k)), 1)
  76. # we really must not hash line information. 'n.typ' is debatable but
  77. # shouldn't be necessary for now and avoids potential infinite recursions.
  78. case n.kind
  79. of nkEmpty, nkNilLit, nkType: discard
  80. of nkIdent:
  81. c &= n.ident.s
  82. of nkSym:
  83. hashSym(c, n.sym)
  84. of nkCharLit..nkUInt64Lit:
  85. var v = n.intVal
  86. md5Update(c, cast[cstring](addr(v)), sizeof(v))
  87. of nkFloatLit..nkFloat64Lit:
  88. var v = n.floatVal
  89. md5Update(c, cast[cstring](addr(v)), sizeof(v))
  90. of nkStrLit..nkTripleStrLit:
  91. c &= n.strVal
  92. else:
  93. for i in 0..<n.len: hashTree(c, n[i])
  94. proc hashType(c: var MD5Context, t: PType) =
  95. # modelled after 'typeToString'
  96. if t == nil:
  97. c &= "\254"
  98. return
  99. var k = t.kind
  100. md5Update(c, cast[cstring](addr(k)), 1)
  101. if t.sym != nil and sfAnon notin t.sym.flags:
  102. # t.n for literals, but not for e.g. objects!
  103. if t.kind in {tyFloat, tyInt}: c.hashNode(t.n)
  104. c.hashSym(t.sym)
  105. case t.kind
  106. of tyGenericBody, tyGenericInst, tyGenericInvocation:
  107. for i in 0..<t.len-ord(t.kind != tyGenericInvocation):
  108. c.hashType t[i]
  109. of tyUserTypeClass:
  110. internalAssert t.sym != nil and t.sym.owner != nil
  111. c &= t.sym.owner.name.s
  112. of tyUserTypeClassInst:
  113. let body = t.base
  114. c.hashSym body.sym
  115. for i in 1..<t.len-1:
  116. c.hashType t[i]
  117. of tyFromExpr:
  118. c.hashTree(t.n)
  119. of tyArray:
  120. c.hashTree(t[0].n)
  121. c.hashType(t[1])
  122. of tyTuple:
  123. if t.n != nil:
  124. assert(t.n.len == t.len)
  125. for i in 0..<t.n.len:
  126. assert(t.n[i].kind == nkSym)
  127. c &= t.n[i].sym.name.s
  128. c &= ":"
  129. c.hashType(t[i])
  130. c &= ","
  131. else:
  132. for i in 0..<t.len: c.hashType t[i]
  133. of tyRange:
  134. c.hashTree(t.n)
  135. c.hashType(t[0])
  136. of tyProc:
  137. c &= (if tfIterator in t.flags: "iterator " else: "proc ")
  138. for i in 0..<t.len: c.hashType(t[i])
  139. md5Update(c, cast[cstring](addr(t.callConv)), 1)
  140. if tfNoSideEffect in t.flags: c &= ".noSideEffect"
  141. if tfThread in t.flags: c &= ".thread"
  142. else:
  143. for i in 0..<t.len: c.hashType(t[i])
  144. if tfNotNil in t.flags: c &= "not nil"
  145. proc canonConst(n: PNode): TUid =
  146. var c: MD5Context
  147. md5Init(c)
  148. c.hashTree(n)
  149. c.hashType(n.typ)
  150. md5Final(c, MD5Digest(result))
  151. proc canonSym(s: PSym): TUid =
  152. var c: MD5Context
  153. md5Init(c)
  154. c.hashSym(s)
  155. md5Final(c, MD5Digest(result))
  156. proc pushType(w: PRodWriter, t: PType) =
  157. # check so that the stack does not grow too large:
  158. if iiTableGet(w.index.tab, t.id) == InvalidKey:
  159. w.tstack.add(t)
  160. proc pushSym(w: PRodWriter, s: PSym) =
  161. # check so that the stack does not grow too large:
  162. if iiTableGet(w.index.tab, s.id) == InvalidKey:
  163. w.sstack.add(s)
  164. proc encodeNode(w: PRodWriter, fInfo: TLineInfo, n: PNode,
  165. result: var string) =
  166. if n == nil:
  167. # nil nodes have to be stored too:
  168. result.add("()")
  169. return
  170. result.add('(')
  171. encodeVInt(ord(n.kind), result)
  172. # we do not write comments for now
  173. # Line information takes easily 20% or more of the filesize! Therefore we
  174. # omit line information if it is the same as the father's line information:
  175. if fInfo.fileIndex != n.info.fileIndex:
  176. result.add('?')
  177. encodeVInt(n.info.col, result)
  178. result.add(',')
  179. encodeVInt(n.info.line, result)
  180. result.add(',')
  181. encodeVInt(fileIdx(w, toFilename(n.info)), result)
  182. elif fInfo.line != n.info.line:
  183. result.add('?')
  184. encodeVInt(n.info.col, result)
  185. result.add(',')
  186. encodeVInt(n.info.line, result)
  187. elif fInfo.col != n.info.col:
  188. result.add('?')
  189. encodeVInt(n.info.col, result)
  190. var f = n.flags * PersistentNodeFlags
  191. if f != {}:
  192. result.add('$')
  193. encodeVInt(cast[int32](f), result)
  194. if n.typ != nil:
  195. result.add('^')
  196. encodeVInt(n.typ.id, result)
  197. pushType(w, n.typ)
  198. case n.kind
  199. of nkCharLit..nkInt64Lit:
  200. if n.intVal != 0:
  201. result.add('!')
  202. encodeVBiggestInt(n.intVal, result)
  203. of nkFloatLit..nkFloat64Lit:
  204. if n.floatVal != 0.0:
  205. result.add('!')
  206. encodeStr($n.floatVal, result)
  207. of nkStrLit..nkTripleStrLit:
  208. if n.strVal != "":
  209. result.add('!')
  210. encodeStr(n.strVal, result)
  211. of nkIdent:
  212. result.add('!')
  213. encodeStr(n.ident.s, result)
  214. of nkSym:
  215. result.add('!')
  216. encodeVInt(n.sym.id, result)
  217. pushSym(w, n.sym)
  218. else:
  219. for i in 0..<n.len:
  220. encodeNode(w, n.info, n[i], result)
  221. result.add(')')
  222. proc encodeLoc(w: PRodWriter, loc: TLoc, result: var string) =
  223. var oldLen = result.len
  224. result.add('<')
  225. if loc.k != low(loc.k): encodeVInt(ord(loc.k), result)
  226. if loc.s != low(loc.s):
  227. result.add('*')
  228. encodeVInt(ord(loc.s), result)
  229. if loc.flags != {}:
  230. result.add('$')
  231. encodeVInt(cast[int32](loc.flags), result)
  232. if loc.t != nil:
  233. result.add('^')
  234. encodeVInt(cast[int32](loc.t.id), result)
  235. pushType(w, loc.t)
  236. if loc.r != nil:
  237. result.add('!')
  238. encodeStr($loc.r, result)
  239. if loc.a != 0:
  240. result.add('?')
  241. encodeVInt(loc.a, result)
  242. if oldLen + 1 == result.len:
  243. # no data was necessary, so remove the '<' again:
  244. setLen(result, oldLen)
  245. else:
  246. result.add('>')
  247. proc encodeType(w: PRodWriter, t: PType, result: var string) =
  248. if t == nil:
  249. # nil nodes have to be stored too:
  250. result.add("[]")
  251. return
  252. # we need no surrounding [] here because the type is in a line of its own
  253. if t.kind == tyForward: internalError("encodeType: tyForward")
  254. # for the new rodfile viewer we use a preceding [ so that the data section
  255. # can easily be disambiguated:
  256. result.add('[')
  257. encodeVInt(ord(t.kind), result)
  258. result.add('+')
  259. encodeVInt(t.id, result)
  260. if t.n != nil:
  261. encodeNode(w, unknownLineInfo, t.n, result)
  262. if t.flags != {}:
  263. result.add('$')
  264. encodeVInt(cast[int32](t.flags), result)
  265. if t.callConv != low(t.callConv):
  266. result.add('?')
  267. encodeVInt(ord(t.callConv), result)
  268. if t.owner != nil:
  269. result.add('*')
  270. encodeVInt(t.owner.id, result)
  271. pushSym(w, t.owner)
  272. if t.sym != nil:
  273. result.add('&')
  274. encodeVInt(t.sym.id, result)
  275. pushSym(w, t.sym)
  276. if t.size != - 1:
  277. result.add('/')
  278. encodeVBiggestInt(t.size, result)
  279. if t.align != - 1:
  280. result.add('=')
  281. encodeVInt(t.align, result)
  282. encodeLoc(w, t.loc, result)
  283. for i in 0..<t.len:
  284. if t[i] == nil:
  285. result.add("^()")
  286. else:
  287. result.add('^')
  288. encodeVInt(t[i].id, result)
  289. pushType(w, t[i])
  290. proc encodeLib(w: PRodWriter, lib: PLib, info: TLineInfo, result: var string) =
  291. result.add('|')
  292. encodeVInt(ord(lib.kind), result)
  293. result.add('|')
  294. encodeStr($lib.name, result)
  295. result.add('|')
  296. encodeNode(w, info, lib.path, result)
  297. proc encodeSym(w: PRodWriter, s: PSym, result: var string) =
  298. if s == nil:
  299. # nil nodes have to be stored too:
  300. result.add("{}")
  301. return
  302. # we need no surrounding {} here because the symbol is in a line of its own
  303. encodeVInt(ord(s.kind), result)
  304. result.add('+')
  305. encodeVInt(s.id, result)
  306. result.add('&')
  307. encodeStr(s.name.s, result)
  308. if s.typ != nil:
  309. result.add('^')
  310. encodeVInt(s.typ.id, result)
  311. pushType(w, s.typ)
  312. result.add('?')
  313. if s.info.col != -1'i16: encodeVInt(s.info.col, result)
  314. result.add(',')
  315. if s.info.line != -1'i16: encodeVInt(s.info.line, result)
  316. result.add(',')
  317. encodeVInt(fileIdx(w, toFilename(s.info)), result)
  318. if s.owner != nil:
  319. result.add('*')
  320. encodeVInt(s.owner.id, result)
  321. pushSym(w, s.owner)
  322. if s.flags != {}:
  323. result.add('$')
  324. encodeVInt(cast[int32](s.flags), result)
  325. if s.magic != mNone:
  326. result.add('@')
  327. encodeVInt(ord(s.magic), result)
  328. if s.options != w.options:
  329. result.add('!')
  330. encodeVInt(cast[int32](s.options), result)
  331. if s.position != 0:
  332. result.add('%')
  333. encodeVInt(s.position, result)
  334. if s.offset != - 1:
  335. result.add('`')
  336. encodeVInt(s.offset, result)
  337. encodeLoc(w, s.loc, result)
  338. if s.annex != nil: encodeLib(w, s.annex, s.info, result)
  339. if s.constraint != nil:
  340. result.add('#')
  341. encodeNode(w, unknownLineInfo, s.constraint, result)
  342. # lazy loading will soon reload the ast lazily, so the ast needs to be
  343. # the last entry of a symbol:
  344. if s.ast != nil:
  345. # we used to attempt to save space here by only storing a dummy AST if
  346. # it is not necessary, but Nim's heavy compile-time evaluation features
  347. # make that unfeasible nowadays:
  348. encodeNode(w, s.info, s.ast, result)
  349. proc createDb() =
  350. db.exec(sql"""
  351. create table if not exists Module(
  352. id integer primary key,
  353. name varchar(256) not null,
  354. fullpath varchar(256) not null,
  355. interfHash varchar(256) not null,
  356. fullHash varchar(256) not null,
  357. created timestamp not null default (DATETIME('now'))
  358. );""")
  359. db.exec(sql"""
  360. create table if not exists Backend(
  361. id integer primary key,
  362. strongdeps varchar(max) not null,
  363. weakdeps varchar(max) not null,
  364. header varchar(max) not null,
  365. code varchar(max) not null
  366. )
  367. create table if not exists Symbol(
  368. id integer primary key,
  369. module integer not null,
  370. backend integer not null,
  371. name varchar(max) not null,
  372. data varchar(max) not null,
  373. created timestamp not null default (DATETIME('now')),
  374. foreign key (module) references Module(id),
  375. foreign key (backend) references Backend(id)
  376. );""")
  377. db.exec(sql"""
  378. create table if not exists Type(
  379. id integer primary key,
  380. module integer not null,
  381. name varchar(max) not null,
  382. data varchar(max) not null,
  383. created timestamp not null default (DATETIME('now')),
  384. foreign key (module) references module(id)
  385. );""")