dfa.nim 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2017 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Data flow analysis for Nim. For now the task is to prove that every
  10. ## usage of a local variable 'v' is covered by an initialization to 'v'
  11. ## first.
  12. ## We transform the AST into a linear list of instructions first to
  13. ## make this easier to handle: There are only 2 different branching
  14. ## instructions: 'goto X' is an unconditional goto, 'fork X'
  15. ## is a conditional goto (either the next instruction or 'X' can be
  16. ## taken). Exhaustive case statements are translated
  17. ## so that the last branch is transformed into an 'else' branch.
  18. ## ``return`` and ``break`` are all covered by 'goto'.
  19. ## The case to detect is ``use v`` that is not dominated by
  20. ## a ``def v``.
  21. ## The data structures and algorithms used here are inspired by
  22. ## "A Graph–Free Approach to Data–Flow Analysis" by Markus Mohnen.
  23. ## https://link.springer.com/content/pdf/10.1007/3-540-45937-5_6.pdf
  24. import ast, astalgo, types, intsets, tables, msgs, options, lineinfos
  25. type
  26. InstrKind* = enum
  27. goto, fork, def, use,
  28. useWithinCall # this strange special case is used to get more
  29. # move optimizations out of regular code
  30. # XXX This is still overly pessimistic in
  31. # call(let x = foo; bar(x))
  32. Instr* = object
  33. n*: PNode
  34. case kind*: InstrKind
  35. of def, use, useWithinCall: sym*: PSym
  36. of goto, fork: dest*: int
  37. ControlFlowGraph* = seq[Instr]
  38. TPosition = distinct int
  39. TBlock = object
  40. label: PSym
  41. fixups: seq[TPosition]
  42. ValueKind = enum
  43. undef, value, valueOrUndef
  44. Con = object
  45. code: ControlFlowGraph
  46. inCall: int
  47. blocks: seq[TBlock]
  48. proc debugInfo(info: TLineInfo): string =
  49. result = $info.line #info.toFilename & ":" & $info.line
  50. proc codeListing(c: ControlFlowGraph, result: var string, start=0; last = -1) =
  51. # for debugging purposes
  52. # first iteration: compute all necessary labels:
  53. var jumpTargets = initIntSet()
  54. let last = if last < 0: c.len-1 else: min(last, c.len-1)
  55. for i in start..last:
  56. if c[i].kind in {goto, fork}:
  57. jumpTargets.incl(i+c[i].dest)
  58. var i = start
  59. while i <= last:
  60. if i in jumpTargets: result.add("L" & $i & ":\n")
  61. result.add "\t"
  62. result.add $c[i].kind
  63. result.add "\t"
  64. case c[i].kind
  65. of def, use, useWithinCall:
  66. result.add c[i].sym.name.s
  67. of goto, fork:
  68. result.add "L"
  69. result.add c[i].dest+i
  70. result.add("\t#")
  71. result.add(debugInfo(c[i].n.info))
  72. result.add("\n")
  73. inc i
  74. if i in jumpTargets: result.add("L" & $i & ": End\n")
  75. proc echoCfg*(c: ControlFlowGraph; start=0; last = -1) {.deprecated.} =
  76. ## echos the ControlFlowGraph for debugging purposes.
  77. var buf = ""
  78. codeListing(c, buf, start, last)
  79. when declared(echo):
  80. echo buf
  81. proc forkI(c: var Con; n: PNode): TPosition =
  82. result = TPosition(c.code.len)
  83. c.code.add Instr(n: n, kind: fork, dest: 0)
  84. proc gotoI(c: var Con; n: PNode): TPosition =
  85. result = TPosition(c.code.len)
  86. c.code.add Instr(n: n, kind: goto, dest: 0)
  87. proc genLabel(c: Con): TPosition =
  88. result = TPosition(c.code.len)
  89. proc jmpBack(c: var Con, n: PNode, p = TPosition(0)) =
  90. let dist = p.int - c.code.len
  91. doAssert(-0x7fff < dist and dist < 0x7fff)
  92. c.code.add Instr(n: n, kind: goto, dest: dist)
  93. proc patch(c: var Con, p: TPosition) =
  94. # patch with current index
  95. let p = p.int
  96. let diff = c.code.len - p
  97. doAssert(-0x7fff < diff and diff < 0x7fff)
  98. c.code[p].dest = diff
  99. proc popBlock(c: var Con; oldLen: int) =
  100. for f in c.blocks[oldLen].fixups:
  101. c.patch(f)
  102. c.blocks.setLen(oldLen)
  103. template withBlock(labl: PSym; body: untyped) {.dirty.} =
  104. var oldLen {.gensym.} = c.blocks.len
  105. c.blocks.add TBlock(label: labl, fixups: @[])
  106. body
  107. popBlock(c, oldLen)
  108. proc isTrue(n: PNode): bool =
  109. n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or
  110. n.kind == nkIntLit and n.intVal != 0
  111. proc gen(c: var Con; n: PNode) # {.noSideEffect.}
  112. proc genWhile(c: var Con; n: PNode) =
  113. # L1:
  114. # cond, tmp
  115. # fork tmp, L2
  116. # body
  117. # jmp L1
  118. # L2:
  119. let L1 = c.genLabel
  120. withBlock(nil):
  121. if isTrue(n.sons[0]):
  122. c.gen(n.sons[1])
  123. c.jmpBack(n, L1)
  124. else:
  125. c.gen(n.sons[0])
  126. let L2 = c.forkI(n)
  127. c.gen(n.sons[1])
  128. c.jmpBack(n, L1)
  129. c.patch(L2)
  130. proc genBlock(c: var Con; n: PNode) =
  131. withBlock(n.sons[0].sym):
  132. c.gen(n.sons[1])
  133. proc genBreak(c: var Con; n: PNode) =
  134. let L1 = c.gotoI(n)
  135. if n.sons[0].kind == nkSym:
  136. #echo cast[int](n.sons[0].sym)
  137. for i in countdown(c.blocks.len-1, 0):
  138. if c.blocks[i].label == n.sons[0].sym:
  139. c.blocks[i].fixups.add L1
  140. return
  141. #globalError(n.info, "VM problem: cannot find 'break' target")
  142. else:
  143. c.blocks[c.blocks.high].fixups.add L1
  144. proc genIf(c: var Con, n: PNode) =
  145. var endings: seq[TPosition] = @[]
  146. for i in countup(0, len(n) - 1):
  147. var it = n.sons[i]
  148. c.gen(it.sons[0])
  149. if it.len == 2:
  150. let elsePos = c.forkI(it.sons[1])
  151. c.gen(it.sons[1])
  152. if i < sonsLen(n)-1:
  153. endings.add(c.gotoI(it.sons[1]))
  154. c.patch(elsePos)
  155. for endPos in endings: c.patch(endPos)
  156. proc genAndOr(c: var Con; n: PNode) =
  157. # asgn dest, a
  158. # fork L1
  159. # asgn dest, b
  160. # L1:
  161. c.gen(n.sons[1])
  162. let L1 = c.forkI(n)
  163. c.gen(n.sons[2])
  164. c.patch(L1)
  165. proc genCase(c: var Con; n: PNode) =
  166. # if (!expr1) goto L1;
  167. # thenPart
  168. # goto LEnd
  169. # L1:
  170. # if (!expr2) goto L2;
  171. # thenPart2
  172. # goto LEnd
  173. # L2:
  174. # elsePart
  175. # Lend:
  176. var endings: seq[TPosition] = @[]
  177. c.gen(n.sons[0])
  178. for i in 1 ..< n.len:
  179. let it = n.sons[i]
  180. if it.len == 1:
  181. c.gen(it.sons[0])
  182. else:
  183. let elsePos = c.forkI(it.lastSon)
  184. c.gen(it.lastSon)
  185. if i < sonsLen(n)-1:
  186. endings.add(c.gotoI(it.lastSon))
  187. c.patch(elsePos)
  188. for endPos in endings: c.patch(endPos)
  189. proc genTry(c: var Con; n: PNode) =
  190. var endings: seq[TPosition] = @[]
  191. let elsePos = c.forkI(n)
  192. c.gen(n.sons[0])
  193. c.patch(elsePos)
  194. for i in 1 ..< n.len:
  195. let it = n.sons[i]
  196. if it.kind != nkFinally:
  197. var blen = len(it)
  198. let endExcept = c.forkI(it)
  199. c.gen(it.lastSon)
  200. if i < sonsLen(n)-1:
  201. endings.add(c.gotoI(it))
  202. c.patch(endExcept)
  203. for endPos in endings: c.patch(endPos)
  204. let fin = lastSon(n)
  205. if fin.kind == nkFinally:
  206. c.gen(fin.sons[0])
  207. proc genRaise(c: var Con; n: PNode) =
  208. gen(c, n.sons[0])
  209. c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
  210. proc genReturn(c: var Con; n: PNode) =
  211. if n.sons[0].kind != nkEmpty: gen(c, n.sons[0])
  212. c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
  213. const
  214. InterestingSyms = {skVar, skResult, skLet}
  215. proc genUse(c: var Con; n: PNode) =
  216. var n = n
  217. while n.kind in {nkDotExpr, nkCheckedFieldExpr,
  218. nkBracketExpr, nkDerefExpr, nkHiddenDeref,
  219. nkAddr, nkHiddenAddr}:
  220. n = n[0]
  221. if n.kind == nkSym and n.sym.kind in InterestingSyms:
  222. if c.inCall > 0:
  223. c.code.add Instr(n: n, kind: useWithinCall, sym: n.sym)
  224. else:
  225. c.code.add Instr(n: n, kind: use, sym: n.sym)
  226. proc genDef(c: var Con; n: PNode) =
  227. if n.kind == nkSym and n.sym.kind in InterestingSyms:
  228. c.code.add Instr(n: n, kind: def, sym: n.sym)
  229. proc genCall(c: var Con; n: PNode) =
  230. gen(c, n[0])
  231. var t = n[0].typ
  232. if t != nil: t = t.skipTypes(abstractInst)
  233. inc c.inCall
  234. for i in 1..<n.len:
  235. gen(c, n[i])
  236. if t != nil and i < t.len and t.sons[i].kind == tyVar:
  237. genDef(c, n[i])
  238. dec c.inCall
  239. proc genMagic(c: var Con; n: PNode; m: TMagic) =
  240. case m
  241. of mAnd, mOr: c.genAndOr(n)
  242. of mNew, mNewFinalize:
  243. genDef(c, n[1])
  244. for i in 2..<n.len: gen(c, n[i])
  245. of mExit:
  246. genCall(c, n)
  247. c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
  248. else:
  249. genCall(c, n)
  250. proc genVarSection(c: var Con; n: PNode) =
  251. for a in n:
  252. if a.kind == nkCommentStmt: continue
  253. if a.kind == nkVarTuple:
  254. gen(c, a.lastSon)
  255. for i in 0 .. a.len-3: genDef(c, a[i])
  256. else:
  257. gen(c, a.lastSon)
  258. if a.lastSon.kind != nkEmpty:
  259. genDef(c, a.sons[0])
  260. proc gen(c: var Con; n: PNode) =
  261. case n.kind
  262. of nkSym: genUse(c, n)
  263. of nkCallKinds:
  264. if n.sons[0].kind == nkSym:
  265. let s = n.sons[0].sym
  266. if s.magic != mNone:
  267. genMagic(c, n, s.magic)
  268. else:
  269. genCall(c, n)
  270. else:
  271. genCall(c, n)
  272. of nkCharLit..nkNilLit: discard
  273. of nkAsgn, nkFastAsgn:
  274. gen(c, n[1])
  275. genDef(c, n[0])
  276. of nkDotExpr, nkCheckedFieldExpr, nkBracketExpr,
  277. nkDerefExpr, nkHiddenDeref, nkAddr, nkHiddenAddr:
  278. gen(c, n[0])
  279. of nkIfStmt, nkIfExpr: genIf(c, n)
  280. of nkWhenStmt:
  281. # This is "when nimvm" node. Chose the first branch.
  282. gen(c, n.sons[0].sons[1])
  283. of nkCaseStmt: genCase(c, n)
  284. of nkWhileStmt: genWhile(c, n)
  285. of nkBlockExpr, nkBlockStmt: genBlock(c, n)
  286. of nkReturnStmt: genReturn(c, n)
  287. of nkRaiseStmt: genRaise(c, n)
  288. of nkBreakStmt: genBreak(c, n)
  289. of nkTryStmt: genTry(c, n)
  290. of nkStmtList, nkStmtListExpr, nkChckRangeF, nkChckRange64, nkChckRange,
  291. nkBracket, nkCurly, nkPar, nkTupleConstr, nkClosure, nkObjConstr:
  292. for x in n: gen(c, x)
  293. of nkPragmaBlock: gen(c, n.lastSon)
  294. of nkDiscardStmt: gen(c, n.sons[0])
  295. of nkHiddenStdConv, nkHiddenSubConv, nkConv, nkExprColonExpr, nkExprEqExpr,
  296. nkCast:
  297. gen(c, n.sons[1])
  298. of nkObjDownConv, nkStringToCString, nkCStringToString: gen(c, n.sons[0])
  299. of nkVarSection, nkLetSection: genVarSection(c, n)
  300. else: discard
  301. proc dfa(code: seq[Instr]; conf: ConfigRef) =
  302. var u = newSeq[IntSet](code.len) # usages
  303. var d = newSeq[IntSet](code.len) # defs
  304. var c = newSeq[IntSet](code.len) # consumed
  305. var backrefs = initTable[int, int]()
  306. for i in 0..<code.len:
  307. u[i] = initIntSet()
  308. d[i] = initIntSet()
  309. c[i] = initIntSet()
  310. case code[i].kind
  311. of use, useWithinCall: u[i].incl(code[i].sym.id)
  312. of def: d[i].incl(code[i].sym.id)
  313. of fork, goto:
  314. let d = i+code[i].dest
  315. backrefs.add(d, i)
  316. var w = @[0]
  317. var maxIters = 50
  318. var someChange = true
  319. var takenGotos = initIntSet()
  320. var consuming = -1
  321. while w.len > 0 and maxIters > 0: # and someChange:
  322. dec maxIters
  323. var pc = w.pop() # w[^1]
  324. var prevPc = -1
  325. # this simulates a single linear control flow execution:
  326. while pc < code.len:
  327. if prevPc >= 0:
  328. someChange = false
  329. # merge step and test for changes (we compute the fixpoints here):
  330. # 'u' needs to be the union of prevPc, pc
  331. # 'd' needs to be the intersection of 'pc'
  332. for id in u[prevPc]:
  333. if not u[pc].containsOrIncl(id):
  334. someChange = true
  335. # in (a; b) if ``a`` sets ``v`` so does ``b``. The intersection
  336. # is only interesting on merge points:
  337. for id in d[prevPc]:
  338. if not d[pc].containsOrIncl(id):
  339. someChange = true
  340. # if this is a merge point, we take the intersection of the 'd' sets:
  341. if backrefs.hasKey(pc):
  342. var intersect = initIntSet()
  343. assign(intersect, d[pc])
  344. var first = true
  345. for prevPc in backrefs.allValues(pc):
  346. for def in d[pc]:
  347. if def notin d[prevPc]:
  348. excl(intersect, def)
  349. someChange = true
  350. when defined(debugDfa):
  351. echo "Excluding ", pc, " prev ", prevPc
  352. assign d[pc], intersect
  353. if consuming >= 0:
  354. if not c[pc].containsOrIncl(consuming):
  355. someChange = true
  356. consuming = -1
  357. # our interpretation ![I!]:
  358. prevPc = pc
  359. case code[pc].kind
  360. of goto:
  361. # we must leave endless loops eventually:
  362. if not takenGotos.containsOrIncl(pc) or someChange:
  363. pc = pc + code[pc].dest
  364. else:
  365. inc pc
  366. of fork:
  367. # we follow the next instruction but push the dest onto our "work" stack:
  368. #if someChange:
  369. w.add pc + code[pc].dest
  370. inc pc
  371. of use, useWithinCall:
  372. #if not d[prevPc].missingOrExcl():
  373. # someChange = true
  374. consuming = code[pc].sym.id
  375. inc pc
  376. of def:
  377. if not d[pc].containsOrIncl(code[pc].sym.id):
  378. someChange = true
  379. inc pc
  380. when defined(useDfa) and defined(debugDfa):
  381. for i in 0..<code.len:
  382. echo "PC ", i, ": defs: ", d[i], "; uses ", u[i], "; consumes ", c[i]
  383. # now check the condition we're interested in:
  384. for i in 0..<code.len:
  385. case code[i].kind
  386. of use, useWithinCall:
  387. let s = code[i].sym
  388. if s.id notin d[i]:
  389. localError(conf, code[i].n.info, "usage of uninitialized variable: " & s.name.s)
  390. if s.id in c[i]:
  391. localError(conf, code[i].n.info, "usage of an already consumed variable: " & s.name.s)
  392. else: discard
  393. proc dataflowAnalysis*(s: PSym; body: PNode; conf: ConfigRef) =
  394. var c = Con(code: @[], blocks: @[])
  395. gen(c, body)
  396. when defined(useDfa) and defined(debugDfa): echoCfg(c.code)
  397. dfa(c.code, conf)
  398. proc constructCfg*(s: PSym; body: PNode): ControlFlowGraph =
  399. ## constructs a control flow graph for ``body``.
  400. var c = Con(code: @[], blocks: @[])
  401. shallowCopy(result, c.code)