dfa.nim 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2017 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Data flow analysis for Nim. For now the task is to prove that every
  10. ## usage of a local variable 'v' is covered by an initialization to 'v'
  11. ## first.
  12. ## We transform the AST into a linear list of instructions first to
  13. ## make this easier to handle: There are only 2 different branching
  14. ## instructions: 'goto X' is an unconditional goto, 'fork X'
  15. ## is a conditional goto (either the next instruction or 'X' can be
  16. ## taken). Exhaustive case statements are translated
  17. ## so that the last branch is transformed into an 'else' branch.
  18. ## ``return`` and ``break`` are all covered by 'goto'.
  19. ## The case to detect is ``use v`` that is not dominated by
  20. ## a ``def v``.
  21. ## The data structures and algorithms used here are inspired by
  22. ## "A Graph–Free Approach to Data–Flow Analysis" by Markus Mohnen.
  23. ## https://link.springer.com/content/pdf/10.1007/3-540-45937-5_6.pdf
  24. import ast, astalgo, types, intsets, tables, msgs
  25. type
  26. InstrKind* = enum
  27. goto, fork, def, use,
  28. useWithinCall # this strange special case is used to get more
  29. # move optimizations out of regular code
  30. # XXX This is still overly pessimistic in
  31. # call(let x = foo; bar(x))
  32. Instr* = object
  33. n*: PNode
  34. case kind*: InstrKind
  35. of def, use, useWithinCall: sym*: PSym
  36. of goto, fork: dest*: int
  37. ControlFlowGraph* = seq[Instr]
  38. TPosition = distinct int
  39. TBlock = object
  40. label: PSym
  41. fixups: seq[TPosition]
  42. ValueKind = enum
  43. undef, value, valueOrUndef
  44. Con = object
  45. code: ControlFlowGraph
  46. inCall: int
  47. blocks: seq[TBlock]
  48. proc debugInfo(info: TLineInfo): string =
  49. result = info.toFilename & ":" & $info.line
  50. proc codeListing(c: ControlFlowGraph, result: var string, start=0; last = -1) =
  51. # for debugging purposes
  52. # first iteration: compute all necessary labels:
  53. var jumpTargets = initIntSet()
  54. let last = if last < 0: c.len-1 else: min(last, c.len-1)
  55. for i in start..last:
  56. if c[i].kind in {goto, fork}:
  57. jumpTargets.incl(i+c[i].dest)
  58. var i = start
  59. while i <= last:
  60. if i in jumpTargets: result.add("L" & $i & ":\n")
  61. result.add "\t"
  62. result.add $c[i].kind
  63. result.add "\t"
  64. case c[i].kind
  65. of def, use, useWithinCall:
  66. result.add c[i].sym.name.s
  67. of goto, fork:
  68. result.add "L"
  69. result.add c[i].dest+i
  70. result.add("\t#")
  71. result.add(debugInfo(c[i].n.info))
  72. result.add("\n")
  73. inc i
  74. if i in jumpTargets: result.add("L" & $i & ": End\n")
  75. proc echoCfg*(c: ControlFlowGraph; start=0; last = -1) {.deprecated.} =
  76. ## echos the ControlFlowGraph for debugging purposes.
  77. var buf = ""
  78. codeListing(c, buf, start, last)
  79. echo buf
  80. proc forkI(c: var Con; n: PNode): TPosition =
  81. result = TPosition(c.code.len)
  82. c.code.add Instr(n: n, kind: fork, dest: 0)
  83. proc gotoI(c: var Con; n: PNode): TPosition =
  84. result = TPosition(c.code.len)
  85. c.code.add Instr(n: n, kind: goto, dest: 0)
  86. proc genLabel(c: Con): TPosition =
  87. result = TPosition(c.code.len)
  88. proc jmpBack(c: var Con, n: PNode, p = TPosition(0)) =
  89. let dist = p.int - c.code.len
  90. internalAssert(-0x7fff < dist and dist < 0x7fff)
  91. c.code.add Instr(n: n, kind: goto, dest: dist)
  92. proc patch(c: var Con, p: TPosition) =
  93. # patch with current index
  94. let p = p.int
  95. let diff = c.code.len - p
  96. internalAssert(-0x7fff < diff and diff < 0x7fff)
  97. c.code[p].dest = diff
  98. proc popBlock(c: var Con; oldLen: int) =
  99. for f in c.blocks[oldLen].fixups:
  100. c.patch(f)
  101. c.blocks.setLen(oldLen)
  102. template withBlock(labl: PSym; body: untyped) {.dirty.} =
  103. var oldLen {.gensym.} = c.blocks.len
  104. c.blocks.add TBlock(label: labl, fixups: @[])
  105. body
  106. popBlock(c, oldLen)
  107. proc isTrue(n: PNode): bool =
  108. n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or
  109. n.kind == nkIntLit and n.intVal != 0
  110. proc gen(c: var Con; n: PNode) # {.noSideEffect.}
  111. proc genWhile(c: var Con; n: PNode) =
  112. # L1:
  113. # cond, tmp
  114. # fjmp tmp, L2
  115. # body
  116. # jmp L1
  117. # L2:
  118. let L1 = c.genLabel
  119. withBlock(nil):
  120. if isTrue(n.sons[0]):
  121. c.gen(n.sons[1])
  122. c.jmpBack(n, L1)
  123. else:
  124. c.gen(n.sons[0])
  125. let L2 = c.forkI(n)
  126. c.gen(n.sons[1])
  127. c.jmpBack(n, L1)
  128. c.patch(L2)
  129. proc genBlock(c: var Con; n: PNode) =
  130. withBlock(n.sons[0].sym):
  131. c.gen(n.sons[1])
  132. proc genBreak(c: var Con; n: PNode) =
  133. let L1 = c.gotoI(n)
  134. if n.sons[0].kind == nkSym:
  135. #echo cast[int](n.sons[0].sym)
  136. for i in countdown(c.blocks.len-1, 0):
  137. if c.blocks[i].label == n.sons[0].sym:
  138. c.blocks[i].fixups.add L1
  139. return
  140. globalError(n.info, errGenerated, "VM problem: cannot find 'break' target")
  141. else:
  142. c.blocks[c.blocks.high].fixups.add L1
  143. proc genIf(c: var Con, n: PNode) =
  144. var endings: seq[TPosition] = @[]
  145. for i in countup(0, len(n) - 1):
  146. var it = n.sons[i]
  147. if it.len == 2:
  148. c.gen(it.sons[0].sons[1])
  149. var elsePos = c.forkI(it.sons[0].sons[1])
  150. c.gen(it.sons[1])
  151. if i < sonsLen(n)-1:
  152. endings.add(c.gotoI(it.sons[1]))
  153. c.patch(elsePos)
  154. else:
  155. c.gen(it.sons[0])
  156. for endPos in endings: c.patch(endPos)
  157. proc genAndOr(c: var Con; n: PNode) =
  158. # asgn dest, a
  159. # fork L1
  160. # asgn dest, b
  161. # L1:
  162. c.gen(n.sons[1])
  163. let L1 = c.forkI(n)
  164. c.gen(n.sons[2])
  165. c.patch(L1)
  166. proc genCase(c: var Con; n: PNode) =
  167. # if (!expr1) goto L1;
  168. # thenPart
  169. # goto LEnd
  170. # L1:
  171. # if (!expr2) goto L2;
  172. # thenPart2
  173. # goto LEnd
  174. # L2:
  175. # elsePart
  176. # Lend:
  177. var endings: seq[TPosition] = @[]
  178. c.gen(n.sons[0])
  179. for i in 1 .. <n.len:
  180. let it = n.sons[i]
  181. if it.len == 1:
  182. c.gen(it.sons[0])
  183. else:
  184. let elsePos = c.forkI(it.lastSon)
  185. c.gen(it.lastSon)
  186. if i < sonsLen(n)-1:
  187. endings.add(c.gotoI(it.lastSon))
  188. c.patch(elsePos)
  189. for endPos in endings: c.patch(endPos)
  190. proc genTry(c: var Con; n: PNode) =
  191. var endings: seq[TPosition] = @[]
  192. let elsePos = c.forkI(n)
  193. c.gen(n.sons[0])
  194. c.patch(elsePos)
  195. for i in 1 .. <n.len:
  196. let it = n.sons[i]
  197. if it.kind != nkFinally:
  198. var blen = len(it)
  199. let endExcept = c.forkI(it)
  200. c.gen(it.lastSon)
  201. if i < sonsLen(n)-1:
  202. endings.add(c.gotoI(it))
  203. c.patch(endExcept)
  204. for endPos in endings: c.patch(endPos)
  205. let fin = lastSon(n)
  206. if fin.kind == nkFinally:
  207. c.gen(fin.sons[0])
  208. proc genRaise(c: var Con; n: PNode) =
  209. gen(c, n.sons[0])
  210. c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
  211. proc genReturn(c: var Con; n: PNode) =
  212. if n.sons[0].kind != nkEmpty: gen(c, n.sons[0])
  213. c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
  214. const
  215. InterestingSyms = {skVar, skResult, skLet}
  216. proc genUse(c: var Con; n: PNode) =
  217. var n = n
  218. while n.kind in {nkDotExpr, nkCheckedFieldExpr,
  219. nkBracketExpr, nkDerefExpr, nkHiddenDeref,
  220. nkAddr, nkHiddenAddr}:
  221. n = n[0]
  222. if n.kind == nkSym and n.sym.kind in InterestingSyms:
  223. if c.inCall > 0:
  224. c.code.add Instr(n: n, kind: useWithinCall, sym: n.sym)
  225. else:
  226. c.code.add Instr(n: n, kind: use, sym: n.sym)
  227. proc genDef(c: var Con; n: PNode) =
  228. if n.kind == nkSym and n.sym.kind in InterestingSyms:
  229. c.code.add Instr(n: n, kind: def, sym: n.sym)
  230. proc genCall(c: var Con; n: PNode) =
  231. gen(c, n[0])
  232. var t = n[0].typ
  233. if t != nil: t = t.skipTypes(abstractInst)
  234. inc c.inCall
  235. for i in 1..<n.len:
  236. gen(c, n[i])
  237. if t != nil and i < t.len and t.sons[i].kind == tyVar:
  238. genDef(c, n[i])
  239. dec c.inCall
  240. proc genMagic(c: var Con; n: PNode; m: TMagic) =
  241. case m
  242. of mAnd, mOr: c.genAndOr(n)
  243. of mNew, mNewFinalize:
  244. genDef(c, n[1])
  245. for i in 2..<n.len: gen(c, n[i])
  246. of mExit:
  247. genCall(c, n)
  248. c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
  249. else:
  250. genCall(c, n)
  251. proc genVarSection(c: var Con; n: PNode) =
  252. for a in n:
  253. if a.kind == nkCommentStmt: continue
  254. if a.kind == nkVarTuple:
  255. gen(c, a.lastSon)
  256. for i in 0 .. a.len-3: genDef(c, a[i])
  257. else:
  258. gen(c, a.lastSon)
  259. if a.lastSon.kind != nkEmpty:
  260. genDef(c, a.sons[0])
  261. proc gen(c: var Con; n: PNode) =
  262. case n.kind
  263. of nkSym: genUse(c, n)
  264. of nkCallKinds:
  265. if n.sons[0].kind == nkSym:
  266. let s = n.sons[0].sym
  267. if s.magic != mNone:
  268. genMagic(c, n, s.magic)
  269. else:
  270. genCall(c, n)
  271. else:
  272. genCall(c, n)
  273. of nkCharLit..nkNilLit: discard
  274. of nkAsgn, nkFastAsgn:
  275. gen(c, n[1])
  276. genDef(c, n[0])
  277. of nkDotExpr, nkCheckedFieldExpr, nkBracketExpr,
  278. nkDerefExpr, nkHiddenDeref, nkAddr, nkHiddenAddr:
  279. gen(c, n[0])
  280. of nkIfStmt, nkIfExpr: genIf(c, n)
  281. of nkWhenStmt:
  282. # This is "when nimvm" node. Chose the first branch.
  283. gen(c, n.sons[0].sons[1])
  284. of nkCaseStmt: genCase(c, n)
  285. of nkWhileStmt: genWhile(c, n)
  286. of nkBlockExpr, nkBlockStmt: genBlock(c, n)
  287. of nkReturnStmt: genReturn(c, n)
  288. of nkRaiseStmt: genRaise(c, n)
  289. of nkBreakStmt: genBreak(c, n)
  290. of nkTryStmt: genTry(c, n)
  291. of nkStmtList, nkStmtListExpr, nkChckRangeF, nkChckRange64, nkChckRange,
  292. nkBracket, nkCurly, nkPar, nkClosure, nkObjConstr:
  293. for x in n: gen(c, x)
  294. of nkPragmaBlock: gen(c, n.lastSon)
  295. of nkDiscardStmt: gen(c, n.sons[0])
  296. of nkHiddenStdConv, nkHiddenSubConv, nkConv, nkExprColonExpr, nkExprEqExpr,
  297. nkCast:
  298. gen(c, n.sons[1])
  299. of nkObjDownConv, nkStringToCString, nkCStringToString: gen(c, n.sons[0])
  300. of nkVarSection, nkLetSection: genVarSection(c, n)
  301. else: discard
  302. proc dfa(code: seq[Instr]) =
  303. # We aggressively push 'undef' values for every 'use v' instruction
  304. # until they are eliminated via a 'def v' instructions.
  305. # If we manage to push one 'undef' to a 'use' instruction, we produce
  306. # an error:
  307. var undef = initIntSet()
  308. for i in 0..<code.len:
  309. if code[i].kind == use: undef.incl(code[i].sym.id)
  310. var s = newSeq[IntSet](code.len)
  311. for i in 0..<code.len:
  312. assign(s[i], undef)
  313. # In the original paper, W := {0,...,n} is done. This is wasteful, we
  314. # have no intention to analyse a program like
  315. #
  316. # return 3
  317. # echo a + b
  318. #
  319. # any further than necessary.
  320. var w = @[0]
  321. while w.len > 0:
  322. var pc = w[^1]
  323. # this simulates a single linear control flow execution:
  324. while true:
  325. # according to the paper, it is better to shrink the working set here
  326. # in this inner loop:
  327. let widx = w.find(pc)
  328. if widx >= 0: w.del(widx)
  329. # our interpretation ![I!]:
  330. var sid = -1
  331. case code[pc].kind
  332. of goto, fork: discard
  333. of use, useWithinCall:
  334. let sym = code[pc].sym
  335. if s[pc].contains(sym.id):
  336. localError(code[pc].n.info, "variable read before initialized: " & sym.name.s)
  337. of def:
  338. sid = code[pc].sym.id
  339. var pc2: int
  340. if code[pc].kind == goto:
  341. pc2 = pc + code[pc].dest
  342. else:
  343. pc2 = pc + 1
  344. if code[pc].kind == fork:
  345. let l = pc + code[pc].dest
  346. if sid >= 0 and s[l].missingOrExcl(sid):
  347. w.add l
  348. if sid >= 0 and s[pc2].missingOrExcl(sid):
  349. pc = pc2
  350. else:
  351. break
  352. if pc >= code.len: break
  353. when false:
  354. case code[pc].kind
  355. of use:
  356. let s = code[pc].sym
  357. if undefB.contains(s.id):
  358. localError(code[pc].n.info, "variable read before initialized: " & s.name.s)
  359. break
  360. inc pc
  361. of def:
  362. let s = code[pc].sym
  363. # exclude 'undef' for s for this path through the graph.
  364. if not undefB.missingOrExcl(s.id):
  365. inc pc
  366. else:
  367. break
  368. #undefB.excl s.id
  369. #inc pc
  370. when false:
  371. let prev = bindings.getOrDefault(s.id)
  372. if prev != value:
  373. # well now it has a value and we made progress, so
  374. bindings[s.id] = value
  375. inc pc
  376. else:
  377. break
  378. of fork:
  379. let diff = code[pc].dest
  380. # we follow pc + 1 and remember the label for later:
  381. w.add pc+diff
  382. inc pc
  383. of goto:
  384. let diff = code[pc].dest
  385. pc = pc + diff
  386. if pc >= code.len: break
  387. proc dataflowAnalysis*(s: PSym; body: PNode) =
  388. var c = Con(code: @[], blocks: @[])
  389. gen(c, body)
  390. #echoCfg(c.code)
  391. dfa(c.code)
  392. proc constructCfg*(s: PSym; body: PNode): ControlFlowGraph =
  393. ## constructs a control flow graph for ``body``.
  394. var c = Con(code: @[], blocks: @[])
  395. shallowCopy(result, c.code)