dfa.nim 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2017 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Data flow analysis for Nim.
  10. ## We transform the AST into a linear list of instructions first to
  11. ## make this easier to handle: There are only 2 different branching
  12. ## instructions: 'goto X' is an unconditional goto, 'fork X'
  13. ## is a conditional goto (either the next instruction or 'X' can be
  14. ## taken). Exhaustive case statements could be translated
  15. ## so that the last branch is transformed into an 'else' branch, but
  16. ## this is currently not done.
  17. ## ``return`` and ``break`` are all covered by 'goto'.
  18. ##
  19. ## Control flow through exception handling:
  20. ## Contrary to popular belief, exception handling doesn't cause
  21. ## many problems for this DFA representation, ``raise`` is a statement
  22. ## that ``goes to`` the outer ``finally`` or ``except`` if there is one,
  23. ## otherwise it is the same as ``return``. Every call is treated as
  24. ## a call that can potentially ``raise``. However, without a surrounding
  25. ## ``try`` we don't emit these ``fork ReturnLabel`` instructions in order
  26. ## to speed up the dataflow analysis passes.
  27. ##
  28. ## The data structures and algorithms used here are inspired by
  29. ## "A Graph–Free Approach to Data–Flow Analysis" by Markus Mohnen.
  30. ## https://link.springer.com/content/pdf/10.1007/3-540-45937-5_6.pdf
  31. import ast, astalgo, types, intsets, tables, msgs, options, lineinfos
  32. type
  33. InstrKind* = enum
  34. goto, fork, def, use
  35. Instr* = object
  36. n*: PNode
  37. case kind*: InstrKind
  38. of def, use: sym*: PSym
  39. of goto, fork: dest*: int
  40. ControlFlowGraph* = seq[Instr]
  41. TPosition = distinct int
  42. TBlock = object
  43. label: PSym
  44. fixups: seq[TPosition]
  45. ValueKind = enum
  46. undef, value, valueOrUndef
  47. Con = object
  48. code: ControlFlowGraph
  49. inCall, inTryStmt: int
  50. blocks: seq[TBlock]
  51. tryStmtFixups: seq[TPosition]
  52. proc debugInfo(info: TLineInfo): string =
  53. result = $info.line #info.toFilename & ":" & $info.line
  54. proc codeListing(c: ControlFlowGraph, result: var string, start=0; last = -1) =
  55. # for debugging purposes
  56. # first iteration: compute all necessary labels:
  57. var jumpTargets = initIntSet()
  58. let last = if last < 0: c.len-1 else: min(last, c.len-1)
  59. for i in start..last:
  60. if c[i].kind in {goto, fork}:
  61. jumpTargets.incl(i+c[i].dest)
  62. var i = start
  63. while i <= last:
  64. if i in jumpTargets: result.add("L" & $i & ":\n")
  65. result.add "\t"
  66. result.add $c[i].kind
  67. result.add "\t"
  68. case c[i].kind
  69. of def, use:
  70. result.add c[i].sym.name.s
  71. of goto, fork:
  72. result.add "L"
  73. result.add c[i].dest+i
  74. result.add("\t#")
  75. result.add(debugInfo(c[i].n.info))
  76. result.add("\n")
  77. inc i
  78. if i in jumpTargets: result.add("L" & $i & ": End\n")
  79. proc echoCfg*(c: ControlFlowGraph; start=0; last = -1) {.deprecated.} =
  80. ## echos the ControlFlowGraph for debugging purposes.
  81. var buf = ""
  82. codeListing(c, buf, start, last)
  83. when declared(echo):
  84. echo buf
  85. proc forkI(c: var Con; n: PNode): TPosition =
  86. result = TPosition(c.code.len)
  87. c.code.add Instr(n: n, kind: fork, dest: 0)
  88. proc gotoI(c: var Con; n: PNode): TPosition =
  89. result = TPosition(c.code.len)
  90. c.code.add Instr(n: n, kind: goto, dest: 0)
  91. proc genLabel(c: Con): TPosition =
  92. result = TPosition(c.code.len)
  93. proc jmpBack(c: var Con, n: PNode, p = TPosition(0)) =
  94. let dist = p.int - c.code.len
  95. doAssert(-0x7fff < dist and dist < 0x7fff)
  96. c.code.add Instr(n: n, kind: goto, dest: dist)
  97. proc patch(c: var Con, p: TPosition) =
  98. # patch with current index
  99. let p = p.int
  100. let diff = c.code.len - p
  101. doAssert(-0x7fff < diff and diff < 0x7fff)
  102. c.code[p].dest = diff
  103. proc popBlock(c: var Con; oldLen: int) =
  104. for f in c.blocks[oldLen].fixups:
  105. c.patch(f)
  106. c.blocks.setLen(oldLen)
  107. template withBlock(labl: PSym; body: untyped) {.dirty.} =
  108. var oldLen {.gensym.} = c.blocks.len
  109. c.blocks.add TBlock(label: labl, fixups: @[])
  110. body
  111. popBlock(c, oldLen)
  112. proc isTrue(n: PNode): bool =
  113. n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or
  114. n.kind == nkIntLit and n.intVal != 0
  115. proc gen(c: var Con; n: PNode) # {.noSideEffect.}
  116. proc genWhile(c: var Con; n: PNode) =
  117. # L1:
  118. # cond, tmp
  119. # fork tmp, L2
  120. # body
  121. # jmp L1
  122. # L2:
  123. let L1 = c.genLabel
  124. withBlock(nil):
  125. if isTrue(n.sons[0]):
  126. c.gen(n.sons[1])
  127. c.jmpBack(n, L1)
  128. else:
  129. c.gen(n.sons[0])
  130. let L2 = c.forkI(n)
  131. c.gen(n.sons[1])
  132. c.jmpBack(n, L1)
  133. c.patch(L2)
  134. proc genBlock(c: var Con; n: PNode) =
  135. withBlock(n.sons[0].sym):
  136. c.gen(n.sons[1])
  137. proc genBreak(c: var Con; n: PNode) =
  138. let L1 = c.gotoI(n)
  139. if n.sons[0].kind == nkSym:
  140. #echo cast[int](n.sons[0].sym)
  141. for i in countdown(c.blocks.len-1, 0):
  142. if c.blocks[i].label == n.sons[0].sym:
  143. c.blocks[i].fixups.add L1
  144. return
  145. #globalError(n.info, "VM problem: cannot find 'break' target")
  146. else:
  147. c.blocks[c.blocks.high].fixups.add L1
  148. proc genIf(c: var Con, n: PNode) =
  149. var endings: seq[TPosition] = @[]
  150. for i in countup(0, len(n) - 1):
  151. var it = n.sons[i]
  152. c.gen(it.sons[0])
  153. if it.len == 2:
  154. let elsePos = c.forkI(it.sons[1])
  155. c.gen(it.sons[1])
  156. if i < sonsLen(n)-1:
  157. endings.add(c.gotoI(it.sons[1]))
  158. c.patch(elsePos)
  159. for endPos in endings: c.patch(endPos)
  160. proc genAndOr(c: var Con; n: PNode) =
  161. # asgn dest, a
  162. # fork L1
  163. # asgn dest, b
  164. # L1:
  165. c.gen(n.sons[1])
  166. let L1 = c.forkI(n)
  167. c.gen(n.sons[2])
  168. c.patch(L1)
  169. proc genCase(c: var Con; n: PNode) =
  170. # if (!expr1) goto L1;
  171. # thenPart
  172. # goto LEnd
  173. # L1:
  174. # if (!expr2) goto L2;
  175. # thenPart2
  176. # goto LEnd
  177. # L2:
  178. # elsePart
  179. # Lend:
  180. when false:
  181. # XXX Exhaustiveness is not yet mapped to the control flow graph as
  182. # it seems to offer no benefits for the 'last read of' question.
  183. let isExhaustive = skipTypes(n.sons[0].typ,
  184. abstractVarRange-{tyTypeDesc}).kind in {tyFloat..tyFloat128, tyString} or
  185. lastSon(n).kind == nkElse
  186. var endings: seq[TPosition] = @[]
  187. c.gen(n.sons[0])
  188. for i in 1 ..< n.len:
  189. let it = n.sons[i]
  190. if it.len == 1:
  191. c.gen(it.sons[0])
  192. else:
  193. let elsePos = c.forkI(it.lastSon)
  194. c.gen(it.lastSon)
  195. if i < sonsLen(n)-1:
  196. endings.add(c.gotoI(it.lastSon))
  197. c.patch(elsePos)
  198. for endPos in endings: c.patch(endPos)
  199. proc genTry(c: var Con; n: PNode) =
  200. var endings: seq[TPosition] = @[]
  201. inc c.inTryStmt
  202. var newFixups: seq[TPosition]
  203. swap(newFixups, c.tryStmtFixups)
  204. let elsePos = c.forkI(n)
  205. c.gen(n.sons[0])
  206. dec c.inTryStmt
  207. for f in newFixups:
  208. c.patch(f)
  209. swap(newFixups, c.tryStmtFixups)
  210. c.patch(elsePos)
  211. for i in 1 ..< n.len:
  212. let it = n.sons[i]
  213. if it.kind != nkFinally:
  214. var blen = len(it)
  215. let endExcept = c.forkI(it)
  216. c.gen(it.lastSon)
  217. if i < sonsLen(n)-1:
  218. endings.add(c.gotoI(it))
  219. c.patch(endExcept)
  220. for endPos in endings: c.patch(endPos)
  221. let fin = lastSon(n)
  222. if fin.kind == nkFinally:
  223. c.gen(fin.sons[0])
  224. proc genRaise(c: var Con; n: PNode) =
  225. gen(c, n.sons[0])
  226. if c.inTryStmt > 0:
  227. c.tryStmtFixups.add c.gotoI(n)
  228. else:
  229. c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
  230. proc genReturn(c: var Con; n: PNode) =
  231. if n.sons[0].kind != nkEmpty: gen(c, n.sons[0])
  232. c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
  233. const
  234. InterestingSyms = {skVar, skResult, skLet}
  235. proc genUse(c: var Con; n: PNode) =
  236. var n = n
  237. while n.kind in {nkDotExpr, nkCheckedFieldExpr,
  238. nkBracketExpr, nkDerefExpr, nkHiddenDeref,
  239. nkAddr, nkHiddenAddr}:
  240. n = n[0]
  241. if n.kind == nkSym and n.sym.kind in InterestingSyms:
  242. c.code.add Instr(n: n, kind: use, sym: n.sym)
  243. proc genDef(c: var Con; n: PNode) =
  244. if n.kind == nkSym and n.sym.kind in InterestingSyms:
  245. c.code.add Instr(n: n, kind: def, sym: n.sym)
  246. proc genCall(c: var Con; n: PNode) =
  247. gen(c, n[0])
  248. var t = n[0].typ
  249. if t != nil: t = t.skipTypes(abstractInst)
  250. inc c.inCall
  251. for i in 1..<n.len:
  252. gen(c, n[i])
  253. if t != nil and i < t.len and t.sons[i].kind == tyVar:
  254. genDef(c, n[i])
  255. # every call can potentially raise:
  256. if c.inTryStmt > 0:
  257. c.tryStmtFixups.add c.forkI(n)
  258. dec c.inCall
  259. proc genMagic(c: var Con; n: PNode; m: TMagic) =
  260. case m
  261. of mAnd, mOr: c.genAndOr(n)
  262. of mNew, mNewFinalize:
  263. genDef(c, n[1])
  264. for i in 2..<n.len: gen(c, n[i])
  265. of mExit:
  266. genCall(c, n)
  267. c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
  268. else:
  269. genCall(c, n)
  270. proc genVarSection(c: var Con; n: PNode) =
  271. for a in n:
  272. if a.kind == nkCommentStmt: continue
  273. if a.kind == nkVarTuple:
  274. gen(c, a.lastSon)
  275. for i in 0 .. a.len-3: genDef(c, a[i])
  276. else:
  277. gen(c, a.lastSon)
  278. if a.lastSon.kind != nkEmpty:
  279. genDef(c, a.sons[0])
  280. proc gen(c: var Con; n: PNode) =
  281. case n.kind
  282. of nkSym: genUse(c, n)
  283. of nkCallKinds:
  284. if n.sons[0].kind == nkSym:
  285. let s = n.sons[0].sym
  286. if s.magic != mNone:
  287. genMagic(c, n, s.magic)
  288. else:
  289. genCall(c, n)
  290. else:
  291. genCall(c, n)
  292. of nkCharLit..nkNilLit: discard
  293. of nkAsgn, nkFastAsgn:
  294. gen(c, n[1])
  295. genDef(c, n[0])
  296. of nkDotExpr, nkCheckedFieldExpr, nkBracketExpr,
  297. nkDerefExpr, nkHiddenDeref, nkAddr, nkHiddenAddr:
  298. gen(c, n[0])
  299. of nkIfStmt, nkIfExpr: genIf(c, n)
  300. of nkWhenStmt:
  301. # This is "when nimvm" node. Chose the first branch.
  302. gen(c, n.sons[0].sons[1])
  303. of nkCaseStmt: genCase(c, n)
  304. of nkWhileStmt: genWhile(c, n)
  305. of nkBlockExpr, nkBlockStmt: genBlock(c, n)
  306. of nkReturnStmt: genReturn(c, n)
  307. of nkRaiseStmt: genRaise(c, n)
  308. of nkBreakStmt: genBreak(c, n)
  309. of nkTryStmt: genTry(c, n)
  310. of nkStmtList, nkStmtListExpr, nkChckRangeF, nkChckRange64, nkChckRange,
  311. nkBracket, nkCurly, nkPar, nkTupleConstr, nkClosure, nkObjConstr:
  312. for x in n: gen(c, x)
  313. of nkPragmaBlock: gen(c, n.lastSon)
  314. of nkDiscardStmt: gen(c, n.sons[0])
  315. of nkHiddenStdConv, nkHiddenSubConv, nkConv, nkExprColonExpr, nkExprEqExpr,
  316. nkCast:
  317. gen(c, n.sons[1])
  318. of nkObjDownConv, nkStringToCString, nkCStringToString: gen(c, n.sons[0])
  319. of nkVarSection, nkLetSection: genVarSection(c, n)
  320. of nkDefer:
  321. doAssert false, "dfa construction pass requires the elimination of 'defer'"
  322. else: discard
  323. proc dfa(code: seq[Instr]; conf: ConfigRef) =
  324. var u = newSeq[IntSet](code.len) # usages
  325. var d = newSeq[IntSet](code.len) # defs
  326. var c = newSeq[IntSet](code.len) # consumed
  327. var backrefs = initTable[int, int]()
  328. for i in 0..<code.len:
  329. u[i] = initIntSet()
  330. d[i] = initIntSet()
  331. c[i] = initIntSet()
  332. case code[i].kind
  333. of use: u[i].incl(code[i].sym.id)
  334. of def: d[i].incl(code[i].sym.id)
  335. of fork, goto:
  336. let d = i+code[i].dest
  337. backrefs.add(d, i)
  338. var w = @[0]
  339. var maxIters = 50
  340. var someChange = true
  341. var takenGotos = initIntSet()
  342. var consuming = -1
  343. while w.len > 0 and maxIters > 0: # and someChange:
  344. dec maxIters
  345. var pc = w.pop() # w[^1]
  346. var prevPc = -1
  347. # this simulates a single linear control flow execution:
  348. while pc < code.len:
  349. if prevPc >= 0:
  350. someChange = false
  351. # merge step and test for changes (we compute the fixpoints here):
  352. # 'u' needs to be the union of prevPc, pc
  353. # 'd' needs to be the intersection of 'pc'
  354. for id in u[prevPc]:
  355. if not u[pc].containsOrIncl(id):
  356. someChange = true
  357. # in (a; b) if ``a`` sets ``v`` so does ``b``. The intersection
  358. # is only interesting on merge points:
  359. for id in d[prevPc]:
  360. if not d[pc].containsOrIncl(id):
  361. someChange = true
  362. # if this is a merge point, we take the intersection of the 'd' sets:
  363. if backrefs.hasKey(pc):
  364. var intersect = initIntSet()
  365. assign(intersect, d[pc])
  366. var first = true
  367. for prevPc in backrefs.allValues(pc):
  368. for def in d[pc]:
  369. if def notin d[prevPc]:
  370. excl(intersect, def)
  371. someChange = true
  372. when defined(debugDfa):
  373. echo "Excluding ", pc, " prev ", prevPc
  374. assign d[pc], intersect
  375. if consuming >= 0:
  376. if not c[pc].containsOrIncl(consuming):
  377. someChange = true
  378. consuming = -1
  379. # our interpretation ![I!]:
  380. prevPc = pc
  381. case code[pc].kind
  382. of goto:
  383. # we must leave endless loops eventually:
  384. if not takenGotos.containsOrIncl(pc) or someChange:
  385. pc = pc + code[pc].dest
  386. else:
  387. inc pc
  388. of fork:
  389. # we follow the next instruction but push the dest onto our "work" stack:
  390. #if someChange:
  391. w.add pc + code[pc].dest
  392. inc pc
  393. of use:
  394. #if not d[prevPc].missingOrExcl():
  395. # someChange = true
  396. consuming = code[pc].sym.id
  397. inc pc
  398. of def:
  399. if not d[pc].containsOrIncl(code[pc].sym.id):
  400. someChange = true
  401. inc pc
  402. when defined(useDfa) and defined(debugDfa):
  403. for i in 0..<code.len:
  404. echo "PC ", i, ": defs: ", d[i], "; uses ", u[i], "; consumes ", c[i]
  405. # now check the condition we're interested in:
  406. for i in 0..<code.len:
  407. case code[i].kind
  408. of use:
  409. let s = code[i].sym
  410. if s.id notin d[i]:
  411. localError(conf, code[i].n.info, "usage of uninitialized variable: " & s.name.s)
  412. if s.id in c[i]:
  413. localError(conf, code[i].n.info, "usage of an already consumed variable: " & s.name.s)
  414. else: discard
  415. proc dataflowAnalysis*(s: PSym; body: PNode; conf: ConfigRef) =
  416. var c = Con(code: @[], blocks: @[])
  417. gen(c, body)
  418. when defined(useDfa) and defined(debugDfa): echoCfg(c.code)
  419. dfa(c.code, conf)
  420. proc constructCfg*(s: PSym; body: PNode): ControlFlowGraph =
  421. ## constructs a control flow graph for ``body``.
  422. var c = Con(code: @[], blocks: @[])
  423. gen(c, body)
  424. shallowCopy(result, c.code)