123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447 |
- #
- #
- # The Nim Compiler
- # (c) Copyright 2017 Andreas Rumpf
- #
- # See the file "copying.txt", included in this
- # distribution, for details about the copyright.
- #
- ## Data flow analysis for Nim. For now the task is to prove that every
- ## usage of a local variable 'v' is covered by an initialization to 'v'
- ## first.
- ## We transform the AST into a linear list of instructions first to
- ## make this easier to handle: There are only 2 different branching
- ## instructions: 'goto X' is an unconditional goto, 'fork X'
- ## is a conditional goto (either the next instruction or 'X' can be
- ## taken). Exhaustive case statements are translated
- ## so that the last branch is transformed into an 'else' branch.
- ## ``return`` and ``break`` are all covered by 'goto'.
- ## The case to detect is ``use v`` that is not dominated by
- ## a ``def v``.
- ## The data structures and algorithms used here are inspired by
- ## "A Graph–Free Approach to Data–Flow Analysis" by Markus Mohnen.
- ## https://link.springer.com/content/pdf/10.1007/3-540-45937-5_6.pdf
- import ast, astalgo, types, intsets, tables, msgs, options, lineinfos
- type
- InstrKind* = enum
- goto, fork, def, use,
- useWithinCall # this strange special case is used to get more
- # move optimizations out of regular code
- # XXX This is still overly pessimistic in
- # call(let x = foo; bar(x))
- Instr* = object
- n*: PNode
- case kind*: InstrKind
- of def, use, useWithinCall: sym*: PSym
- of goto, fork: dest*: int
- ControlFlowGraph* = seq[Instr]
- TPosition = distinct int
- TBlock = object
- label: PSym
- fixups: seq[TPosition]
- ValueKind = enum
- undef, value, valueOrUndef
- Con = object
- code: ControlFlowGraph
- inCall: int
- blocks: seq[TBlock]
- proc debugInfo(info: TLineInfo): string =
- result = $info.line #info.toFilename & ":" & $info.line
- proc codeListing(c: ControlFlowGraph, result: var string, start=0; last = -1) =
- # for debugging purposes
- # first iteration: compute all necessary labels:
- var jumpTargets = initIntSet()
- let last = if last < 0: c.len-1 else: min(last, c.len-1)
- for i in start..last:
- if c[i].kind in {goto, fork}:
- jumpTargets.incl(i+c[i].dest)
- var i = start
- while i <= last:
- if i in jumpTargets: result.add("L" & $i & ":\n")
- result.add "\t"
- result.add $c[i].kind
- result.add "\t"
- case c[i].kind
- of def, use, useWithinCall:
- result.add c[i].sym.name.s
- of goto, fork:
- result.add "L"
- result.add c[i].dest+i
- result.add("\t#")
- result.add(debugInfo(c[i].n.info))
- result.add("\n")
- inc i
- if i in jumpTargets: result.add("L" & $i & ": End\n")
- proc echoCfg*(c: ControlFlowGraph; start=0; last = -1) {.deprecated.} =
- ## echos the ControlFlowGraph for debugging purposes.
- var buf = ""
- codeListing(c, buf, start, last)
- when declared(echo):
- echo buf
- proc forkI(c: var Con; n: PNode): TPosition =
- result = TPosition(c.code.len)
- c.code.add Instr(n: n, kind: fork, dest: 0)
- proc gotoI(c: var Con; n: PNode): TPosition =
- result = TPosition(c.code.len)
- c.code.add Instr(n: n, kind: goto, dest: 0)
- proc genLabel(c: Con): TPosition =
- result = TPosition(c.code.len)
- proc jmpBack(c: var Con, n: PNode, p = TPosition(0)) =
- let dist = p.int - c.code.len
- doAssert(-0x7fff < dist and dist < 0x7fff)
- c.code.add Instr(n: n, kind: goto, dest: dist)
- proc patch(c: var Con, p: TPosition) =
- # patch with current index
- let p = p.int
- let diff = c.code.len - p
- doAssert(-0x7fff < diff and diff < 0x7fff)
- c.code[p].dest = diff
- proc popBlock(c: var Con; oldLen: int) =
- for f in c.blocks[oldLen].fixups:
- c.patch(f)
- c.blocks.setLen(oldLen)
- template withBlock(labl: PSym; body: untyped) {.dirty.} =
- var oldLen {.gensym.} = c.blocks.len
- c.blocks.add TBlock(label: labl, fixups: @[])
- body
- popBlock(c, oldLen)
- proc isTrue(n: PNode): bool =
- n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or
- n.kind == nkIntLit and n.intVal != 0
- proc gen(c: var Con; n: PNode) # {.noSideEffect.}
- proc genWhile(c: var Con; n: PNode) =
- # L1:
- # cond, tmp
- # fork tmp, L2
- # body
- # jmp L1
- # L2:
- let L1 = c.genLabel
- withBlock(nil):
- if isTrue(n.sons[0]):
- c.gen(n.sons[1])
- c.jmpBack(n, L1)
- else:
- c.gen(n.sons[0])
- let L2 = c.forkI(n)
- c.gen(n.sons[1])
- c.jmpBack(n, L1)
- c.patch(L2)
- proc genBlock(c: var Con; n: PNode) =
- withBlock(n.sons[0].sym):
- c.gen(n.sons[1])
- proc genBreak(c: var Con; n: PNode) =
- let L1 = c.gotoI(n)
- if n.sons[0].kind == nkSym:
- #echo cast[int](n.sons[0].sym)
- for i in countdown(c.blocks.len-1, 0):
- if c.blocks[i].label == n.sons[0].sym:
- c.blocks[i].fixups.add L1
- return
- #globalError(n.info, "VM problem: cannot find 'break' target")
- else:
- c.blocks[c.blocks.high].fixups.add L1
- proc genIf(c: var Con, n: PNode) =
- var endings: seq[TPosition] = @[]
- for i in countup(0, len(n) - 1):
- var it = n.sons[i]
- c.gen(it.sons[0])
- if it.len == 2:
- let elsePos = c.forkI(it.sons[1])
- c.gen(it.sons[1])
- if i < sonsLen(n)-1:
- endings.add(c.gotoI(it.sons[1]))
- c.patch(elsePos)
- for endPos in endings: c.patch(endPos)
- proc genAndOr(c: var Con; n: PNode) =
- # asgn dest, a
- # fork L1
- # asgn dest, b
- # L1:
- c.gen(n.sons[1])
- let L1 = c.forkI(n)
- c.gen(n.sons[2])
- c.patch(L1)
- proc genCase(c: var Con; n: PNode) =
- # if (!expr1) goto L1;
- # thenPart
- # goto LEnd
- # L1:
- # if (!expr2) goto L2;
- # thenPart2
- # goto LEnd
- # L2:
- # elsePart
- # Lend:
- var endings: seq[TPosition] = @[]
- c.gen(n.sons[0])
- for i in 1 ..< n.len:
- let it = n.sons[i]
- if it.len == 1:
- c.gen(it.sons[0])
- else:
- let elsePos = c.forkI(it.lastSon)
- c.gen(it.lastSon)
- if i < sonsLen(n)-1:
- endings.add(c.gotoI(it.lastSon))
- c.patch(elsePos)
- for endPos in endings: c.patch(endPos)
- proc genTry(c: var Con; n: PNode) =
- var endings: seq[TPosition] = @[]
- let elsePos = c.forkI(n)
- c.gen(n.sons[0])
- c.patch(elsePos)
- for i in 1 ..< n.len:
- let it = n.sons[i]
- if it.kind != nkFinally:
- var blen = len(it)
- let endExcept = c.forkI(it)
- c.gen(it.lastSon)
- if i < sonsLen(n)-1:
- endings.add(c.gotoI(it))
- c.patch(endExcept)
- for endPos in endings: c.patch(endPos)
- let fin = lastSon(n)
- if fin.kind == nkFinally:
- c.gen(fin.sons[0])
- proc genRaise(c: var Con; n: PNode) =
- gen(c, n.sons[0])
- c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
- proc genReturn(c: var Con; n: PNode) =
- if n.sons[0].kind != nkEmpty: gen(c, n.sons[0])
- c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
- const
- InterestingSyms = {skVar, skResult, skLet}
- proc genUse(c: var Con; n: PNode) =
- var n = n
- while n.kind in {nkDotExpr, nkCheckedFieldExpr,
- nkBracketExpr, nkDerefExpr, nkHiddenDeref,
- nkAddr, nkHiddenAddr}:
- n = n[0]
- if n.kind == nkSym and n.sym.kind in InterestingSyms:
- if c.inCall > 0:
- c.code.add Instr(n: n, kind: useWithinCall, sym: n.sym)
- else:
- c.code.add Instr(n: n, kind: use, sym: n.sym)
- proc genDef(c: var Con; n: PNode) =
- if n.kind == nkSym and n.sym.kind in InterestingSyms:
- c.code.add Instr(n: n, kind: def, sym: n.sym)
- proc genCall(c: var Con; n: PNode) =
- gen(c, n[0])
- var t = n[0].typ
- if t != nil: t = t.skipTypes(abstractInst)
- inc c.inCall
- for i in 1..<n.len:
- gen(c, n[i])
- if t != nil and i < t.len and t.sons[i].kind == tyVar:
- genDef(c, n[i])
- dec c.inCall
- proc genMagic(c: var Con; n: PNode; m: TMagic) =
- case m
- of mAnd, mOr: c.genAndOr(n)
- of mNew, mNewFinalize:
- genDef(c, n[1])
- for i in 2..<n.len: gen(c, n[i])
- of mExit:
- genCall(c, n)
- c.code.add Instr(n: n, kind: goto, dest: high(int) - c.code.len)
- else:
- genCall(c, n)
- proc genVarSection(c: var Con; n: PNode) =
- for a in n:
- if a.kind == nkCommentStmt: continue
- if a.kind == nkVarTuple:
- gen(c, a.lastSon)
- for i in 0 .. a.len-3: genDef(c, a[i])
- else:
- gen(c, a.lastSon)
- if a.lastSon.kind != nkEmpty:
- genDef(c, a.sons[0])
- proc gen(c: var Con; n: PNode) =
- case n.kind
- of nkSym: genUse(c, n)
- of nkCallKinds:
- if n.sons[0].kind == nkSym:
- let s = n.sons[0].sym
- if s.magic != mNone:
- genMagic(c, n, s.magic)
- else:
- genCall(c, n)
- else:
- genCall(c, n)
- of nkCharLit..nkNilLit: discard
- of nkAsgn, nkFastAsgn:
- gen(c, n[1])
- genDef(c, n[0])
- of nkDotExpr, nkCheckedFieldExpr, nkBracketExpr,
- nkDerefExpr, nkHiddenDeref, nkAddr, nkHiddenAddr:
- gen(c, n[0])
- of nkIfStmt, nkIfExpr: genIf(c, n)
- of nkWhenStmt:
- # This is "when nimvm" node. Chose the first branch.
- gen(c, n.sons[0].sons[1])
- of nkCaseStmt: genCase(c, n)
- of nkWhileStmt: genWhile(c, n)
- of nkBlockExpr, nkBlockStmt: genBlock(c, n)
- of nkReturnStmt: genReturn(c, n)
- of nkRaiseStmt: genRaise(c, n)
- of nkBreakStmt: genBreak(c, n)
- of nkTryStmt: genTry(c, n)
- of nkStmtList, nkStmtListExpr, nkChckRangeF, nkChckRange64, nkChckRange,
- nkBracket, nkCurly, nkPar, nkTupleConstr, nkClosure, nkObjConstr:
- for x in n: gen(c, x)
- of nkPragmaBlock: gen(c, n.lastSon)
- of nkDiscardStmt: gen(c, n.sons[0])
- of nkHiddenStdConv, nkHiddenSubConv, nkConv, nkExprColonExpr, nkExprEqExpr,
- nkCast:
- gen(c, n.sons[1])
- of nkObjDownConv, nkStringToCString, nkCStringToString: gen(c, n.sons[0])
- of nkVarSection, nkLetSection: genVarSection(c, n)
- else: discard
- proc dfa(code: seq[Instr]; conf: ConfigRef) =
- var u = newSeq[IntSet](code.len) # usages
- var d = newSeq[IntSet](code.len) # defs
- var c = newSeq[IntSet](code.len) # consumed
- var backrefs = initTable[int, int]()
- for i in 0..<code.len:
- u[i] = initIntSet()
- d[i] = initIntSet()
- c[i] = initIntSet()
- case code[i].kind
- of use, useWithinCall: u[i].incl(code[i].sym.id)
- of def: d[i].incl(code[i].sym.id)
- of fork, goto:
- let d = i+code[i].dest
- backrefs.add(d, i)
- var w = @[0]
- var maxIters = 50
- var someChange = true
- var takenGotos = initIntSet()
- var consuming = -1
- while w.len > 0 and maxIters > 0: # and someChange:
- dec maxIters
- var pc = w.pop() # w[^1]
- var prevPc = -1
- # this simulates a single linear control flow execution:
- while pc < code.len:
- if prevPc >= 0:
- someChange = false
- # merge step and test for changes (we compute the fixpoints here):
- # 'u' needs to be the union of prevPc, pc
- # 'd' needs to be the intersection of 'pc'
- for id in u[prevPc]:
- if not u[pc].containsOrIncl(id):
- someChange = true
- # in (a; b) if ``a`` sets ``v`` so does ``b``. The intersection
- # is only interesting on merge points:
- for id in d[prevPc]:
- if not d[pc].containsOrIncl(id):
- someChange = true
- # if this is a merge point, we take the intersection of the 'd' sets:
- if backrefs.hasKey(pc):
- var intersect = initIntSet()
- assign(intersect, d[pc])
- var first = true
- for prevPc in backrefs.allValues(pc):
- for def in d[pc]:
- if def notin d[prevPc]:
- excl(intersect, def)
- someChange = true
- when defined(debugDfa):
- echo "Excluding ", pc, " prev ", prevPc
- assign d[pc], intersect
- if consuming >= 0:
- if not c[pc].containsOrIncl(consuming):
- someChange = true
- consuming = -1
- # our interpretation ![I!]:
- prevPc = pc
- case code[pc].kind
- of goto:
- # we must leave endless loops eventually:
- if not takenGotos.containsOrIncl(pc) or someChange:
- pc = pc + code[pc].dest
- else:
- inc pc
- of fork:
- # we follow the next instruction but push the dest onto our "work" stack:
- #if someChange:
- w.add pc + code[pc].dest
- inc pc
- of use, useWithinCall:
- #if not d[prevPc].missingOrExcl():
- # someChange = true
- consuming = code[pc].sym.id
- inc pc
- of def:
- if not d[pc].containsOrIncl(code[pc].sym.id):
- someChange = true
- inc pc
- when defined(useDfa) and defined(debugDfa):
- for i in 0..<code.len:
- echo "PC ", i, ": defs: ", d[i], "; uses ", u[i], "; consumes ", c[i]
- # now check the condition we're interested in:
- for i in 0..<code.len:
- case code[i].kind
- of use, useWithinCall:
- let s = code[i].sym
- if s.id notin d[i]:
- localError(conf, code[i].n.info, "usage of uninitialized variable: " & s.name.s)
- if s.id in c[i]:
- localError(conf, code[i].n.info, "usage of an already consumed variable: " & s.name.s)
- else: discard
- proc dataflowAnalysis*(s: PSym; body: PNode; conf: ConfigRef) =
- var c = Con(code: @[], blocks: @[])
- gen(c, body)
- when defined(useDfa) and defined(debugDfa): echoCfg(c.code)
- dfa(c.code, conf)
- proc constructCfg*(s: PSym; body: PNode): ControlFlowGraph =
- ## constructs a control flow graph for ``body``.
- var c = Con(code: @[], blocks: @[])
- gen(c, body)
- shallowCopy(result, c.code)
|