123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605 |
- #
- #
- # The Nim Compiler
- # (c) Copyright 2018 Andreas Rumpf
- #
- # See the file "copying.txt", included in this
- # distribution, for details about the copyright.
- #
- ## Layouter for nimpretty.
- import idents, lexer, lineinfos, llstream, options, msgs, strutils, pathutils
- const
- MinLineLen = 15
- type
- SplitKind = enum
- splitComma, splitParLe, splitAnd, splitOr, splitIn, splitBinary
- SemicolonKind = enum
- detectSemicolonKind, useSemicolon, dontTouch
- LayoutToken* = enum
- ltSpaces,
- ltCrucialNewline, ## a semantically crucial newline (indentation!)
- ltSplittingNewline, ## newline used for splitting up long
- ## expressions (like after a comma or a binary operator)
- ltTab,
- ltOptionalNewline, ## optional newline introduced by nimpretty
- ltComment, ltLit, ltKeyword, ltExportMarker, ltIdent,
- ltOther, ltOpr, ltSomeParLe, ltSomeParRi,
- ltBeginSection, ltEndSection
- Emitter* = object
- config: ConfigRef
- fid: FileIndex
- lastTok: TokType
- inquote, lastTokWasTerse: bool
- semicolons: SemicolonKind
- col, lastLineNumber, lineSpan, indentLevel, indWidth*, inSection: int
- keepIndents*: int
- doIndentMore*: int
- kinds*: seq[LayoutToken]
- tokens*: seq[string]
- indentStack: seq[int]
- fixedUntil: int # marks where we must not go in the content
- altSplitPos: array[SplitKind, int] # alternative split positions
- maxLineLen*: int
- proc openEmitter*(em: var Emitter, cache: IdentCache;
- config: ConfigRef, fileIdx: FileIndex) =
- let fullPath = AbsoluteFile config.toFullPath(fileIdx)
- if em.indWidth == 0:
- em.indWidth = getIndentWidth(fileIdx, llStreamOpen(fullPath, fmRead),
- cache, config)
- if em.indWidth == 0: em.indWidth = 2
- em.config = config
- em.fid = fileIdx
- em.lastTok = tkInvalid
- em.inquote = false
- em.col = 0
- em.indentStack = newSeqOfCap[int](30)
- em.indentStack.add 0
- em.lastLineNumber = 1
- proc computeMax(em: Emitter; pos: int): int =
- var p = pos
- var extraSpace = 0
- result = 0
- while p < em.tokens.len and em.kinds[p] != ltEndSection:
- var lhs = 0
- var lineLen = 0
- var foundTab = false
- while p < em.tokens.len and em.kinds[p] != ltEndSection:
- if em.kinds[p] in {ltCrucialNewline, ltSplittingNewline}:
- if foundTab and lineLen <= em.maxLineLen:
- result = max(result, lhs + extraSpace)
- inc p
- break
- if em.kinds[p] == ltTab:
- extraSpace = if em.kinds[p-1] == ltSpaces: 0 else: 1
- foundTab = true
- else:
- if not foundTab:
- inc lhs, em.tokens[p].len
- inc lineLen, em.tokens[p].len
- inc p
- proc computeRhs(em: Emitter; pos: int): int =
- var p = pos
- result = 0
- while p < em.tokens.len and em.kinds[p] notin {ltCrucialNewline, ltSplittingNewline}:
- inc result, em.tokens[p].len
- inc p
- proc isLongEnough(lineLen, startPos, endPos: int): bool =
- result = lineLen > MinLineLen and endPos > startPos + 4
- proc findNewline(em: Emitter; p, lineLen: var int) =
- while p < em.tokens.len and em.kinds[p] notin {ltCrucialNewline, ltSplittingNewline}:
- inc lineLen, em.tokens[p].len
- inc p
- proc countNewlines(s: string): int =
- result = 0
- for i in 0..<s.len:
- if s[i] == '\L': inc result
- proc calcCol(em: var Emitter; s: string) =
- var i = s.len-1
- em.col = 0
- while i >= 0 and s[i] != '\L':
- dec i
- inc em.col
- proc optionalIsGood(em: var Emitter; pos, currentLen: int): bool =
- let ourIndent = em.tokens[pos].len
- var p = pos+1
- var lineLen = 0
- em.findNewline(p, lineLen)
- if p == pos+1: # optionalNewline followed by another newline
- result = false
- elif em.kinds[p-1] == ltComment and currentLen+lineLen < em.maxLineLen+MinLineLen:
- result = false
- elif p+1 < em.tokens.len and em.kinds[p+1] == ltSpaces and
- em.kinds[p-1] == ltOptionalNewline:
- if em.tokens[p+1].len == ourIndent:
- # concatenate lines with the same indententation
- var nlPos = p
- var lineLenTotal = lineLen
- inc p
- em.findNewline(p, lineLenTotal)
- if isLongEnough(lineLenTotal, nlPos, p):
- em.kinds[nlPos] = ltOptionalNewline
- if em.kinds[nlPos+1] == ltSpaces:
- # inhibit extra spaces when concatenating two lines
- em.tokens[nlPos+1] = if em.tokens[nlPos-2] == ",": " " else: ""
- result = true
- elif em.tokens[p+1].len < ourIndent:
- result = isLongEnough(lineLen, pos, p)
- elif em.kinds[pos+1] in {ltOther, ltSomeParLe, ltSomeParRi}: # note: pos+1, not p+1
- result = false
- else:
- result = isLongEnough(lineLen, pos, p)
- proc lenOfNextTokens(em: Emitter; pos: int): int =
- result = 0
- for i in 1..<em.tokens.len-pos:
- if em.kinds[pos+i] in {ltCrucialNewline, ltSplittingNewline, ltOptionalNewline}: break
- inc result, em.tokens[pos+i].len
- proc guidingInd(em: Emitter; pos: int): int =
- var i = pos - 1
- while i >= 0 and em.kinds[i] != ltSomeParLe:
- dec i
- while i+1 <= em.kinds.high and em.kinds[i] != ltSomeParRi:
- if em.kinds[i] == ltSplittingNewline and em.kinds[i+1] == ltSpaces:
- return em.tokens[i+1].len
- inc i
- result = -1
- proc renderTokens*(em: var Emitter): string =
- ## Render Emitter tokens to a string of code
- template defaultCase() =
- content.add em.tokens[i]
- inc lineLen, em.tokens[i].len
- var content = newStringOfCap(16_000)
- var maxLhs = 0
- var lineLen = 0
- var lineBegin = 0
- var openPars = 0
- var i = 0
- while i <= em.tokens.high:
- when defined(debug):
- echo (token: em.tokens[i], kind: em.kinds[i])
- case em.kinds[i]
- of ltBeginSection:
- maxLhs = computeMax(em, lineBegin)
- of ltEndSection:
- maxLhs = 0
- lineBegin = i+1
- of ltTab:
- if i >= 2 and em.kinds[i-2] in {ltCrucialNewline, ltSplittingNewline} and
- em.kinds[i-1] in {ltCrucialNewline, ltSplittingNewline, ltSpaces}:
- # a previous section has ended
- maxLhs = 0
- if maxLhs == 0:
- if em.kinds[i-1] != ltSpaces:
- content.add em.tokens[i]
- inc lineLen, em.tokens[i].len
- else:
- # pick the shorter indentation token:
- var spaces = maxLhs - lineLen
- if spaces < em.tokens[i].len or computeRhs(em, i+1)+maxLhs <= em.maxLineLen+MinLineLen:
- if spaces <= 0 and content[^1] notin {' ', '\L'}: spaces = 1
- for j in 1..spaces: content.add ' '
- inc lineLen, spaces
- else:
- content.add em.tokens[i]
- inc lineLen, em.tokens[i].len
- of ltCrucialNewline, ltSplittingNewline:
- content.add em.tokens[i]
- lineLen = 0
- lineBegin = i+1
- of ltOptionalNewline:
- let totalLineLen = lineLen + lenOfNextTokens(em, i)
- if totalLineLen > em.maxLineLen and optionalIsGood(em, i, lineLen):
- if i-1 >= 0 and em.kinds[i-1] == ltSpaces:
- let spaces = em.tokens[i-1].len
- content.setLen(content.len - spaces)
- content.add "\L"
- let guide = if openPars > 0: guidingInd(em, i) else: -1
- if guide >= 0:
- content.add repeat(' ', guide)
- lineLen = guide
- else:
- content.add em.tokens[i]
- lineLen = em.tokens[i].len
- lineBegin = i+1
- if i+1 < em.kinds.len and em.kinds[i+1] == ltSpaces:
- # inhibit extra spaces at the start of a new line
- inc i
- of ltLit:
- let lineSpan = countNewlines(em.tokens[i])
- if lineSpan > 0:
- em.calcCol(em.tokens[i])
- lineLen = em.col
- else:
- inc lineLen, em.tokens[i].len
- content.add em.tokens[i]
- of ltSomeParLe:
- inc openPars
- defaultCase()
- of ltSomeParRi:
- doAssert openPars > 0
- dec openPars
- defaultCase()
- else:
- defaultCase()
- inc i
- return content
- proc writeOut*(em: Emitter, content: string) =
- ## Write to disk
- let outFile = em.config.absOutFile
- if fileExists(outFile) and readFile(outFile.string) == content:
- discard "do nothing, see #9499"
- return
- var f = llStreamOpen(outFile, fmWrite)
- if f == nil:
- rawMessage(em.config, errGenerated, "cannot open file: " & outFile.string)
- return
- f.llStreamWrite content
- llStreamClose(f)
- proc closeEmitter*(em: var Emitter) =
- ## Renders emitter tokens and write to a file
- let content = renderTokens(em)
- em.writeOut(content)
- proc wr(em: var Emitter; x: string; lt: LayoutToken) =
- em.tokens.add x
- em.kinds.add lt
- inc em.col, x.len
- assert em.tokens.len == em.kinds.len
- proc wrNewline(em: var Emitter; kind = ltCrucialNewline) =
- em.tokens.add "\L"
- em.kinds.add kind
- em.col = 0
- proc newlineWasSplitting*(em: var Emitter) =
- if em.kinds.len >= 3 and em.kinds[^3] == ltCrucialNewline:
- em.kinds[^3] = ltSplittingNewline
- #[
- Splitting newlines can occur:
- - after commas, semicolon, '[', '('.
- - after binary operators, '='.
- - after ':' type
- We only need parser support for the "after type" case.
- ]#
- proc wrSpaces(em: var Emitter; spaces: int) =
- if spaces > 0:
- wr(em, strutils.repeat(' ', spaces), ltSpaces)
- proc wrSpace(em: var Emitter) =
- wr(em, " ", ltSpaces)
- proc wrTab(em: var Emitter) =
- wr(em, " ", ltTab)
- proc beginSection*(em: var Emitter) =
- let pos = max(0, em.tokens.len-2)
- em.tokens.insert "", pos
- em.kinds.insert ltBeginSection, pos
- inc em.inSection
- #wr(em, "", ltBeginSection)
- proc endSection*(em: var Emitter) =
- em.tokens.insert "", em.tokens.len-2
- em.kinds.insert ltEndSection, em.kinds.len-2
- dec em.inSection
- #wr(em, "", ltEndSection)
- proc removeSpaces(em: var Emitter) =
- while em.kinds.len > 0 and em.kinds[^1] == ltSpaces:
- let tokenLen = em.tokens[^1].len
- setLen(em.tokens, em.tokens.len-1)
- setLen(em.kinds, em.kinds.len-1)
- dec em.col, tokenLen
- const
- openPars = {tkParLe, tkParDotLe,
- tkBracketLe, tkBracketDotLe, tkBracketLeColon,
- tkCurlyDotLe, tkCurlyLe}
- closedPars = {tkParRi, tkParDotRi,
- tkBracketRi, tkBracketDotRi,
- tkCurlyDotRi, tkCurlyRi}
- splitters = openPars + {tkComma, tkSemiColon} # do not add 'tkColon' here!
- oprSet = {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs,
- tkIsnot, tkNot, tkOf, tkAs, tkFrom, tkDotDot, tkAnd, tkOr, tkXor}
- template goodCol(col): bool = col >= em.maxLineLen div 2
- template moreIndent(em): int =
- if em.doIndentMore > 0: em.indWidth*2 else: em.indWidth
- template rememberSplit(kind) =
- if goodCol(em.col) and not em.inquote:
- let spaces = em.indentLevel+moreIndent(em)
- if spaces < em.col and spaces > 0:
- wr(em, strutils.repeat(' ', spaces), ltOptionalNewline)
- #em.altSplitPos[kind] = em.tokens.len
- proc emitMultilineComment(em: var Emitter, lit: string, col: int; dontIndent: bool) =
- # re-align every line in the multi-line comment:
- var i = 0
- var lastIndent = if em.keepIndents > 0: em.indentLevel else: em.indentStack[^1]
- var b = 0
- var dontIndent = dontIndent
- var hasEmptyLine = false
- for commentLine in splitLines(lit):
- if i == 0 and (commentLine.endsWith("\\") or commentLine.endsWith("[")):
- dontIndent = true
- wr em, commentLine, ltComment
- elif dontIndent:
- if i > 0: wrNewline em
- wr em, commentLine, ltComment
- else:
- let stripped = commentLine.strip()
- if i == 0:
- if em.kinds.len > 0 and em.kinds[^1] != ltTab:
- wr(em, "", ltTab)
- elif stripped.len == 0:
- wrNewline em
- hasEmptyLine = true
- else:
- var a = 0
- while a < commentLine.len and commentLine[a] == ' ': inc a
- if a > lastIndent:
- b += em.indWidth
- lastIndent = a
- elif a < lastIndent:
- b -= em.indWidth
- lastIndent = a
- wrNewline em
- if not hasEmptyLine or col + b < 15:
- if col + b > 0:
- wr(em, repeat(' ', col+b), ltTab)
- else:
- wr(em, "", ltTab)
- else:
- wr(em, repeat(' ', a), ltSpaces)
- wr em, stripped, ltComment
- inc i
- proc lastChar(s: string): char =
- result = if s.len > 0: s[s.high] else: '\0'
- proc endsInWhite(em: Emitter): bool =
- var i = em.tokens.len-1
- while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection}: dec(i)
- result = if i >= 0: em.kinds[i] in {ltSpaces, ltCrucialNewline, ltSplittingNewline, ltTab} else: true
- proc endsInNewline(em: Emitter): bool =
- var i = em.tokens.len-1
- while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection, ltSpaces}: dec(i)
- result = if i >= 0: em.kinds[i] in {ltCrucialNewline, ltSplittingNewline, ltTab} else: true
- proc endsInAlpha(em: Emitter): bool =
- var i = em.tokens.len-1
- while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection}: dec(i)
- result = if i >= 0: em.tokens[i].lastChar in SymChars+{'_'} else: false
- proc emitComment(em: var Emitter; tok: Token; dontIndent: bool) =
- var col = em.col
- let lit = strip fileSection(em.config, em.fid, tok.commentOffsetA, tok.commentOffsetB)
- em.lineSpan = countNewlines(lit)
- if em.lineSpan > 0: calcCol(em, lit)
- if em.lineSpan == 0:
- if not endsInNewline(em):
- wrTab em
- wr em, lit, ltComment
- else:
- if not endsInWhite(em):
- wrTab em
- inc col
- emitMultilineComment(em, lit, col, dontIndent)
- proc emitTok*(em: var Emitter; L: Lexer; tok: Token) =
- template wasExportMarker(em): bool =
- em.kinds.len > 0 and em.kinds[^1] == ltExportMarker
- if tok.tokType == tkComment and tok.literal.startsWith("#!nimpretty"):
- case tok.literal
- of "#!nimpretty off":
- inc em.keepIndents
- wrNewline em
- em.lastLineNumber = tok.line + 1
- of "#!nimpretty on":
- dec em.keepIndents
- em.lastLineNumber = tok.line
- wrNewline em
- wr em, tok.literal, ltComment
- em.col = 0
- em.lineSpan = 0
- return
- var preventComment = false
- if tok.tokType == tkComment and tok.line == em.lastLineNumber:
- # we have an inline comment so handle it before the indentation token:
- emitComment(em, tok, dontIndent = (em.inSection == 0))
- preventComment = true
- em.fixedUntil = em.tokens.high
- elif tok.indent >= 0:
- var newlineKind = ltCrucialNewline
- if em.keepIndents > 0:
- em.indentLevel = tok.indent
- elif (em.lastTok in (splitters + oprSet) and
- tok.tokType notin (closedPars - {tkBracketDotRi})):
- if tok.tokType in openPars and tok.indent > em.indentStack[^1]:
- while em.indentStack[^1] < tok.indent:
- em.indentStack.add(em.indentStack[^1] + em.indWidth)
- while em.indentStack[^1] > tok.indent:
- discard em.indentStack.pop()
- # aka: we are in an expression context:
- let alignment = max(tok.indent - em.indentStack[^1], 0)
- em.indentLevel = alignment + em.indentStack.high * em.indWidth
- newlineKind = ltSplittingNewline
- else:
- if tok.indent > em.indentStack[^1]:
- em.indentStack.add tok.indent
- else:
- # dedent?
- while em.indentStack.len > 1 and em.indentStack[^1] > tok.indent:
- discard em.indentStack.pop()
- em.indentLevel = em.indentStack.high * em.indWidth
- #[ we only correct the indentation if it is not in an expression context,
- so that code like
- const splitters = {tkComma, tkSemicolon, tkParLe, tkParDotLe,
- tkBracketLe, tkBracketLeColon, tkCurlyDotLe,
- tkCurlyLe}
- is not touched.
- ]#
- # remove trailing whitespace:
- removeSpaces em
- wrNewline em, newlineKind
- for i in 2..tok.line - em.lastLineNumber: wrNewline(em)
- wrSpaces em, em.indentLevel
- em.fixedUntil = em.tokens.high
- var lastTokWasTerse = false
- case tok.tokType
- of tokKeywordLow..tokKeywordHigh:
- if endsInAlpha(em):
- wrSpace em
- elif not em.inquote and not endsInWhite(em) and
- em.lastTok notin (openPars+{tkOpr, tkDotDot}) and not em.lastTokWasTerse:
- #and tok.tokType in oprSet
- wrSpace em
- if not em.inquote:
- wr(em, $tok.tokType, ltKeyword)
- if tok.tokType in {tkAnd, tkOr, tkIn, tkNotin}:
- rememberSplit(splitIn)
- wrSpace em
- else:
- # keywords in backticks are not normalized:
- wr(em, tok.ident.s, ltIdent)
- of tkColon:
- wr(em, $tok.tokType, ltOther)
- wrSpace em
- of tkSemiColon, tkComma:
- wr(em, $tok.tokType, ltOther)
- rememberSplit(splitComma)
- wrSpace em
- of openPars:
- if tok.strongSpaceA > 0 and not em.endsInWhite and
- (not em.wasExportMarker or tok.tokType == tkCurlyDotLe):
- wrSpace em
- wr(em, $tok.tokType, ltSomeParLe)
- if tok.tokType != tkCurlyDotLe:
- rememberSplit(splitParLe)
- of closedPars:
- wr(em, $tok.tokType, ltSomeParRi)
- of tkColonColon:
- wr(em, $tok.tokType, ltOther)
- of tkDot:
- lastTokWasTerse = true
- wr(em, $tok.tokType, ltOther)
- of tkEquals:
- if not em.inquote and not em.endsInWhite: wrSpace(em)
- wr(em, $tok.tokType, ltOther)
- if not em.inquote: wrSpace(em)
- of tkOpr, tkDotDot:
- if em.inquote or ((tok.strongSpaceA == 0 and tok.strongSpaceB == 0) and
- tok.ident.s notin ["<", ">", "<=", ">=", "==", "!="]):
- # bug #9504: remember to not spacify a keyword:
- lastTokWasTerse = true
- # if not surrounded by whitespace, don't produce any whitespace either:
- wr(em, tok.ident.s, ltOpr)
- else:
- if not em.endsInWhite: wrSpace(em)
- wr(em, tok.ident.s, ltOpr)
- template isUnary(tok): bool =
- tok.strongSpaceB == 0 and tok.strongSpaceA > 0
- if not isUnary(tok):
- rememberSplit(splitBinary)
- wrSpace(em)
- of tkAccent:
- if not em.inquote and endsInAlpha(em): wrSpace(em)
- wr(em, $tok.tokType, ltOther)
- em.inquote = not em.inquote
- of tkComment:
- if not preventComment:
- emitComment(em, tok, dontIndent = false)
- of tkIntLit..tkStrLit, tkRStrLit, tkTripleStrLit, tkGStrLit, tkGTripleStrLit, tkCharLit:
- if not em.inquote:
- let lit = fileSection(em.config, em.fid, tok.offsetA, tok.offsetB)
- if endsInAlpha(em) and tok.tokType notin {tkGStrLit, tkGTripleStrLit}: wrSpace(em)
- em.lineSpan = countNewlines(lit)
- if em.lineSpan > 0: calcCol(em, lit)
- wr em, lit, ltLit
- else:
- if endsInAlpha(em): wrSpace(em)
- wr em, tok.literal, ltLit
- of tkEof: discard
- else:
- let lit = if tok.ident != nil: tok.ident.s else: tok.literal
- if endsInAlpha(em): wrSpace(em)
- wr em, lit, ltIdent
- em.lastTok = tok.tokType
- em.lastTokWasTerse = lastTokWasTerse
- em.lastLineNumber = tok.line + em.lineSpan
- em.lineSpan = 0
- proc endsWith(em: Emitter; k: varargs[string]): bool =
- if em.tokens.len < k.len: return false
- for i in 0..high(k):
- if em.tokens[em.tokens.len - k.len + i] != k[i]: return false
- return true
- proc rfind(em: Emitter, t: string): int =
- for i in 1..5:
- if em.tokens[^i] == t:
- return i
- proc starWasExportMarker*(em: var Emitter) =
- if em.endsWith(" ", "*", " "):
- setLen(em.tokens, em.tokens.len-3)
- setLen(em.kinds, em.kinds.len-3)
- em.tokens.add("*")
- em.kinds.add ltExportMarker
- dec em.col, 2
- proc commaWasSemicolon*(em: var Emitter) =
- if em.semicolons == detectSemicolonKind:
- em.semicolons = if em.rfind(";") > 0: useSemicolon else: dontTouch
- if em.semicolons == useSemicolon:
- let commaPos = em.rfind(",")
- if commaPos > 0:
- em.tokens[^commaPos] = ";"
- proc curlyRiWasPragma*(em: var Emitter) =
- if em.endsWith("}"):
- em.tokens[^1] = ".}"
- inc em.col
|