12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884 |
- #
- #
- # Nim's Runtime Library
- # (c) Copyright 2012 Andreas Rumpf
- #
- # See the file "copying.txt", included in this
- # distribution, for details about the copyright.
- #
- ## This module implements a `reStructuredText`:idx: (RST) and
- ## `Markdown`:idx: parser.
- ## User's manual on supported markup syntax and command line usage can be
- ## found in [Nim-flavored Markdown and reStructuredText](markdown_rst.html).
- ##
- ## * See also [Nim DocGen Tools Guide](docgen.html) for handling of
- ## ``.nim`` files.
- ## * See also [packages/docutils/rstgen module](rstgen.html) to know how to
- ## generate HTML or Latex strings (for embedding them into custom documents).
- ##
- ## Choice between Markdown and RST as well as optional additional features are
- ## turned on by passing ``options:`` [RstParseOptions] to [proc rstParse].
- import
- std/[os, strutils, enumutils, algorithm, lists, sequtils,
- tables, strscans]
- import dochelpers, rstidx, rstast
- import std/private/miscdollars
- from highlite import SourceLanguage, getSourceLanguage
- when defined(nimPreviewSlimSystem):
- import std/[assertions, syncio]
- type
- RstParseOption* = enum ## options for the RST parser
- roSupportSmilies, ## make the RST parser support smilies like ``:)``
- roSupportRawDirective, ## support the ``raw`` directive (don't support
- ## it for sandboxing)
- roSupportMarkdown, ## support additional features of Markdown
- roPreferMarkdown, ## parse as Markdown (keeping RST as "extension"
- ## to Markdown) -- implies `roSupportMarkdown`
- roNimFile ## set for Nim files where default interpreted
- ## text role should be :nim:
- roSandboxDisabled ## this option enables certain options
- ## (e.g. raw, include, importdoc)
- ## which are disabled by default as they can
- ## enable users to read arbitrary data and
- ## perform XSS if the parser is used in a web
- ## app.
- RstParseOptions* = set[RstParseOption]
- MsgClass* = enum
- mcHint = "Hint",
- mcWarning = "Warning",
- mcError = "Error"
- # keep the order in sync with compiler/docgen.nim and compiler/lineinfos.nim:
- MsgKind* = enum ## the possible messages
- meCannotOpenFile = "cannot open '$1'",
- meExpected = "'$1' expected",
- meMissingClosing = "$1",
- meGridTableNotImplemented = "grid table is not implemented",
- meMarkdownIllformedTable = "illformed delimiter row of a Markdown table",
- meIllformedTable = "Illformed table: $1",
- meNewSectionExpected = "new section expected $1",
- meGeneralParseError = "general parse error",
- meInvalidDirective = "invalid directive: '$1'",
- meInvalidField = "invalid field: $1",
- meFootnoteMismatch = "mismatch in number of footnotes and their refs: $1",
- mwRedefinitionOfLabel = "redefinition of label '$1'",
- mwUnknownSubstitution = "unknown substitution '$1'",
- mwAmbiguousLink = "ambiguous doc link $1",
- mwBrokenLink = "broken link '$1'",
- mwUnsupportedLanguage = "language '$1' not supported",
- mwUnsupportedField = "field '$1' not supported",
- mwRstStyle = "RST style: $1",
- mwUnusedImportdoc = "importdoc for '$1' is not used",
- meSandboxedDirective = "disabled directive: '$1'",
- MsgHandler* = proc (filename: string, line, col: int, msgKind: MsgKind,
- arg: string) {.closure, gcsafe.} ## what to do in case of an error
- FindFileHandler* = proc (filename: string): string {.closure, gcsafe.}
- FindRefFileHandler* =
- proc (targetRelPath: string):
- tuple[targetPath: string, linkRelPath: string] {.closure, gcsafe.}
- ## returns where .html or .idx file should be found by its relative path;
- ## `linkRelPath` is a prefix to be added before a link anchor from such file
- proc rstnodeToRefname*(n: PRstNode): string
- proc addNodes*(n: PRstNode): string
- proc getFieldValue*(n: PRstNode, fieldname: string): string {.gcsafe.}
- proc getArgument*(n: PRstNode): string
- # ----------------------------- scanner part --------------------------------
- const
- SymChars: set[char] = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
- SmileyStartChars: set[char] = {':', ';', '8'}
- Smilies = {
- ":D": "icon_e_biggrin",
- ":-D": "icon_e_biggrin",
- ":)": "icon_e_smile",
- ":-)": "icon_e_smile",
- ";)": "icon_e_wink",
- ";-)": "icon_e_wink",
- ":(": "icon_e_sad",
- ":-(": "icon_e_sad",
- ":o": "icon_e_surprised",
- ":-o": "icon_e_surprised",
- ":shock:": "icon_eek",
- ":?": "icon_e_confused",
- ":-?": "icon_e_confused",
- ":-/": "icon_e_confused",
- "8-)": "icon_cool",
- ":lol:": "icon_lol",
- ":x": "icon_mad",
- ":-x": "icon_mad",
- ":P": "icon_razz",
- ":-P": "icon_razz",
- ":oops:": "icon_redface",
- ":cry:": "icon_cry",
- ":evil:": "icon_evil",
- ":twisted:": "icon_twisted",
- ":roll:": "icon_rolleyes",
- ":!:": "icon_exclaim",
- ":?:": "icon_question",
- ":idea:": "icon_idea",
- ":arrow:": "icon_arrow",
- ":|": "icon_neutral",
- ":-|": "icon_neutral",
- ":mrgreen:": "icon_mrgreen",
- ":geek:": "icon_e_geek",
- ":ugeek:": "icon_e_ugeek"
- }
- SandboxDirAllowlist = [
- "image", "code", "code-block", "admonition", "attention", "caution",
- "container", "contents", "danger", "default-role", "error", "figure",
- "hint", "important", "index", "note", "role", "tip", "title", "warning"]
- type
- TokType = enum
- tkEof, tkIndent,
- tkWhite, tkWord,
- tkAdornment, # used for chapter adornment, transitions and
- # horizontal table borders
- tkPunct, # one or many punctuation characters
- tkOther
- Token = object # a RST token
- kind*: TokType # the type of the token
- ival*: int # the indentation or parsed integer value
- symbol*: string # the parsed symbol as string
- line*, col*: int # line and column of the token
- TokenSeq = seq[Token]
- Lexer = object of RootObj
- buf*: cstring
- bufpos*: int
- line*, col*, baseIndent*: int
- adornmentLine*: bool
- escapeNext*: bool
- proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
- tok.kind = tkWord
- tok.line = L.line
- tok.col = L.col
- var pos = L.bufpos
- while true:
- tok.symbol.add(L.buf[pos])
- inc pos
- if L.buf[pos] notin s: break
- inc L.col, pos - L.bufpos
- L.bufpos = pos
- proc isCurrentLineAdornment(L: var Lexer): bool =
- var pos = L.bufpos
- let c = L.buf[pos]
- while true:
- inc pos
- if L.buf[pos] in {'\c', '\l', '\0'}:
- break
- if c == '+': # grid table
- if L.buf[pos] notin {'-', '=', '+'}:
- return false
- else: # section adornment or table horizontal border
- if L.buf[pos] notin {c, ' ', '\t', '\v', '\f'}:
- return false
- result = true
- proc getPunctAdornment(L: var Lexer, tok: var Token) =
- if L.adornmentLine:
- tok.kind = tkAdornment
- else:
- tok.kind = tkPunct
- tok.line = L.line
- tok.col = L.col
- var pos = L.bufpos
- let c = L.buf[pos]
- if not L.escapeNext and (c != '\\' or L.adornmentLine):
- while true:
- tok.symbol.add(L.buf[pos])
- inc pos
- if L.buf[pos] != c: break
- elif L.escapeNext:
- tok.symbol.add(L.buf[pos])
- inc pos
- else: # not L.escapeNext and c == '\\' and not L.adornmentLine
- tok.symbol.add '\\'
- inc pos
- L.escapeNext = true
- inc L.col, pos - L.bufpos
- L.bufpos = pos
- if tok.symbol == "\\": tok.kind = tkPunct
- # nim extension: standalone \ can not be adornment
- proc getBracket(L: var Lexer, tok: var Token) =
- tok.kind = tkPunct
- tok.line = L.line
- tok.col = L.col
- tok.symbol.add(L.buf[L.bufpos])
- inc L.col
- inc L.bufpos
- proc getIndentAux(L: var Lexer, start: int): int =
- var pos = start
- # skip the newline (but include it in the token!)
- if L.buf[pos] == '\r':
- if L.buf[pos + 1] == '\n': inc pos, 2
- else: inc pos
- elif L.buf[pos] == '\n':
- inc pos
- while true:
- case L.buf[pos]
- of ' ', '\v', '\f':
- inc pos
- inc result
- of '\t':
- inc pos
- result = result - (result mod 8) + 8
- else:
- break # EndOfFile also leaves the loop
- if L.buf[pos] == '\0':
- result = 0
- elif L.buf[pos] == '\n' or L.buf[pos] == '\r':
- # look at the next line for proper indentation:
- result = getIndentAux(L, pos)
- L.bufpos = pos # no need to set back buf
- proc getIndent(L: var Lexer, tok: var Token) =
- tok.col = 0
- tok.kind = tkIndent # skip the newline (but include it in the token!)
- tok.ival = getIndentAux(L, L.bufpos)
- inc L.line
- tok.line = L.line
- L.col = tok.ival
- tok.ival = max(tok.ival - L.baseIndent, 0)
- tok.symbol = "\n" & spaces(tok.ival)
- proc rawGetTok(L: var Lexer, tok: var Token) =
- tok.symbol = ""
- tok.ival = 0
- if L.col == 0:
- L.adornmentLine = false
- var c = L.buf[L.bufpos]
- case c
- of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9':
- getThing(L, tok, SymChars)
- of ' ', '\t', '\v', '\f':
- getThing(L, tok, {' ', '\t'})
- tok.kind = tkWhite
- if L.buf[L.bufpos] in {'\r', '\n'}:
- rawGetTok(L, tok) # ignore spaces before \n
- of '\r', '\n':
- getIndent(L, tok)
- L.adornmentLine = false
- of '!', '\"', '#', '$', '%', '&', '\'', '*', '+', ',', '-', '.',
- '/', ':', ';', '<', '=', '>', '?', '@', '\\', '^', '_', '`',
- '|', '~':
- if L.col == 0:
- L.adornmentLine = L.isCurrentLineAdornment()
- getPunctAdornment(L, tok)
- of '(', ')', '[', ']', '{', '}':
- getBracket(L, tok)
- else:
- tok.line = L.line
- tok.col = L.col
- if c == '\0':
- tok.kind = tkEof
- else:
- tok.kind = tkOther
- tok.symbol.add(c)
- inc L.bufpos
- inc L.col
- tok.col = max(tok.col - L.baseIndent, 0)
- proc getTokens(buffer: string, tokens: var TokenSeq) =
- var L: Lexer
- var length = tokens.len
- L.buf = cstring(buffer)
- L.line = 0 # skip UTF-8 BOM
- if L.buf[0] == '\xEF' and L.buf[1] == '\xBB' and L.buf[2] == '\xBF':
- inc L.bufpos, 3
- while true:
- inc length
- setLen(tokens, length)
- let toEscape = L.escapeNext
- rawGetTok(L, tokens[length - 1])
- if toEscape: L.escapeNext = false
- if tokens[length - 1].kind == tkEof: break
- if tokens[0].kind == tkWhite:
- # BUGFIX
- tokens[0].ival = tokens[0].symbol.len
- tokens[0].kind = tkIndent
- type
- LevelInfo = object
- symbol: char # adornment character
- hasOverline: bool # has also overline (besides underline)?
- line: int # the last line of this style occurrence
- # (for error message)
- hasPeers: bool # has headings on the same level of hierarchy?
- LiteralBlockKind = enum # RST-style literal blocks after `::`
- lbNone,
- lbIndentedLiteralBlock,
- lbQuotedLiteralBlock
- LevelMap = seq[LevelInfo] # Saves for each possible title adornment
- # style its level in the current document.
- SubstitutionKind = enum
- rstSubstitution = "substitution",
- hyperlinkAlias = "hyperlink alias",
- implicitHyperlinkAlias = "implicitly-generated hyperlink alias"
- Substitution = object
- kind*: SubstitutionKind
- key*: string
- value*: PRstNode
- info*: TLineInfo # place where the substitution was defined
- AnchorRule = enum
- arInternalRst, ## For automatically generated RST anchors (from
- ## headings, footnotes, inline internal targets):
- ## case-insensitive, 1-space-significant (by RST spec)
- arExternalRst, ## For external .nim doc comments or .rst/.md
- arNim ## For anchors generated by ``docgen.nim``: Nim-style case
- ## sensitivity, etc. (see `proc normalizeNimName`_ for details)
- arHyperlink, ## For links with manually set anchors in
- ## form `text <pagename.html#anchor>`_
- RstAnchorKind = enum
- manualDirectiveAnchor = "manual directive anchor",
- manualInlineAnchor = "manual inline anchor",
- footnoteAnchor = "footnote anchor",
- headlineAnchor = "implicitly-generated headline anchor"
- AnchorSubst = object
- info: TLineInfo # the file where the anchor was defined
- priority: int
- case kind: range[arInternalRst .. arNim]
- of arInternalRst:
- anchorType: RstAnchorKind
- target: PRstNode
- of arExternalRst:
- anchorTypeExt: RstAnchorKind
- refnameExt: string
- of arNim:
- module: FileIndex # anchor's module (generally not the same as file)
- tooltip: string # displayed tooltip for Nim-generated anchors
- langSym: LangSymbol
- refname: string # A reference name that will be inserted directly
- # into HTML/Latex.
- external: bool
- AnchorSubstTable = Table[string, seq[AnchorSubst]]
- # use `seq` to account for duplicate anchors
- FootnoteType = enum
- fnManualNumber, # manually numbered footnote like [3]
- fnAutoNumber, # auto-numbered footnote [#]
- fnAutoNumberLabel, # auto-numbered with label [#label]
- fnAutoSymbol, # auto-symbol footnote [*]
- fnCitation # simple text label like [citation2021]
- FootnoteSubst = tuple
- kind: FootnoteType # discriminator
- number: int # valid for fnManualNumber (always) and fnAutoNumber,
- # fnAutoNumberLabel after resolveSubs is called
- autoNumIdx: int # order of occurrence: fnAutoNumber, fnAutoNumberLabel
- autoSymIdx: int # order of occurrence: fnAutoSymbol
- label: string # valid for fnAutoNumberLabel
- RstFileTable* = object
- filenameToIdx*: Table[string, FileIndex]
- idxToFilename*: seq[string]
- ImportdocInfo = object
- used: bool # was this import used?
- fromInfo: TLineInfo # place of `.. importdoc::` directive
- idxPath: string # full path to ``.idx`` file
- linkRelPath: string # prefix before target anchor
- title: string # document title obtained from ``.idx``
- RstSharedState = object
- options*: RstParseOptions # parsing options
- hLevels: LevelMap # hierarchy of heading styles
- hTitleCnt: int # =0 if no title, =1 if only main title,
- # =2 if both title and subtitle are present
- hCurLevel: int # current section level
- currRole: string # current interpreted text role
- currRoleKind: RstNodeKind # ... and its node kind
- subs: seq[Substitution] # substitutions
- refs*: seq[Substitution] # references
- anchors*: AnchorSubstTable
- # internal target substitutions
- lineFootnoteNum: seq[TLineInfo] # footnote line, auto numbers .. [#]
- lineFootnoteNumRef: seq[TLineInfo] # footnote line, their reference [#]_
- currFootnoteNumRef: int # ... their counter for `resolveSubs`
- lineFootnoteSym: seq[TLineInfo] # footnote line, auto symbols .. [*]
- lineFootnoteSymRef: seq[TLineInfo] # footnote line, their reference [*]_
- currFootnoteSymRef: int # ... their counter for `resolveSubs`
- footnotes: seq[FootnoteSubst] # correspondence b/w footnote label,
- # number, order of occurrence
- msgHandler: MsgHandler # How to handle errors.
- findFile: FindFileHandler # How to find files for include.
- findRefFile: FindRefFileHandler
- # How to find files imported by importdoc.
- filenames*: RstFileTable # map file name <-> FileIndex (for storing
- # file names for warnings after 1st stage)
- currFileIdx*: FileIndex # current index in `filenames`
- tocPart*: seq[PRstNode] # all the headings of a document
- hasToc*: bool
- idxImports*: Table[string, ImportdocInfo]
- # map `importdoc`ed filename -> it's info
- nimFileImported*: bool # Was any ``.nim`` module `importdoc`ed ?
- PRstSharedState* = ref RstSharedState
- ManualAnchor = object
- alias: string # a (short) name that can substitute the `anchor`
- anchor: string # anchor = id = refname
- info: TLineInfo
- RstParser = object of RootObj
- idx*: int
- tok*: TokenSeq
- s*: PRstSharedState
- indentStack*: seq[int]
- line*, col*: int ## initial line/column of whole text or
- ## documenation fragment that will be added
- ## in case of error/warning reporting to
- ## (relative) line/column of the token.
- curAnchors*: seq[ManualAnchor]
- ## seq to accumulate aliases for anchors:
- ## because RST can have >1 alias per 1 anchor
- EParseError* = object of ValueError
- SectionParser = proc (p: var RstParser): PRstNode {.nimcall, gcsafe.}
- const
- LineRstInit* = 1 ## Initial line number for standalone RST text
- ColRstInit* = 0 ## Initial column number for standalone RST text
- ## (Nim global reporting adds ColOffset=1)
- ColRstOffset* = 1 ## 1: a replica of ColOffset for internal use
- template currentTok(p: RstParser): Token = p.tok[p.idx]
- template prevTok(p: RstParser): Token = p.tok[p.idx - 1]
- template nextTok(p: RstParser): Token = p.tok[p.idx + 1]
- proc whichMsgClass*(k: MsgKind): MsgClass =
- ## returns which message class `k` belongs to.
- case k.symbolName[1]
- of 'e', 'E': result = mcError
- of 'w', 'W': result = mcWarning
- of 'h', 'H': result = mcHint
- else: assert false, "msgkind does not fit naming scheme"
- proc defaultMsgHandler*(filename: string, line, col: int, msgkind: MsgKind,
- arg: string) =
- let mc = msgkind.whichMsgClass
- let a = $msgkind % arg
- var message: string
- toLocation(message, filename, line, col + ColRstOffset)
- message.add " $1: $2" % [$mc, a]
- if mc == mcError: raise newException(EParseError, message)
- else: writeLine(stdout, message)
- proc defaultFindFile*(filename: string): string =
- if fileExists(filename): result = filename
- else: result = ""
- proc defaultFindRefFile*(filename: string): (string, string) =
- (filename, "")
- proc defaultRole(options: RstParseOptions): string =
- if roNimFile in options: "nim" else: "literal"
- proc whichRoleAux(sym: string): RstNodeKind =
- let r = sym.toLowerAscii
- case r
- of "idx": result = rnIdx
- of "literal": result = rnInlineLiteral
- of "strong": result = rnStrongEmphasis
- of "emphasis": result = rnEmphasis
- of "sub", "subscript": result = rnSub
- of "sup", "superscript": result = rnSup
- # literal and code are the same in our implementation
- of "code": result = rnInlineLiteral
- of "program", "option", "tok": result = rnCodeFragment
- # c++ currently can be spelled only as cpp, c# only as csharp
- elif getSourceLanguage(r) != langNone:
- result = rnInlineCode
- else: # unknown role
- result = rnUnknownRole
- proc len(filenames: RstFileTable): int = filenames.idxToFilename.len
- proc addFilename*(s: PRstSharedState, file1: string): FileIndex =
- ## Returns index of filename, adding it if it has not been used before
- let nextIdx = s.filenames.len.FileIndex
- result = getOrDefault(s.filenames.filenameToIdx, file1, default = nextIdx)
- if result == nextIdx:
- s.filenames.filenameToIdx[file1] = result
- s.filenames.idxToFilename.add file1
- proc setCurrFilename*(s: PRstSharedState, file1: string) =
- s.currFileIdx = addFilename(s, file1)
- proc getFilename(filenames: RstFileTable, fid: FileIndex): string =
- doAssert(0 <= fid.int and fid.int < filenames.len,
- "incorrect FileIndex $1 (range 0..$2)" % [
- $fid.int, $(filenames.len - 1)])
- result = filenames.idxToFilename[fid.int]
- proc getFilename(s: PRstSharedState, subst: AnchorSubst): string =
- getFilename(s.filenames, subst.info.fileIndex)
- proc getModule(s: PRstSharedState, subst: AnchorSubst): string =
- result = getFilename(s.filenames, subst.module)
- proc currFilename(s: PRstSharedState): string =
- getFilename(s.filenames, s.currFileIdx)
- proc newRstSharedState*(options: RstParseOptions,
- filename: string,
- findFile: FindFileHandler,
- findRefFile: FindRefFileHandler,
- msgHandler: MsgHandler,
- hasToc: bool): PRstSharedState =
- let r = defaultRole(options)
- result = PRstSharedState(
- currRole: r,
- currRoleKind: whichRoleAux(r),
- options: options,
- msgHandler: if not isNil(msgHandler): msgHandler else: defaultMsgHandler,
- findFile: if not isNil(findFile): findFile else: defaultFindFile,
- findRefFile:
- if not isNil(findRefFile): findRefFile
- else: defaultFindRefFile,
- hasToc: hasToc
- )
- setCurrFilename(result, filename)
- proc curLine(p: RstParser): int = p.line + currentTok(p).line
- proc findRelativeFile(p: RstParser; filename: string): string =
- result = p.s.currFilename.splitFile.dir / filename
- if not fileExists(result):
- result = p.s.findFile(filename)
- proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string) =
- p.s.msgHandler(p.s.currFilename, curLine(p),
- p.col + currentTok(p).col, msgKind, arg)
- proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string) =
- s.msgHandler(s.currFilename, LineRstInit, ColRstInit, msgKind, arg)
- proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string;
- line, col: int) =
- s.msgHandler(s.currFilename, line, col, msgKind, arg)
- proc rstMessage(s: PRstSharedState, filename: string, msgKind: MsgKind,
- arg: string) =
- s.msgHandler(filename, LineRstInit, ColRstInit, msgKind, arg)
- proc rstMessage*(filenames: RstFileTable, f: MsgHandler,
- info: TLineInfo, msgKind: MsgKind, arg: string) =
- ## Print warnings using `info`, i.e. in 2nd-pass warnings for
- ## footnotes/substitutions/references or from ``rstgen.nim``.
- let file = getFilename(filenames, info.fileIndex)
- f(file, info.line.int, info.col.int, msgKind, arg)
- proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string, line, col: int) =
- p.s.msgHandler(p.s.currFilename, p.line + line,
- p.col + col, msgKind, arg)
- proc rstMessage(p: RstParser, msgKind: MsgKind) =
- p.s.msgHandler(p.s.currFilename, curLine(p),
- p.col + currentTok(p).col, msgKind,
- currentTok(p).symbol)
- # Functions `isPureRst` & `stopOrWarn` address differences between
- # Markdown and RST:
- # * Markdown always tries to continue working. If it is really impossible
- # to parse a markup element, its proc just returns `nil` and parsing
- # continues for it as for normal text paragraph.
- # The downside is that real mistakes/typos are often silently ignored.
- # The same applies to legacy `RstMarkdown` mode for nimforum.
- # * RST really signals errors. The downside is that it's more intrusive -
- # the user must escape special syntax with \ explicitly.
- #
- # TODO: we need to apply this strategy to all markup elements eventually.
- func isPureRst(p: RstParser): bool = roSupportMarkdown notin p.s.options
- func isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options
- func isMd(p: RstParser): bool = roPreferMarkdown in p.s.options
- func isMd(s: PRstSharedState): bool = roPreferMarkdown in s.options
- proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string) =
- let realMsgKind = if isPureRst(p): errorType else: mwRstStyle
- rstMessage(p, realMsgKind, arg)
- proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string, line, col: int) =
- let realMsgKind = if isPureRst(p): errorType else: mwRstStyle
- rstMessage(p, realMsgKind, arg, line, col)
- proc currInd(p: RstParser): int =
- result = p.indentStack[high(p.indentStack)]
- proc pushInd(p: var RstParser, ind: int) =
- p.indentStack.add(ind)
- proc popInd(p: var RstParser) =
- if p.indentStack.len > 1: setLen(p.indentStack, p.indentStack.len - 1)
- # Working with indentation in rst.nim
- # -----------------------------------
- #
- # Every line break has an associated tkIndent.
- # The tokenizer writes back the first column of next non-blank line
- # in all preceeding tkIndent tokens to the `ival` field of tkIndent.
- #
- # RST document is separated into body elements (B.E.), every of which
- # has a dedicated handler proc (or block of logic when B.E. is a block quote)
- # that should follow the next rule:
- # Every B.E. handler proc should finish at tkIndent (newline)
- # after its B.E. finishes.
- # Then its callers (which is `parseSection` or another B.E. handler)
- # check for tkIndent ival (without necessity to advance `p.idx`)
- # and decide themselves whether they continue processing or also stop.
- #
- # An example::
- #
- # L RST text fragment indentation
- # +--------------------+
- # 1 | | <- (empty line at the start of file) no tokens
- # 2 |First paragraph. | <- tkIndent has ival=0, and next tkWord has col=0
- # 3 | | <- tkIndent has ival=0
- # 4 |* bullet item and | <- tkIndent has ival=0, and next tkPunct has col=0
- # 5 | its continuation | <- tkIndent has ival=2, and next tkWord has col=2
- # 6 | | <- tkIndent has ival=4
- # 7 | Block quote | <- tkIndent has ival=4, and next tkWord has col=4
- # 8 | | <- tkIndent has ival=0
- # 9 | | <- tkIndent has ival=0
- # 10|Final paragraph | <- tkIndent has ival=0, and tkWord has col=0
- # +--------------------+
- # C:01234
- #
- # Here parser starts with initial `indentStack=[0]` and then calls the
- # 1st `parseSection`:
- #
- # - `parseSection` calls `parseParagraph` and "First paragraph" is parsed
- # - bullet list handler is started at reaching ``*`` (L4 C0), it
- # starts bullet item logic (L4 C2), which calls `pushInd(p, ind=2)`,
- # then calls `parseSection` (2nd call, nested) which parses
- # paragraph "bullet list and its continuation" and then starts
- # a block quote logic (L7 C4).
- # The block quote logic calls calls `pushInd(p, ind=4)` and
- # calls `parseSection` again, so a (simplified) sequence of calls now is::
- #
- # parseSection -> parseBulletList ->
- # parseSection (+block quote logic) -> parseSection
- #
- # 3rd `parseSection` finishes, block quote logic calls `popInd(p)`,
- # it returns to bullet item logic, which sees that next tkIndent has
- # ival=0 and stops there since the required indentation for a bullet item
- # is 2 and 0<2; the bullet item logic calls `popInd(p)`.
- # Then bullet list handler checks that next tkWord (L10 C0) has the
- # right indentation but does not have ``*`` so stops at tkIndent (L10).
- # - 1st `parseSection` invocation calls `parseParagraph` and the
- # "Final paragraph" is parsed.
- #
- # If a B.E. handler has advanced `p.idx` past tkIndent to check
- # whether it should continue its processing or not, and decided not to,
- # then this B.E. handler should step back (e.g. do `dec p.idx`).
- proc initParser(p: var RstParser, sharedState: PRstSharedState) =
- p.indentStack = @[0]
- p.tok = @[]
- p.idx = 0
- p.col = ColRstInit
- p.line = LineRstInit
- p.s = sharedState
- proc addNodesAux(n: PRstNode, result: var string) =
- if n == nil:
- return
- if n.kind == rnLeaf:
- result.add(n.text)
- else:
- for i in 0 ..< n.len: addNodesAux(n.sons[i], result)
- proc addNodes(n: PRstNode): string =
- n.addNodesAux(result)
- proc linkName(n: PRstNode): string =
- ## Returns a normalized reference name, see:
- ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names
- n.addNodes.toLowerAscii
- proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) =
- template special(s) =
- if b:
- r.add('-')
- b = false
- r.add(s)
- if n == nil: return
- if n.kind == rnLeaf:
- for i in 0 ..< n.text.len:
- case n.text[i]
- of '0'..'9':
- if b:
- r.add('-')
- b = false
- if r.len == 0: r.add('Z')
- r.add(n.text[i])
- of 'a'..'z', '\128'..'\255':
- if b:
- r.add('-')
- b = false
- r.add(n.text[i])
- of 'A'..'Z':
- if b:
- r.add('-')
- b = false
- r.add(chr(ord(n.text[i]) - ord('A') + ord('a')))
- of '$': special "dollar"
- of '%': special "percent"
- of '&': special "amp"
- of '^': special "roof"
- of '!': special "emark"
- of '?': special "qmark"
- of '*': special "star"
- of '+': special "plus"
- of '-': special "minus"
- of '/': special "slash"
- of '\\': special "backslash"
- of '=': special "eq"
- of '<': special "lt"
- of '>': special "gt"
- of '~': special "tilde"
- of ':': special "colon"
- of '.': special "dot"
- of '@': special "at"
- of '|': special "bar"
- else:
- if r.len > 0: b = true
- else:
- for i in 0 ..< n.len: rstnodeToRefnameAux(n.sons[i], r, b)
- proc rstnodeToRefname(n: PRstNode): string =
- var b = false
- rstnodeToRefnameAux(n, result, b)
- proc findSub(s: PRstSharedState, n: PRstNode): int =
- var key = addNodes(n)
- # the spec says: if no exact match, try one without case distinction:
- for i in countup(0, high(s.subs)):
- if key == s.subs[i].key:
- return i
- for i in countup(0, high(s.subs)):
- if cmpIgnoreStyle(key, s.subs[i].key) == 0:
- return i
- result = -1
- proc lineInfo(p: RstParser, iTok: int): TLineInfo =
- result.col = int16(p.col + p.tok[iTok].col)
- result.line = uint16(p.line + p.tok[iTok].line)
- result.fileIndex = p.s.currFileIdx
- proc lineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx)
- # TODO: we need this simplification because we don't preserve exact starting
- # token of currently parsed element:
- proc prevLineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx-1)
- proc setSub(p: var RstParser, key: string, value: PRstNode) =
- var length = p.s.subs.len
- for i in 0 ..< length:
- if key == p.s.subs[i].key:
- p.s.subs[i].value = value
- return
- p.s.subs.add(Substitution(key: key, value: value, info: prevLineInfo(p)))
- proc setRef(p: var RstParser, key: string, value: PRstNode,
- refType: SubstitutionKind) =
- var length = p.s.refs.len
- for i in 0 ..< length:
- if key == p.s.refs[i].key:
- if p.s.refs[i].value.addNodes != value.addNodes:
- rstMessage(p, mwRedefinitionOfLabel, key)
- p.s.refs[i].value = value
- return
- p.s.refs.add(Substitution(kind: refType, key: key, value: value,
- info: prevLineInfo(p)))
- proc findRef(s: PRstSharedState, key: string): seq[Substitution] =
- for i in countup(0, high(s.refs)):
- if key == s.refs[i].key:
- result.add s.refs[i]
- # Ambiguity in links: we don't follow procedure of removing implicit targets
- # defined in https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#implicit-hyperlink-targets
- # Instead we just give explicit links a higher priority than to implicit ones
- # and report ambiguities as warnings. Hopefully it is easy to remove
- # ambiguities manually. Nim auto-generated links from ``docgen.nim``
- # have lowest priority: 1 (for procs) and below for other symbol types.
- proc refPriority(k: SubstitutionKind): int =
- case k
- of rstSubstitution: result = 8
- of hyperlinkAlias: result = 7
- of implicitHyperlinkAlias: result = 2
- proc internalRefPriority(k: RstAnchorKind): int =
- case k
- of manualDirectiveAnchor: result = 6
- of manualInlineAnchor: result = 5
- of footnoteAnchor: result = 4
- of headlineAnchor: result = 3
- proc `$`(subst: AnchorSubst): string = # for debug
- let s =
- case subst.kind
- of arInternalRst: "type=" & $subst.anchorType
- of arExternalRst: "type=" & $subst.anchorTypeExt
- of arNim: "langsym=" & $subst.langSym
- result = "(kind=$1, priority=$2, $3)" % [$subst.kind, $subst.priority, s]
- proc addAnchorRst(p: var RstParser, name: string, target: PRstNode,
- anchorType: RstAnchorKind) =
- ## Associates node `target` (which has field `anchor`) with an
- ## alias `name` and updates the corresponding aliases in `p.curAnchors`.
- let prio = internalRefPriority(anchorType)
- for a in p.curAnchors:
- p.s.anchors.mgetOrPut(a.alias, newSeq[AnchorSubst]()).add(
- AnchorSubst(kind: arInternalRst, target: target, priority: prio,
- info: a.info, anchorType: manualDirectiveAnchor))
- if name != "":
- p.s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add(
- AnchorSubst(kind: arInternalRst, target: target, priority: prio,
- info: prevLineInfo(p), anchorType: anchorType))
- p.curAnchors.setLen 0
- proc addAnchorExtRst(s: var PRstSharedState, key: string, refn: string,
- anchorType: RstAnchorKind, info: TLineInfo) =
- let name = key.toLowerAscii
- let prio = internalRefPriority(anchorType)
- s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add(
- AnchorSubst(kind: arExternalRst, refnameExt: refn, priority: prio,
- info: info,
- anchorTypeExt: anchorType))
- proc addAnchorNim*(s: var PRstSharedState, external: bool, refn: string, tooltip: string,
- langSym: LangSymbol, priority: int,
- info: TLineInfo, module: FileIndex) =
- ## Adds an anchor `refn`, which follows
- ## the rule `arNim` (i.e. a symbol in ``*.nim`` file)
- s.anchors.mgetOrPut(langSym.name, newSeq[AnchorSubst]()).add(
- AnchorSubst(kind: arNim, external: external, refname: refn, langSym: langSym,
- tooltip: tooltip, priority: priority,
- info: info))
- proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode,
- info: TLineInfo):
- seq[AnchorSubst] =
- var langSym: LangSymbol
- try:
- langSym = toLangSymbol(signature)
- except ValueError: # parsing failed, not a Nim symbol
- return
- let substitutions = s.anchors.getOrDefault(langSym.name,
- newSeq[AnchorSubst]())
- if substitutions.len == 0:
- return
- # logic to select only groups instead of concrete symbols
- # with overloads, note that the same symbol can be defined
- # in multiple modules and `importdoc`ed:
- type GroupKey = tuple[symKind: string, origModule: string]
- # map (symKind, file) (like "proc", "os.nim") -> found symbols/groups:
- var found: Table[GroupKey, seq[AnchorSubst]]
- for subst in substitutions:
- if subst.kind == arNim:
- if match(subst.langSym, langSym):
- let key: GroupKey = (subst.langSym.symKind, getModule(s, subst))
- found.mgetOrPut(key, newSeq[AnchorSubst]()).add subst
- for key, sList in found:
- if sList.len == 1:
- result.add sList[0]
- else: # > 1, there are overloads, potential ambiguity in this `symKind`
- if langSym.parametersProvided:
- # there are non-group signatures, select only them
- for s in sList:
- if not s.langSym.isGroup:
- result.add s
- else: # when there are many overloads a link like foo_ points to all
- # of them, so selecting the group
- var foundGroup = false
- for s in sList:
- if s.langSym.isGroup:
- result.add s
- foundGroup = true
- break
- doAssert(foundGroup,
- "docgen has not generated the group for $1 (file $2)" % [
- langSym.name, getModule(s, sList[0]) ])
- proc findMainAnchorRst(s: PRstSharedState, linkText: string, info: TLineInfo):
- seq[AnchorSubst] =
- let name = linkText.toLowerAscii
- let substitutions = s.anchors.getOrDefault(name, newSeq[AnchorSubst]())
- for s in substitutions:
- if s.kind in {arInternalRst, arExternalRst}:
- result.add s
- proc addFootnoteNumManual(p: var RstParser, num: int) =
- ## add manually-numbered footnote
- for fnote in p.s.footnotes:
- if fnote.number == num:
- rstMessage(p, mwRedefinitionOfLabel, $num)
- return
- p.s.footnotes.add((fnManualNumber, num, -1, -1, $num))
- proc addFootnoteNumAuto(p: var RstParser, label: string) =
- ## add auto-numbered footnote.
- ## Empty label [#] means it'll be resolved by the occurrence.
- if label == "": # simple auto-numbered [#]
- p.s.lineFootnoteNum.add lineInfo(p)
- p.s.footnotes.add((fnAutoNumber, -1, p.s.lineFootnoteNum.len, -1, label))
- else: # auto-numbered with label [#label]
- for fnote in p.s.footnotes:
- if fnote.label == label:
- rstMessage(p, mwRedefinitionOfLabel, label)
- return
- p.s.footnotes.add((fnAutoNumberLabel, -1, -1, -1, label))
- proc addFootnoteSymAuto(p: var RstParser) =
- p.s.lineFootnoteSym.add lineInfo(p)
- p.s.footnotes.add((fnAutoSymbol, -1, -1, p.s.lineFootnoteSym.len, ""))
- proc orderFootnotes(s: PRstSharedState) =
- ## numerate auto-numbered footnotes taking into account that all
- ## manually numbered ones always have preference.
- ## Save the result back to `s.footnotes`.
- # Report an error if found any mismatch in number of automatic footnotes
- proc listFootnotes(locations: seq[TLineInfo]): string =
- var lines: seq[string]
- for info in locations:
- if s.filenames.len > 1:
- let file = getFilename(s.filenames, info.fileIndex)
- lines.add file & ":"
- else: # no need to add file name here if there is only 1
- lines.add ""
- lines[^1].add $info.line
- result.add $lines.len & " (lines " & join(lines, ", ") & ")"
- if s.lineFootnoteNum.len != s.lineFootnoteNumRef.len:
- rstMessage(s, meFootnoteMismatch,
- "$1 != $2" % [listFootnotes(s.lineFootnoteNum),
- listFootnotes(s.lineFootnoteNumRef)] &
- " for auto-numbered footnotes")
- if s.lineFootnoteSym.len != s.lineFootnoteSymRef.len:
- rstMessage(s, meFootnoteMismatch,
- "$1 != $2" % [listFootnotes(s.lineFootnoteSym),
- listFootnotes(s.lineFootnoteSymRef)] &
- " for auto-symbol footnotes")
- var result: seq[FootnoteSubst]
- var manuallyN, autoN, autoSymbol: seq[FootnoteSubst]
- for fs in s.footnotes:
- if fs.kind == fnManualNumber: manuallyN.add fs
- elif fs.kind in {fnAutoNumber, fnAutoNumberLabel}: autoN.add fs
- else: autoSymbol.add fs
- if autoN.len == 0:
- result = manuallyN
- else:
- # fill gaps between manually numbered footnotes in ascending order
- manuallyN.sort() # sort by number - its first field
- var lst = initSinglyLinkedList[FootnoteSubst]()
- for elem in manuallyN: lst.append(elem)
- var firstAuto = 0
- if lst.head == nil or lst.head.value.number != 1:
- # no manual footnote [1], start numeration from 1 for auto-numbered
- lst.prepend (autoN[0].kind, 1, autoN[0].autoNumIdx, -1, autoN[0].label)
- firstAuto = 1
- var curNode = lst.head
- var nextNode: SinglyLinkedNode[FootnoteSubst]
- # go simultaneously through `autoN` and `lst` looking for gaps
- for (kind, x, autoNumIdx, y, label) in autoN[firstAuto .. ^1]:
- while (nextNode = curNode.next; nextNode != nil):
- if nextNode.value.number - curNode.value.number > 1:
- # gap found, insert new node `n` between curNode and nextNode:
- var n = newSinglyLinkedNode((kind, curNode.value.number + 1,
- autoNumIdx, -1, label))
- curNode.next = n
- n.next = nextNode
- curNode = n
- break
- else:
- curNode = nextNode
- if nextNode == nil: # no gap found, just append
- lst.append (kind, curNode.value.number + 1, autoNumIdx, -1, label)
- curNode = lst.tail
- result = lst.toSeq
- # we use ASCII symbols instead of those recommended in RST specification:
- const footnoteAutoSymbols = ["*", "^", "+", "=", "~", "$", "@", "%", "&"]
- for fs in autoSymbol:
- # assignment order: *, **, ***, ^, ^^, ^^^, ... &&&, ****, *****, ...
- let i = fs.autoSymIdx - 1
- let symbolNum = (i div 3) mod footnoteAutoSymbols.len
- let nSymbols = (1 + i mod 3) + 3 * (i div (3 * footnoteAutoSymbols.len))
- let label = footnoteAutoSymbols[symbolNum].repeat(nSymbols)
- result.add((fs.kind, -1, -1, fs.autoSymIdx, label))
- s.footnotes = result
- proc getFootnoteNum(s: PRstSharedState, label: string): int =
- ## get number from label. Must be called after `orderFootnotes`.
- result = -1
- for fnote in s.footnotes:
- if fnote.label == label:
- return fnote.number
- proc getFootnoteNum(s: PRstSharedState, order: int): int =
- ## get number from occurrence. Must be called after `orderFootnotes`.
- result = -1
- for fnote in s.footnotes:
- if fnote.autoNumIdx == order:
- return fnote.number
- proc getAutoSymbol(s: PRstSharedState, order: int): string =
- ## get symbol from occurrence of auto-symbol footnote.
- result = "???"
- for fnote in s.footnotes:
- if fnote.autoSymIdx == order:
- return fnote.label
- proc newRstNodeA(p: var RstParser, kind: RstNodeKind): PRstNode =
- ## create node and consume the current anchor
- result = newRstNode(kind)
- if p.curAnchors.len > 0:
- result.anchor = p.curAnchors[0].anchor
- addAnchorRst(p, "", result, manualDirectiveAnchor)
- template newLeaf(s: string): PRstNode = newRstLeaf(s)
- proc newLeaf(p: var RstParser): PRstNode =
- result = newLeaf(currentTok(p).symbol)
- proc validRefnamePunct(x: string): bool =
- ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names
- x.len == 1 and x[0] in {'-', '_', '.', ':', '+'}
- func getRefnameIdx(p: RstParser, startIdx: int): int =
- ## Gets last token index of a refname ("word" in RST terminology):
- ##
- ## reference names are single words consisting of alphanumerics plus
- ## isolated (no two adjacent) internal hyphens, underscores, periods,
- ## colons and plus signs; no whitespace or other characters are allowed.
- ##
- ## Refnames are used for:
- ## - reference names
- ## - role names
- ## - directive names
- ## - footnote labels
- ##
- # TODO: use this func in all other relevant places
- var j = startIdx
- if p.tok[j].kind == tkWord:
- inc j
- while p.tok[j].kind == tkPunct and validRefnamePunct(p.tok[j].symbol) and
- p.tok[j+1].kind == tkWord:
- inc j, 2
- result = j - 1
- func getRefname(p: RstParser, startIdx: int): (string, int) =
- let lastIdx = getRefnameIdx(p, startIdx)
- result[1] = lastIdx
- for j in startIdx..lastIdx:
- result[0].add p.tok[j].symbol
- proc getReferenceName(p: var RstParser, endStr: string): PRstNode =
- var res = newRstNode(rnInner)
- while true:
- case currentTok(p).kind
- of tkWord, tkOther, tkWhite:
- res.add(newLeaf(p))
- of tkPunct:
- if currentTok(p).symbol == endStr:
- inc p.idx
- break
- else:
- res.add(newLeaf(p))
- else:
- rstMessage(p, meExpected, endStr)
- break
- inc p.idx
- result = res
- proc untilEol(p: var RstParser): PRstNode =
- result = newRstNode(rnInner)
- while currentTok(p).kind notin {tkIndent, tkEof}:
- result.add(newLeaf(p))
- inc p.idx
- proc expect(p: var RstParser, tok: string) =
- if currentTok(p).symbol == tok: inc p.idx
- else: rstMessage(p, meExpected, tok)
- proc inlineMarkdownEnd(p: RstParser): bool =
- result = prevTok(p).kind notin {tkIndent, tkWhite}
- ## (For a special case of ` we don't allow spaces surrounding it
- ## unlike original Markdown because this behavior confusing/useless)
- proc inlineRstEnd(p: RstParser): bool =
- # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
- # Rule 2:
- result = prevTok(p).kind notin {tkIndent, tkWhite}
- if not result: return
- # Rule 7:
- result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
- nextTok(p).symbol[0] in
- {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
- proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
- if exact:
- result = currentTok(p).symbol == markup
- else:
- result = currentTok(p).symbol.endsWith markup
- if (not result) and markup == "``":
- # check that escaping may have splitted `` to 2 tokens ` and `
- result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`"
- if not result: return
- # surroundings check
- if markup in ["_", "__"]:
- result = inlineRstEnd(p)
- else:
- if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p)
- else: result = inlineRstEnd(p)
- proc rstRuleSurround(p: RstParser): bool =
- result = true
- # Rules 4 & 5:
- if p.idx > 0:
- var d: char
- var c = prevTok(p).symbol[0]
- case c
- of '\'', '\"': d = c
- of '(': d = ')'
- of '[': d = ']'
- of '{': d = '}'
- of '<': d = '>'
- else: d = '\0'
- if d != '\0': result = nextTok(p).symbol[0] != d
- proc inlineMarkdownStart(p: RstParser): bool =
- result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
- if not result: return
- # this rst rule is really nice, let us use it in Markdown mode too.
- result = rstRuleSurround(p)
- proc inlineRstStart(p: RstParser): bool =
- ## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
- # Rule 6
- result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
- prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
- if not result: return
- # Rule 1:
- result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
- if not result: return
- result = rstRuleSurround(p)
- proc isInlineMarkupStart(p: RstParser, markup: string): bool =
- if markup != "_`":
- result = currentTok(p).symbol == markup
- else: # _` is a 2 token case
- result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
- if not result: return
- # surroundings check
- if markup in ["_", "__", "[", "|"]:
- # Note: we require space/punctuation even before [markdown link](...)
- result = inlineRstStart(p)
- else:
- if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p)
- else: result = inlineRstStart(p)
- proc match(p: RstParser, start: int, expr: string): bool =
- # regular expressions are:
- # special char exact match
- # 'w' tkWord
- # ' ' tkWhite
- # 'a' tkAdornment
- # 'i' tkIndent
- # 'I' tkIndent or tkEof
- # 'p' tkPunct
- # 'T' always true
- # 'E' whitespace, indent or eof
- # 'e' any enumeration sequence or '#' (for enumeration lists)
- # 'x' a..z or '#' (for enumeration lists)
- # 'n' 0..9 or '#' (for enumeration lists)
- var i = 0
- var j = start
- var last = expr.len - 1
- while i <= last:
- case expr[i]
- of 'w':
- let lastIdx = getRefnameIdx(p, j)
- result = lastIdx >= j
- if result: j = lastIdx
- of ' ': result = p.tok[j].kind == tkWhite
- of 'i': result = p.tok[j].kind == tkIndent
- of 'I': result = p.tok[j].kind in {tkIndent, tkEof}
- of 'p': result = p.tok[j].kind == tkPunct
- of 'a': result = p.tok[j].kind == tkAdornment
- of 'o': result = p.tok[j].kind == tkOther
- of 'T': result = true
- of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent}
- of 'e', 'x', 'n':
- result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#"
- if result:
- case p.tok[j].symbol[0]
- of '#': result = true
- of 'a'..'z', 'A'..'Z':
- result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1
- of '0'..'9':
- result = expr[i] in {'e', 'n'} and
- allCharsInSet(p.tok[j].symbol, {'0'..'9'})
- else: result = false
- else:
- var c = expr[i]
- var length = 0
- while i <= last and expr[i] == c:
- inc i
- inc length
- dec i
- result = p.tok[j].kind in {tkPunct, tkAdornment} and
- p.tok[j].symbol.len == length and p.tok[j].symbol[0] == c
- if not result: return
- inc j
- inc i
- result = true
- proc safeProtocol*(linkStr: var string): string =
- # Returns link's protocol and, if it's not safe, clears `linkStr`
- result = ""
- if scanf(linkStr, "$w:", result):
- # if it has a protocol at all, ensure that it's not 'javascript:' or worse:
- if cmpIgnoreCase(result, "http") == 0 or
- cmpIgnoreCase(result, "https") == 0 or
- cmpIgnoreCase(result, "ftp") == 0:
- discard "it's fine"
- else:
- linkStr = ""
- proc fixupEmbeddedRef(p: var RstParser, n, a, b: PRstNode): bool =
- # Returns `true` if the link belongs to an allowed protocol
- var sep = - 1
- for i in countdown(n.len - 2, 0):
- if n.sons[i].text == "<":
- sep = i
- break
- var incr = if sep > 0 and n.sons[sep - 1].text[0] == ' ': 2 else: 1
- for i in countup(0, sep - incr): a.add(n.sons[i])
- var linkStr = ""
- for i in countup(sep + 1, n.len - 2): linkStr.add(n.sons[i].addNodes)
- if linkStr != "":
- let protocol = safeProtocol(linkStr)
- result = linkStr != ""
- if not result:
- rstMessage(p, mwBrokenLink, protocol,
- p.tok[p.idx-3].line, p.tok[p.idx-3].col)
- b.add newLeaf(linkStr)
- proc whichRole(p: RstParser, sym: string): RstNodeKind =
- result = whichRoleAux(sym)
- if result == rnUnknownRole:
- rstMessage(p, mwUnsupportedLanguage, sym)
- proc toInlineCode(n: PRstNode, language: string): PRstNode =
- ## Creates rnInlineCode and attaches `n` contents as code (in 3rd son).
- result = newRstNode(rnInlineCode, info=n.info)
- let args = newRstNode(rnDirArg)
- var lang = language
- if language == "cpp": lang = "c++"
- elif language == "csharp": lang = "c#"
- args.add newLeaf(lang)
- result.add args
- result.add PRstNode(nil)
- var lb = newRstNode(rnLiteralBlock)
- var s: string
- for i in n.sons:
- assert i.kind == rnLeaf
- s.add i.text
- lb.add newLeaf(s)
- result.add lb
- proc toOtherRole(n: PRstNode, kind: RstNodeKind, roleName: string): PRstNode =
- let newN = newRstNode(rnInner, n.sons)
- let newSons = @[newN, newLeaf(roleName)]
- result = newRstNode(kind, newSons)
- proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
- ## Finalizes node `n` that was tentatively determined as interpreted text.
- var newKind = n.kind
- var newSons = n.sons
- proc finalizeInterpreted(node: PRstNode, newKind: RstNodeKind,
- newSons: seq[PRstNode], roleName: string):
- PRstNode {.nimcall.} =
- # fixes interpreted text (`x` or `y`:role:) to proper internal AST format
- if newKind in {rnUnknownRole, rnCodeFragment}:
- result = node.toOtherRole(newKind, roleName)
- elif newKind == rnInlineCode:
- result = node.toInlineCode(language=roleName)
- else:
- result = newRstNode(newKind, newSons)
- if isInlineMarkupEnd(p, "_", exact=true) or
- isInlineMarkupEnd(p, "__", exact=true):
- inc p.idx
- if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">":
- var a = newRstNode(rnInner)
- var b = newRstNode(rnInner)
- if fixupEmbeddedRef(p, n, a, b):
- if a.len == 0: # e.g. `<a_named_relative_link>`_
- newKind = rnStandaloneHyperlink
- newSons = @[b]
- else: # e.g. `link title <http://site>`_
- newKind = rnHyperlink
- newSons = @[a, b]
- setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias)
- else: # include as plain text, not a link
- newKind = rnInner
- newSons = n.sons
- result = newRstNode(newKind, newSons)
- else: # some link that will be resolved in `resolveSubs`
- newKind = rnRstRef
- result = newRstNode(newKind, sons=newSons, info=n.info)
- elif match(p, p.idx, ":w:"):
- # a role:
- let (roleName, lastIdx) = getRefname(p, p.idx+1)
- newKind = whichRole(p, roleName)
- result = n.finalizeInterpreted(newKind, newSons, roleName)
- p.idx = lastIdx + 2
- else:
- result = n.finalizeInterpreted(p.s.currRoleKind, newSons, p.s.currRole)
- proc matchVerbatim(p: RstParser, start: int, expr: string): int =
- result = start
- var j = 0
- while j < expr.len and result < p.tok.len and
- continuesWith(expr, p.tok[result].symbol, j):
- inc j, p.tok[result].symbol.len
- inc result
- if j < expr.len: result = 0
- proc parseSmiley(p: var RstParser): PRstNode =
- if currentTok(p).symbol[0] notin SmileyStartChars: return
- for key, val in items(Smilies):
- let m = matchVerbatim(p, p.idx, key)
- if m > 0:
- p.idx = m
- result = newRstNode(rnSmiley)
- result.text = val
- return
- proc isUrl(p: RstParser, i: int): bool =
- result = p.tok[i+1].symbol == ":" and p.tok[i+2].symbol == "//" and
- p.tok[i+3].kind == tkWord and
- p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"]
- proc checkParen(token: Token, parensStack: var seq[char]): bool {.inline.} =
- ## Returns `true` iff `token` is a closing parenthesis for some
- ## previous opening parenthesis saved in `parensStack`.
- ## This is according Markdown balanced parentheses rule
- ## (https://spec.commonmark.org/0.29/#link-destination)
- ## to allow links like
- ## https://en.wikipedia.org/wiki/APL_(programming_language),
- ## we use it for RST also.
- result = false
- if token.kind == tkPunct:
- let c = token.symbol[0]
- if c in {'(', '[', '{'}: # push
- parensStack.add c
- elif c in {')', ']', '}'}: # try pop
- # a case like ([) inside a link is allowed and [ is also `pop`ed:
- for i in countdown(parensStack.len - 1, 0):
- if (parensStack[i] == '(' and c == ')' or
- parensStack[i] == '[' and c == ']' or
- parensStack[i] == '{' and c == '}'):
- parensStack.setLen i
- result = true
- break
- proc parseUrl(p: var RstParser): PRstNode =
- ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks
- result = newRstNode(rnStandaloneHyperlink)
- var lastIdx = p.idx
- var closedParenIdx = p.idx - 1 # for balanced parens rule
- var parensStack: seq[char]
- while p.tok[lastIdx].kind in {tkWord, tkPunct, tkOther}:
- let isClosing = checkParen(p.tok[lastIdx], parensStack)
- if isClosing:
- closedParenIdx = lastIdx
- inc lastIdx
- dec lastIdx
- # standalone URL can not end with punctuation in RST
- while lastIdx > closedParenIdx and p.tok[lastIdx].kind == tkPunct and
- p.tok[lastIdx].symbol != "/":
- dec lastIdx
- var s = ""
- for i in p.idx .. lastIdx: s.add p.tok[i].symbol
- result.add s
- p.idx = lastIdx + 1
- proc parseWordOrRef(p: var RstParser, father: PRstNode) =
- ## Parses a normal word or may be a reference or URL.
- if nextTok(p).kind != tkPunct: # <- main path, a normal word
- father.add newLeaf(p)
- inc p.idx
- elif isUrl(p, p.idx): # URL http://something
- father.add parseUrl(p)
- else:
- # check for reference (probably, long one like some.ref.with.dots_ )
- var saveIdx = p.idx
- var reference: PRstNode = nil
- inc p.idx
- while currentTok(p).kind in {tkWord, tkPunct}:
- if currentTok(p).kind == tkPunct:
- if isInlineMarkupEnd(p, "_", exact=true):
- reference = newRstNode(rnRstRef, info=lineInfo(p, saveIdx))
- break
- if not validRefnamePunct(currentTok(p).symbol):
- break
- inc p.idx
- if reference != nil:
- for i in saveIdx..p.idx-1: reference.add newLeaf(p.tok[i].symbol)
- father.add reference
- inc p.idx # skip final _
- else: # 1 normal word
- father.add newLeaf(p.tok[saveIdx].symbol)
- p.idx = saveIdx + 1
- proc parseBackslash(p: var RstParser, father: PRstNode) =
- assert(currentTok(p).kind == tkPunct)
- if currentTok(p).symbol == "\\":
- # XXX: Unicode?
- inc p.idx
- if currentTok(p).kind != tkWhite: father.add(newLeaf(p))
- if currentTok(p).kind != tkEof: inc p.idx
- else:
- father.add(newLeaf(p))
- inc p.idx
- proc parseUntil(p: var RstParser, father: PRstNode, postfix: string,
- interpretBackslash: bool) =
- let
- line = currentTok(p).line
- col = currentTok(p).col
- inc p.idx
- while true:
- case currentTok(p).kind
- of tkPunct:
- if isInlineMarkupEnd(p, postfix, exact=false):
- let l = currentTok(p).symbol.len
- if l > postfix.len:
- # handle cases like *emphasis with stars****. (It's valid RST!)
- father.add newLeaf(currentTok(p).symbol[0 ..< l - postfix.len])
- elif postfix == "``" and currentTok(p).symbol == "`" and
- prevTok(p).symbol == "`":
- # handle cases like ``literal\`` - delete ` already added after \
- father.sons.setLen(father.sons.len - 1)
- inc p.idx
- break
- else:
- if postfix == "`":
- if currentTok(p).symbol == "\\":
- if nextTok(p).symbol == "\\":
- father.add newLeaf("\\")
- father.add newLeaf("\\")
- inc p.idx, 2
- elif nextTok(p).symbol == "`": # escape `
- father.add newLeaf("`")
- inc p.idx, 2
- else:
- father.add newLeaf("\\")
- inc p.idx
- else:
- father.add(newLeaf(p))
- inc p.idx
- else:
- if interpretBackslash:
- parseBackslash(p, father)
- else:
- father.add(newLeaf(p))
- inc p.idx
- of tkAdornment, tkWord, tkOther:
- father.add(newLeaf(p))
- inc p.idx
- of tkIndent:
- father.add newLeaf(" ")
- inc p.idx
- if currentTok(p).kind == tkIndent:
- rstMessage(p, meExpected, postfix, line, col)
- break
- of tkWhite:
- father.add newLeaf(" ")
- inc p.idx
- else: rstMessage(p, meExpected, postfix, line, col)
- proc parseMarkdownCodeblockFields(p: var RstParser): PRstNode =
- ## Parses additional (after language string) code block parameters
- ## in a format *suggested* in the `CommonMark Spec`_ with handling of `"`.
- if currentTok(p).kind == tkIndent:
- result = nil
- else:
- result = newRstNode(rnFieldList)
- while currentTok(p).kind != tkIndent:
- if currentTok(p).kind == tkWhite:
- inc p.idx
- else:
- let field = newRstNode(rnField)
- var fieldName = ""
- while currentTok(p).kind notin {tkWhite, tkIndent, tkEof} and
- currentTok(p).symbol != "=":
- fieldName.add currentTok(p).symbol
- inc p.idx
- field.add(newRstNode(rnFieldName, @[newLeaf(fieldName)]))
- if currentTok(p).kind == tkWhite: inc p.idx
- let fieldBody = newRstNode(rnFieldBody)
- if currentTok(p).symbol == "=":
- inc p.idx
- if currentTok(p).kind == tkWhite: inc p.idx
- var fieldValue = ""
- if currentTok(p).symbol == "\"":
- while true:
- fieldValue.add currentTok(p).symbol
- inc p.idx
- if currentTok(p).kind == tkEof:
- rstMessage(p, meExpected, "\"")
- elif currentTok(p).symbol == "\"":
- fieldValue.add "\""
- inc p.idx
- break
- else:
- while currentTok(p).kind notin {tkWhite, tkIndent, tkEof}:
- fieldValue.add currentTok(p).symbol
- inc p.idx
- fieldBody.add newLeaf(fieldValue)
- field.add(fieldBody)
- result.add(field)
- proc mayLoadFile(p: RstParser, result: var PRstNode) =
- var filename = strip(getFieldValue(result, "file"),
- chars = Whitespace + {'"'})
- if filename != "":
- if roSandboxDisabled notin p.s.options:
- let tok = p.tok[p.idx-2]
- rstMessage(p, meSandboxedDirective, "file", tok.line, tok.col)
- var path = p.findRelativeFile(filename)
- if path == "": rstMessage(p, meCannotOpenFile, filename)
- var n = newRstNode(rnLiteralBlock)
- n.add newLeaf(readFile(path))
- result.sons[2] = n
- proc defaultCodeLangNim(p: RstParser, result: var PRstNode) =
- # Create a field block if the input block didn't have any.
- if result.sons[1].isNil: result.sons[1] = newRstNode(rnFieldList)
- assert result.sons[1].kind == rnFieldList
- # Hook the extra field and specify the Nim language as value.
- var extraNode = newRstNode(rnField, info=lineInfo(p))
- extraNode.add(newRstNode(rnFieldName))
- extraNode.add(newRstNode(rnFieldBody))
- extraNode.sons[0].add newLeaf("default-language")
- extraNode.sons[1].add newLeaf("Nim")
- result.sons[1].add(extraNode)
- proc parseMarkdownCodeblock(p: var RstParser): PRstNode =
- result = newRstNodeA(p, rnCodeBlock)
- result.sons.setLen(3)
- let line = curLine(p)
- let baseCol = currentTok(p).col
- let baseSym = currentTok(p).symbol # usually just ```
- inc p.idx
- result.info = lineInfo(p)
- var args = newRstNode(rnDirArg)
- if currentTok(p).kind == tkWord:
- args.add(newLeaf(p))
- inc p.idx
- result.sons[1] = parseMarkdownCodeblockFields(p)
- mayLoadFile(p, result)
- else:
- args = nil
- var n = newLeaf("")
- while true:
- if currentTok(p).kind == tkEof:
- rstMessage(p, meMissingClosing,
- "$1 (started at line $2)" % [baseSym, $line])
- break
- elif nextTok(p).kind in {tkPunct, tkAdornment} and
- nextTok(p).symbol[0] == baseSym[0] and
- nextTok(p).symbol.len >= baseSym.len:
- inc p.idx, 2
- break
- elif currentTok(p).kind == tkIndent:
- n.text.add "\n"
- if currentTok(p).ival > baseCol:
- n.text.add " ".repeat(currentTok(p).ival - baseCol)
- elif currentTok(p).ival < baseCol:
- rstMessage(p, mwRstStyle,
- "unexpected de-indentation in Markdown code block")
- inc p.idx
- else:
- n.text.add(currentTok(p).symbol)
- inc p.idx
- result.sons[0] = args
- if result.sons[2] == nil:
- var lb = newRstNode(rnLiteralBlock)
- lb.add(n)
- result.sons[2] = lb
- if result.sons[0].isNil and roNimFile in p.s.options:
- defaultCodeLangNim(p, result)
- proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =
- # Parses Markdown link. If it's Pandoc auto-link then its second
- # son (target) will be in tokenized format (rnInner with leafs).
- var desc = newRstNode(rnInner)
- var i = p.idx
- var parensStack: seq[char]
- template parse(endToken, dest) =
- parensStack.setLen 0
- inc i # skip begin token
- while true:
- if p.tok[i].kind == tkEof: return false
- if p.tok[i].kind == tkIndent and p.tok[i+1].kind == tkIndent:
- return false
- let isClosing = checkParen(p.tok[i], parensStack)
- if p.tok[i].symbol == endToken and not isClosing:
- break
- let symbol = if p.tok[i].kind == tkIndent: " " else: p.tok[i].symbol
- when dest is string: dest.add symbol
- else: dest.add newLeaf(symbol)
- inc i
- inc i # skip end token
- parse("]", desc)
- if p.tok[i].symbol == "(":
- var link = ""
- let linkIdx = i + 1
- parse(")", link)
- # only commit if we detected no syntax error:
- let protocol = safeProtocol(link)
- if link == "":
- result = false
- rstMessage(p, mwBrokenLink, protocol,
- p.tok[linkIdx].line, p.tok[linkIdx].col)
- else:
- let child = newRstNode(rnHyperlink)
- child.add newLeaf(desc.addNodes)
- child.add link
- father.add child
- p.idx = i
- result = true
- elif roPreferMarkdown in p.s.options:
- # Use Pandoc's implicit_header_references extension
- var n = newRstNode(rnPandocRef)
- if p.tok[i].symbol == "[":
- var link = newRstNode(rnInner)
- let targetIdx = i + 1
- parse("]", link)
- n.add desc
- if link.len != 0: # [description][target]
- n.add link
- n.info = lineInfo(p, targetIdx)
- else: # [description=target][]
- n.add desc
- n.info = lineInfo(p, p.idx + 1)
- else: # [description=target]
- n.add desc
- n.add desc # target is the same as description
- n.info = lineInfo(p, p.idx + 1)
- father.add n
- p.idx = i
- result = true
- else:
- result = false
- proc getRstFootnoteType(label: PRstNode): (FootnoteType, int) =
- if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and
- label.sons[0].text == "#":
- if label.sons.len == 1:
- result = (fnAutoNumber, -1)
- else:
- result = (fnAutoNumberLabel, -1)
- elif label.len == 1 and label.sons[0].kind == rnLeaf and
- label.sons[0].text == "*":
- result = (fnAutoSymbol, -1)
- elif label.len == 1 and label.sons[0].kind == rnLeaf:
- try:
- result = (fnManualNumber, parseInt(label.sons[0].text))
- except ValueError:
- result = (fnCitation, -1)
- else:
- result = (fnCitation, -1)
- proc getMdFootnoteType(label: PRstNode): (FootnoteType, int) =
- try:
- result = (fnManualNumber, parseInt(label.sons[0].text))
- except ValueError:
- result = (fnAutoNumberLabel, -1)
- proc getFootnoteType(s: PRstSharedState, label: PRstNode): (FootnoteType, int) =
- ## Returns footnote/citation type and manual number (if present).
- if isMd(s): getMdFootnoteType(label)
- else: getRstFootnoteType(label)
- proc parseRstFootnoteName(p: var RstParser, reference: bool): PRstNode =
- ## parse footnote/citation label. Precondition: start at `[`.
- ## Label text should be valid ref. name symbol, otherwise nil is returned.
- var i = p.idx + 1
- result = newRstNode(rnInner)
- while true:
- if p.tok[i].kind in {tkEof, tkIndent, tkWhite}:
- return nil
- if p.tok[i].kind == tkPunct:
- case p.tok[i].symbol:
- of "]":
- if i > p.idx + 1 and (not reference or (p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol == "_")):
- inc i # skip ]
- if reference: inc i # skip _
- break # to succeed, it's a footnote/citation indeed
- else:
- return nil
- of "#":
- if i != p.idx + 1:
- return nil
- of "*":
- if i != p.idx + 1 and p.tok[i].kind != tkPunct and p.tok[i+1].symbol != "]":
- return nil
- else:
- if not validRefnamePunct(p.tok[i].symbol):
- return nil
- result.add newLeaf(p.tok[i].symbol)
- inc i
- p.idx = i
- proc isMdFootnoteName(p: RstParser, reference: bool): bool =
- ## Pandoc Markdown footnote extension.
- let j = p.idx
- result = p.tok[j].symbol == "[" and p.tok[j+1].symbol == "^" and
- p.tok[j+2].kind == tkWord
- proc parseMdFootnoteName(p: var RstParser, reference: bool): PRstNode =
- if isMdFootnoteName(p, reference):
- result = newRstNode(rnInner)
- var j = p.idx + 2
- while p.tok[j].kind in {tkWord, tkOther} or
- validRefnamePunct(p.tok[j].symbol):
- result.add newLeaf(p.tok[j].symbol)
- inc j
- if j == p.idx + 2:
- return nil
- if p.tok[j].symbol == "]":
- if reference:
- p.idx = j + 1 # skip ]
- else:
- if p.tok[j+1].symbol == ":":
- p.idx = j + 2 # skip ]:
- else:
- result = nil
- else:
- result = nil
- else:
- result = nil
- proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode =
- if isMd(p): parseMdFootnoteName(p, reference)
- else:
- if isInlineMarkupStart(p, "["): parseRstFootnoteName(p, reference)
- else: nil
- proc isMarkdownCodeBlock(p: RstParser, idx: int): bool =
- let tok = p.tok[idx]
- template allowedSymbol: bool =
- (tok.symbol[0] == '`' or
- roPreferMarkdown in p.s.options and tok.symbol[0] == '~')
- result = (roSupportMarkdown in p.s.options and
- tok.kind in {tkPunct, tkAdornment} and
- allowedSymbol and
- tok.symbol.len >= 3)
- proc isMarkdownCodeBlock(p: RstParser): bool =
- isMarkdownCodeBlock(p, p.idx)
- proc parseInline(p: var RstParser, father: PRstNode) =
- var n: PRstNode # to be used in `if` condition
- let saveIdx = p.idx
- case currentTok(p).kind
- of tkPunct:
- if isInlineMarkupStart(p, "***"):
- var n = newRstNode(rnTripleEmphasis)
- parseUntil(p, n, "***", true)
- father.add(n)
- elif isInlineMarkupStart(p, "**"):
- var n = newRstNode(rnStrongEmphasis)
- parseUntil(p, n, "**", true)
- father.add(n)
- elif isInlineMarkupStart(p, "*"):
- var n = newRstNode(rnEmphasis)
- parseUntil(p, n, "*", true)
- father.add(n)
- elif isInlineMarkupStart(p, "_`"):
- var n = newRstNode(rnInlineTarget)
- inc p.idx
- parseUntil(p, n, "`", false)
- n.anchor = rstnodeToRefname(n)
- addAnchorRst(p, name = linkName(n), target = n,
- anchorType=manualInlineAnchor)
- father.add(n)
- elif isMarkdownCodeBlock(p):
- father.add(parseMarkdownCodeblock(p))
- elif isInlineMarkupStart(p, "``"):
- var n = newRstNode(rnInlineLiteral)
- parseUntil(p, n, "``", false)
- father.add(n)
- elif match(p, p.idx, ":w:") and
- (var lastIdx = getRefnameIdx(p, p.idx + 1);
- p.tok[lastIdx+2].symbol == "`"):
- let (roleName, _) = getRefname(p, p.idx+1)
- let k = whichRole(p, roleName)
- var n = newRstNode(k)
- p.idx = lastIdx + 2
- if k == rnInlineCode:
- n = n.toInlineCode(language=roleName)
- parseUntil(p, n, "`", false) # bug #17260
- if k in {rnUnknownRole, rnCodeFragment}:
- n = n.toOtherRole(k, roleName)
- father.add(n)
- elif isInlineMarkupStart(p, "`"):
- var n = newRstNode(rnInterpretedText, info=lineInfo(p, p.idx+1))
- parseUntil(p, n, "`", false) # bug #17260
- n = parsePostfix(p, n)
- father.add(n)
- elif isInlineMarkupStart(p, "|"):
- var n = newRstNode(rnSubstitutionReferences, info=lineInfo(p, p.idx+1))
- parseUntil(p, n, "|", false)
- father.add(n)
- elif currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and
- (n = parseFootnoteName(p, reference=true); n != nil):
- var nn = newRstNode(rnFootnoteRef)
- nn.info = lineInfo(p, saveIdx+1)
- nn.add n
- let (fnType, _) = getFootnoteType(p.s, n)
- case fnType
- of fnAutoSymbol:
- p.s.lineFootnoteSymRef.add lineInfo(p)
- of fnAutoNumber:
- p.s.lineFootnoteNumRef.add lineInfo(p)
- else: discard
- father.add(nn)
- elif roSupportMarkdown in p.s.options and
- currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and
- parseMarkdownLink(p, father):
- discard "parseMarkdownLink already processed it"
- else:
- if roSupportSmilies in p.s.options:
- let n = parseSmiley(p)
- if n != nil:
- father.add(n)
- return
- parseBackslash(p, father)
- of tkWord:
- if roSupportSmilies in p.s.options:
- let n = parseSmiley(p)
- if n != nil:
- father.add(n)
- return
- parseWordOrRef(p, father)
- of tkAdornment, tkOther, tkWhite:
- if isMarkdownCodeBlock(p):
- father.add(parseMarkdownCodeblock(p))
- return
- if roSupportSmilies in p.s.options:
- let n = parseSmiley(p)
- if n != nil:
- father.add(n)
- return
- father.add(newLeaf(p))
- inc p.idx
- else: discard
- proc getDirective(p: var RstParser): string =
- result = ""
- if currentTok(p).kind == tkWhite:
- let (name, lastIdx) = getRefname(p, p.idx + 1)
- let afterIdx = lastIdx + 1
- if name.len > 0:
- if p.tok[afterIdx].symbol == "::":
- result = name
- p.idx = afterIdx + 1
- if currentTok(p).kind == tkWhite:
- inc p.idx
- elif currentTok(p).kind != tkIndent:
- rstMessage(p, mwRstStyle,
- "whitespace or newline expected after directive " & name)
- result = result.toLowerAscii()
- elif p.tok[afterIdx].symbol == ":":
- rstMessage(p, mwRstStyle,
- "double colon :: may be missing at end of '" & name & "'",
- p.tok[afterIdx].line, p.tok[afterIdx].col)
- elif p.tok[afterIdx].kind == tkPunct and p.tok[afterIdx].symbol[0] == ':':
- rstMessage(p, mwRstStyle,
- "too many colons for a directive (should be ::)",
- p.tok[afterIdx].line, p.tok[afterIdx].col)
- proc parseComment(p: var RstParser, col: int): PRstNode =
- if currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
- inc p.idx # empty comment
- else:
- while currentTok(p).kind != tkEof:
- if currentTok(p).kind == tkIndent and currentTok(p).ival > col or
- currentTok(p).kind != tkIndent and currentTok(p).col > col:
- inc p.idx
- else:
- break
- result = nil
- proc parseLine(p: var RstParser, father: PRstNode) =
- while true:
- case currentTok(p).kind
- of tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father)
- else: break
- proc parseUntilNewline(p: var RstParser, father: PRstNode) =
- while true:
- case currentTok(p).kind
- of tkWhite, tkWord, tkAdornment, tkOther, tkPunct: parseInline(p, father)
- of tkEof, tkIndent: break
- proc parseSection(p: var RstParser, result: PRstNode) {.gcsafe.}
- proc tokenAfterNewline(p: RstParser, start: int): int =
- result = start
- while true:
- case p.tok[result].kind
- of tkEof:
- break
- of tkIndent:
- inc result
- break
- else: inc result
- proc tokenAfterNewline(p: RstParser): int {.inline.} =
- result = tokenAfterNewline(p, p.idx)
- proc getWrappableIndent(p: RstParser): int =
- ## Gets baseline indentation for bodies of field lists and directives.
- ## Handles situations like this (with possible de-indent in [case.3])::
- ##
- ## :field: definition [case.1]
- ##
- ## currInd currentTok(p).col
- ## | |
- ## v v
- ##
- ## .. Note:: defItem: [case.2]
- ## definition
- ##
- ## ^
- ## |
- ## nextIndent
- ##
- ## .. Note:: - point1 [case.3]
- ## - point 2
- ##
- ## ^
- ## |
- ## nextIndent
- if currentTok(p).kind == tkIndent:
- result = currentTok(p).ival
- else:
- var nextIndent = p.tok[tokenAfterNewline(p)-1].ival
- if nextIndent <= currInd(p): # parse only this line [case.1]
- result = currentTok(p).col
- elif nextIndent >= currentTok(p).col: # may be a definition list [case.2]
- result = currentTok(p).col
- else:
- result = nextIndent # allow parsing next lines [case.3]
- proc getMdBlockIndent(p: RstParser): int =
- ## Markdown version of `getWrappableIndent`.
- if currentTok(p).kind == tkIndent:
- result = currentTok(p).ival
- else:
- var nextIndent = p.tok[tokenAfterNewline(p)-1].ival
- # TODO: Markdown-compliant definition should allow nextIndent == currInd(p):
- if nextIndent <= currInd(p): # parse only this line
- result = currentTok(p).col
- else:
- result = nextIndent # allow parsing next lines [case.3]
- proc indFollows(p: RstParser): bool =
- result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p)
- proc parseBlockContent(p: var RstParser, father: var PRstNode,
- contentParser: SectionParser): bool {.gcsafe.} =
- ## parse the final content part of explicit markup blocks (directives,
- ## footnotes, etc). Returns true if succeeded.
- if currentTok(p).kind != tkIndent or indFollows(p):
- let blockIndent = getWrappableIndent(p)
- pushInd(p, blockIndent)
- let content = contentParser(p)
- popInd(p)
- father.add content
- result = true
- proc parseSectionWrapper(p: var RstParser): PRstNode =
- result = newRstNode(rnInner)
- parseSection(p, result)
- while result.kind == rnInner and result.len == 1:
- result = result.sons[0]
- proc parseField(p: var RstParser): PRstNode =
- ## Returns a parsed rnField node.
- ##
- ## rnField nodes have two children nodes, a rnFieldName and a rnFieldBody.
- result = newRstNode(rnField, info=lineInfo(p))
- var col = currentTok(p).col
- var fieldname = newRstNode(rnFieldName)
- parseUntil(p, fieldname, ":", false)
- var fieldbody = newRstNode(rnFieldBody)
- if currentTok(p).kind == tkWhite: inc p.idx
- let indent = getWrappableIndent(p)
- if indent > col:
- pushInd(p, indent)
- parseSection(p, fieldbody)
- popInd(p)
- result.add(fieldname)
- result.add(fieldbody)
- proc parseFields(p: var RstParser): PRstNode =
- ## Parses fields for a section or directive block.
- ##
- ## This proc may return nil if the parsing doesn't find anything of value,
- ## otherwise it will return a node of rnFieldList type with children.
- result = nil
- var atStart = p.idx == 0 and p.tok[0].symbol == ":"
- if currentTok(p).kind == tkIndent and nextTok(p).symbol == ":" or
- atStart:
- var col = if atStart: currentTok(p).col else: currentTok(p).ival
- result = newRstNodeA(p, rnFieldList)
- if not atStart: inc p.idx
- while true:
- result.add(parseField(p))
- if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
- nextTok(p).symbol == ":":
- inc p.idx
- else:
- break
- proc getFieldValue*(n: PRstNode): string =
- ## Returns the value of a specific ``rnField`` node.
- ##
- ## This proc will assert if the node is not of the expected type. The empty
- ## string will be returned as a minimum. Any value in the rst will be
- ## stripped form leading/trailing whitespace.
- assert n.kind == rnField
- assert n.len == 2
- assert n.sons[0].kind == rnFieldName
- assert n.sons[1].kind == rnFieldBody
- result = addNodes(n.sons[1]).strip
- proc getFieldValue(n: PRstNode, fieldname: string): string =
- if n.sons[1] == nil: return
- if n.sons[1].kind != rnFieldList:
- #InternalError("getFieldValue (2): " & $n.sons[1].kind)
- # We don't like internal errors here anymore as that would break the forum!
- return
- for i in 0 ..< n.sons[1].len:
- var f = n.sons[1].sons[i]
- if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) == 0:
- result = addNodes(f.sons[1])
- if result == "": result = "\x01\x01" # indicates that the field exists
- return
- proc getArgument(n: PRstNode): string =
- if n.sons[0] == nil: result = ""
- else: result = addNodes(n.sons[0])
- proc parseDotDot(p: var RstParser): PRstNode {.gcsafe.}
- proc parseLiteralBlock(p: var RstParser): PRstNode =
- result = newRstNodeA(p, rnLiteralBlock)
- var n = newLeaf("")
- if currentTok(p).kind == tkIndent:
- var indent = currentTok(p).ival
- while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines
- while true:
- case currentTok(p).kind
- of tkEof:
- break
- of tkIndent:
- if currentTok(p).ival < indent:
- break
- else:
- n.text.add("\n")
- n.text.add(spaces(currentTok(p).ival - indent))
- inc p.idx
- else:
- n.text.add(currentTok(p).symbol)
- inc p.idx
- else:
- while currentTok(p).kind notin {tkIndent, tkEof}:
- n.text.add(currentTok(p).symbol)
- inc p.idx
- result.add(n)
- proc parseQuotedLiteralBlock(p: var RstParser): PRstNode =
- result = newRstNodeA(p, rnLiteralBlock)
- var n = newLeaf("")
- if currentTok(p).kind == tkIndent:
- var indent = currInd(p)
- while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines
- var quoteSym = currentTok(p).symbol[0]
- while true:
- case currentTok(p).kind
- of tkEof:
- break
- of tkIndent:
- if currentTok(p).ival < indent:
- break
- elif currentTok(p).ival == indent:
- if nextTok(p).kind == tkPunct and nextTok(p).symbol[0] == quoteSym:
- n.text.add("\n")
- inc p.idx
- elif nextTok(p).kind == tkIndent:
- break
- else:
- rstMessage(p, mwRstStyle, "no newline after quoted literal block")
- break
- else:
- rstMessage(p, mwRstStyle,
- "unexpected indentation in quoted literal block")
- break
- else:
- n.text.add(currentTok(p).symbol)
- inc p.idx
- result.add(n)
- proc parseRstLiteralBlock(p: var RstParser, kind: LiteralBlockKind): PRstNode =
- if kind == lbIndentedLiteralBlock:
- result = parseLiteralBlock(p)
- else:
- result = parseQuotedLiteralBlock(p)
- proc getLevel(p: var RstParser, c: char, hasOverline: bool): int =
- ## Returns (preliminary) heading level corresponding to `c` and
- ## `hasOverline`. If level does not exist, add it first.
- for i, hType in p.s.hLevels:
- if hType.symbol == c and hType.hasOverline == hasOverline:
- p.s.hLevels[i].line = curLine(p)
- p.s.hLevels[i].hasPeers = true
- return i
- p.s.hLevels.add LevelInfo(symbol: c, hasOverline: hasOverline,
- line: curLine(p), hasPeers: false)
- result = p.s.hLevels.len - 1
- proc countTitles(s: PRstSharedState, n: PRstNode) =
- ## Fill `s.hTitleCnt`
- if n == nil: return
- for node in n.sons:
- if node != nil:
- if node.kind notin {rnOverline, rnSubstitutionDef, rnDefaultRole}:
- break
- if node.kind == rnOverline:
- if s.hLevels[s.hTitleCnt].hasPeers:
- break
- inc s.hTitleCnt
- if s.hTitleCnt >= 2:
- break
- proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool =
- ## check that underline/overline length is enough for the heading.
- ## No support for Unicode.
- if p.tok[adornmentIdx].symbol in ["::", "..", "|"]:
- return false
- if isMarkdownCodeBlock(p, adornmentIdx):
- return false
- var headlineLen = 0
- var failure = ""
- if p.idx < adornmentIdx: # check for underline
- if p.idx > 0:
- headlineLen = currentTok(p).col - p.tok[adornmentIdx].col
- if headlineLen > 0:
- rstMessage(p, mwRstStyle, "indentation of heading text allowed" &
- " only for overline titles")
- for i in p.idx ..< adornmentIdx-1: # adornmentIdx-1 is a linebreak
- headlineLen += p.tok[i].symbol.len
- result = p.tok[adornmentIdx].symbol.len >= headlineLen and headlineLen != 0
- if not result:
- failure = "(underline '" & p.tok[adornmentIdx].symbol & "' is too short)"
- else: # p.idx == adornmentIdx, at overline. Check overline and underline
- var i = p.idx + 2
- headlineLen = p.tok[i].col - p.tok[adornmentIdx].col
- while p.tok[i].kind notin {tkEof, tkIndent}:
- headlineLen += p.tok[i].symbol.len
- inc i
- if p.tok[i].kind == tkIndent and
- p.tok[i+1].kind == tkAdornment and
- p.tok[i+1].symbol[0] == p.tok[adornmentIdx].symbol[0]:
- result = p.tok[adornmentIdx].symbol.len >= headlineLen and
- headlineLen != 0
- if result:
- result = p.tok[i+1].symbol == p.tok[adornmentIdx].symbol
- if not result:
- failure = "(underline '" & p.tok[i+1].symbol & "' does not match " &
- "overline '" & p.tok[adornmentIdx].symbol & "')"
- else:
- failure = "(overline '" & p.tok[adornmentIdx].symbol & "' is too short)"
- else: # it's not overline/underline section, not reporting error
- return false
- if not result:
- rstMessage(p, meNewSectionExpected, failure)
- proc isLineBlock(p: RstParser): bool =
- var j = tokenAfterNewline(p)
- result = currentTok(p).col == p.tok[j].col and p.tok[j].symbol == "|" or
- p.tok[j].col > currentTok(p).col or
- p.tok[j].symbol == "\n"
- proc isMarkdownBlockQuote(p: RstParser): bool =
- result = currentTok(p).symbol[0] == '>'
- proc whichRstLiteralBlock(p: RstParser): LiteralBlockKind =
- ## Checks that the following tokens are either Indented Literal Block or
- ## Quoted Literal Block (which is not quite the same as Markdown quote block).
- ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
- if currentTok(p).symbol == "::" and nextTok(p).kind == tkIndent:
- if currInd(p) > nextTok(p).ival:
- result = lbNone
- if currInd(p) < nextTok(p).ival:
- result = lbIndentedLiteralBlock
- elif currInd(p) == nextTok(p).ival:
- var i = p.idx + 1
- while p.tok[i].kind == tkIndent: inc i
- const validQuotingCharacters = {
- '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
- '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^',
- '_', '`', '{', '|', '}', '~'}
- if p.tok[i].kind in {tkPunct, tkAdornment} and
- p.tok[i].symbol[0] in validQuotingCharacters:
- result = lbQuotedLiteralBlock
- else:
- result = lbNone
- else:
- result = lbNone
- proc predNL(p: RstParser): bool =
- result = true
- if p.idx > 0:
- result = prevTok(p).kind == tkIndent and
- prevTok(p).ival == currInd(p)
- proc isDefList(p: RstParser): bool =
- var j = tokenAfterNewline(p)
- result = currentTok(p).col < p.tok[j].col and
- p.tok[j].kind in {tkWord, tkOther, tkPunct} and
- p.tok[j - 2].symbol != "::"
- proc `$`(t: Token): string = # for debugging only
- result = "(" & $t.kind & " line=" & $t.line & " col=" & $t.col
- if t.kind == tkIndent: result = result & " ival=" & $t.ival & ")"
- else: result = result & " symbol=" & t.symbol & ")"
- proc skipNewlines(p: RstParser, j: int): int =
- result = j
- while p.tok[result].kind != tkEof and p.tok[result].kind == tkIndent:
- inc result # skip blank lines
- proc skipNewlines(p: var RstParser) =
- p.idx = skipNewlines(p, p.idx)
- const maxMdRelInd = 3 ## In Markdown: maximum indentation that does not yet
- ## make the indented block a code
- proc isMdRelInd(outerInd, nestedInd: int): bool =
- result = outerInd <= nestedInd and nestedInd <= outerInd + maxMdRelInd
- proc isMdDefBody(p: RstParser, j: int, termCol: int): bool =
- let defCol = p.tok[j].col
- result = p.tok[j].symbol == ":" and
- isMdRelInd(termCol, defCol) and
- p.tok[j+1].kind == tkWhite and
- p.tok[j+2].kind in {tkWord, tkOther, tkPunct}
- proc isMdDefListItem(p: RstParser, idx: int): bool =
- var j = tokenAfterNewline(p, idx)
- j = skipNewlines(p, j)
- let termCol = p.tok[j].col
- result = isMdRelInd(currInd(p), termCol) and
- isMdDefBody(p, j, termCol)
- proc isOptionList(p: RstParser): bool =
- result = match(p, p.idx, "-w") or match(p, p.idx, "--w") or
- match(p, p.idx, "/w") or match(p, p.idx, "//w")
- proc isMarkdownHeadlinePattern(s: string): bool =
- if s.len >= 1 and s.len <= 6:
- for c in s:
- if c != '#': return false
- result = true
- proc isMarkdownHeadline(p: RstParser): bool =
- if roSupportMarkdown in p.s.options:
- if isMarkdownHeadlinePattern(currentTok(p).symbol) and nextTok(p).kind == tkWhite:
- if p.tok[p.idx+2].kind in {tkWord, tkOther, tkPunct}:
- result = true
- proc findPipe(p: RstParser, start: int): bool =
- var i = start
- while true:
- if p.tok[i].symbol == "|": return true
- if p.tok[i].kind in {tkIndent, tkEof}: return false
- inc i
- proc whichSection(p: RstParser): RstNodeKind =
- if currentTok(p).kind in {tkAdornment, tkPunct}:
- # for punctuation sequences that can be both tkAdornment and tkPunct
- if isMarkdownCodeBlock(p):
- return rnCodeBlock
- elif isRst(p) and currentTok(p).symbol == "::":
- return rnLiteralBlock
- elif currentTok(p).symbol == ".." and
- nextTok(p).kind in {tkWhite, tkIndent}:
- return rnDirective
- case currentTok(p).kind
- of tkAdornment:
- if match(p, p.idx + 1, "iI") and currentTok(p).symbol.len >= 4:
- result = rnTransition
- elif match(p, p.idx, "+a+"):
- result = rnGridTable
- rstMessage(p, meGridTableNotImplemented)
- elif match(p, p.idx + 1, " a"): result = rnTable
- elif currentTok(p).symbol == "|" and isLineBlock(p):
- result = rnLineBlock
- elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
- result = rnMarkdownBlockQuote
- elif (match(p, p.idx + 1, "i") and not match(p, p.idx + 2, "I")) and
- isAdornmentHeadline(p, p.idx):
- result = rnOverline
- else:
- result = rnParagraph
- of tkPunct:
- if isMarkdownHeadline(p):
- result = rnMarkdownHeadline
- elif roSupportMarkdown in p.s.options and predNL(p) and
- match(p, p.idx, "| w") and findPipe(p, p.idx+3):
- result = rnMarkdownTable
- elif isMd(p) and isMdFootnoteName(p, reference=false):
- result = rnFootnote
- elif currentTok(p).symbol == "|" and isLineBlock(p):
- result = rnLineBlock
- elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
- result = rnMarkdownBlockQuote
- elif match(p, tokenAfterNewline(p), "aI") and
- isAdornmentHeadline(p, tokenAfterNewline(p)):
- result = rnHeadline
- elif currentTok(p).symbol in ["+", "*", "-"] and nextTok(p).kind == tkWhite:
- result = rnBulletList
- elif match(p, p.idx, ":w:E"):
- # (currentTok(p).symbol == ":")
- result = rnFieldList
- elif match(p, p.idx, "(e) ") or match(p, p.idx, "e) ") or
- match(p, p.idx, "e. "):
- result = rnEnumList
- elif isOptionList(p):
- result = rnOptionList
- elif isRst(p) and isDefList(p):
- result = rnDefList
- elif isMd(p) and isMdDefListItem(p, p.idx):
- result = rnMdDefList
- else:
- result = rnParagraph
- of tkWord, tkOther, tkWhite:
- let tokIdx = tokenAfterNewline(p)
- if match(p, tokIdx, "aI"):
- if isAdornmentHeadline(p, tokIdx): result = rnHeadline
- else: result = rnParagraph
- elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList
- elif isRst(p) and isDefList(p): result = rnDefList
- elif isMd(p) and isMdDefListItem(p, p.idx):
- result = rnMdDefList
- else: result = rnParagraph
- else: result = rnLeaf
- proc parseLineBlock(p: var RstParser): PRstNode =
- ## Returns rnLineBlock with all sons of type rnLineBlockItem
- result = nil
- if nextTok(p).kind in {tkWhite, tkIndent}:
- var col = currentTok(p).col
- result = newRstNodeA(p, rnLineBlock)
- while true:
- var item = newRstNode(rnLineBlockItem)
- if nextTok(p).kind == tkWhite:
- if nextTok(p).symbol.len > 1: # pass additional indentation after '| '
- item.lineIndent = nextTok(p).symbol
- inc p.idx, 2
- pushInd(p, p.tok[p.idx].col)
- parseSection(p, item)
- popInd(p)
- else: # tkIndent => add an empty line
- item.lineIndent = "\n"
- inc p.idx, 1
- result.add(item)
- if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
- nextTok(p).symbol == "|" and
- p.tok[p.idx + 2].kind in {tkWhite, tkIndent}:
- inc p.idx, 1
- else:
- break
- proc parseDoc(p: var RstParser): PRstNode {.gcsafe.}
- proc getQuoteSymbol(p: RstParser, idx: int): tuple[sym: string, depth: int, tokens: int] =
- result = ("", 0, 0)
- var i = idx
- result.sym &= p.tok[i].symbol
- result.depth += p.tok[i].symbol.len
- inc result.tokens
- inc i
- while p.tok[i].kind == tkWhite and i+1 < p.tok.len and
- p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol[0] == '>':
- result.sym &= p.tok[i].symbol
- result.sym &= p.tok[i+1].symbol
- result.depth += p.tok[i+1].symbol.len
- inc result.tokens, 2
- inc i, 2
- proc parseMarkdownQuoteSegment(p: var RstParser, curSym: string, col: int):
- PRstNode =
- ## We define *segment* as a group of lines that starts with exactly the
- ## same quote symbol. If the following lines don't contain any `>` (*lazy*
- ## continuation) they considered as continuation of the current segment.
- var q: RstParser # to delete `>` at a start of line and then parse normally
- initParser(q, p.s)
- q.col = p.col
- q.line = p.line
- var minCol = int.high # minimum colum num in the segment
- while true: # move tokens of segment from `p` to `q` skipping `curSym`
- case currentTok(p).kind
- of tkEof:
- break
- of tkIndent:
- if nextTok(p).kind in {tkIndent, tkEof}:
- break
- else:
- if nextTok(p).symbol[0] == '>':
- var (quoteSym, _, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
- if quoteSym == curSym: # the segment continues
- var iTok = tokenAfterNewline(p, p.idx+1)
- if p.tok[iTok].kind notin {tkEof, tkIndent} and
- p.tok[iTok].symbol[0] != '>':
- rstMessage(p, mwRstStyle,
- "two or more quoted lines are followed by unquoted line " &
- $(curLine(p) + 1))
- break
- q.tok.add currentTok(p)
- var ival = currentTok(p).ival + quoteSym.len
- inc p.idx, (1 + quoteTokens) # skip newline and > > >
- if currentTok(p).kind == tkWhite:
- ival += currentTok(p).symbol.len
- inc p.idx
- # fix up previous `tkIndent`s to ival (as if >>> were not there)
- var j = q.tok.len - 1
- while j >= 0 and q.tok[j].kind == tkIndent:
- q.tok[j].ival = ival
- dec j
- else: # next segment started
- break
- elif currentTok(p).ival < col:
- break
- else: # the segment continues, a case like:
- # > beginning
- # continuation
- q.tok.add currentTok(p)
- inc p.idx
- else:
- if currentTok(p).col < minCol: minCol = currentTok(p).col
- q.tok.add currentTok(p)
- inc p.idx
- q.indentStack = @[minCol]
- # if initial indentation `minCol` is > 0 then final newlines
- # should be omitted so that parseDoc could advance to the end of tokens:
- var j = q.tok.len - 1
- while q.tok[j].kind == tkIndent: dec j
- q.tok.setLen (j+1)
- q.tok.add Token(kind: tkEof, line: currentTok(p).line)
- result = parseDoc(q)
- proc parseMarkdownBlockQuote(p: var RstParser): PRstNode =
- var (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx)
- let col = currentTok(p).col
- result = newRstNodeA(p, rnMarkdownBlockQuote)
- inc p.idx, quoteTokens # skip first >
- while true:
- var item = newRstNode(rnMarkdownBlockQuoteItem)
- item.quotationDepth = quotationDepth
- if currentTok(p).kind == tkWhite: inc p.idx
- item.add parseMarkdownQuoteSegment(p, curSym, col)
- result.add(item)
- if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
- nextTok(p).kind != tkEof and nextTok(p).symbol[0] == '>':
- (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
- inc p.idx, (1 + quoteTokens) # skip newline and > > >
- else:
- break
- proc parseParagraph(p: var RstParser, result: PRstNode) =
- while true:
- case currentTok(p).kind
- of tkIndent:
- if nextTok(p).kind == tkIndent:
- inc p.idx
- break # blank line breaks paragraph for both Md & Rst
- elif currentTok(p).ival == currInd(p) or (
- isMd(p) and currentTok(p).ival > currInd(p)):
- # (Md allows adding additional indentation inside paragraphs)
- inc p.idx
- case whichSection(p)
- of rnParagraph, rnLeaf, rnHeadline, rnMarkdownHeadline,
- rnOverline, rnDirective:
- result.add newLeaf(" ")
- of rnLineBlock:
- result.addIfNotNil(parseLineBlock(p))
- of rnMarkdownBlockQuote:
- result.addIfNotNil(parseMarkdownBlockQuote(p))
- else:
- dec p.idx # allow subsequent block to be parsed as another section
- break
- else:
- break
- of tkPunct:
- if isRst(p) and (
- let literalBlockKind = whichRstLiteralBlock(p);
- literalBlockKind != lbNone):
- result.add newLeaf(":")
- inc p.idx # skip '::'
- result.add(parseRstLiteralBlock(p, literalBlockKind))
- break
- else:
- parseInline(p, result)
- of tkWhite, tkWord, tkAdornment, tkOther:
- parseInline(p, result)
- else: break
- proc checkHeadingHierarchy(p: RstParser, lvl: int) =
- if lvl - p.s.hCurLevel > 1: # broken hierarchy!
- proc descr(l: int): string =
- (if p.s.hLevels[l].hasOverline: "overline " else: "underline ") &
- repeat(p.s.hLevels[l].symbol, 5)
- var msg = "(section level inconsistent: "
- msg.add descr(lvl) & " unexpectedly found, " &
- "while the following intermediate section level(s) are missing on lines "
- msg.add $p.s.hLevels[p.s.hCurLevel].line & ".." & $curLine(p) & ":"
- for l in p.s.hCurLevel+1 .. lvl-1:
- msg.add " " & descr(l)
- if l != lvl-1: msg.add ","
- rstMessage(p, meNewSectionExpected, msg & ")")
- proc parseHeadline(p: var RstParser): PRstNode =
- if isMarkdownHeadline(p):
- result = newRstNode(rnMarkdownHeadline)
- # Note that level hierarchy is not checked for markdown headings
- result.level = currentTok(p).symbol.len
- assert(nextTok(p).kind == tkWhite)
- inc p.idx, 2
- parseUntilNewline(p, result)
- else:
- result = newRstNode(rnHeadline)
- parseUntilNewline(p, result)
- assert(currentTok(p).kind == tkIndent)
- assert(nextTok(p).kind == tkAdornment)
- var c = nextTok(p).symbol[0]
- inc p.idx, 2
- result.level = getLevel(p, c, hasOverline=false)
- checkHeadingHierarchy(p, result.level)
- p.s.hCurLevel = result.level
- addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor)
- p.s.tocPart.add result
- proc parseOverline(p: var RstParser): PRstNode =
- var c = currentTok(p).symbol[0]
- inc p.idx, 2
- result = newRstNode(rnOverline)
- while true:
- parseUntilNewline(p, result)
- if currentTok(p).kind == tkIndent:
- inc p.idx
- if prevTok(p).ival > currInd(p):
- result.add newLeaf(" ")
- else:
- break
- else:
- break
- result.level = getLevel(p, c, hasOverline=true)
- checkHeadingHierarchy(p, result.level)
- p.s.hCurLevel = result.level
- if currentTok(p).kind == tkAdornment:
- inc p.idx
- if currentTok(p).kind == tkIndent: inc p.idx
- addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor)
- p.s.tocPart.add result
- proc fixHeadlines(s: PRstSharedState) =
- # Fix up section levels depending on presence of a title and subtitle:
- for n in s.tocPart:
- if n.kind in {rnHeadline, rnOverline}:
- if s.hTitleCnt == 2:
- if n.level == 1: # it's the subtitle
- n.level = 0
- elif n.level >= 2: # normal sections, start numbering from 1
- n.level -= 1
- elif s.hTitleCnt == 0:
- n.level += 1
- # Set headline anchors:
- for iHeading in 0 .. s.tocPart.high:
- let n: PRstNode = s.tocPart[iHeading]
- if n.level >= 1:
- n.anchor = rstnodeToRefname(n)
- # Fix anchors for uniqueness if `.. contents::` is present
- if s.hasToc:
- # Find the last higher level section for unique reference name
- var sectionPrefix = ""
- for i in countdown(iHeading - 1, 0):
- if s.tocPart[i].level >= 1 and s.tocPart[i].level < n.level:
- sectionPrefix = rstnodeToRefname(s.tocPart[i]) & "-"
- break
- if sectionPrefix != "":
- n.anchor = sectionPrefix & n.anchor
- s.tocPart.setLen 0
- type
- ColSpec = object
- start, stop: int
- RstCols = seq[ColSpec]
- ColumnLimits = tuple # for Markdown
- first, last: int
- ColSeq = seq[ColumnLimits]
- proc tokStart(p: RstParser, idx: int): int =
- result = p.tok[idx].col
- proc tokStart(p: RstParser): int =
- result = tokStart(p, p.idx)
- proc tokEnd(p: RstParser, idx: int): int =
- result = p.tok[idx].col + p.tok[idx].symbol.len - 1
- proc tokEnd(p: RstParser): int =
- result = tokEnd(p, p.idx)
- proc getColumns(p: RstParser, cols: var RstCols, startIdx: int): int =
- # Fills table column specification (or separator) `cols` and returns
- # the next parser index after it.
- var L = 0
- result = startIdx
- while true:
- inc L
- setLen(cols, L)
- cols[L - 1].start = tokStart(p, result)
- cols[L - 1].stop = tokEnd(p, result)
- assert(p.tok[result].kind == tkAdornment)
- inc result
- if p.tok[result].kind != tkWhite: break
- inc result
- if p.tok[result].kind != tkAdornment: break
- if p.tok[result].kind == tkIndent: inc result
- proc checkColumns(p: RstParser, cols: RstCols) =
- var i = p.idx
- if p.tok[i].symbol[0] != '=':
- stopOrWarn(p, meIllformedTable,
- "only tables with `=` columns specification are allowed")
- for col in 0 ..< cols.len:
- if tokEnd(p, i) != cols[col].stop:
- stopOrWarn(p, meIllformedTable,
- "end of table column #$1 should end at position $2" % [
- $(col+1), $(cols[col].stop+ColRstOffset)],
- p.tok[i].line, tokEnd(p, i))
- inc i
- if col == cols.len - 1:
- if p.tok[i].kind == tkWhite:
- inc i
- if p.tok[i].kind notin {tkIndent, tkEof}:
- stopOrWarn(p, meIllformedTable, "extraneous column specification")
- elif p.tok[i].kind == tkWhite:
- inc i
- else:
- stopOrWarn(p, meIllformedTable,
- "no enough table columns", p.tok[i].line, p.tok[i].col)
- proc getSpans(p: RstParser, nextLine: int,
- cols: RstCols, unitedCols: RstCols): seq[int] =
- ## Calculates how many columns a joined cell occupies.
- if unitedCols.len > 0:
- result = newSeq[int](unitedCols.len)
- var
- iCell = 0
- jCell = 0
- uCell = 0
- while jCell < cols.len:
- if cols[jCell].stop < unitedCols[uCell].stop:
- inc jCell
- elif cols[jCell].stop == unitedCols[uCell].stop:
- result[uCell] = jCell - iCell + 1
- iCell = jCell + 1
- jCell = jCell + 1
- inc uCell
- else:
- rstMessage(p, meIllformedTable,
- "spanning underline does not match main table columns",
- p.tok[nextLine].line, p.tok[nextLine].col)
- proc parseSimpleTableRow(p: var RstParser, cols: RstCols, colChar: char): PRstNode =
- ## Parses 1 row in RST simple table.
- # Consider that columns may be spanning (united by using underline like ----):
- let nextLine = tokenAfterNewline(p)
- var unitedCols: RstCols
- var afterSpan: int
- if p.tok[nextLine].kind == tkAdornment and p.tok[nextLine].symbol[0] == '-':
- afterSpan = getColumns(p, unitedCols, nextLine)
- if unitedCols == cols and p.tok[nextLine].symbol[0] == colChar:
- # legacy rst.nim compat.: allow punctuation like `----` in main boundaries
- afterSpan = nextLine
- unitedCols.setLen 0
- else:
- afterSpan = nextLine
- template colEnd(i): int =
- if i == cols.len - 1: high(int) # last column has no limit
- elif unitedCols.len > 0: unitedCols[i].stop else: cols[i].stop
- template colStart(i): int =
- if unitedCols.len > 0: unitedCols[i].start else: cols[i].start
- var row = newSeq[string](if unitedCols.len > 0: unitedCols.len else: cols.len)
- var spans: seq[int] = getSpans(p, nextLine, cols, unitedCols)
- let line = currentTok(p).line
- # Iterate over the lines a single cell may span:
- while true:
- var nCell = 0
- # distribute tokens between cells in the current line:
- while currentTok(p).kind notin {tkIndent, tkEof}:
- if tokEnd(p) <= colEnd(nCell):
- if tokStart(p) < colStart(nCell):
- if currentTok(p).kind != tkWhite:
- stopOrWarn(p, meIllformedTable,
- "this word crosses table column from the left")
- row[nCell].add(currentTok(p).symbol)
- else:
- row[nCell].add(currentTok(p).symbol)
- inc p.idx
- else:
- if tokStart(p) < colEnd(nCell) and currentTok(p).kind != tkWhite:
- stopOrWarn(p, meIllformedTable,
- "this word crosses table column from the right")
- row[nCell].add(currentTok(p).symbol)
- inc p.idx
- inc nCell
- if currentTok(p).kind == tkIndent: inc p.idx
- if tokEnd(p) <= colEnd(0): break
- # Continued current cells because the 1st column is empty.
- if currentTok(p).kind in {tkEof, tkAdornment}:
- break
- for nCell in countup(1, high(row)): row[nCell].add('\n')
- result = newRstNode(rnTableRow)
- var q: RstParser
- for uCell in 0 ..< row.len:
- initParser(q, p.s)
- q.col = colStart(uCell)
- q.line = line - 1
- getTokens(row[uCell], q.tok)
- let cell = newRstNode(rnTableDataCell)
- cell.span = if spans.len == 0: 0 else: spans[uCell]
- cell.add(parseDoc(q))
- result.add(cell)
- if afterSpan > p.idx:
- p.idx = afterSpan
- proc parseSimpleTable(p: var RstParser): PRstNode =
- var cols: RstCols
- result = newRstNodeA(p, rnTable)
- let startIdx = getColumns(p, cols, p.idx)
- let colChar = currentTok(p).symbol[0]
- checkColumns(p, cols)
- p.idx = startIdx
- result.colCount = cols.len
- while true:
- if currentTok(p).kind == tkAdornment:
- checkColumns(p, cols)
- p.idx = tokenAfterNewline(p)
- if currentTok(p).kind in {tkEof, tkIndent}:
- # skip last adornment line:
- break
- if result.sons.len > 0: result.sons[^1].endsHeader = true
- # fix rnTableDataCell -> rnTableHeaderCell for previous table rows:
- for nRow in 0 ..< result.sons.len:
- for nCell in 0 ..< result.sons[nRow].len:
- template cell: PRstNode = result.sons[nRow].sons[nCell]
- cell = PRstNode(kind: rnTableHeaderCell, sons: cell.sons,
- span: cell.span, anchor: cell.anchor)
- if currentTok(p).kind == tkEof: break
- let tabRow = parseSimpleTableRow(p, cols, colChar)
- result.add tabRow
- proc readTableRow(p: var RstParser): ColSeq =
- if currentTok(p).symbol == "|": inc p.idx
- while currentTok(p).kind notin {tkIndent, tkEof}:
- var limits: ColumnLimits
- limits.first = p.idx
- while currentTok(p).kind notin {tkIndent, tkEof}:
- if currentTok(p).symbol == "|" and prevTok(p).symbol != "\\": break
- inc p.idx
- limits.last = p.idx
- result.add(limits)
- if currentTok(p).kind in {tkIndent, tkEof}: break
- inc p.idx
- p.idx = tokenAfterNewline(p)
- proc getColContents(p: var RstParser, colLim: ColumnLimits): string =
- for i in colLim.first ..< colLim.last:
- result.add(p.tok[i].symbol)
- result.strip
- proc isValidDelimiterRow(p: var RstParser, colNum: int): bool =
- let row = readTableRow(p)
- if row.len != colNum: return false
- for limits in row:
- let content = getColContents(p, limits)
- if content.len < 3 or not (content.startsWith("--") or content.startsWith(":-")):
- return false
- return true
- proc parseMarkdownTable(p: var RstParser): PRstNode =
- var
- row: ColSeq
- a, b: PRstNode
- q: RstParser
- result = newRstNodeA(p, rnMarkdownTable)
- proc parseRow(p: var RstParser, cellKind: RstNodeKind, result: PRstNode) =
- row = readTableRow(p)
- if result.colCount == 0: result.colCount = row.len # table header
- elif row.len < result.colCount: row.setLen(result.colCount)
- a = newRstNode(rnTableRow)
- for j in 0 ..< result.colCount:
- b = newRstNode(cellKind)
- initParser(q, p.s)
- q.col = p.col
- q.line = currentTok(p).line - 1
- getTokens(getColContents(p, row[j]), q.tok)
- b.add(parseDoc(q))
- a.add(b)
- result.add(a)
- parseRow(p, rnTableHeaderCell, result)
- if not isValidDelimiterRow(p, result.colCount):
- rstMessage(p, meMarkdownIllformedTable)
- while predNL(p) and currentTok(p).symbol == "|":
- parseRow(p, rnTableDataCell, result)
- proc parseTransition(p: var RstParser): PRstNode =
- result = newRstNodeA(p, rnTransition)
- inc p.idx
- if currentTok(p).kind == tkIndent: inc p.idx
- if currentTok(p).kind == tkIndent: inc p.idx
- proc parseBulletList(p: var RstParser): PRstNode =
- result = nil
- if nextTok(p).kind == tkWhite:
- var bullet = currentTok(p).symbol
- var col = currentTok(p).col
- result = newRstNodeA(p, rnBulletList)
- pushInd(p, p.tok[p.idx + 2].col)
- inc p.idx, 2
- while true:
- var item = newRstNode(rnBulletItem)
- parseSection(p, item)
- result.add(item)
- if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
- nextTok(p).symbol == bullet and
- p.tok[p.idx + 2].kind == tkWhite:
- inc p.idx, 3
- else:
- break
- popInd(p)
- proc parseOptionList(p: var RstParser): PRstNode =
- result = newRstNodeA(p, rnOptionList)
- let col = currentTok(p).col
- var order = 1
- while true:
- if currentTok(p).col == col and isOptionList(p):
- var a = newRstNode(rnOptionGroup)
- var b = newRstNode(rnDescription)
- var c = newRstNode(rnOptionListItem)
- if match(p, p.idx, "//w"): inc p.idx
- while currentTok(p).kind notin {tkIndent, tkEof}:
- if currentTok(p).kind == tkWhite and currentTok(p).symbol.len > 1:
- inc p.idx
- break
- a.add(newLeaf(p))
- inc p.idx
- var j = tokenAfterNewline(p)
- if j > 0 and p.tok[j - 1].kind == tkIndent and p.tok[j - 1].ival > currInd(p):
- pushInd(p, p.tok[j - 1].ival)
- parseSection(p, b)
- popInd(p)
- else:
- parseLine(p, b)
- while currentTok(p).kind == tkIndent: inc p.idx
- c.add(a)
- c.add(b)
- c.order = order; inc order
- result.add(c)
- else:
- if currentTok(p).kind != tkEof: dec p.idx # back to tkIndent
- break
- proc parseMdDefinitionList(p: var RstParser): PRstNode =
- ## Parses (Pandoc/kramdown/PHPextra) Markdown definition lists.
- result = newRstNodeA(p, rnMdDefList)
- let termCol = currentTok(p).col
- while true:
- var item = newRstNode(rnDefItem)
- var term = newRstNode(rnDefName)
- parseLine(p, term)
- skipNewlines(p)
- inc p.idx, 2 # skip ":" and space
- item.add(term)
- while true:
- var def = newRstNode(rnDefBody)
- let indent = getMdBlockIndent(p)
- pushInd(p, indent)
- parseSection(p, def)
- popInd(p)
- item.add(def)
- let j = skipNewlines(p, p.idx)
- if isMdDefBody(p, j, termCol): # parse next definition body
- p.idx = j + 2 # skip ":" and space
- else:
- break
- result.add(item)
- let j = skipNewlines(p, p.idx)
- if p.tok[j].col == termCol and isMdDefListItem(p, j):
- p.idx = j # parse next item
- else:
- break
- proc parseDefinitionList(p: var RstParser): PRstNode =
- result = nil
- var j = tokenAfterNewline(p) - 1
- if j >= 1 and p.tok[j].kind == tkIndent and
- p.tok[j].ival > currInd(p) and p.tok[j - 1].symbol != "::":
- var col = currentTok(p).col
- result = newRstNodeA(p, rnDefList)
- while true:
- if isOptionList(p):
- break # option list has priority over def.list
- j = p.idx
- var a = newRstNode(rnDefName)
- parseLine(p, a)
- if currentTok(p).kind == tkIndent and
- currentTok(p).ival > currInd(p) and
- nextTok(p).symbol != "::" and
- nextTok(p).kind notin {tkIndent, tkEof}:
- pushInd(p, currentTok(p).ival)
- var b = newRstNode(rnDefBody)
- parseSection(p, b)
- var c = newRstNode(rnDefItem)
- c.add(a)
- c.add(b)
- result.add(c)
- popInd(p)
- else:
- p.idx = j
- break
- if currentTok(p).kind == tkIndent and currentTok(p).ival == col:
- inc p.idx
- j = tokenAfterNewline(p) - 1
- if j >= 1 and p.tok[j].kind == tkIndent and p.tok[j].ival > col and
- p.tok[j-1].symbol != "::" and p.tok[j+1].kind != tkIndent:
- discard
- else:
- break
- if result.len == 0: result = nil
- proc parseEnumList(p: var RstParser): PRstNode =
- const
- wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ",
- "(x) ", "x) ", "x. "]
- # enumerator patterns, where 'x' means letter and 'n' means number
- wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3] # number of tokens
- wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0]
- # position of enumeration sequence (number/letter) in enumerator
- let col = currentTok(p).col
- var w = 0
- while w < wildcards.len:
- if match(p, p.idx, wildcards[w]): break
- inc w
- assert w < wildcards.len
- proc checkAfterNewline(p: RstParser, report: bool): bool =
- ## If no indentation on the next line then parse as a normal paragraph
- ## according to the RST spec. And report a warning with suggestions
- let j = tokenAfterNewline(p, start=p.idx+1)
- let requiredIndent = p.tok[p.idx+wildToken[w]].col
- if p.tok[j].kind notin {tkIndent, tkEof} and
- p.tok[j].col < requiredIndent and
- (p.tok[j].col > col or
- (p.tok[j].col == col and not match(p, j, wildcards[w]))):
- if report:
- let n = p.line + p.tok[j].line
- let msg = "\n" & """
- not enough indentation on line $2
- (should be at column $3 if it's a continuation of enum. list),
- or no blank line after line $1 (if it should be the next paragraph),
- or no escaping \ at the beginning of line $1
- (if lines $1..$2 are a normal paragraph, not enum. list)""".dedent
- let c = p.col + requiredIndent + ColRstOffset
- rstMessage(p, mwRstStyle, msg % [$(n-1), $n, $c],
- p.tok[j].line, p.tok[j].col)
- result = false
- else:
- result = true
- if not checkAfterNewline(p, report = true):
- return nil
- result = newRstNodeA(p, rnEnumList)
- let autoEnums = if roSupportMarkdown in p.s.options: @["#", "1"] else: @["#"]
- var prevAE = "" # so as not allow mixing auto-enumerators `1` and `#`
- var curEnum = 1
- for i in 0 ..< wildToken[w]-1: # add first enumerator with (, ), and .
- if p.tok[p.idx + i].symbol == "#":
- prevAE = "#"
- result.labelFmt.add "1"
- else:
- result.labelFmt.add p.tok[p.idx + i].symbol
- var prevEnum = p.tok[p.idx + wildIndex[w]].symbol
- inc p.idx, wildToken[w]
- while true:
- var item = newRstNode(rnEnumItem)
- pushInd(p, currentTok(p).col)
- parseSection(p, item)
- popInd(p)
- result.add(item)
- if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
- match(p, p.idx+1, wildcards[w]):
- # don't report to avoid duplication of warning since for
- # subsequent enum. items parseEnumList will be called second time:
- if not checkAfterNewline(p, report = false):
- break
- let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol
- # check that it's in sequence: enumerator == next(prevEnum)
- if "n" in wildcards[w]: # arabic numeral
- let prevEnumI = try: parseInt(prevEnum) except ValueError: 1
- if enumerator in autoEnums:
- if prevAE != "" and enumerator != prevAE:
- break
- prevAE = enumerator
- curEnum = prevEnumI + 1
- else: curEnum = (try: parseInt(enumerator) except ValueError: 1)
- if curEnum - prevEnumI != 1:
- break
- prevEnum = enumerator
- else: # a..z
- let prevEnumI = ord(prevEnum[0])
- if enumerator == "#": curEnum = prevEnumI + 1
- else: curEnum = ord(enumerator[0])
- if curEnum - prevEnumI != 1:
- break
- prevEnum = $chr(curEnum)
- inc p.idx, 1 + wildToken[w]
- else:
- break
- proc prefix(ftnType: FootnoteType): string =
- case ftnType
- of fnManualNumber: result = "footnote-"
- of fnAutoNumber: result = "footnoteauto-"
- of fnAutoNumberLabel: result = "footnote-"
- of fnAutoSymbol: result = "footnotesym-"
- of fnCitation: result = "citation-"
- proc parseFootnote(p: var RstParser): PRstNode {.gcsafe.} =
- ## Parses footnotes and citations, always returns 2 sons:
- ##
- ## 1) footnote label, always containing rnInner with 1 or more sons
- ## 2) footnote body, which may be nil
- var label: PRstNode
- if isRst(p):
- inc p.idx # skip space after `..`
- label = parseFootnoteName(p, reference=false)
- if label == nil:
- if isRst(p):
- dec p.idx
- return nil
- result = newRstNode(rnFootnote)
- result.add label
- let (fnType, i) = getFootnoteType(p.s, label)
- var name = ""
- var anchor = fnType.prefix
- case fnType
- of fnManualNumber:
- addFootnoteNumManual(p, i)
- anchor.add $i
- of fnAutoNumber, fnAutoNumberLabel:
- name = rstnodeToRefname(label)
- addFootnoteNumAuto(p, name)
- if fnType == fnAutoNumberLabel:
- anchor.add name
- else: # fnAutoNumber
- result.order = p.s.lineFootnoteNum.len
- anchor.add $result.order
- of fnAutoSymbol:
- addFootnoteSymAuto(p)
- result.order = p.s.lineFootnoteSym.len
- anchor.add $p.s.lineFootnoteSym.len
- of fnCitation:
- anchor.add rstnodeToRefname(label)
- addAnchorRst(p, anchor, target = result, anchorType = footnoteAnchor)
- result.anchor = anchor
- if currentTok(p).kind == tkWhite: inc p.idx
- discard parseBlockContent(p, result, parseSectionWrapper)
- if result.len < 2:
- result.add nil
- proc sonKind(father: PRstNode, i: int): RstNodeKind =
- result = rnLeaf
- if i < father.len: result = father.sons[i].kind
- proc parseSection(p: var RstParser, result: PRstNode) =
- ## parse top-level RST elements: sections, transitions and body elements.
- while true:
- var leave = false
- assert(p.idx >= 0)
- while currentTok(p).kind == tkIndent:
- if currInd(p) == currentTok(p).ival:
- inc p.idx
- elif currentTok(p).ival > currInd(p):
- if roPreferMarkdown in p.s.options: # Markdown => normal paragraphs
- if currentTok(p).ival - currInd(p) >= 4:
- result.add parseLiteralBlock(p)
- else:
- pushInd(p, currentTok(p).ival)
- parseSection(p, result)
- popInd(p)
- else: # RST mode => block quotes
- pushInd(p, currentTok(p).ival)
- var a = newRstNodeA(p, rnBlockQuote)
- parseSection(p, a)
- result.add(a)
- popInd(p)
- else:
- while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
- inc p.idx # skip blank lines
- leave = true
- break
- if leave or currentTok(p).kind == tkEof: break
- var a: PRstNode = nil
- var k = whichSection(p)
- case k
- of rnLiteralBlock:
- inc p.idx # skip '::'
- a = parseLiteralBlock(p)
- of rnBulletList: a = parseBulletList(p)
- of rnLineBlock: a = parseLineBlock(p)
- of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p)
- of rnDirective: a = parseDotDot(p)
- of rnFootnote: a = parseFootnote(p)
- of rnEnumList: a = parseEnumList(p)
- of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)")
- of rnParagraph: discard
- of rnDefList: a = parseDefinitionList(p)
- of rnMdDefList: a = parseMdDefinitionList(p)
- of rnFieldList:
- if p.idx > 0: dec p.idx
- a = parseFields(p)
- of rnTransition: a = parseTransition(p)
- of rnHeadline, rnMarkdownHeadline: a = parseHeadline(p)
- of rnOverline: a = parseOverline(p)
- of rnTable: a = parseSimpleTable(p)
- of rnMarkdownTable: a = parseMarkdownTable(p)
- of rnOptionList: a = parseOptionList(p)
- else:
- #InternalError("rst.parseSection()")
- discard
- if a == nil and k != rnDirective:
- a = newRstNodeA(p, rnParagraph)
- parseParagraph(p, a)
- result.addIfNotNil(a)
- if sonKind(result, 0) == rnParagraph and sonKind(result, 1) != rnParagraph:
- result.sons[0] = newRstNode(rnInner, result.sons[0].sons,
- anchor=result.sons[0].anchor)
- proc parseDoc(p: var RstParser): PRstNode =
- result = parseSectionWrapper(p)
- if currentTok(p).kind != tkEof:
- rstMessage(p, meGeneralParseError)
- type
- DirFlag = enum
- hasArg, hasOptions, argIsFile, argIsWord
- DirFlags = set[DirFlag]
- proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode =
- ## Parses arguments and options for a directive block.
- ##
- ## A directive block will always have three sons: the arguments for the
- ## directive (rnDirArg), the options (rnFieldList) and the directive
- ## content block. This proc parses the two first nodes, the 3rd is left to
- ## the outer `parseDirective` call.
- ##
- ## Both rnDirArg and rnFieldList children nodes might be nil, so you need to
- ## check them before accessing.
- result = newRstNodeA(p, k)
- if k == rnCodeBlock: result.info = lineInfo(p)
- var args: PRstNode = nil
- var options: PRstNode = nil
- if hasArg in flags:
- args = newRstNode(rnDirArg)
- if argIsFile in flags:
- while true:
- case currentTok(p).kind
- of tkWord, tkOther, tkPunct, tkAdornment:
- args.add(newLeaf(p))
- inc p.idx
- else: break
- elif argIsWord in flags:
- while currentTok(p).kind == tkWhite: inc p.idx
- if currentTok(p).kind == tkWord:
- args.add(newLeaf(p))
- inc p.idx
- else:
- args = nil
- else:
- parseLine(p, args)
- result.add(args)
- if hasOptions in flags:
- if currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) and
- nextTok(p).symbol == ":":
- pushInd(p, currentTok(p).ival)
- options = parseFields(p)
- popInd(p)
- result.add(options)
- proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags,
- contentParser: SectionParser): PRstNode =
- ## A helper proc that does main work for specific directive procs.
- ## Always returns a generic rnDirective tree with these 3 children:
- ##
- ## 1) rnDirArg
- ## 2) rnFieldList
- ## 3) a node returned by `contentParser`.
- ##
- ## .. warning:: Any of the 3 children may be nil.
- result = parseDirective(p, k, flags)
- if not isNil(contentParser) and
- parseBlockContent(p, result, contentParser):
- discard "result is updated by parseBlockContent"
- else:
- result.add(PRstNode(nil))
- proc parseDirBody(p: var RstParser, contentParser: SectionParser): PRstNode =
- if indFollows(p):
- pushInd(p, currentTok(p).ival)
- result = contentParser(p)
- popInd(p)
- proc dirInclude(p: var RstParser): PRstNode =
- ##
- ## The following options are recognized:
- ##
- ## :start-after: text to find in the external data file
- ##
- ## Only the content after the first occurrence of the specified
- ## text will be included. If text is not found inclusion will
- ## start from beginning of the file
- ##
- ## :end-before: text to find in the external data file
- ##
- ## Only the content before the first occurrence of the specified
- ## text (but after any after text) will be included. If text is
- ## not found inclusion will happen until the end of the file.
- #literal : flag (empty)
- # The entire included text is inserted into the document as a single
- # literal block (useful for program listings).
- #encoding : name of text encoding
- # The text encoding of the external data file. Defaults to the document's
- # encoding (if specified).
- #
- result = nil
- var n = parseDirective(p, rnDirective, {hasArg, argIsFile, hasOptions}, nil)
- var filename = strip(addNodes(n.sons[0]))
- var path = p.findRelativeFile(filename)
- if path == "":
- rstMessage(p, meCannotOpenFile, filename)
- else:
- # XXX: error handling; recursive file inclusion!
- if getFieldValue(n, "literal") != "":
- result = newRstNode(rnLiteralBlock)
- result.add newLeaf(readFile(path))
- else:
- let inputString = readFile(path)
- let startPosition =
- block:
- let searchFor = n.getFieldValue("start-after").strip()
- if searchFor != "":
- let pos = inputString.find(searchFor)
- if pos != -1: pos + searchFor.len
- else: 0
- else:
- 0
- let endPosition =
- block:
- let searchFor = n.getFieldValue("end-before").strip()
- if searchFor != "":
- let pos = inputString.find(searchFor, start = startPosition)
- if pos != -1: pos - 1
- else: 0
- else:
- inputString.len - 1
- var q: RstParser
- initParser(q, p.s)
- let saveFileIdx = p.s.currFileIdx
- setCurrFilename(p.s, path)
- getTokens(
- inputString[startPosition..endPosition],
- q.tok)
- # workaround a GCC bug; more like the interior pointer bug?
- #if find(q.tok[high(q.tok)].symbol, "\0\x01\x02") > 0:
- # InternalError("Too many binary zeros in include file")
- result = parseDoc(q)
- p.s.currFileIdx = saveFileIdx
- proc dirCodeBlock(p: var RstParser, nimExtension = false): PRstNode =
- ## Parses a code block.
- ##
- ## Code blocks are rnDirective trees with a `kind` of rnCodeBlock. See the
- ## description of ``parseDirective`` for further structure information.
- ##
- ## Code blocks can come in two forms, the standard `code directive
- ## <http://docutils.sourceforge.net/docs/ref/rst/directives.html#code>`_ and
- ## the nim extension ``.. code-block::``. If the block is an extension, we
- ## want the default language syntax highlighting to be Nim, so we create a
- ## fake internal field to communicate with the generator. The field is named
- ## ``default-language``, which is unlikely to collide with a field specified
- ## by any random rst input file.
- ##
- ## As an extension this proc will process the ``file`` extension field and if
- ## present will replace the code block with the contents of the referenced
- ## file. This behaviour is disabled in sandboxed mode and can be re-enabled
- ## with the `roSandboxDisabled` flag.
- result = parseDirective(p, rnCodeBlock, {hasArg, hasOptions}, parseLiteralBlock)
- mayLoadFile(p, result)
- # Extend the field block if we are using our custom Nim extension.
- if nimExtension:
- defaultCodeLangNim(p, result)
- proc dirContainer(p: var RstParser): PRstNode =
- result = parseDirective(p, rnContainer, {hasArg}, parseSectionWrapper)
- assert(result.len == 3)
- proc dirImage(p: var RstParser): PRstNode =
- result = parseDirective(p, rnImage, {hasOptions, hasArg, argIsFile}, nil)
- proc dirFigure(p: var RstParser): PRstNode =
- result = parseDirective(p, rnFigure, {hasOptions, hasArg, argIsFile},
- parseSectionWrapper)
- proc dirTitle(p: var RstParser): PRstNode =
- result = parseDirective(p, rnTitle, {hasArg}, nil)
- proc dirContents(p: var RstParser): PRstNode =
- result = parseDirective(p, rnContents, {hasArg}, nil)
- p.s.hasToc = true
- proc dirIndex(p: var RstParser): PRstNode =
- result = parseDirective(p, rnIndex, {}, parseSectionWrapper)
- proc dirAdmonition(p: var RstParser, d: string): PRstNode =
- result = parseDirective(p, rnAdmonition, {}, parseSectionWrapper)
- result.adType = d
- proc dirDefaultRole(p: var RstParser): PRstNode =
- result = parseDirective(p, rnDefaultRole, {hasArg}, nil)
- if result.sons[0].len == 0: p.s.currRole = defaultRole(p.s.options)
- else:
- assert result.sons[0].sons[0].kind == rnLeaf
- p.s.currRole = result.sons[0].sons[0].text
- p.s.currRoleKind = whichRole(p, p.s.currRole)
- proc dirRole(p: var RstParser): PRstNode =
- result = parseDirective(p, rnDirective, {hasArg, hasOptions}, nil)
- # just check that language is supported, TODO: real role association
- let lang = getFieldValue(result, "language").strip
- if lang != "" and getSourceLanguage(lang) == langNone:
- rstMessage(p, mwUnsupportedLanguage, lang)
- proc dirRawAux(p: var RstParser, result: var PRstNode, kind: RstNodeKind,
- contentParser: SectionParser) =
- var filename = getFieldValue(result, "file")
- if filename.len > 0:
- var path = p.findRelativeFile(filename)
- if path.len == 0:
- rstMessage(p, meCannotOpenFile, filename)
- else:
- var f = readFile(path)
- result = newRstNode(kind)
- result.add newLeaf(f)
- else:
- result = newRstNode(kind, result.sons)
- result.add(parseDirBody(p, contentParser))
- proc dirRaw(p: var RstParser): PRstNode =
- #
- #The following options are recognized:
- #
- #file : string (newlines removed)
- # The local filesystem path of a raw data file to be included.
- #
- # html
- # latex
- result = parseDirective(p, rnDirective, {hasOptions, hasArg, argIsWord})
- if result.sons[0] != nil:
- if cmpIgnoreCase(result.sons[0].sons[0].text, "html") == 0:
- dirRawAux(p, result, rnRawHtml, parseLiteralBlock)
- elif cmpIgnoreCase(result.sons[0].sons[0].text, "latex") == 0:
- dirRawAux(p, result, rnRawLatex, parseLiteralBlock)
- else:
- rstMessage(p, meInvalidDirective, result.sons[0].sons[0].text)
- else:
- dirRawAux(p, result, rnRaw, parseSectionWrapper)
- proc dirImportdoc(p: var RstParser): PRstNode =
- result = parseDirective(p, rnDirective, {}, parseLiteralBlock)
- assert result.sons[2].kind == rnLiteralBlock
- assert result.sons[2].sons[0].kind == rnLeaf
- let filenames: seq[string] = split(result.sons[2].sons[0].text, seps = {','})
- proc rmSpaces(s: string): string = s.split.join("")
- for origFilename in filenames:
- p.s.idxImports[origFilename.rmSpaces] = ImportdocInfo(fromInfo: lineInfo(p))
- proc selectDir(p: var RstParser, d: string): PRstNode =
- result = nil
- let tok = p.tok[p.idx-2] # report on directive in ".. directive::"
- if roSandboxDisabled notin p.s.options:
- if d notin SandboxDirAllowlist:
- rstMessage(p, meSandboxedDirective, d, tok.line, tok.col)
- case d
- of "admonition", "attention", "caution": result = dirAdmonition(p, d)
- of "code": result = dirCodeBlock(p)
- of "code-block": result = dirCodeBlock(p, nimExtension = true)
- of "container": result = dirContainer(p)
- of "contents": result = dirContents(p)
- of "danger": result = dirAdmonition(p, d)
- of "default-role": result = dirDefaultRole(p)
- of "error": result = dirAdmonition(p, d)
- of "figure": result = dirFigure(p)
- of "hint": result = dirAdmonition(p, d)
- of "image": result = dirImage(p)
- of "important": result = dirAdmonition(p, d)
- of "importdoc": result = dirImportdoc(p)
- of "include": result = dirInclude(p)
- of "index": result = dirIndex(p)
- of "note": result = dirAdmonition(p, d)
- of "raw":
- if roSupportRawDirective in p.s.options:
- result = dirRaw(p)
- else:
- rstMessage(p, meInvalidDirective, d)
- of "role": result = dirRole(p)
- of "tip": result = dirAdmonition(p, d)
- of "title": result = dirTitle(p)
- of "warning": result = dirAdmonition(p, d)
- else:
- rstMessage(p, meInvalidDirective, d, tok.line, tok.col)
- proc parseDotDot(p: var RstParser): PRstNode =
- # parse "explicit markup blocks"
- result = nil
- var n: PRstNode # to store result, workaround for bug 16855
- var col = currentTok(p).col
- inc p.idx
- var d = getDirective(p)
- if d != "":
- pushInd(p, col)
- result = selectDir(p, d)
- popInd(p)
- elif match(p, p.idx, " _"):
- # hyperlink target:
- inc p.idx, 2
- var ending = ":"
- if currentTok(p).symbol == "`":
- inc p.idx
- ending = "`"
- var a = getReferenceName(p, ending)
- if ending == "`":
- if currentTok(p).symbol == ":":
- inc p.idx
- else:
- rstMessage(p, meExpected, ":")
- if currentTok(p).kind == tkWhite: inc p.idx
- var b = untilEol(p)
- if len(b) == 0: # set internal anchor
- p.curAnchors.add ManualAnchor(
- alias: linkName(a), anchor: rstnodeToRefname(a), info: prevLineInfo(p)
- )
- else: # external hyperlink
- setRef(p, rstnodeToRefname(a), b, refType=hyperlinkAlias)
- elif match(p, p.idx, " |"):
- # substitution definitions:
- inc p.idx, 2
- var a = getReferenceName(p, "|")
- var b: PRstNode
- if currentTok(p).kind == tkWhite: inc p.idx
- if cmpIgnoreStyle(currentTok(p).symbol, "replace") == 0:
- inc p.idx
- expect(p, "::")
- b = untilEol(p)
- elif cmpIgnoreStyle(currentTok(p).symbol, "image") == 0:
- inc p.idx
- b = dirImage(p)
- else:
- rstMessage(p, meInvalidDirective, currentTok(p).symbol)
- setSub(p, addNodes(a), b)
- elif match(p, p.idx, " [") and
- (n = parseFootnote(p); n != nil):
- result = n
- else:
- result = parseComment(p, col)
- proc rstParsePass1*(fragment: string,
- line, column: int,
- sharedState: PRstSharedState): PRstNode =
- ## Parses an RST `fragment`.
- ## The result should be further processed by
- ## preparePass2_ and resolveSubs_ (which is pass 2).
- var p: RstParser
- initParser(p, sharedState)
- p.line = line
- p.col = column
- getTokens(fragment, p.tok)
- result = parseDoc(p)
- proc extractLinkEnd(x: string): string =
- ## From links like `path/to/file.html#/%` extract `file.html#/%`.
- let i = find(x, '#')
- let last =
- if i >= 0: i
- else: x.len - 1
- let j = rfind(x, '/', start=0, last=last)
- if j >= 0:
- result = x[j+1 .. ^1]
- else:
- result = x
- proc loadIdxFile(s: var PRstSharedState, origFilename: string) =
- doAssert roSandboxDisabled in s.options
- var info: TLineInfo
- info.fileIndex = addFilename(s, origFilename)
- var (dir, basename, ext) = origFilename.splitFile
- if ext notin [".md", ".rst", ".nim", ""]:
- rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo,
- meCannotOpenFile, origFilename & ": unknown extension")
- let idxFilename = dir / basename & ".idx"
- let (idxPath, linkRelPath) = s.findRefFile(idxFilename)
- s.idxImports[origFilename].linkRelPath = linkRelPath
- var
- fileEntries: seq[IndexEntry]
- title: IndexEntry
- try:
- (fileEntries, title) = parseIdxFile(idxPath)
- except IOError:
- rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo,
- meCannotOpenFile, idxPath)
- except ValueError as e:
- s.msgHandler(idxPath, LineRstInit, ColRstInit, meInvalidField, e.msg)
- var isMarkup = false # for sanity check to avoid mixing .md <-> .nim
- for entry in fileEntries:
- # Though target .idx already has inside it the path to HTML relative
- # project's root, we won't rely on it and use `linkRelPath` instead.
- let refn = extractLinkEnd(entry.link)
- # select either markup (rst/md) or Nim cases:
- if entry.kind in {ieMarkupTitle, ieNimTitle}:
- s.idxImports[origFilename].title = entry.keyword
- case entry.kind
- of ieIdxRole, ieHeading, ieMarkupTitle:
- if ext == ".nim" and entry.kind == ieMarkupTitle:
- rstMessage(s, idxPath, meInvalidField,
- $ieMarkupTitle & " in supposedly .nim-derived file")
- if entry.kind == ieMarkupTitle:
- isMarkup = true
- info.line = entry.line.uint16
- addAnchorExtRst(s, key = entry.keyword, refn = refn,
- anchorType = headlineAnchor, info=info)
- of ieNim, ieNimGroup, ieNimTitle:
- if ext in [".md", ".rst"] or isMarkup:
- rstMessage(s, idxPath, meInvalidField,
- $entry.kind & " in supposedly markup-derived file")
- s.nimFileImported = true
- var langSym: LangSymbol
- if entry.kind in {ieNim, ieNimTitle}:
- var q: RstParser
- initParser(q, s)
- info.line = entry.line.uint16
- setLen(q.tok, 0)
- q.idx = 0
- getTokens(entry.linkTitle, q.tok)
- var sons = newSeq[PRstNode](q.tok.len)
- for i in 0 ..< q.tok.len: sons[i] = newLeaf(q.tok[i].symbol)
- let linkTitle = newRstNode(rnInner, sons)
- langSym = linkTitle.toLangSymbol
- else: # entry.kind == ieNimGroup
- langSym = langSymbolGroup(kind=entry.linkTitle, name=entry.keyword)
- addAnchorNim(s, external = true, refn = refn, tooltip = entry.linkDesc,
- langSym = langSym, priority = -4, # lowest
- info = info, module = info.fileIndex)
- doAssert s.idxImports[origFilename].title != ""
- proc preparePass2*(s: var PRstSharedState, mainNode: PRstNode, importdoc = true) =
- ## Records titles in node `mainNode` and orders footnotes.
- countTitles(s, mainNode)
- fixHeadlines(s)
- orderFootnotes(s)
- if importdoc:
- for origFilename in s.idxImports.keys:
- loadIdxFile(s, origFilename)
- proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode =
- # Associate this link alias with its target and change node kind to
- # rnHyperlink or rnInternalRef appropriately.
- var desc, alias: PRstNode
- if n.kind == rnPandocRef: # link like [desc][alias]
- desc = n.sons[0]
- alias = n.sons[1]
- else: # n.kind == rnRstRef, link like `desc=alias`_
- desc = n
- alias = n
- type LinkDef = object
- ar: AnchorRule
- priority: int
- tooltip: string
- target: PRstNode
- info: TLineInfo
- externFilename: string
- # when external anchor: origin filename where anchor was defined
- isTitle: bool
- proc cmp(x, y: LinkDef): int =
- result = cmp(x.priority, y.priority)
- if result == 0:
- result = cmp(x.target, y.target)
- var foundLinks: seq[LinkDef]
- let refn = rstnodeToRefname(alias)
- var hyperlinks = findRef(s, refn)
- for y in hyperlinks:
- foundLinks.add LinkDef(ar: arHyperlink, priority: refPriority(y.kind),
- target: y.value, info: y.info,
- tooltip: "(" & $y.kind & ")")
- let substRst = findMainAnchorRst(s, alias.addNodes, n.info)
- template getExternFilename(subst: AnchorSubst): string =
- if subst.kind == arExternalRst or
- (subst.kind == arNim and subst.external):
- getFilename(s, subst)
- else: ""
- for subst in substRst:
- var refname, fullRefname: string
- if subst.kind == arInternalRst:
- refname = subst.target.anchor
- fullRefname = refname
- else: # arExternalRst
- refname = subst.refnameExt
- fullRefname = s.idxImports[getFilename(s, subst)].linkRelPath &
- "/" & refname
- let anchorType =
- if subst.kind == arInternalRst: subst.anchorType
- else: subst.anchorTypeExt # arExternalRst
- foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority,
- target: newLeaf(fullRefname),
- info: subst.info,
- externFilename: getExternFilename(subst),
- isTitle: isDocumentationTitle(refname),
- tooltip: "(" & $anchorType & ")")
- # find anchors automatically generated from Nim symbols
- if roNimFile in s.options or s.nimFileImported:
- let substNim = findMainAnchorNim(s, signature=alias, n.info)
- for subst in substNim:
- let fullRefname =
- if subst.external:
- s.idxImports[getFilename(s, subst)].linkRelPath &
- "/" & subst.refname
- else: subst.refname
- foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority,
- target: newLeaf(fullRefname),
- externFilename: getExternFilename(subst),
- isTitle: isDocumentationTitle(subst.refname),
- info: subst.info, tooltip: subst.tooltip)
- foundLinks.sort(cmp = cmp, order = Descending)
- let aliasStr = addNodes(alias)
- if foundLinks.len >= 1:
- if foundLinks[0].externFilename != "":
- s.idxImports[foundLinks[0].externFilename].used = true
- let kind = if foundLinks[0].ar in {arHyperlink, arExternalRst}: rnHyperlink
- elif foundLinks[0].ar == arNim:
- if foundLinks[0].externFilename == "": rnNimdocRef
- else: rnHyperlink
- else: rnInternalRef
- result = newRstNode(kind)
- let documentName = # filename without ext for `.nim`, title for `.md`
- if foundLinks[0].ar == arNim:
- changeFileExt(foundLinks[0].externFilename.extractFilename, "")
- elif foundLinks[0].externFilename != "":
- s.idxImports[foundLinks[0].externFilename].title
- else: foundLinks[0].externFilename.extractFilename
- let linkText =
- if foundLinks[0].externFilename != "":
- if foundLinks[0].isTitle: newLeaf(addNodes(desc))
- else: newLeaf(documentName & ": " & addNodes(desc))
- else:
- newRstNode(rnInner, desc.sons)
- result.sons = @[linkText, foundLinks[0].target]
- if kind == rnNimdocRef: result.tooltip = foundLinks[0].tooltip
- if foundLinks.len > 1: # report ambiguous link
- var targets = newSeq[string]()
- for l in foundLinks:
- var t = " "
- if s.filenames.len > 1:
- t.add getFilename(s.filenames, l.info.fileIndex)
- let n = l.info.line
- let c = l.info.col + ColRstOffset
- t.add "($1, $2): $3" % [$n, $c, l.tooltip]
- targets.add t
- rstMessage(s.filenames, s.msgHandler, n.info, mwAmbiguousLink,
- "`$1`\n clash:\n$2" % [
- aliasStr, targets.join("\n")])
- else: # nothing found
- result = n
- rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, aliasStr)
- proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode =
- ## Makes pass 2 of RST parsing.
- ## Resolves substitutions and anchor aliases, groups footnotes.
- ## Takes input node `n` and returns the same node with recursive
- ## substitutions in `n.sons` to `result`.
- result = n
- if n == nil: return
- case n.kind
- of rnSubstitutionReferences:
- var x = findSub(s, n)
- if x >= 0:
- result = s.subs[x].value
- else:
- var key = addNodes(n)
- var e = getEnv(key)
- if e != "": result = newLeaf(e)
- else: rstMessage(s.filenames, s.msgHandler, n.info,
- mwUnknownSubstitution, key)
- of rnRstRef, rnPandocRef:
- result = resolveLink(s, n)
- of rnFootnote:
- var (fnType, num) = getFootnoteType(s, n.sons[0])
- case fnType
- of fnManualNumber, fnCitation:
- discard "no need to alter fixed text"
- of fnAutoNumberLabel, fnAutoNumber:
- if fnType == fnAutoNumberLabel:
- let labelR = rstnodeToRefname(n.sons[0])
- num = getFootnoteNum(s, labelR)
- else:
- num = getFootnoteNum(s, n.order)
- var nn = newRstNode(rnInner)
- nn.add newLeaf($num)
- result.sons[0] = nn
- of fnAutoSymbol:
- let sym = getAutoSymbol(s, n.order)
- n.sons[0].sons[0].text = sym
- n.sons[1] = resolveSubs(s, n.sons[1])
- of rnFootnoteRef:
- var (fnType, num) = getFootnoteType(s, n.sons[0])
- template addLabel(number: int | string) =
- var nn = newRstNode(rnInner)
- nn.add newLeaf($number)
- result.add(nn)
- var refn = fnType.prefix
- # create new rnFootnoteRef, add final label, and finalize target refn:
- result = newRstNode(rnFootnoteRef, info = n.info)
- case fnType
- of fnManualNumber:
- addLabel num
- refn.add $num
- of fnAutoNumber:
- inc s.currFootnoteNumRef
- addLabel getFootnoteNum(s, s.currFootnoteNumRef)
- refn.add $s.currFootnoteNumRef
- of fnAutoNumberLabel:
- addLabel getFootnoteNum(s, rstnodeToRefname(n))
- refn.add rstnodeToRefname(n)
- of fnAutoSymbol:
- inc s.currFootnoteSymRef
- addLabel getAutoSymbol(s, s.currFootnoteSymRef)
- refn.add $s.currFootnoteSymRef
- of fnCitation:
- result.add n.sons[0]
- refn.add rstnodeToRefname(n)
- # TODO: correctly report ambiguities
- let anchorInfo = findMainAnchorRst(s, refn, n.info)
- if anchorInfo.len != 0:
- result.add newLeaf(anchorInfo[0].target.anchor) # add link
- else:
- rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, refn)
- result.add newLeaf(refn) # add link
- of rnLeaf:
- discard
- else:
- var regroup = false
- for i in 0 ..< n.len:
- n.sons[i] = resolveSubs(s, n.sons[i])
- if n.sons[i] != nil and n.sons[i].kind == rnFootnote:
- regroup = true
- if regroup: # group footnotes together into rnFootnoteGroup
- var newSons: seq[PRstNode]
- var i = 0
- while i < n.len:
- if n.sons[i] != nil and n.sons[i].kind == rnFootnote:
- var grp = newRstNode(rnFootnoteGroup)
- while i < n.len and n.sons[i].kind == rnFootnote:
- grp.sons.add n.sons[i]
- inc i
- newSons.add grp
- else:
- newSons.add n.sons[i]
- inc i
- result.sons = newSons
- proc completePass2*(s: PRstSharedState) =
- for (filename, importdocInfo) in s.idxImports.pairs:
- if not importdocInfo.used:
- rstMessage(s.filenames, s.msgHandler, importdocInfo.fromInfo,
- mwUnusedImportdoc, filename)
- proc rstParse*(text, filename: string,
- line, column: int,
- options: RstParseOptions,
- findFile: FindFileHandler = nil,
- findRefFile: FindRefFileHandler = nil,
- msgHandler: MsgHandler = nil):
- tuple[node: PRstNode, filenames: RstFileTable, hasToc: bool] =
- ## Parses the whole `text`. The result is ready for `rstgen.renderRstToOut`,
- ## note that 2nd tuple element should be fed to `initRstGenerator`
- ## argument `filenames` (it is being filled here at least with `filename`
- ## and possibly with other files from RST ``.. include::`` statement).
- var sharedState = newRstSharedState(options, filename, findFile, findRefFile,
- msgHandler, hasToc=false)
- let unresolved = rstParsePass1(text, line, column, sharedState)
- preparePass2(sharedState, unresolved)
- result.node = resolveSubs(sharedState, unresolved)
- completePass2(sharedState)
- result.filenames = sharedState.filenames
- result.hasToc = sharedState.hasToc
|