rst.nim 128 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements a `reStructuredText`:idx: (RST) and
  10. ## `Markdown`:idx: parser.
  11. ## User's manual on supported markup syntax and command line usage can be
  12. ## found in [Nim-flavored Markdown and reStructuredText](markdown_rst.html).
  13. ##
  14. ## * See also [Nim DocGen Tools Guide](docgen.html) for handling of
  15. ## ``.nim`` files.
  16. ## * See also [packages/docutils/rstgen module](rstgen.html) to know how to
  17. ## generate HTML or Latex strings (for embedding them into custom documents).
  18. ##
  19. ## Choice between Markdown and RST as well as optional additional features are
  20. ## turned on by passing ``options:`` [RstParseOptions] to [proc rstParse].
  21. import
  22. os, strutils, rstast, dochelpers, std/enumutils, algorithm, lists, sequtils,
  23. std/private/miscdollars, tables, strscans
  24. from highlite import SourceLanguage, getSourceLanguage
  25. when defined(nimPreviewSlimSystem):
  26. import std/[assertions, syncio]
  27. type
  28. RstParseOption* = enum ## options for the RST parser
  29. roSupportSmilies, ## make the RST parser support smilies like ``:)``
  30. roSupportRawDirective, ## support the ``raw`` directive (don't support
  31. ## it for sandboxing)
  32. roSupportMarkdown, ## support additional features of Markdown
  33. roPreferMarkdown, ## parse as Markdown (keeping RST as "extension"
  34. ## to Markdown) -- implies `roSupportMarkdown`
  35. roNimFile ## set for Nim files where default interpreted
  36. ## text role should be :nim:
  37. roSandboxDisabled ## this option enables certain options
  38. ## (e.g. raw, include)
  39. ## which are disabled by default as they can
  40. ## enable users to read arbitrary data and
  41. ## perform XSS if the parser is used in a web
  42. ## app.
  43. RstParseOptions* = set[RstParseOption]
  44. MsgClass* = enum
  45. mcHint = "Hint",
  46. mcWarning = "Warning",
  47. mcError = "Error"
  48. # keep the order in sync with compiler/docgen.nim and compiler/lineinfos.nim:
  49. MsgKind* = enum ## the possible messages
  50. meCannotOpenFile = "cannot open '$1'",
  51. meExpected = "'$1' expected",
  52. meMissingClosing = "$1",
  53. meGridTableNotImplemented = "grid table is not implemented",
  54. meMarkdownIllformedTable = "illformed delimiter row of a Markdown table",
  55. meIllformedTable = "Illformed table: $1",
  56. meNewSectionExpected = "new section expected $1",
  57. meGeneralParseError = "general parse error",
  58. meInvalidDirective = "invalid directive: '$1'",
  59. meInvalidField = "invalid field: $1",
  60. meFootnoteMismatch = "mismatch in number of footnotes and their refs: $1",
  61. mwRedefinitionOfLabel = "redefinition of label '$1'",
  62. mwUnknownSubstitution = "unknown substitution '$1'",
  63. mwAmbiguousLink = "ambiguous doc link $1",
  64. mwBrokenLink = "broken link '$1'",
  65. mwUnsupportedLanguage = "language '$1' not supported",
  66. mwUnsupportedField = "field '$1' not supported",
  67. mwRstStyle = "RST style: $1",
  68. meSandboxedDirective = "disabled directive: '$1'",
  69. MsgHandler* = proc (filename: string, line, col: int, msgKind: MsgKind,
  70. arg: string) {.closure, gcsafe.} ## what to do in case of an error
  71. FindFileHandler* = proc (filename: string): string {.closure, gcsafe.}
  72. proc rstnodeToRefname*(n: PRstNode): string
  73. proc addNodes*(n: PRstNode): string
  74. proc getFieldValue*(n: PRstNode, fieldname: string): string {.gcsafe.}
  75. proc getArgument*(n: PRstNode): string
  76. # ----------------------------- scanner part --------------------------------
  77. const
  78. SymChars: set[char] = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
  79. SmileyStartChars: set[char] = {':', ';', '8'}
  80. Smilies = {
  81. ":D": "icon_e_biggrin",
  82. ":-D": "icon_e_biggrin",
  83. ":)": "icon_e_smile",
  84. ":-)": "icon_e_smile",
  85. ";)": "icon_e_wink",
  86. ";-)": "icon_e_wink",
  87. ":(": "icon_e_sad",
  88. ":-(": "icon_e_sad",
  89. ":o": "icon_e_surprised",
  90. ":-o": "icon_e_surprised",
  91. ":shock:": "icon_eek",
  92. ":?": "icon_e_confused",
  93. ":-?": "icon_e_confused",
  94. ":-/": "icon_e_confused",
  95. "8-)": "icon_cool",
  96. ":lol:": "icon_lol",
  97. ":x": "icon_mad",
  98. ":-x": "icon_mad",
  99. ":P": "icon_razz",
  100. ":-P": "icon_razz",
  101. ":oops:": "icon_redface",
  102. ":cry:": "icon_cry",
  103. ":evil:": "icon_evil",
  104. ":twisted:": "icon_twisted",
  105. ":roll:": "icon_rolleyes",
  106. ":!:": "icon_exclaim",
  107. ":?:": "icon_question",
  108. ":idea:": "icon_idea",
  109. ":arrow:": "icon_arrow",
  110. ":|": "icon_neutral",
  111. ":-|": "icon_neutral",
  112. ":mrgreen:": "icon_mrgreen",
  113. ":geek:": "icon_e_geek",
  114. ":ugeek:": "icon_e_ugeek"
  115. }
  116. SandboxDirAllowlist = [
  117. "image", "code", "code-block", "admonition", "attention", "caution",
  118. "container", "contents", "danger", "default-role", "error", "figure",
  119. "hint", "important", "index", "note", "role", "tip", "title", "warning"]
  120. type
  121. TokType = enum
  122. tkEof, tkIndent,
  123. tkWhite, tkWord,
  124. tkAdornment, # used for chapter adornment, transitions and
  125. # horizontal table borders
  126. tkPunct, # one or many punctuation characters
  127. tkOther
  128. Token = object # a RST token
  129. kind*: TokType # the type of the token
  130. ival*: int # the indentation or parsed integer value
  131. symbol*: string # the parsed symbol as string
  132. line*, col*: int # line and column of the token
  133. TokenSeq = seq[Token]
  134. Lexer = object of RootObj
  135. buf*: cstring
  136. bufpos*: int
  137. line*, col*, baseIndent*: int
  138. adornmentLine*: bool
  139. escapeNext*: bool
  140. proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
  141. tok.kind = tkWord
  142. tok.line = L.line
  143. tok.col = L.col
  144. var pos = L.bufpos
  145. while true:
  146. tok.symbol.add(L.buf[pos])
  147. inc pos
  148. if L.buf[pos] notin s: break
  149. inc L.col, pos - L.bufpos
  150. L.bufpos = pos
  151. proc isCurrentLineAdornment(L: var Lexer): bool =
  152. var pos = L.bufpos
  153. let c = L.buf[pos]
  154. while true:
  155. inc pos
  156. if L.buf[pos] in {'\c', '\l', '\0'}:
  157. break
  158. if c == '+': # grid table
  159. if L.buf[pos] notin {'-', '=', '+'}:
  160. return false
  161. else: # section adornment or table horizontal border
  162. if L.buf[pos] notin {c, ' ', '\t', '\v', '\f'}:
  163. return false
  164. result = true
  165. proc getPunctAdornment(L: var Lexer, tok: var Token) =
  166. if L.adornmentLine:
  167. tok.kind = tkAdornment
  168. else:
  169. tok.kind = tkPunct
  170. tok.line = L.line
  171. tok.col = L.col
  172. var pos = L.bufpos
  173. let c = L.buf[pos]
  174. if not L.escapeNext and (c != '\\' or L.adornmentLine):
  175. while true:
  176. tok.symbol.add(L.buf[pos])
  177. inc pos
  178. if L.buf[pos] != c: break
  179. elif L.escapeNext:
  180. tok.symbol.add(L.buf[pos])
  181. inc pos
  182. else: # not L.escapeNext and c == '\\' and not L.adornmentLine
  183. tok.symbol.add '\\'
  184. inc pos
  185. L.escapeNext = true
  186. inc L.col, pos - L.bufpos
  187. L.bufpos = pos
  188. if tok.symbol == "\\": tok.kind = tkPunct
  189. # nim extension: standalone \ can not be adornment
  190. proc getBracket(L: var Lexer, tok: var Token) =
  191. tok.kind = tkPunct
  192. tok.line = L.line
  193. tok.col = L.col
  194. tok.symbol.add(L.buf[L.bufpos])
  195. inc L.col
  196. inc L.bufpos
  197. proc getIndentAux(L: var Lexer, start: int): int =
  198. var pos = start
  199. # skip the newline (but include it in the token!)
  200. if L.buf[pos] == '\r':
  201. if L.buf[pos + 1] == '\n': inc pos, 2
  202. else: inc pos
  203. elif L.buf[pos] == '\n':
  204. inc pos
  205. while true:
  206. case L.buf[pos]
  207. of ' ', '\v', '\f':
  208. inc pos
  209. inc result
  210. of '\t':
  211. inc pos
  212. result = result - (result mod 8) + 8
  213. else:
  214. break # EndOfFile also leaves the loop
  215. if L.buf[pos] == '\0':
  216. result = 0
  217. elif L.buf[pos] == '\n' or L.buf[pos] == '\r':
  218. # look at the next line for proper indentation:
  219. result = getIndentAux(L, pos)
  220. L.bufpos = pos # no need to set back buf
  221. proc getIndent(L: var Lexer, tok: var Token) =
  222. tok.col = 0
  223. tok.kind = tkIndent # skip the newline (but include it in the token!)
  224. tok.ival = getIndentAux(L, L.bufpos)
  225. inc L.line
  226. tok.line = L.line
  227. L.col = tok.ival
  228. tok.ival = max(tok.ival - L.baseIndent, 0)
  229. tok.symbol = "\n" & spaces(tok.ival)
  230. proc rawGetTok(L: var Lexer, tok: var Token) =
  231. tok.symbol = ""
  232. tok.ival = 0
  233. if L.col == 0:
  234. L.adornmentLine = false
  235. var c = L.buf[L.bufpos]
  236. case c
  237. of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9':
  238. getThing(L, tok, SymChars)
  239. of ' ', '\t', '\v', '\f':
  240. getThing(L, tok, {' ', '\t'})
  241. tok.kind = tkWhite
  242. if L.buf[L.bufpos] in {'\r', '\n'}:
  243. rawGetTok(L, tok) # ignore spaces before \n
  244. of '\r', '\n':
  245. getIndent(L, tok)
  246. L.adornmentLine = false
  247. of '!', '\"', '#', '$', '%', '&', '\'', '*', '+', ',', '-', '.',
  248. '/', ':', ';', '<', '=', '>', '?', '@', '\\', '^', '_', '`',
  249. '|', '~':
  250. if L.col == 0:
  251. L.adornmentLine = L.isCurrentLineAdornment()
  252. getPunctAdornment(L, tok)
  253. of '(', ')', '[', ']', '{', '}':
  254. getBracket(L, tok)
  255. else:
  256. tok.line = L.line
  257. tok.col = L.col
  258. if c == '\0':
  259. tok.kind = tkEof
  260. else:
  261. tok.kind = tkOther
  262. tok.symbol.add(c)
  263. inc L.bufpos
  264. inc L.col
  265. tok.col = max(tok.col - L.baseIndent, 0)
  266. proc getTokens(buffer: string, tokens: var TokenSeq) =
  267. var L: Lexer
  268. var length = tokens.len
  269. L.buf = cstring(buffer)
  270. L.line = 0 # skip UTF-8 BOM
  271. if L.buf[0] == '\xEF' and L.buf[1] == '\xBB' and L.buf[2] == '\xBF':
  272. inc L.bufpos, 3
  273. while true:
  274. inc length
  275. setLen(tokens, length)
  276. let toEscape = L.escapeNext
  277. rawGetTok(L, tokens[length - 1])
  278. if toEscape: L.escapeNext = false
  279. if tokens[length - 1].kind == tkEof: break
  280. if tokens[0].kind == tkWhite:
  281. # BUGFIX
  282. tokens[0].ival = tokens[0].symbol.len
  283. tokens[0].kind = tkIndent
  284. type
  285. LevelInfo = object
  286. symbol: char # adornment character
  287. hasOverline: bool # has also overline (besides underline)?
  288. line: int # the last line of this style occurrence
  289. # (for error message)
  290. hasPeers: bool # has headings on the same level of hierarchy?
  291. LiteralBlockKind = enum # RST-style literal blocks after `::`
  292. lbNone,
  293. lbIndentedLiteralBlock,
  294. lbQuotedLiteralBlock
  295. LevelMap = seq[LevelInfo] # Saves for each possible title adornment
  296. # style its level in the current document.
  297. SubstitutionKind = enum
  298. rstSubstitution = "substitution",
  299. hyperlinkAlias = "hyperlink alias",
  300. implicitHyperlinkAlias = "implicitly-generated hyperlink alias"
  301. Substitution = object
  302. kind*: SubstitutionKind
  303. key*: string
  304. value*: PRstNode
  305. info*: TLineInfo # place where the substitution was defined
  306. AnchorRule = enum
  307. arInternalRst, ## For automatically generated RST anchors (from
  308. ## headings, footnotes, inline internal targets):
  309. ## case-insensitive, 1-space-significant (by RST spec)
  310. arNim ## For anchors generated by ``docgen.rst``: Nim-style case
  311. ## sensitivity, etc. (see `proc normalizeNimName`_ for details)
  312. arHyperlink, ## For links with manually set anchors in
  313. ## form `text <pagename.html#anchor>`_
  314. RstAnchorKind = enum
  315. manualDirectiveAnchor = "manual directive anchor",
  316. manualInlineAnchor = "manual inline anchor",
  317. footnoteAnchor = "footnote anchor",
  318. headlineAnchor = "implicitly-generated headline anchor"
  319. AnchorSubst = object
  320. info: TLineInfo # where the anchor was defined
  321. priority: int
  322. case kind: range[arInternalRst .. arNim]
  323. of arInternalRst:
  324. anchorType: RstAnchorKind
  325. target: PRstNode
  326. of arNim:
  327. tooltip: string # displayed tooltip for Nim-generated anchors
  328. langSym: LangSymbol
  329. refname: string # A reference name that will be inserted directly
  330. # into HTML/Latex.
  331. AnchorSubstTable = Table[string, seq[AnchorSubst]]
  332. # use `seq` to account for duplicate anchors
  333. FootnoteType = enum
  334. fnManualNumber, # manually numbered footnote like [3]
  335. fnAutoNumber, # auto-numbered footnote [#]
  336. fnAutoNumberLabel, # auto-numbered with label [#label]
  337. fnAutoSymbol, # auto-symbol footnote [*]
  338. fnCitation # simple text label like [citation2021]
  339. FootnoteSubst = tuple
  340. kind: FootnoteType # discriminator
  341. number: int # valid for fnManualNumber (always) and fnAutoNumber,
  342. # fnAutoNumberLabel after resolveSubs is called
  343. autoNumIdx: int # order of occurence: fnAutoNumber, fnAutoNumberLabel
  344. autoSymIdx: int # order of occurence: fnAutoSymbol
  345. label: string # valid for fnAutoNumberLabel
  346. RstFileTable* = object
  347. filenameToIdx*: Table[string, FileIndex]
  348. idxToFilename*: seq[string]
  349. RstSharedState = object
  350. options*: RstParseOptions # parsing options
  351. hLevels: LevelMap # hierarchy of heading styles
  352. hTitleCnt: int # =0 if no title, =1 if only main title,
  353. # =2 if both title and subtitle are present
  354. hCurLevel: int # current section level
  355. currRole: string # current interpreted text role
  356. currRoleKind: RstNodeKind # ... and its node kind
  357. subs: seq[Substitution] # substitutions
  358. refs*: seq[Substitution] # references
  359. anchors*: AnchorSubstTable
  360. # internal target substitutions
  361. lineFootnoteNum: seq[TLineInfo] # footnote line, auto numbers .. [#]
  362. lineFootnoteNumRef: seq[TLineInfo] # footnote line, their reference [#]_
  363. currFootnoteNumRef: int # ... their counter for `resolveSubs`
  364. lineFootnoteSym: seq[TLineInfo] # footnote line, auto symbols .. [*]
  365. lineFootnoteSymRef: seq[TLineInfo] # footnote line, their reference [*]_
  366. currFootnoteSymRef: int # ... their counter for `resolveSubs`
  367. footnotes: seq[FootnoteSubst] # correspondence b/w footnote label,
  368. # number, order of occurrence
  369. msgHandler: MsgHandler # How to handle errors.
  370. findFile: FindFileHandler # How to find files.
  371. filenames*: RstFileTable # map file name <-> FileIndex (for storing
  372. # file names for warnings after 1st stage)
  373. currFileIdx*: FileIndex # current index in `filenames`
  374. tocPart*: seq[PRstNode] # all the headings of a document
  375. hasToc*: bool
  376. PRstSharedState* = ref RstSharedState
  377. ManualAnchor = object
  378. alias: string # a (short) name that can substitute the `anchor`
  379. anchor: string # anchor = id = refname
  380. info: TLineInfo
  381. RstParser = object of RootObj
  382. idx*: int
  383. tok*: TokenSeq
  384. s*: PRstSharedState
  385. indentStack*: seq[int]
  386. line*, col*: int ## initial line/column of whole text or
  387. ## documenation fragment that will be added
  388. ## in case of error/warning reporting to
  389. ## (relative) line/column of the token.
  390. curAnchors*: seq[ManualAnchor]
  391. ## seq to accumulate aliases for anchors:
  392. ## because RST can have >1 alias per 1 anchor
  393. EParseError* = object of ValueError
  394. const
  395. LineRstInit* = 1 ## Initial line number for standalone RST text
  396. ColRstInit* = 0 ## Initial column number for standalone RST text
  397. ## (Nim global reporting adds ColOffset=1)
  398. ColRstOffset* = 1 ## 1: a replica of ColOffset for internal use
  399. template currentTok(p: RstParser): Token = p.tok[p.idx]
  400. template prevTok(p: RstParser): Token = p.tok[p.idx - 1]
  401. template nextTok(p: RstParser): Token = p.tok[p.idx + 1]
  402. proc whichMsgClass*(k: MsgKind): MsgClass =
  403. ## returns which message class `k` belongs to.
  404. case k.symbolName[1]
  405. of 'e', 'E': result = mcError
  406. of 'w', 'W': result = mcWarning
  407. of 'h', 'H': result = mcHint
  408. else: assert false, "msgkind does not fit naming scheme"
  409. proc defaultMsgHandler*(filename: string, line, col: int, msgkind: MsgKind,
  410. arg: string) =
  411. let mc = msgkind.whichMsgClass
  412. let a = $msgkind % arg
  413. var message: string
  414. toLocation(message, filename, line, col + ColRstOffset)
  415. message.add " $1: $2" % [$mc, a]
  416. if mc == mcError: raise newException(EParseError, message)
  417. else: writeLine(stdout, message)
  418. proc defaultFindFile*(filename: string): string =
  419. if fileExists(filename): result = filename
  420. else: result = ""
  421. proc defaultRole(options: RstParseOptions): string =
  422. if roNimFile in options: "nim" else: "literal"
  423. proc whichRoleAux(sym: string): RstNodeKind =
  424. let r = sym.toLowerAscii
  425. case r
  426. of "idx": result = rnIdx
  427. of "literal": result = rnInlineLiteral
  428. of "strong": result = rnStrongEmphasis
  429. of "emphasis": result = rnEmphasis
  430. of "sub", "subscript": result = rnSub
  431. of "sup", "superscript": result = rnSup
  432. # literal and code are the same in our implementation
  433. of "code": result = rnInlineLiteral
  434. of "program", "option", "tok": result = rnCodeFragment
  435. # c++ currently can be spelled only as cpp, c# only as csharp
  436. elif getSourceLanguage(r) != langNone:
  437. result = rnInlineCode
  438. else: # unknown role
  439. result = rnUnknownRole
  440. proc len(filenames: RstFileTable): int = filenames.idxToFilename.len
  441. proc addFilename*(s: PRstSharedState, file1: string): FileIndex =
  442. ## Returns index of filename, adding it if it has not been used before
  443. let nextIdx = s.filenames.len.FileIndex
  444. result = getOrDefault(s.filenames.filenameToIdx, file1, default = nextIdx)
  445. if result == nextIdx:
  446. s.filenames.filenameToIdx[file1] = result
  447. s.filenames.idxToFilename.add file1
  448. proc setCurrFilename*(s: PRstSharedState, file1: string) =
  449. s.currFileIdx = addFilename(s, file1)
  450. proc getFilename(filenames: RstFileTable, fid: FileIndex): string =
  451. doAssert(0 <= fid.int and fid.int < filenames.len,
  452. "incorrect FileIndex $1 (range 0..$2)" % [
  453. $fid.int, $(filenames.len - 1)])
  454. result = filenames.idxToFilename[fid.int]
  455. proc currFilename(s: PRstSharedState): string =
  456. getFilename(s.filenames, s.currFileIdx)
  457. proc newRstSharedState*(options: RstParseOptions,
  458. filename: string,
  459. findFile: FindFileHandler,
  460. msgHandler: MsgHandler,
  461. hasToc: bool): PRstSharedState =
  462. let r = defaultRole(options)
  463. result = PRstSharedState(
  464. currRole: r,
  465. currRoleKind: whichRoleAux(r),
  466. options: options,
  467. msgHandler: if not isNil(msgHandler): msgHandler else: defaultMsgHandler,
  468. findFile: if not isNil(findFile): findFile else: defaultFindFile,
  469. hasToc: hasToc
  470. )
  471. setCurrFilename(result, filename)
  472. proc curLine(p: RstParser): int = p.line + currentTok(p).line
  473. proc findRelativeFile(p: RstParser; filename: string): string =
  474. result = p.s.currFilename.splitFile.dir / filename
  475. if not fileExists(result):
  476. result = p.s.findFile(filename)
  477. proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string) =
  478. p.s.msgHandler(p.s.currFilename, curLine(p),
  479. p.col + currentTok(p).col, msgKind, arg)
  480. proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string) =
  481. s.msgHandler(s.currFilename, LineRstInit, ColRstInit, msgKind, arg)
  482. proc rstMessage*(filenames: RstFileTable, f: MsgHandler,
  483. info: TLineInfo, msgKind: MsgKind, arg: string) =
  484. ## Print warnings using `info`, i.e. in 2nd-pass warnings for
  485. ## footnotes/substitutions/references or from ``rstgen.nim``.
  486. let file = getFilename(filenames, info.fileIndex)
  487. f(file, info.line.int, info.col.int, msgKind, arg)
  488. proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string, line, col: int) =
  489. p.s.msgHandler(p.s.currFilename, p.line + line,
  490. p.col + col, msgKind, arg)
  491. proc rstMessage(p: RstParser, msgKind: MsgKind) =
  492. p.s.msgHandler(p.s.currFilename, curLine(p),
  493. p.col + currentTok(p).col, msgKind,
  494. currentTok(p).symbol)
  495. proc currInd(p: RstParser): int =
  496. result = p.indentStack[high(p.indentStack)]
  497. proc pushInd(p: var RstParser, ind: int) =
  498. p.indentStack.add(ind)
  499. proc popInd(p: var RstParser) =
  500. if p.indentStack.len > 1: setLen(p.indentStack, p.indentStack.len - 1)
  501. # Working with indentation in rst.nim
  502. # -----------------------------------
  503. #
  504. # Every line break has an associated tkIndent.
  505. # The tokenizer writes back the first column of next non-blank line
  506. # in all preceeding tkIndent tokens to the `ival` field of tkIndent.
  507. #
  508. # RST document is separated into body elements (B.E.), every of which
  509. # has a dedicated handler proc (or block of logic when B.E. is a block quote)
  510. # that should follow the next rule:
  511. # Every B.E. handler proc should finish at tkIndent (newline)
  512. # after its B.E. finishes.
  513. # Then its callers (which is `parseSection` or another B.E. handler)
  514. # check for tkIndent ival (without necessity to advance `p.idx`)
  515. # and decide themselves whether they continue processing or also stop.
  516. #
  517. # An example::
  518. #
  519. # L RST text fragment indentation
  520. # +--------------------+
  521. # 1 | | <- (empty line at the start of file) no tokens
  522. # 2 |First paragraph. | <- tkIndent has ival=0, and next tkWord has col=0
  523. # 3 | | <- tkIndent has ival=0
  524. # 4 |* bullet item and | <- tkIndent has ival=0, and next tkPunct has col=0
  525. # 5 | its continuation | <- tkIndent has ival=2, and next tkWord has col=2
  526. # 6 | | <- tkIndent has ival=4
  527. # 7 | Block quote | <- tkIndent has ival=4, and next tkWord has col=4
  528. # 8 | | <- tkIndent has ival=0
  529. # 9 | | <- tkIndent has ival=0
  530. # 10|Final paragraph | <- tkIndent has ival=0, and tkWord has col=0
  531. # +--------------------+
  532. # C:01234
  533. #
  534. # Here parser starts with initial `indentStack=[0]` and then calls the
  535. # 1st `parseSection`:
  536. #
  537. # - `parseSection` calls `parseParagraph` and "First paragraph" is parsed
  538. # - bullet list handler is started at reaching ``*`` (L4 C0), it
  539. # starts bullet item logic (L4 C2), which calls `pushInd(p, ind=2)`,
  540. # then calls `parseSection` (2nd call, nested) which parses
  541. # paragraph "bullet list and its continuation" and then starts
  542. # a block quote logic (L7 C4).
  543. # The block quote logic calls calls `pushInd(p, ind=4)` and
  544. # calls `parseSection` again, so a (simplified) sequence of calls now is::
  545. #
  546. # parseSection -> parseBulletList ->
  547. # parseSection (+block quote logic) -> parseSection
  548. #
  549. # 3rd `parseSection` finishes, block quote logic calls `popInd(p)`,
  550. # it returns to bullet item logic, which sees that next tkIndent has
  551. # ival=0 and stops there since the required indentation for a bullet item
  552. # is 2 and 0<2; the bullet item logic calls `popInd(p)`.
  553. # Then bullet list handler checks that next tkWord (L10 C0) has the
  554. # right indentation but does not have ``*`` so stops at tkIndent (L10).
  555. # - 1st `parseSection` invocation calls `parseParagraph` and the
  556. # "Final paragraph" is parsed.
  557. #
  558. # If a B.E. handler has advanced `p.idx` past tkIndent to check
  559. # whether it should continue its processing or not, and decided not to,
  560. # then this B.E. handler should step back (e.g. do `dec p.idx`).
  561. proc initParser(p: var RstParser, sharedState: PRstSharedState) =
  562. p.indentStack = @[0]
  563. p.tok = @[]
  564. p.idx = 0
  565. p.col = ColRstInit
  566. p.line = LineRstInit
  567. p.s = sharedState
  568. proc addNodesAux(n: PRstNode, result: var string) =
  569. if n == nil:
  570. return
  571. if n.kind == rnLeaf:
  572. result.add(n.text)
  573. else:
  574. for i in 0 ..< n.len: addNodesAux(n.sons[i], result)
  575. proc addNodes(n: PRstNode): string =
  576. n.addNodesAux(result)
  577. proc linkName(n: PRstNode): string =
  578. ## Returns a normalized reference name, see:
  579. ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names
  580. n.addNodes.toLowerAscii
  581. proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) =
  582. template special(s) =
  583. if b:
  584. r.add('-')
  585. b = false
  586. r.add(s)
  587. if n == nil: return
  588. if n.kind == rnLeaf:
  589. for i in 0 ..< n.text.len:
  590. case n.text[i]
  591. of '0'..'9':
  592. if b:
  593. r.add('-')
  594. b = false
  595. if r.len == 0: r.add('Z')
  596. r.add(n.text[i])
  597. of 'a'..'z', '\128'..'\255':
  598. if b:
  599. r.add('-')
  600. b = false
  601. r.add(n.text[i])
  602. of 'A'..'Z':
  603. if b:
  604. r.add('-')
  605. b = false
  606. r.add(chr(ord(n.text[i]) - ord('A') + ord('a')))
  607. of '$': special "dollar"
  608. of '%': special "percent"
  609. of '&': special "amp"
  610. of '^': special "roof"
  611. of '!': special "emark"
  612. of '?': special "qmark"
  613. of '*': special "star"
  614. of '+': special "plus"
  615. of '-': special "minus"
  616. of '/': special "slash"
  617. of '\\': special "backslash"
  618. of '=': special "eq"
  619. of '<': special "lt"
  620. of '>': special "gt"
  621. of '~': special "tilde"
  622. of ':': special "colon"
  623. of '.': special "dot"
  624. of '@': special "at"
  625. of '|': special "bar"
  626. else:
  627. if r.len > 0: b = true
  628. else:
  629. for i in 0 ..< n.len: rstnodeToRefnameAux(n.sons[i], r, b)
  630. proc rstnodeToRefname(n: PRstNode): string =
  631. var b = false
  632. rstnodeToRefnameAux(n, result, b)
  633. proc findSub(s: PRstSharedState, n: PRstNode): int =
  634. var key = addNodes(n)
  635. # the spec says: if no exact match, try one without case distinction:
  636. for i in countup(0, high(s.subs)):
  637. if key == s.subs[i].key:
  638. return i
  639. for i in countup(0, high(s.subs)):
  640. if cmpIgnoreStyle(key, s.subs[i].key) == 0:
  641. return i
  642. result = -1
  643. proc lineInfo(p: RstParser, iTok: int): TLineInfo =
  644. result.col = int16(p.col + p.tok[iTok].col)
  645. result.line = uint16(p.line + p.tok[iTok].line)
  646. result.fileIndex = p.s.currFileIdx
  647. proc lineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx)
  648. # TODO: we need this simplification because we don't preserve exact starting
  649. # token of currently parsed element:
  650. proc prevLineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx-1)
  651. proc setSub(p: var RstParser, key: string, value: PRstNode) =
  652. var length = p.s.subs.len
  653. for i in 0 ..< length:
  654. if key == p.s.subs[i].key:
  655. p.s.subs[i].value = value
  656. return
  657. p.s.subs.add(Substitution(key: key, value: value, info: prevLineInfo(p)))
  658. proc setRef(p: var RstParser, key: string, value: PRstNode,
  659. refType: SubstitutionKind) =
  660. var length = p.s.refs.len
  661. for i in 0 ..< length:
  662. if key == p.s.refs[i].key:
  663. if p.s.refs[i].value.addNodes != value.addNodes:
  664. rstMessage(p, mwRedefinitionOfLabel, key)
  665. p.s.refs[i].value = value
  666. return
  667. p.s.refs.add(Substitution(kind: refType, key: key, value: value,
  668. info: prevLineInfo(p)))
  669. proc findRef(s: PRstSharedState, key: string): seq[Substitution] =
  670. for i in countup(0, high(s.refs)):
  671. if key == s.refs[i].key:
  672. result.add s.refs[i]
  673. # Ambiguity in links: we don't follow procedure of removing implicit targets
  674. # defined in https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#implicit-hyperlink-targets
  675. # Instead we just give explicit links a higher priority than to implicit ones
  676. # and report ambiguities as warnings. Hopefully it is easy to remove
  677. # ambiguities manually. Nim auto-generated links from ``docgen.nim``
  678. # have lowest priority: 1 (for procs) and below for other symbol types.
  679. proc refPriority(k: SubstitutionKind): int =
  680. case k
  681. of rstSubstitution: result = 8
  682. of hyperlinkAlias: result = 7
  683. of implicitHyperlinkAlias: result = 2
  684. proc internalRefPriority(k: RstAnchorKind): int =
  685. case k
  686. of manualDirectiveAnchor: result = 6
  687. of manualInlineAnchor: result = 5
  688. of footnoteAnchor: result = 4
  689. of headlineAnchor: result = 3
  690. proc addAnchorRst(p: var RstParser, name: string, target: PRstNode,
  691. anchorType: RstAnchorKind) =
  692. ## Associates node `target` (which has field `anchor`) with an
  693. ## alias `name` and updates the corresponding aliases in `p.curAnchors`.
  694. let prio = internalRefPriority(anchorType)
  695. for a in p.curAnchors:
  696. p.s.anchors.mgetOrPut(a.alias, newSeq[AnchorSubst]()).add(
  697. AnchorSubst(kind: arInternalRst, target: target, priority: prio,
  698. info: a.info, anchorType: manualDirectiveAnchor))
  699. if name != "":
  700. p.s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add(
  701. AnchorSubst(kind: arInternalRst, target: target, priority: prio,
  702. info: prevLineInfo(p), anchorType: anchorType))
  703. p.curAnchors.setLen 0
  704. proc addAnchorNim*(s: var PRstSharedState, refn: string, tooltip: string,
  705. langSym: LangSymbol, priority: int,
  706. info: TLineInfo) =
  707. ## Adds an anchor `refn`, which follows
  708. ## the rule `arNim` (i.e. a symbol in ``*.nim`` file)
  709. s.anchors.mgetOrPut(langSym.name, newSeq[AnchorSubst]()).add(
  710. AnchorSubst(kind: arNim, refname: refn, langSym: langSym,
  711. tooltip: tooltip, priority: priority,
  712. info: info))
  713. proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode,
  714. info: TLineInfo):
  715. seq[AnchorSubst] =
  716. let langSym = toLangSymbol(signature)
  717. let substitutions = s.anchors.getOrDefault(langSym.name,
  718. newSeq[AnchorSubst]())
  719. if substitutions.len == 0:
  720. return
  721. # map symKind (like "proc") -> found symbols/groups:
  722. var found: Table[string, seq[AnchorSubst]]
  723. for s in substitutions:
  724. if s.kind == arNim:
  725. if match(s.langSym, langSym):
  726. found.mgetOrPut(s.langSym.symKind, newSeq[AnchorSubst]()).add s
  727. for symKind, sList in found:
  728. if sList.len == 1:
  729. result.add sList[0]
  730. else: # > 1, there are overloads, potential ambiguity in this `symKind`
  731. if langSym.parametersProvided:
  732. # there are non-group signatures, select only them
  733. for s in sList:
  734. if not s.langSym.isGroup:
  735. result.add s
  736. else: # when there are many overloads a link like foo_ points to all
  737. # of them, so selecting the group
  738. var foundGroup = false
  739. for s in sList:
  740. if s.langSym.isGroup:
  741. result.add s
  742. foundGroup = true
  743. break
  744. doAssert foundGroup, "docgen has not generated the group"
  745. proc findMainAnchorRst(s: PRstSharedState, linkText: string, info: TLineInfo):
  746. seq[AnchorSubst] =
  747. let name = linkText.toLowerAscii
  748. let substitutions = s.anchors.getOrDefault(name, newSeq[AnchorSubst]())
  749. for s in substitutions:
  750. if s.kind == arInternalRst:
  751. result.add s
  752. proc addFootnoteNumManual(p: var RstParser, num: int) =
  753. ## add manually-numbered footnote
  754. for fnote in p.s.footnotes:
  755. if fnote.number == num:
  756. rstMessage(p, mwRedefinitionOfLabel, $num)
  757. return
  758. p.s.footnotes.add((fnManualNumber, num, -1, -1, $num))
  759. proc addFootnoteNumAuto(p: var RstParser, label: string) =
  760. ## add auto-numbered footnote.
  761. ## Empty label [#] means it'll be resolved by the occurrence.
  762. if label == "": # simple auto-numbered [#]
  763. p.s.lineFootnoteNum.add lineInfo(p)
  764. p.s.footnotes.add((fnAutoNumber, -1, p.s.lineFootnoteNum.len, -1, label))
  765. else: # auto-numbered with label [#label]
  766. for fnote in p.s.footnotes:
  767. if fnote.label == label:
  768. rstMessage(p, mwRedefinitionOfLabel, label)
  769. return
  770. p.s.footnotes.add((fnAutoNumberLabel, -1, -1, -1, label))
  771. proc addFootnoteSymAuto(p: var RstParser) =
  772. p.s.lineFootnoteSym.add lineInfo(p)
  773. p.s.footnotes.add((fnAutoSymbol, -1, -1, p.s.lineFootnoteSym.len, ""))
  774. proc orderFootnotes(s: PRstSharedState) =
  775. ## numerate auto-numbered footnotes taking into account that all
  776. ## manually numbered ones always have preference.
  777. ## Save the result back to `s.footnotes`.
  778. # Report an error if found any mismatch in number of automatic footnotes
  779. proc listFootnotes(locations: seq[TLineInfo]): string =
  780. var lines: seq[string]
  781. for info in locations:
  782. if s.filenames.len > 1:
  783. let file = getFilename(s.filenames, info.fileIndex)
  784. lines.add file & ":"
  785. else: # no need to add file name here if there is only 1
  786. lines.add ""
  787. lines[^1].add $info.line
  788. result.add $lines.len & " (lines " & join(lines, ", ") & ")"
  789. if s.lineFootnoteNum.len != s.lineFootnoteNumRef.len:
  790. rstMessage(s, meFootnoteMismatch,
  791. "$1 != $2" % [listFootnotes(s.lineFootnoteNum),
  792. listFootnotes(s.lineFootnoteNumRef)] &
  793. " for auto-numbered footnotes")
  794. if s.lineFootnoteSym.len != s.lineFootnoteSymRef.len:
  795. rstMessage(s, meFootnoteMismatch,
  796. "$1 != $2" % [listFootnotes(s.lineFootnoteSym),
  797. listFootnotes(s.lineFootnoteSymRef)] &
  798. " for auto-symbol footnotes")
  799. var result: seq[FootnoteSubst]
  800. var manuallyN, autoN, autoSymbol: seq[FootnoteSubst]
  801. for fs in s.footnotes:
  802. if fs.kind == fnManualNumber: manuallyN.add fs
  803. elif fs.kind in {fnAutoNumber, fnAutoNumberLabel}: autoN.add fs
  804. else: autoSymbol.add fs
  805. if autoN.len == 0:
  806. result = manuallyN
  807. else:
  808. # fill gaps between manually numbered footnotes in ascending order
  809. manuallyN.sort() # sort by number - its first field
  810. var lst = initSinglyLinkedList[FootnoteSubst]()
  811. for elem in manuallyN: lst.append(elem)
  812. var firstAuto = 0
  813. if lst.head == nil or lst.head.value.number != 1:
  814. # no manual footnote [1], start numeration from 1 for auto-numbered
  815. lst.prepend (autoN[0].kind, 1, autoN[0].autoNumIdx, -1, autoN[0].label)
  816. firstAuto = 1
  817. var curNode = lst.head
  818. var nextNode: SinglyLinkedNode[FootnoteSubst]
  819. # go simultaneously through `autoN` and `lst` looking for gaps
  820. for (kind, x, autoNumIdx, y, label) in autoN[firstAuto .. ^1]:
  821. while (nextNode = curNode.next; nextNode != nil):
  822. if nextNode.value.number - curNode.value.number > 1:
  823. # gap found, insert new node `n` between curNode and nextNode:
  824. var n = newSinglyLinkedNode((kind, curNode.value.number + 1,
  825. autoNumIdx, -1, label))
  826. curNode.next = n
  827. n.next = nextNode
  828. curNode = n
  829. break
  830. else:
  831. curNode = nextNode
  832. if nextNode == nil: # no gap found, just append
  833. lst.append (kind, curNode.value.number + 1, autoNumIdx, -1, label)
  834. curNode = lst.tail
  835. result = lst.toSeq
  836. # we use ASCII symbols instead of those recommended in RST specification:
  837. const footnoteAutoSymbols = ["*", "^", "+", "=", "~", "$", "@", "%", "&"]
  838. for fs in autoSymbol:
  839. # assignment order: *, **, ***, ^, ^^, ^^^, ... &&&, ****, *****, ...
  840. let i = fs.autoSymIdx - 1
  841. let symbolNum = (i div 3) mod footnoteAutoSymbols.len
  842. let nSymbols = (1 + i mod 3) + 3 * (i div (3 * footnoteAutoSymbols.len))
  843. let label = footnoteAutoSymbols[symbolNum].repeat(nSymbols)
  844. result.add((fs.kind, -1, -1, fs.autoSymIdx, label))
  845. s.footnotes = result
  846. proc getFootnoteNum(s: PRstSharedState, label: string): int =
  847. ## get number from label. Must be called after `orderFootnotes`.
  848. result = -1
  849. for fnote in s.footnotes:
  850. if fnote.label == label:
  851. return fnote.number
  852. proc getFootnoteNum(s: PRstSharedState, order: int): int =
  853. ## get number from occurrence. Must be called after `orderFootnotes`.
  854. result = -1
  855. for fnote in s.footnotes:
  856. if fnote.autoNumIdx == order:
  857. return fnote.number
  858. proc getAutoSymbol(s: PRstSharedState, order: int): string =
  859. ## get symbol from occurrence of auto-symbol footnote.
  860. result = "???"
  861. for fnote in s.footnotes:
  862. if fnote.autoSymIdx == order:
  863. return fnote.label
  864. proc newRstNodeA(p: var RstParser, kind: RstNodeKind): PRstNode =
  865. ## create node and consume the current anchor
  866. result = newRstNode(kind)
  867. if p.curAnchors.len > 0:
  868. result.anchor = p.curAnchors[0].anchor
  869. addAnchorRst(p, "", result, manualDirectiveAnchor)
  870. template newLeaf(s: string): PRstNode = newRstLeaf(s)
  871. proc newLeaf(p: var RstParser): PRstNode =
  872. result = newLeaf(currentTok(p).symbol)
  873. proc validRefnamePunct(x: string): bool =
  874. ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names
  875. x.len == 1 and x[0] in {'-', '_', '.', ':', '+'}
  876. func getRefnameIdx(p: RstParser, startIdx: int): int =
  877. ## Gets last token index of a refname ("word" in RST terminology):
  878. ##
  879. ## reference names are single words consisting of alphanumerics plus
  880. ## isolated (no two adjacent) internal hyphens, underscores, periods,
  881. ## colons and plus signs; no whitespace or other characters are allowed.
  882. ##
  883. ## Refnames are used for:
  884. ## - reference names
  885. ## - role names
  886. ## - directive names
  887. ## - footnote labels
  888. ##
  889. # TODO: use this func in all other relevant places
  890. var j = startIdx
  891. if p.tok[j].kind == tkWord:
  892. inc j
  893. while p.tok[j].kind == tkPunct and validRefnamePunct(p.tok[j].symbol) and
  894. p.tok[j+1].kind == tkWord:
  895. inc j, 2
  896. result = j - 1
  897. func getRefname(p: RstParser, startIdx: int): (string, int) =
  898. let lastIdx = getRefnameIdx(p, startIdx)
  899. result[1] = lastIdx
  900. for j in startIdx..lastIdx:
  901. result[0].add p.tok[j].symbol
  902. proc getReferenceName(p: var RstParser, endStr: string): PRstNode =
  903. var res = newRstNode(rnInner)
  904. while true:
  905. case currentTok(p).kind
  906. of tkWord, tkOther, tkWhite:
  907. res.add(newLeaf(p))
  908. of tkPunct:
  909. if currentTok(p).symbol == endStr:
  910. inc p.idx
  911. break
  912. else:
  913. res.add(newLeaf(p))
  914. else:
  915. rstMessage(p, meExpected, endStr)
  916. break
  917. inc p.idx
  918. result = res
  919. proc untilEol(p: var RstParser): PRstNode =
  920. result = newRstNode(rnInner)
  921. while currentTok(p).kind notin {tkIndent, tkEof}:
  922. result.add(newLeaf(p))
  923. inc p.idx
  924. proc expect(p: var RstParser, tok: string) =
  925. if currentTok(p).symbol == tok: inc p.idx
  926. else: rstMessage(p, meExpected, tok)
  927. proc inlineMarkdownEnd(p: RstParser): bool =
  928. result = prevTok(p).kind notin {tkIndent, tkWhite}
  929. ## (For a special case of ` we don't allow spaces surrounding it
  930. ## unlike original Markdown because this behavior confusing/useless)
  931. proc inlineRstEnd(p: RstParser): bool =
  932. # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
  933. # Rule 2:
  934. result = prevTok(p).kind notin {tkIndent, tkWhite}
  935. if not result: return
  936. # Rule 7:
  937. result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
  938. nextTok(p).symbol[0] in
  939. {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
  940. proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
  941. if exact:
  942. result = currentTok(p).symbol == markup
  943. else:
  944. result = currentTok(p).symbol.endsWith markup
  945. if (not result) and markup == "``":
  946. # check that escaping may have splitted `` to 2 tokens ` and `
  947. result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`"
  948. if not result: return
  949. # surroundings check
  950. if markup in ["_", "__"]:
  951. result = inlineRstEnd(p)
  952. else:
  953. if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p)
  954. else: result = inlineRstEnd(p)
  955. proc rstRuleSurround(p: RstParser): bool =
  956. result = true
  957. # Rules 4 & 5:
  958. if p.idx > 0:
  959. var d: char
  960. var c = prevTok(p).symbol[0]
  961. case c
  962. of '\'', '\"': d = c
  963. of '(': d = ')'
  964. of '[': d = ']'
  965. of '{': d = '}'
  966. of '<': d = '>'
  967. else: d = '\0'
  968. if d != '\0': result = nextTok(p).symbol[0] != d
  969. proc inlineMarkdownStart(p: RstParser): bool =
  970. result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
  971. if not result: return
  972. # this rst rule is really nice, let us use it in Markdown mode too.
  973. result = rstRuleSurround(p)
  974. proc inlineRstStart(p: RstParser): bool =
  975. ## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
  976. # Rule 6
  977. result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
  978. prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
  979. if not result: return
  980. # Rule 1:
  981. result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
  982. if not result: return
  983. result = rstRuleSurround(p)
  984. proc isInlineMarkupStart(p: RstParser, markup: string): bool =
  985. if markup != "_`":
  986. result = currentTok(p).symbol == markup
  987. else: # _` is a 2 token case
  988. result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
  989. if not result: return
  990. # surroundings check
  991. if markup in ["_", "__", "[", "|"]:
  992. # Note: we require space/punctuation even before [markdown link](...)
  993. result = inlineRstStart(p)
  994. else:
  995. if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p)
  996. else: result = inlineRstStart(p)
  997. proc match(p: RstParser, start: int, expr: string): bool =
  998. # regular expressions are:
  999. # special char exact match
  1000. # 'w' tkWord
  1001. # ' ' tkWhite
  1002. # 'a' tkAdornment
  1003. # 'i' tkIndent
  1004. # 'I' tkIndent or tkEof
  1005. # 'p' tkPunct
  1006. # 'T' always true
  1007. # 'E' whitespace, indent or eof
  1008. # 'e' any enumeration sequence or '#' (for enumeration lists)
  1009. # 'x' a..z or '#' (for enumeration lists)
  1010. # 'n' 0..9 or '#' (for enumeration lists)
  1011. var i = 0
  1012. var j = start
  1013. var last = expr.len - 1
  1014. while i <= last:
  1015. case expr[i]
  1016. of 'w':
  1017. let lastIdx = getRefnameIdx(p, j)
  1018. result = lastIdx >= j
  1019. if result: j = lastIdx
  1020. of ' ': result = p.tok[j].kind == tkWhite
  1021. of 'i': result = p.tok[j].kind == tkIndent
  1022. of 'I': result = p.tok[j].kind in {tkIndent, tkEof}
  1023. of 'p': result = p.tok[j].kind == tkPunct
  1024. of 'a': result = p.tok[j].kind == tkAdornment
  1025. of 'o': result = p.tok[j].kind == tkOther
  1026. of 'T': result = true
  1027. of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent}
  1028. of 'e', 'x', 'n':
  1029. result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#"
  1030. if result:
  1031. case p.tok[j].symbol[0]
  1032. of '#': result = true
  1033. of 'a'..'z', 'A'..'Z':
  1034. result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1
  1035. of '0'..'9':
  1036. result = expr[i] in {'e', 'n'} and
  1037. allCharsInSet(p.tok[j].symbol, {'0'..'9'})
  1038. else: result = false
  1039. else:
  1040. var c = expr[i]
  1041. var length = 0
  1042. while i <= last and expr[i] == c:
  1043. inc i
  1044. inc length
  1045. dec i
  1046. result = p.tok[j].kind in {tkPunct, tkAdornment} and
  1047. p.tok[j].symbol.len == length and p.tok[j].symbol[0] == c
  1048. if not result: return
  1049. inc j
  1050. inc i
  1051. result = true
  1052. proc safeProtocol*(linkStr: var string): string =
  1053. # Returns link's protocol and, if it's not safe, clears `linkStr`
  1054. result = ""
  1055. if scanf(linkStr, "$w:", result):
  1056. # if it has a protocol at all, ensure that it's not 'javascript:' or worse:
  1057. if cmpIgnoreCase(result, "http") == 0 or
  1058. cmpIgnoreCase(result, "https") == 0 or
  1059. cmpIgnoreCase(result, "ftp") == 0:
  1060. discard "it's fine"
  1061. else:
  1062. linkStr = ""
  1063. proc fixupEmbeddedRef(p: var RstParser, n, a, b: PRstNode): bool =
  1064. # Returns `true` if the link belongs to an allowed protocol
  1065. var sep = - 1
  1066. for i in countdown(n.len - 2, 0):
  1067. if n.sons[i].text == "<":
  1068. sep = i
  1069. break
  1070. var incr = if sep > 0 and n.sons[sep - 1].text[0] == ' ': 2 else: 1
  1071. for i in countup(0, sep - incr): a.add(n.sons[i])
  1072. var linkStr = ""
  1073. for i in countup(sep + 1, n.len - 2): linkStr.add(n.sons[i].addNodes)
  1074. if linkStr != "":
  1075. let protocol = safeProtocol(linkStr)
  1076. result = linkStr != ""
  1077. if not result:
  1078. rstMessage(p, mwBrokenLink, protocol,
  1079. p.tok[p.idx-3].line, p.tok[p.idx-3].col)
  1080. b.add newLeaf(linkStr)
  1081. proc whichRole(p: RstParser, sym: string): RstNodeKind =
  1082. result = whichRoleAux(sym)
  1083. if result == rnUnknownRole:
  1084. rstMessage(p, mwUnsupportedLanguage, sym)
  1085. proc toInlineCode(n: PRstNode, language: string): PRstNode =
  1086. ## Creates rnInlineCode and attaches `n` contents as code (in 3rd son).
  1087. result = newRstNode(rnInlineCode, info=n.info)
  1088. let args = newRstNode(rnDirArg)
  1089. var lang = language
  1090. if language == "cpp": lang = "c++"
  1091. elif language == "csharp": lang = "c#"
  1092. args.add newLeaf(lang)
  1093. result.add args
  1094. result.add PRstNode(nil)
  1095. var lb = newRstNode(rnLiteralBlock)
  1096. var s: string
  1097. for i in n.sons:
  1098. assert i.kind == rnLeaf
  1099. s.add i.text
  1100. lb.add newLeaf(s)
  1101. result.add lb
  1102. proc toOtherRole(n: PRstNode, kind: RstNodeKind, roleName: string): PRstNode =
  1103. let newN = newRstNode(rnInner, n.sons)
  1104. let newSons = @[newN, newLeaf(roleName)]
  1105. result = newRstNode(kind, newSons)
  1106. proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
  1107. ## Finalizes node `n` that was tentatively determined as interpreted text.
  1108. var newKind = n.kind
  1109. var newSons = n.sons
  1110. proc finalizeInterpreted(node: PRstNode, newKind: RstNodeKind,
  1111. newSons: seq[PRstNode], roleName: string):
  1112. PRstNode {.nimcall.} =
  1113. # fixes interpreted text (`x` or `y`:role:) to proper internal AST format
  1114. if newKind in {rnUnknownRole, rnCodeFragment}:
  1115. result = node.toOtherRole(newKind, roleName)
  1116. elif newKind == rnInlineCode:
  1117. result = node.toInlineCode(language=roleName)
  1118. else:
  1119. result = newRstNode(newKind, newSons)
  1120. if isInlineMarkupEnd(p, "_", exact=true) or
  1121. isInlineMarkupEnd(p, "__", exact=true):
  1122. inc p.idx
  1123. if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">":
  1124. var a = newRstNode(rnInner)
  1125. var b = newRstNode(rnInner)
  1126. if fixupEmbeddedRef(p, n, a, b):
  1127. if a.len == 0: # e.g. `<a_named_relative_link>`_
  1128. newKind = rnStandaloneHyperlink
  1129. newSons = @[b]
  1130. else: # e.g. `link title <http://site>`_
  1131. newKind = rnHyperlink
  1132. newSons = @[a, b]
  1133. setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias)
  1134. else: # include as plain text, not a link
  1135. newKind = rnInner
  1136. newSons = n.sons
  1137. result = newRstNode(newKind, newSons)
  1138. else: # some link that will be resolved in `resolveSubs`
  1139. newKind = rnRstRef
  1140. result = newRstNode(newKind, sons=newSons, info=n.info)
  1141. elif match(p, p.idx, ":w:"):
  1142. # a role:
  1143. let (roleName, lastIdx) = getRefname(p, p.idx+1)
  1144. newKind = whichRole(p, roleName)
  1145. result = n.finalizeInterpreted(newKind, newSons, roleName)
  1146. p.idx = lastIdx + 2
  1147. else:
  1148. result = n.finalizeInterpreted(p.s.currRoleKind, newSons, p.s.currRole)
  1149. proc matchVerbatim(p: RstParser, start: int, expr: string): int =
  1150. result = start
  1151. var j = 0
  1152. while j < expr.len and result < p.tok.len and
  1153. continuesWith(expr, p.tok[result].symbol, j):
  1154. inc j, p.tok[result].symbol.len
  1155. inc result
  1156. if j < expr.len: result = 0
  1157. proc parseSmiley(p: var RstParser): PRstNode =
  1158. if currentTok(p).symbol[0] notin SmileyStartChars: return
  1159. for key, val in items(Smilies):
  1160. let m = matchVerbatim(p, p.idx, key)
  1161. if m > 0:
  1162. p.idx = m
  1163. result = newRstNode(rnSmiley)
  1164. result.text = val
  1165. return
  1166. proc isUrl(p: RstParser, i: int): bool =
  1167. result = p.tok[i+1].symbol == ":" and p.tok[i+2].symbol == "//" and
  1168. p.tok[i+3].kind == tkWord and
  1169. p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"]
  1170. proc checkParen(token: Token, parensStack: var seq[char]): bool {.inline.} =
  1171. ## Returns `true` iff `token` is a closing parenthesis for some
  1172. ## previous opening parenthesis saved in `parensStack`.
  1173. ## This is according Markdown balanced parentheses rule
  1174. ## (https://spec.commonmark.org/0.29/#link-destination)
  1175. ## to allow links like
  1176. ## https://en.wikipedia.org/wiki/APL_(programming_language),
  1177. ## we use it for RST also.
  1178. result = false
  1179. if token.kind == tkPunct:
  1180. let c = token.symbol[0]
  1181. if c in {'(', '[', '{'}: # push
  1182. parensStack.add c
  1183. elif c in {')', ']', '}'}: # try pop
  1184. # a case like ([) inside a link is allowed and [ is also `pop`ed:
  1185. for i in countdown(parensStack.len - 1, 0):
  1186. if (parensStack[i] == '(' and c == ')' or
  1187. parensStack[i] == '[' and c == ']' or
  1188. parensStack[i] == '{' and c == '}'):
  1189. parensStack.setLen i
  1190. result = true
  1191. break
  1192. proc parseUrl(p: var RstParser): PRstNode =
  1193. ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks
  1194. result = newRstNode(rnStandaloneHyperlink)
  1195. var lastIdx = p.idx
  1196. var closedParenIdx = p.idx - 1 # for balanced parens rule
  1197. var parensStack: seq[char]
  1198. while p.tok[lastIdx].kind in {tkWord, tkPunct, tkOther}:
  1199. let isClosing = checkParen(p.tok[lastIdx], parensStack)
  1200. if isClosing:
  1201. closedParenIdx = lastIdx
  1202. inc lastIdx
  1203. dec lastIdx
  1204. # standalone URL can not end with punctuation in RST
  1205. while lastIdx > closedParenIdx and p.tok[lastIdx].kind == tkPunct and
  1206. p.tok[lastIdx].symbol != "/":
  1207. dec lastIdx
  1208. var s = ""
  1209. for i in p.idx .. lastIdx: s.add p.tok[i].symbol
  1210. result.add s
  1211. p.idx = lastIdx + 1
  1212. proc parseWordOrRef(p: var RstParser, father: PRstNode) =
  1213. ## Parses a normal word or may be a reference or URL.
  1214. if nextTok(p).kind != tkPunct: # <- main path, a normal word
  1215. father.add newLeaf(p)
  1216. inc p.idx
  1217. elif isUrl(p, p.idx): # URL http://something
  1218. father.add parseUrl(p)
  1219. else:
  1220. # check for reference (probably, long one like some.ref.with.dots_ )
  1221. var saveIdx = p.idx
  1222. var reference: PRstNode = nil
  1223. inc p.idx
  1224. while currentTok(p).kind in {tkWord, tkPunct}:
  1225. if currentTok(p).kind == tkPunct:
  1226. if isInlineMarkupEnd(p, "_", exact=true):
  1227. reference = newRstNode(rnRstRef, info=lineInfo(p, saveIdx))
  1228. break
  1229. if not validRefnamePunct(currentTok(p).symbol):
  1230. break
  1231. inc p.idx
  1232. if reference != nil:
  1233. for i in saveIdx..p.idx-1: reference.add newLeaf(p.tok[i].symbol)
  1234. father.add reference
  1235. inc p.idx # skip final _
  1236. else: # 1 normal word
  1237. father.add newLeaf(p.tok[saveIdx].symbol)
  1238. p.idx = saveIdx + 1
  1239. proc parseBackslash(p: var RstParser, father: PRstNode) =
  1240. assert(currentTok(p).kind == tkPunct)
  1241. if currentTok(p).symbol == "\\":
  1242. # XXX: Unicode?
  1243. inc p.idx
  1244. if currentTok(p).kind != tkWhite: father.add(newLeaf(p))
  1245. if currentTok(p).kind != tkEof: inc p.idx
  1246. else:
  1247. father.add(newLeaf(p))
  1248. inc p.idx
  1249. proc parseUntil(p: var RstParser, father: PRstNode, postfix: string,
  1250. interpretBackslash: bool) =
  1251. let
  1252. line = currentTok(p).line
  1253. col = currentTok(p).col
  1254. inc p.idx
  1255. while true:
  1256. case currentTok(p).kind
  1257. of tkPunct:
  1258. if isInlineMarkupEnd(p, postfix, exact=false):
  1259. let l = currentTok(p).symbol.len
  1260. if l > postfix.len:
  1261. # handle cases like *emphasis with stars****. (It's valid RST!)
  1262. father.add newLeaf(currentTok(p).symbol[0 ..< l - postfix.len])
  1263. elif postfix == "``" and currentTok(p).symbol == "`" and
  1264. prevTok(p).symbol == "`":
  1265. # handle cases like ``literal\`` - delete ` already added after \
  1266. father.sons.setLen(father.sons.len - 1)
  1267. inc p.idx
  1268. break
  1269. else:
  1270. if postfix == "`":
  1271. if currentTok(p).symbol == "\\":
  1272. if nextTok(p).symbol == "\\":
  1273. father.add newLeaf("\\")
  1274. father.add newLeaf("\\")
  1275. inc p.idx, 2
  1276. elif nextTok(p).symbol == "`": # escape `
  1277. father.add newLeaf("`")
  1278. inc p.idx, 2
  1279. else:
  1280. father.add newLeaf("\\")
  1281. inc p.idx
  1282. else:
  1283. father.add(newLeaf(p))
  1284. inc p.idx
  1285. else:
  1286. if interpretBackslash:
  1287. parseBackslash(p, father)
  1288. else:
  1289. father.add(newLeaf(p))
  1290. inc p.idx
  1291. of tkAdornment, tkWord, tkOther:
  1292. father.add(newLeaf(p))
  1293. inc p.idx
  1294. of tkIndent:
  1295. father.add newLeaf(" ")
  1296. inc p.idx
  1297. if currentTok(p).kind == tkIndent:
  1298. rstMessage(p, meExpected, postfix, line, col)
  1299. break
  1300. of tkWhite:
  1301. father.add newLeaf(" ")
  1302. inc p.idx
  1303. else: rstMessage(p, meExpected, postfix, line, col)
  1304. proc parseMarkdownCodeblockFields(p: var RstParser): PRstNode =
  1305. ## Parses additional (after language string) code block parameters
  1306. ## in a format *suggested* in the `CommonMark Spec`_ with handling of `"`.
  1307. if currentTok(p).kind == tkIndent:
  1308. result = nil
  1309. else:
  1310. result = newRstNode(rnFieldList)
  1311. while currentTok(p).kind != tkIndent:
  1312. if currentTok(p).kind == tkWhite:
  1313. inc p.idx
  1314. else:
  1315. let field = newRstNode(rnField)
  1316. var fieldName = ""
  1317. while currentTok(p).kind notin {tkWhite, tkIndent, tkEof} and
  1318. currentTok(p).symbol != "=":
  1319. fieldName.add currentTok(p).symbol
  1320. inc p.idx
  1321. field.add(newRstNode(rnFieldName, @[newLeaf(fieldName)]))
  1322. if currentTok(p).kind == tkWhite: inc p.idx
  1323. let fieldBody = newRstNode(rnFieldBody)
  1324. if currentTok(p).symbol == "=":
  1325. inc p.idx
  1326. if currentTok(p).kind == tkWhite: inc p.idx
  1327. var fieldValue = ""
  1328. if currentTok(p).symbol == "\"":
  1329. while true:
  1330. fieldValue.add currentTok(p).symbol
  1331. inc p.idx
  1332. if currentTok(p).kind == tkEof:
  1333. rstMessage(p, meExpected, "\"")
  1334. elif currentTok(p).symbol == "\"":
  1335. fieldValue.add "\""
  1336. inc p.idx
  1337. break
  1338. else:
  1339. while currentTok(p).kind notin {tkWhite, tkIndent, tkEof}:
  1340. fieldValue.add currentTok(p).symbol
  1341. inc p.idx
  1342. fieldBody.add newLeaf(fieldValue)
  1343. field.add(fieldBody)
  1344. result.add(field)
  1345. proc mayLoadFile(p: RstParser, result: var PRstNode) =
  1346. var filename = strip(getFieldValue(result, "file"),
  1347. chars = Whitespace + {'"'})
  1348. if filename != "":
  1349. if roSandboxDisabled notin p.s.options:
  1350. let tok = p.tok[p.idx-2]
  1351. rstMessage(p, meSandboxedDirective, "file", tok.line, tok.col)
  1352. var path = p.findRelativeFile(filename)
  1353. if path == "": rstMessage(p, meCannotOpenFile, filename)
  1354. var n = newRstNode(rnLiteralBlock)
  1355. n.add newLeaf(readFile(path))
  1356. result.sons[2] = n
  1357. proc defaultCodeLangNim(p: RstParser, result: var PRstNode) =
  1358. # Create a field block if the input block didn't have any.
  1359. if result.sons[1].isNil: result.sons[1] = newRstNode(rnFieldList)
  1360. assert result.sons[1].kind == rnFieldList
  1361. # Hook the extra field and specify the Nim language as value.
  1362. var extraNode = newRstNode(rnField, info=lineInfo(p))
  1363. extraNode.add(newRstNode(rnFieldName))
  1364. extraNode.add(newRstNode(rnFieldBody))
  1365. extraNode.sons[0].add newLeaf("default-language")
  1366. extraNode.sons[1].add newLeaf("Nim")
  1367. result.sons[1].add(extraNode)
  1368. proc parseMarkdownCodeblock(p: var RstParser): PRstNode =
  1369. result = newRstNodeA(p, rnCodeBlock)
  1370. result.sons.setLen(3)
  1371. let line = curLine(p)
  1372. let baseCol = currentTok(p).col
  1373. let baseSym = currentTok(p).symbol # usually just ```
  1374. inc p.idx
  1375. result.info = lineInfo(p)
  1376. var args = newRstNode(rnDirArg)
  1377. if currentTok(p).kind == tkWord:
  1378. args.add(newLeaf(p))
  1379. inc p.idx
  1380. result.sons[1] = parseMarkdownCodeblockFields(p)
  1381. mayLoadFile(p, result)
  1382. else:
  1383. args = nil
  1384. var n = newLeaf("")
  1385. while true:
  1386. if currentTok(p).kind == tkEof:
  1387. rstMessage(p, meMissingClosing,
  1388. "$1 (started at line $2)" % [baseSym, $line])
  1389. break
  1390. elif nextTok(p).kind in {tkPunct, tkAdornment} and
  1391. nextTok(p).symbol[0] == baseSym[0] and
  1392. nextTok(p).symbol.len >= baseSym.len:
  1393. inc p.idx, 2
  1394. break
  1395. elif currentTok(p).kind == tkIndent:
  1396. n.text.add "\n"
  1397. if currentTok(p).ival > baseCol:
  1398. n.text.add " ".repeat(currentTok(p).ival - baseCol)
  1399. elif currentTok(p).ival < baseCol:
  1400. rstMessage(p, mwRstStyle,
  1401. "unexpected de-indentation in Markdown code block")
  1402. inc p.idx
  1403. else:
  1404. n.text.add(currentTok(p).symbol)
  1405. inc p.idx
  1406. result.sons[0] = args
  1407. if result.sons[2] == nil:
  1408. var lb = newRstNode(rnLiteralBlock)
  1409. lb.add(n)
  1410. result.sons[2] = lb
  1411. if result.sons[0].isNil and roNimFile in p.s.options:
  1412. defaultCodeLangNim(p, result)
  1413. proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =
  1414. # Parses Markdown link. If it's Pandoc auto-link then its second
  1415. # son (target) will be in tokenized format (rnInner with leafs).
  1416. var desc = newRstNode(rnInner)
  1417. var i = p.idx
  1418. var parensStack: seq[char]
  1419. template parse(endToken, dest) =
  1420. parensStack.setLen 0
  1421. inc i # skip begin token
  1422. while true:
  1423. if p.tok[i].kind == tkEof: return false
  1424. if p.tok[i].kind == tkIndent and p.tok[i+1].kind == tkIndent:
  1425. return false
  1426. let isClosing = checkParen(p.tok[i], parensStack)
  1427. if p.tok[i].symbol == endToken and not isClosing:
  1428. break
  1429. let symbol = if p.tok[i].kind == tkIndent: " " else: p.tok[i].symbol
  1430. when dest is string: dest.add symbol
  1431. else: dest.add newLeaf(symbol)
  1432. inc i
  1433. inc i # skip end token
  1434. parse("]", desc)
  1435. if p.tok[i].symbol == "(":
  1436. var link = ""
  1437. let linkIdx = i + 1
  1438. parse(")", link)
  1439. # only commit if we detected no syntax error:
  1440. let protocol = safeProtocol(link)
  1441. if link == "":
  1442. result = false
  1443. rstMessage(p, mwBrokenLink, protocol,
  1444. p.tok[linkIdx].line, p.tok[linkIdx].col)
  1445. else:
  1446. let child = newRstNode(rnHyperlink)
  1447. child.add newLeaf(desc.addNodes)
  1448. child.add link
  1449. father.add child
  1450. p.idx = i
  1451. result = true
  1452. elif roPreferMarkdown in p.s.options:
  1453. # Use Pandoc's implicit_header_references extension
  1454. var n = newRstNode(rnPandocRef)
  1455. if p.tok[i].symbol == "[":
  1456. var link = newRstNode(rnInner)
  1457. let targetIdx = i + 1
  1458. parse("]", link)
  1459. n.add desc
  1460. if link.len != 0: # [description][target]
  1461. n.add link
  1462. n.info = lineInfo(p, targetIdx)
  1463. else: # [description=target][]
  1464. n.add desc
  1465. n.info = lineInfo(p, p.idx + 1)
  1466. else: # [description=target]
  1467. n.add desc
  1468. n.add desc # target is the same as description
  1469. n.info = lineInfo(p, p.idx + 1)
  1470. father.add n
  1471. p.idx = i
  1472. result = true
  1473. else:
  1474. result = false
  1475. proc getFootnoteType(label: PRstNode): (FootnoteType, int) =
  1476. if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and
  1477. label.sons[0].text == "#":
  1478. if label.sons.len == 1:
  1479. result = (fnAutoNumber, -1)
  1480. else:
  1481. result = (fnAutoNumberLabel, -1)
  1482. elif label.len == 1 and label.sons[0].kind == rnLeaf and
  1483. label.sons[0].text == "*":
  1484. result = (fnAutoSymbol, -1)
  1485. elif label.len == 1 and label.sons[0].kind == rnLeaf:
  1486. try:
  1487. result = (fnManualNumber, parseInt(label.sons[0].text))
  1488. except:
  1489. result = (fnCitation, -1)
  1490. else:
  1491. result = (fnCitation, -1)
  1492. proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode =
  1493. ## parse footnote/citation label. Precondition: start at `[`.
  1494. ## Label text should be valid ref. name symbol, otherwise nil is returned.
  1495. var i = p.idx + 1
  1496. result = newRstNode(rnInner)
  1497. while true:
  1498. if p.tok[i].kind in {tkEof, tkIndent, tkWhite}:
  1499. return nil
  1500. if p.tok[i].kind == tkPunct:
  1501. case p.tok[i].symbol:
  1502. of "]":
  1503. if i > p.idx + 1 and (not reference or (p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol == "_")):
  1504. inc i # skip ]
  1505. if reference: inc i # skip _
  1506. break # to succeed, it's a footnote/citation indeed
  1507. else:
  1508. return nil
  1509. of "#":
  1510. if i != p.idx + 1:
  1511. return nil
  1512. of "*":
  1513. if i != p.idx + 1 and p.tok[i].kind != tkPunct and p.tok[i+1].symbol != "]":
  1514. return nil
  1515. else:
  1516. if not validRefnamePunct(p.tok[i].symbol):
  1517. return nil
  1518. result.add newLeaf(p.tok[i].symbol)
  1519. inc i
  1520. p.idx = i
  1521. proc isMarkdownCodeBlock(p: RstParser, idx: int): bool =
  1522. let tok = p.tok[idx]
  1523. template allowedSymbol: bool =
  1524. (tok.symbol[0] == '`' or
  1525. roPreferMarkdown in p.s.options and tok.symbol[0] == '~')
  1526. result = (roSupportMarkdown in p.s.options and
  1527. tok.kind in {tkPunct, tkAdornment} and
  1528. allowedSymbol and
  1529. tok.symbol.len >= 3)
  1530. proc isMarkdownCodeBlock(p: RstParser): bool =
  1531. isMarkdownCodeBlock(p, p.idx)
  1532. proc parseInline(p: var RstParser, father: PRstNode) =
  1533. var n: PRstNode # to be used in `if` condition
  1534. let saveIdx = p.idx
  1535. case currentTok(p).kind
  1536. of tkPunct:
  1537. if isInlineMarkupStart(p, "***"):
  1538. var n = newRstNode(rnTripleEmphasis)
  1539. parseUntil(p, n, "***", true)
  1540. father.add(n)
  1541. elif isInlineMarkupStart(p, "**"):
  1542. var n = newRstNode(rnStrongEmphasis)
  1543. parseUntil(p, n, "**", true)
  1544. father.add(n)
  1545. elif isInlineMarkupStart(p, "*"):
  1546. var n = newRstNode(rnEmphasis)
  1547. parseUntil(p, n, "*", true)
  1548. father.add(n)
  1549. elif isInlineMarkupStart(p, "_`"):
  1550. var n = newRstNode(rnInlineTarget)
  1551. inc p.idx
  1552. parseUntil(p, n, "`", false)
  1553. n.anchor = rstnodeToRefname(n)
  1554. addAnchorRst(p, name = linkName(n), target = n,
  1555. anchorType=manualInlineAnchor)
  1556. father.add(n)
  1557. elif isMarkdownCodeBlock(p):
  1558. father.add(parseMarkdownCodeblock(p))
  1559. elif isInlineMarkupStart(p, "``"):
  1560. var n = newRstNode(rnInlineLiteral)
  1561. parseUntil(p, n, "``", false)
  1562. father.add(n)
  1563. elif match(p, p.idx, ":w:") and
  1564. (var lastIdx = getRefnameIdx(p, p.idx + 1);
  1565. p.tok[lastIdx+2].symbol == "`"):
  1566. let (roleName, _) = getRefname(p, p.idx+1)
  1567. let k = whichRole(p, roleName)
  1568. var n = newRstNode(k)
  1569. p.idx = lastIdx + 2
  1570. if k == rnInlineCode:
  1571. n = n.toInlineCode(language=roleName)
  1572. parseUntil(p, n, "`", false) # bug #17260
  1573. if k in {rnUnknownRole, rnCodeFragment}:
  1574. n = n.toOtherRole(k, roleName)
  1575. father.add(n)
  1576. elif isInlineMarkupStart(p, "`"):
  1577. var n = newRstNode(rnInterpretedText, info=lineInfo(p, p.idx+1))
  1578. parseUntil(p, n, "`", false) # bug #17260
  1579. n = parsePostfix(p, n)
  1580. father.add(n)
  1581. elif isInlineMarkupStart(p, "|"):
  1582. var n = newRstNode(rnSubstitutionReferences, info=lineInfo(p, p.idx+1))
  1583. parseUntil(p, n, "|", false)
  1584. father.add(n)
  1585. elif roSupportMarkdown in p.s.options and
  1586. currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and
  1587. parseMarkdownLink(p, father):
  1588. discard "parseMarkdownLink already processed it"
  1589. elif isInlineMarkupStart(p, "[") and nextTok(p).symbol != "[" and
  1590. (n = parseFootnoteName(p, reference=true); n != nil):
  1591. var nn = newRstNode(rnFootnoteRef)
  1592. nn.info = lineInfo(p, saveIdx+1)
  1593. nn.add n
  1594. let (fnType, _) = getFootnoteType(n)
  1595. case fnType
  1596. of fnAutoSymbol:
  1597. p.s.lineFootnoteSymRef.add lineInfo(p)
  1598. of fnAutoNumber:
  1599. p.s.lineFootnoteNumRef.add lineInfo(p)
  1600. else: discard
  1601. father.add(nn)
  1602. else:
  1603. if roSupportSmilies in p.s.options:
  1604. let n = parseSmiley(p)
  1605. if n != nil:
  1606. father.add(n)
  1607. return
  1608. parseBackslash(p, father)
  1609. of tkWord:
  1610. if roSupportSmilies in p.s.options:
  1611. let n = parseSmiley(p)
  1612. if n != nil:
  1613. father.add(n)
  1614. return
  1615. parseWordOrRef(p, father)
  1616. of tkAdornment, tkOther, tkWhite:
  1617. if isMarkdownCodeBlock(p):
  1618. father.add(parseMarkdownCodeblock(p))
  1619. return
  1620. if roSupportSmilies in p.s.options:
  1621. let n = parseSmiley(p)
  1622. if n != nil:
  1623. father.add(n)
  1624. return
  1625. father.add(newLeaf(p))
  1626. inc p.idx
  1627. else: discard
  1628. proc getDirective(p: var RstParser): string =
  1629. result = ""
  1630. if currentTok(p).kind == tkWhite:
  1631. let (name, lastIdx) = getRefname(p, p.idx + 1)
  1632. let afterIdx = lastIdx + 1
  1633. if name.len > 0:
  1634. if p.tok[afterIdx].symbol == "::":
  1635. result = name
  1636. p.idx = afterIdx + 1
  1637. if currentTok(p).kind == tkWhite:
  1638. inc p.idx
  1639. elif currentTok(p).kind != tkIndent:
  1640. rstMessage(p, mwRstStyle,
  1641. "whitespace or newline expected after directive " & name)
  1642. result = result.toLowerAscii()
  1643. elif p.tok[afterIdx].symbol == ":":
  1644. rstMessage(p, mwRstStyle,
  1645. "double colon :: may be missing at end of '" & name & "'",
  1646. p.tok[afterIdx].line, p.tok[afterIdx].col)
  1647. elif p.tok[afterIdx].kind == tkPunct and p.tok[afterIdx].symbol[0] == ':':
  1648. rstMessage(p, mwRstStyle,
  1649. "too many colons for a directive (should be ::)",
  1650. p.tok[afterIdx].line, p.tok[afterIdx].col)
  1651. proc parseComment(p: var RstParser, col: int): PRstNode =
  1652. if currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
  1653. inc p.idx # empty comment
  1654. else:
  1655. while currentTok(p).kind != tkEof:
  1656. if currentTok(p).kind == tkIndent and currentTok(p).ival > col or
  1657. currentTok(p).kind != tkIndent and currentTok(p).col > col:
  1658. inc p.idx
  1659. else:
  1660. break
  1661. result = nil
  1662. proc parseLine(p: var RstParser, father: PRstNode) =
  1663. while true:
  1664. case currentTok(p).kind
  1665. of tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father)
  1666. else: break
  1667. proc parseUntilNewline(p: var RstParser, father: PRstNode) =
  1668. while true:
  1669. case currentTok(p).kind
  1670. of tkWhite, tkWord, tkAdornment, tkOther, tkPunct: parseInline(p, father)
  1671. of tkEof, tkIndent: break
  1672. proc parseSection(p: var RstParser, result: PRstNode) {.gcsafe.}
  1673. proc tokenAfterNewline(p: RstParser, start: int): int =
  1674. result = start
  1675. while true:
  1676. case p.tok[result].kind
  1677. of tkEof:
  1678. break
  1679. of tkIndent:
  1680. inc result
  1681. break
  1682. else: inc result
  1683. proc tokenAfterNewline(p: RstParser): int {.inline.} =
  1684. result = tokenAfterNewline(p, p.idx)
  1685. proc getWrappableIndent(p: RstParser): int =
  1686. ## Gets baseline indentation for bodies of field lists and directives.
  1687. ## Handles situations like this (with possible de-indent in [case.3])::
  1688. ##
  1689. ## :field: definition [case.1]
  1690. ##
  1691. ## currInd currentTok(p).col
  1692. ## | |
  1693. ## v v
  1694. ##
  1695. ## .. Note:: defItem: [case.2]
  1696. ## definition
  1697. ##
  1698. ## ^
  1699. ## |
  1700. ## nextIndent
  1701. ##
  1702. ## .. Note:: - point1 [case.3]
  1703. ## - point 2
  1704. ##
  1705. ## ^
  1706. ## |
  1707. ## nextIndent
  1708. if currentTok(p).kind == tkIndent:
  1709. result = currentTok(p).ival
  1710. else:
  1711. var nextIndent = p.tok[tokenAfterNewline(p)-1].ival
  1712. if nextIndent <= currInd(p): # parse only this line [case.1]
  1713. result = currentTok(p).col
  1714. elif nextIndent >= currentTok(p).col: # may be a definition list [case.2]
  1715. result = currentTok(p).col
  1716. else:
  1717. result = nextIndent # allow parsing next lines [case.3]
  1718. proc getMdBlockIndent(p: RstParser): int =
  1719. ## Markdown version of `getWrappableIndent`.
  1720. if currentTok(p).kind == tkIndent:
  1721. result = currentTok(p).ival
  1722. else:
  1723. var nextIndent = p.tok[tokenAfterNewline(p)-1].ival
  1724. # TODO: Markdown-compliant definition should allow nextIndent == currInd(p):
  1725. if nextIndent <= currInd(p): # parse only this line
  1726. result = currentTok(p).col
  1727. else:
  1728. result = nextIndent # allow parsing next lines [case.3]
  1729. template isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options
  1730. template isMd(p: RstParser): bool = roPreferMarkdown in p.s.options
  1731. proc parseField(p: var RstParser): PRstNode =
  1732. ## Returns a parsed rnField node.
  1733. ##
  1734. ## rnField nodes have two children nodes, a rnFieldName and a rnFieldBody.
  1735. result = newRstNode(rnField, info=lineInfo(p))
  1736. var col = currentTok(p).col
  1737. var fieldname = newRstNode(rnFieldName)
  1738. parseUntil(p, fieldname, ":", false)
  1739. var fieldbody = newRstNode(rnFieldBody)
  1740. if currentTok(p).kind == tkWhite: inc p.idx
  1741. let indent = getWrappableIndent(p)
  1742. if indent > col:
  1743. pushInd(p, indent)
  1744. parseSection(p, fieldbody)
  1745. popInd(p)
  1746. result.add(fieldname)
  1747. result.add(fieldbody)
  1748. proc parseFields(p: var RstParser): PRstNode =
  1749. ## Parses fields for a section or directive block.
  1750. ##
  1751. ## This proc may return nil if the parsing doesn't find anything of value,
  1752. ## otherwise it will return a node of rnFieldList type with children.
  1753. result = nil
  1754. var atStart = p.idx == 0 and p.tok[0].symbol == ":"
  1755. if currentTok(p).kind == tkIndent and nextTok(p).symbol == ":" or
  1756. atStart:
  1757. var col = if atStart: currentTok(p).col else: currentTok(p).ival
  1758. result = newRstNodeA(p, rnFieldList)
  1759. if not atStart: inc p.idx
  1760. while true:
  1761. result.add(parseField(p))
  1762. if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
  1763. nextTok(p).symbol == ":":
  1764. inc p.idx
  1765. else:
  1766. break
  1767. proc getFieldValue*(n: PRstNode): string =
  1768. ## Returns the value of a specific ``rnField`` node.
  1769. ##
  1770. ## This proc will assert if the node is not of the expected type. The empty
  1771. ## string will be returned as a minimum. Any value in the rst will be
  1772. ## stripped form leading/trailing whitespace.
  1773. assert n.kind == rnField
  1774. assert n.len == 2
  1775. assert n.sons[0].kind == rnFieldName
  1776. assert n.sons[1].kind == rnFieldBody
  1777. result = addNodes(n.sons[1]).strip
  1778. proc getFieldValue(n: PRstNode, fieldname: string): string =
  1779. if n.sons[1] == nil: return
  1780. if n.sons[1].kind != rnFieldList:
  1781. #InternalError("getFieldValue (2): " & $n.sons[1].kind)
  1782. # We don't like internal errors here anymore as that would break the forum!
  1783. return
  1784. for i in 0 ..< n.sons[1].len:
  1785. var f = n.sons[1].sons[i]
  1786. if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) == 0:
  1787. result = addNodes(f.sons[1])
  1788. if result == "": result = "\x01\x01" # indicates that the field exists
  1789. return
  1790. proc getArgument(n: PRstNode): string =
  1791. if n.sons[0] == nil: result = ""
  1792. else: result = addNodes(n.sons[0])
  1793. proc parseDotDot(p: var RstParser): PRstNode {.gcsafe.}
  1794. proc parseLiteralBlock(p: var RstParser): PRstNode =
  1795. result = newRstNodeA(p, rnLiteralBlock)
  1796. var n = newLeaf("")
  1797. if currentTok(p).kind == tkIndent:
  1798. var indent = currentTok(p).ival
  1799. while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines
  1800. while true:
  1801. case currentTok(p).kind
  1802. of tkEof:
  1803. break
  1804. of tkIndent:
  1805. if currentTok(p).ival < indent:
  1806. break
  1807. else:
  1808. n.text.add("\n")
  1809. n.text.add(spaces(currentTok(p).ival - indent))
  1810. inc p.idx
  1811. else:
  1812. n.text.add(currentTok(p).symbol)
  1813. inc p.idx
  1814. else:
  1815. while currentTok(p).kind notin {tkIndent, tkEof}:
  1816. n.text.add(currentTok(p).symbol)
  1817. inc p.idx
  1818. result.add(n)
  1819. proc parseQuotedLiteralBlock(p: var RstParser): PRstNode =
  1820. result = newRstNodeA(p, rnLiteralBlock)
  1821. var n = newLeaf("")
  1822. if currentTok(p).kind == tkIndent:
  1823. var indent = currInd(p)
  1824. while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines
  1825. var quoteSym = currentTok(p).symbol[0]
  1826. while true:
  1827. case currentTok(p).kind
  1828. of tkEof:
  1829. break
  1830. of tkIndent:
  1831. if currentTok(p).ival < indent:
  1832. break
  1833. elif currentTok(p).ival == indent:
  1834. if nextTok(p).kind == tkPunct and nextTok(p).symbol[0] == quoteSym:
  1835. n.text.add("\n")
  1836. inc p.idx
  1837. elif nextTok(p).kind == tkIndent:
  1838. break
  1839. else:
  1840. rstMessage(p, mwRstStyle, "no newline after quoted literal block")
  1841. break
  1842. else:
  1843. rstMessage(p, mwRstStyle,
  1844. "unexpected indentation in quoted literal block")
  1845. break
  1846. else:
  1847. n.text.add(currentTok(p).symbol)
  1848. inc p.idx
  1849. result.add(n)
  1850. proc parseRstLiteralBlock(p: var RstParser, kind: LiteralBlockKind): PRstNode =
  1851. if kind == lbIndentedLiteralBlock:
  1852. result = parseLiteralBlock(p)
  1853. else:
  1854. result = parseQuotedLiteralBlock(p)
  1855. proc getLevel(p: var RstParser, c: char, hasOverline: bool): int =
  1856. ## Returns (preliminary) heading level corresponding to `c` and
  1857. ## `hasOverline`. If level does not exist, add it first.
  1858. for i, hType in p.s.hLevels:
  1859. if hType.symbol == c and hType.hasOverline == hasOverline:
  1860. p.s.hLevels[i].line = curLine(p)
  1861. p.s.hLevels[i].hasPeers = true
  1862. return i
  1863. p.s.hLevels.add LevelInfo(symbol: c, hasOverline: hasOverline,
  1864. line: curLine(p), hasPeers: false)
  1865. result = p.s.hLevels.len - 1
  1866. proc countTitles(s: PRstSharedState, n: PRstNode) =
  1867. ## Fill `s.hTitleCnt`
  1868. if n == nil: return
  1869. for node in n.sons:
  1870. if node != nil:
  1871. if node.kind notin {rnOverline, rnSubstitutionDef, rnDefaultRole}:
  1872. break
  1873. if node.kind == rnOverline:
  1874. if s.hLevels[s.hTitleCnt].hasPeers:
  1875. break
  1876. inc s.hTitleCnt
  1877. if s.hTitleCnt >= 2:
  1878. break
  1879. proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool =
  1880. ## check that underline/overline length is enough for the heading.
  1881. ## No support for Unicode.
  1882. if p.tok[adornmentIdx].symbol in ["::", "..", "|"]:
  1883. return false
  1884. if isMarkdownCodeBlock(p, adornmentIdx):
  1885. return false
  1886. var headlineLen = 0
  1887. var failure = ""
  1888. if p.idx < adornmentIdx: # check for underline
  1889. if p.idx > 0:
  1890. headlineLen = currentTok(p).col - p.tok[adornmentIdx].col
  1891. if headlineLen > 0:
  1892. rstMessage(p, mwRstStyle, "indentation of heading text allowed" &
  1893. " only for overline titles")
  1894. for i in p.idx ..< adornmentIdx-1: # adornmentIdx-1 is a linebreak
  1895. headlineLen += p.tok[i].symbol.len
  1896. result = p.tok[adornmentIdx].symbol.len >= headlineLen and headlineLen != 0
  1897. if not result:
  1898. failure = "(underline '" & p.tok[adornmentIdx].symbol & "' is too short)"
  1899. else: # p.idx == adornmentIdx, at overline. Check overline and underline
  1900. var i = p.idx + 2
  1901. headlineLen = p.tok[i].col - p.tok[adornmentIdx].col
  1902. while p.tok[i].kind notin {tkEof, tkIndent}:
  1903. headlineLen += p.tok[i].symbol.len
  1904. inc i
  1905. result = p.tok[adornmentIdx].symbol.len >= headlineLen and
  1906. headlineLen != 0
  1907. if result:
  1908. result = result and p.tok[i].kind == tkIndent and
  1909. p.tok[i+1].kind == tkAdornment and
  1910. p.tok[i+1].symbol == p.tok[adornmentIdx].symbol
  1911. if not result:
  1912. failure = "(underline '" & p.tok[i+1].symbol & "' does not match " &
  1913. "overline '" & p.tok[adornmentIdx].symbol & "')"
  1914. else:
  1915. failure = "(overline '" & p.tok[adornmentIdx].symbol & "' is too short)"
  1916. if not result:
  1917. rstMessage(p, meNewSectionExpected, failure)
  1918. proc isLineBlock(p: RstParser): bool =
  1919. var j = tokenAfterNewline(p)
  1920. result = currentTok(p).col == p.tok[j].col and p.tok[j].symbol == "|" or
  1921. p.tok[j].col > currentTok(p).col or
  1922. p.tok[j].symbol == "\n"
  1923. proc isMarkdownBlockQuote(p: RstParser): bool =
  1924. result = currentTok(p).symbol[0] == '>'
  1925. proc whichRstLiteralBlock(p: RstParser): LiteralBlockKind =
  1926. ## Checks that the following tokens are either Indented Literal Block or
  1927. ## Quoted Literal Block (which is not quite the same as Markdown quote block).
  1928. ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
  1929. if currentTok(p).symbol == "::" and nextTok(p).kind == tkIndent:
  1930. if currInd(p) > nextTok(p).ival:
  1931. result = lbNone
  1932. if currInd(p) < nextTok(p).ival:
  1933. result = lbIndentedLiteralBlock
  1934. elif currInd(p) == nextTok(p).ival:
  1935. var i = p.idx + 1
  1936. while p.tok[i].kind == tkIndent: inc i
  1937. const validQuotingCharacters = {
  1938. '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
  1939. '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^',
  1940. '_', '`', '{', '|', '}', '~'}
  1941. if p.tok[i].kind in {tkPunct, tkAdornment} and
  1942. p.tok[i].symbol[0] in validQuotingCharacters:
  1943. result = lbQuotedLiteralBlock
  1944. else:
  1945. result = lbNone
  1946. else:
  1947. result = lbNone
  1948. proc predNL(p: RstParser): bool =
  1949. result = true
  1950. if p.idx > 0:
  1951. result = prevTok(p).kind == tkIndent and
  1952. prevTok(p).ival == currInd(p)
  1953. proc isDefList(p: RstParser): bool =
  1954. var j = tokenAfterNewline(p)
  1955. result = currentTok(p).col < p.tok[j].col and
  1956. p.tok[j].kind in {tkWord, tkOther, tkPunct} and
  1957. p.tok[j - 2].symbol != "::"
  1958. proc `$`(t: Token): string = # for debugging only
  1959. result = "(" & $t.kind & " line=" & $t.line & " col=" & $t.col
  1960. if t.kind == tkIndent: result = result & " ival=" & $t.ival & ")"
  1961. else: result = result & " symbol=" & t.symbol & ")"
  1962. proc skipNewlines(p: RstParser, j: int): int =
  1963. result = j
  1964. while p.tok[result].kind != tkEof and p.tok[result].kind == tkIndent:
  1965. inc result # skip blank lines
  1966. proc skipNewlines(p: var RstParser) =
  1967. p.idx = skipNewlines(p, p.idx)
  1968. const maxMdRelInd = 3 ## In Markdown: maximum indentation that does not yet
  1969. ## make the indented block a code
  1970. proc isMdRelInd(outerInd, nestedInd: int): bool =
  1971. result = outerInd <= nestedInd and nestedInd <= outerInd + maxMdRelInd
  1972. proc isMdDefBody(p: RstParser, j: int, termCol: int): bool =
  1973. let defCol = p.tok[j].col
  1974. result = p.tok[j].symbol == ":" and
  1975. isMdRelInd(termCol, defCol) and
  1976. p.tok[j+1].kind == tkWhite and
  1977. p.tok[j+2].kind in {tkWord, tkOther, tkPunct}
  1978. proc isMdDefListItem(p: RstParser, idx: int): bool =
  1979. var j = tokenAfterNewline(p, idx)
  1980. j = skipNewlines(p, j)
  1981. let termCol = p.tok[j].col
  1982. result = isMdRelInd(currInd(p), termCol) and
  1983. isMdDefBody(p, j, termCol)
  1984. proc isOptionList(p: RstParser): bool =
  1985. result = match(p, p.idx, "-w") or match(p, p.idx, "--w") or
  1986. match(p, p.idx, "/w") or match(p, p.idx, "//w")
  1987. proc isMarkdownHeadlinePattern(s: string): bool =
  1988. if s.len >= 1 and s.len <= 6:
  1989. for c in s:
  1990. if c != '#': return false
  1991. result = true
  1992. proc isMarkdownHeadline(p: RstParser): bool =
  1993. if roSupportMarkdown in p.s.options:
  1994. if isMarkdownHeadlinePattern(currentTok(p).symbol) and nextTok(p).kind == tkWhite:
  1995. if p.tok[p.idx+2].kind in {tkWord, tkOther, tkPunct}:
  1996. result = true
  1997. proc findPipe(p: RstParser, start: int): bool =
  1998. var i = start
  1999. while true:
  2000. if p.tok[i].symbol == "|": return true
  2001. if p.tok[i].kind in {tkIndent, tkEof}: return false
  2002. inc i
  2003. proc whichSection(p: RstParser): RstNodeKind =
  2004. if currentTok(p).kind in {tkAdornment, tkPunct}:
  2005. # for punctuation sequences that can be both tkAdornment and tkPunct
  2006. if isMarkdownCodeBlock(p):
  2007. return rnCodeBlock
  2008. elif isRst(p) and currentTok(p).symbol == "::":
  2009. return rnLiteralBlock
  2010. elif currentTok(p).symbol == ".." and
  2011. nextTok(p).kind in {tkWhite, tkIndent}:
  2012. return rnDirective
  2013. case currentTok(p).kind
  2014. of tkAdornment:
  2015. if match(p, p.idx + 1, "iI") and currentTok(p).symbol.len >= 4:
  2016. result = rnTransition
  2017. elif match(p, p.idx, "+a+"):
  2018. result = rnGridTable
  2019. rstMessage(p, meGridTableNotImplemented)
  2020. elif match(p, p.idx + 1, " a"): result = rnTable
  2021. elif currentTok(p).symbol == "|" and isLineBlock(p):
  2022. result = rnLineBlock
  2023. elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
  2024. result = rnMarkdownBlockQuote
  2025. elif match(p, p.idx + 1, "i") and isAdornmentHeadline(p, p.idx):
  2026. result = rnOverline
  2027. else:
  2028. result = rnLeaf
  2029. of tkPunct:
  2030. if isMarkdownHeadline(p):
  2031. result = rnMarkdownHeadline
  2032. elif roSupportMarkdown in p.s.options and predNL(p) and
  2033. match(p, p.idx, "| w") and findPipe(p, p.idx+3):
  2034. result = rnMarkdownTable
  2035. elif currentTok(p).symbol == "|" and isLineBlock(p):
  2036. result = rnLineBlock
  2037. elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
  2038. result = rnMarkdownBlockQuote
  2039. elif match(p, tokenAfterNewline(p), "aI") and
  2040. isAdornmentHeadline(p, tokenAfterNewline(p)):
  2041. result = rnHeadline
  2042. elif currentTok(p).symbol in ["+", "*", "-"] and nextTok(p).kind == tkWhite:
  2043. result = rnBulletList
  2044. elif match(p, p.idx, ":w:E"):
  2045. # (currentTok(p).symbol == ":")
  2046. result = rnFieldList
  2047. elif match(p, p.idx, "(e) ") or match(p, p.idx, "e) ") or
  2048. match(p, p.idx, "e. "):
  2049. result = rnEnumList
  2050. elif isOptionList(p):
  2051. result = rnOptionList
  2052. elif isRst(p) and isDefList(p):
  2053. result = rnDefList
  2054. elif isMd(p) and isMdDefListItem(p, p.idx):
  2055. result = rnMdDefList
  2056. else:
  2057. result = rnParagraph
  2058. of tkWord, tkOther, tkWhite:
  2059. let tokIdx = tokenAfterNewline(p)
  2060. if match(p, tokIdx, "aI"):
  2061. if isAdornmentHeadline(p, tokIdx): result = rnHeadline
  2062. else: result = rnParagraph
  2063. elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList
  2064. elif isRst(p) and isDefList(p): result = rnDefList
  2065. elif isMd(p) and isMdDefListItem(p, p.idx):
  2066. result = rnMdDefList
  2067. else: result = rnParagraph
  2068. else: result = rnLeaf
  2069. proc parseLineBlock(p: var RstParser): PRstNode =
  2070. ## Returns rnLineBlock with all sons of type rnLineBlockItem
  2071. result = nil
  2072. if nextTok(p).kind in {tkWhite, tkIndent}:
  2073. var col = currentTok(p).col
  2074. result = newRstNodeA(p, rnLineBlock)
  2075. while true:
  2076. var item = newRstNode(rnLineBlockItem)
  2077. if nextTok(p).kind == tkWhite:
  2078. if nextTok(p).symbol.len > 1: # pass additional indentation after '| '
  2079. item.lineIndent = nextTok(p).symbol
  2080. inc p.idx, 2
  2081. pushInd(p, p.tok[p.idx].col)
  2082. parseSection(p, item)
  2083. popInd(p)
  2084. else: # tkIndent => add an empty line
  2085. item.lineIndent = "\n"
  2086. inc p.idx, 1
  2087. result.add(item)
  2088. if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
  2089. nextTok(p).symbol == "|" and
  2090. p.tok[p.idx + 2].kind in {tkWhite, tkIndent}:
  2091. inc p.idx, 1
  2092. else:
  2093. break
  2094. proc parseDoc(p: var RstParser): PRstNode {.gcsafe.}
  2095. proc getQuoteSymbol(p: RstParser, idx: int): tuple[sym: string, depth: int, tokens: int] =
  2096. result = ("", 0, 0)
  2097. var i = idx
  2098. result.sym &= p.tok[i].symbol
  2099. result.depth += p.tok[i].symbol.len
  2100. inc result.tokens
  2101. inc i
  2102. while p.tok[i].kind == tkWhite and i+1 < p.tok.len and
  2103. p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol[0] == '>':
  2104. result.sym &= p.tok[i].symbol
  2105. result.sym &= p.tok[i+1].symbol
  2106. result.depth += p.tok[i+1].symbol.len
  2107. inc result.tokens, 2
  2108. inc i, 2
  2109. proc parseMarkdownQuoteSegment(p: var RstParser, curSym: string, col: int):
  2110. PRstNode =
  2111. ## We define *segment* as a group of lines that starts with exactly the
  2112. ## same quote symbol. If the following lines don't contain any `>` (*lazy*
  2113. ## continuation) they considered as continuation of the current segment.
  2114. var q: RstParser # to delete `>` at a start of line and then parse normally
  2115. initParser(q, p.s)
  2116. q.col = p.col
  2117. q.line = p.line
  2118. var minCol = int.high # minimum colum num in the segment
  2119. while true: # move tokens of segment from `p` to `q` skipping `curSym`
  2120. case currentTok(p).kind
  2121. of tkEof:
  2122. break
  2123. of tkIndent:
  2124. if nextTok(p).kind in {tkIndent, tkEof}:
  2125. break
  2126. else:
  2127. if nextTok(p).symbol[0] == '>':
  2128. var (quoteSym, _, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
  2129. if quoteSym == curSym: # the segment continues
  2130. var iTok = tokenAfterNewline(p, p.idx+1)
  2131. if p.tok[iTok].kind notin {tkEof, tkIndent} and
  2132. p.tok[iTok].symbol[0] != '>':
  2133. rstMessage(p, mwRstStyle,
  2134. "two or more quoted lines are followed by unquoted line " &
  2135. $(curLine(p) + 1))
  2136. break
  2137. q.tok.add currentTok(p)
  2138. var ival = currentTok(p).ival + quoteSym.len
  2139. inc p.idx, (1 + quoteTokens) # skip newline and > > >
  2140. if currentTok(p).kind == tkWhite:
  2141. ival += currentTok(p).symbol.len
  2142. inc p.idx
  2143. # fix up previous `tkIndent`s to ival (as if >>> were not there)
  2144. var j = q.tok.len - 1
  2145. while j >= 0 and q.tok[j].kind == tkIndent:
  2146. q.tok[j].ival = ival
  2147. dec j
  2148. else: # next segment started
  2149. break
  2150. elif currentTok(p).ival < col:
  2151. break
  2152. else: # the segment continues, a case like:
  2153. # > beginning
  2154. # continuation
  2155. q.tok.add currentTok(p)
  2156. inc p.idx
  2157. else:
  2158. if currentTok(p).col < minCol: minCol = currentTok(p).col
  2159. q.tok.add currentTok(p)
  2160. inc p.idx
  2161. q.indentStack = @[minCol]
  2162. # if initial indentation `minCol` is > 0 then final newlines
  2163. # should be omitted so that parseDoc could advance to the end of tokens:
  2164. var j = q.tok.len - 1
  2165. while q.tok[j].kind == tkIndent: dec j
  2166. q.tok.setLen (j+1)
  2167. q.tok.add Token(kind: tkEof, line: currentTok(p).line)
  2168. result = parseDoc(q)
  2169. proc parseMarkdownBlockQuote(p: var RstParser): PRstNode =
  2170. var (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx)
  2171. let col = currentTok(p).col
  2172. result = newRstNodeA(p, rnMarkdownBlockQuote)
  2173. inc p.idx, quoteTokens # skip first >
  2174. while true:
  2175. var item = newRstNode(rnMarkdownBlockQuoteItem)
  2176. item.quotationDepth = quotationDepth
  2177. if currentTok(p).kind == tkWhite: inc p.idx
  2178. item.add parseMarkdownQuoteSegment(p, curSym, col)
  2179. result.add(item)
  2180. if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
  2181. nextTok(p).kind != tkEof and nextTok(p).symbol[0] == '>':
  2182. (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
  2183. inc p.idx, (1 + quoteTokens) # skip newline and > > >
  2184. else:
  2185. break
  2186. proc parseParagraph(p: var RstParser, result: PRstNode) =
  2187. while true:
  2188. case currentTok(p).kind
  2189. of tkIndent:
  2190. if nextTok(p).kind == tkIndent:
  2191. inc p.idx
  2192. break # blank line breaks paragraph for both Md & Rst
  2193. elif currentTok(p).ival == currInd(p) or (
  2194. isMd(p) and currentTok(p).ival > currInd(p)):
  2195. # (Md allows adding additional indentation inside paragraphs)
  2196. inc p.idx
  2197. case whichSection(p)
  2198. of rnParagraph, rnLeaf, rnHeadline, rnMarkdownHeadline,
  2199. rnOverline, rnDirective:
  2200. result.add newLeaf(" ")
  2201. of rnLineBlock:
  2202. result.addIfNotNil(parseLineBlock(p))
  2203. of rnMarkdownBlockQuote:
  2204. result.addIfNotNil(parseMarkdownBlockQuote(p))
  2205. else: break
  2206. else:
  2207. break
  2208. of tkPunct:
  2209. if isRst(p) and (
  2210. let literalBlockKind = whichRstLiteralBlock(p);
  2211. literalBlockKind != lbNone):
  2212. result.add newLeaf(":")
  2213. inc p.idx # skip '::'
  2214. result.add(parseRstLiteralBlock(p, literalBlockKind))
  2215. break
  2216. else:
  2217. parseInline(p, result)
  2218. of tkWhite, tkWord, tkAdornment, tkOther:
  2219. parseInline(p, result)
  2220. else: break
  2221. proc checkHeadingHierarchy(p: RstParser, lvl: int) =
  2222. if lvl - p.s.hCurLevel > 1: # broken hierarchy!
  2223. proc descr(l: int): string =
  2224. (if p.s.hLevels[l].hasOverline: "overline " else: "underline ") &
  2225. repeat(p.s.hLevels[l].symbol, 5)
  2226. var msg = "(section level inconsistent: "
  2227. msg.add descr(lvl) & " unexpectedly found, " &
  2228. "while the following intermediate section level(s) are missing on lines "
  2229. msg.add $p.s.hLevels[p.s.hCurLevel].line & ".." & $curLine(p) & ":"
  2230. for l in p.s.hCurLevel+1 .. lvl-1:
  2231. msg.add " " & descr(l)
  2232. if l != lvl-1: msg.add ","
  2233. rstMessage(p, meNewSectionExpected, msg & ")")
  2234. proc parseHeadline(p: var RstParser): PRstNode =
  2235. if isMarkdownHeadline(p):
  2236. result = newRstNode(rnMarkdownHeadline)
  2237. # Note that level hierarchy is not checked for markdown headings
  2238. result.level = currentTok(p).symbol.len
  2239. assert(nextTok(p).kind == tkWhite)
  2240. inc p.idx, 2
  2241. parseUntilNewline(p, result)
  2242. else:
  2243. result = newRstNode(rnHeadline)
  2244. parseUntilNewline(p, result)
  2245. assert(currentTok(p).kind == tkIndent)
  2246. assert(nextTok(p).kind == tkAdornment)
  2247. var c = nextTok(p).symbol[0]
  2248. inc p.idx, 2
  2249. result.level = getLevel(p, c, hasOverline=false)
  2250. checkHeadingHierarchy(p, result.level)
  2251. p.s.hCurLevel = result.level
  2252. addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor)
  2253. p.s.tocPart.add result
  2254. proc parseOverline(p: var RstParser): PRstNode =
  2255. var c = currentTok(p).symbol[0]
  2256. inc p.idx, 2
  2257. result = newRstNode(rnOverline)
  2258. while true:
  2259. parseUntilNewline(p, result)
  2260. if currentTok(p).kind == tkIndent:
  2261. inc p.idx
  2262. if prevTok(p).ival > currInd(p):
  2263. result.add newLeaf(" ")
  2264. else:
  2265. break
  2266. else:
  2267. break
  2268. result.level = getLevel(p, c, hasOverline=true)
  2269. checkHeadingHierarchy(p, result.level)
  2270. p.s.hCurLevel = result.level
  2271. if currentTok(p).kind == tkAdornment:
  2272. inc p.idx
  2273. if currentTok(p).kind == tkIndent: inc p.idx
  2274. addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor)
  2275. p.s.tocPart.add result
  2276. proc fixHeadlines(s: PRstSharedState) =
  2277. # Fix up section levels depending on presence of a title and subtitle:
  2278. for n in s.tocPart:
  2279. if n.kind in {rnHeadline, rnOverline}:
  2280. if s.hTitleCnt == 2:
  2281. if n.level == 1: # it's the subtitle
  2282. n.level = 0
  2283. elif n.level >= 2: # normal sections, start numbering from 1
  2284. n.level -= 1
  2285. elif s.hTitleCnt == 0:
  2286. n.level += 1
  2287. # Set headline anchors:
  2288. for iHeading in 0 .. s.tocPart.high:
  2289. let n: PRstNode = s.tocPart[iHeading]
  2290. if n.level >= 1:
  2291. n.anchor = rstnodeToRefname(n)
  2292. # Fix anchors for uniqueness if `.. contents::` is present
  2293. if s.hasToc:
  2294. # Find the last higher level section for unique reference name
  2295. var sectionPrefix = ""
  2296. for i in countdown(iHeading - 1, 0):
  2297. if s.tocPart[i].level >= 1 and s.tocPart[i].level < n.level:
  2298. sectionPrefix = rstnodeToRefname(s.tocPart[i]) & "-"
  2299. break
  2300. if sectionPrefix != "":
  2301. n.anchor = sectionPrefix & n.anchor
  2302. s.tocPart.setLen 0
  2303. type
  2304. ColSpec = object
  2305. start, stop: int
  2306. RstCols = seq[ColSpec]
  2307. ColumnLimits = tuple # for Markdown
  2308. first, last: int
  2309. ColSeq = seq[ColumnLimits]
  2310. proc tokStart(p: RstParser, idx: int): int =
  2311. result = p.tok[idx].col
  2312. proc tokStart(p: RstParser): int =
  2313. result = tokStart(p, p.idx)
  2314. proc tokEnd(p: RstParser, idx: int): int =
  2315. result = p.tok[idx].col + p.tok[idx].symbol.len - 1
  2316. proc tokEnd(p: RstParser): int =
  2317. result = tokEnd(p, p.idx)
  2318. proc getColumns(p: RstParser, cols: var RstCols, startIdx: int): int =
  2319. # Fills table column specification (or separator) `cols` and returns
  2320. # the next parser index after it.
  2321. var L = 0
  2322. result = startIdx
  2323. while true:
  2324. inc L
  2325. setLen(cols, L)
  2326. cols[L - 1].start = tokStart(p, result)
  2327. cols[L - 1].stop = tokEnd(p, result)
  2328. assert(p.tok[result].kind == tkAdornment)
  2329. inc result
  2330. if p.tok[result].kind != tkWhite: break
  2331. inc result
  2332. if p.tok[result].kind != tkAdornment: break
  2333. if p.tok[result].kind == tkIndent: inc result
  2334. proc checkColumns(p: RstParser, cols: RstCols) =
  2335. var i = p.idx
  2336. if p.tok[i].symbol[0] != '=':
  2337. rstMessage(p, mwRstStyle,
  2338. "only tables with `=` columns specification are allowed")
  2339. for col in 0 ..< cols.len:
  2340. if tokEnd(p, i) != cols[col].stop:
  2341. rstMessage(p, meIllformedTable,
  2342. "end of table column #$1 should end at position $2" % [
  2343. $(col+1), $(cols[col].stop+ColRstOffset)],
  2344. p.tok[i].line, tokEnd(p, i))
  2345. inc i
  2346. if col == cols.len - 1:
  2347. if p.tok[i].kind == tkWhite:
  2348. inc i
  2349. if p.tok[i].kind notin {tkIndent, tkEof}:
  2350. rstMessage(p, meIllformedTable, "extraneous column specification")
  2351. elif p.tok[i].kind == tkWhite:
  2352. inc i
  2353. else:
  2354. rstMessage(p, meIllformedTable, "no enough table columns",
  2355. p.tok[i].line, p.tok[i].col)
  2356. proc getSpans(p: RstParser, nextLine: int,
  2357. cols: RstCols, unitedCols: RstCols): seq[int] =
  2358. ## Calculates how many columns a joined cell occupies.
  2359. if unitedCols.len > 0:
  2360. result = newSeq[int](unitedCols.len)
  2361. var
  2362. iCell = 0
  2363. jCell = 0
  2364. uCell = 0
  2365. while jCell < cols.len:
  2366. if cols[jCell].stop < unitedCols[uCell].stop:
  2367. inc jCell
  2368. elif cols[jCell].stop == unitedCols[uCell].stop:
  2369. result[uCell] = jCell - iCell + 1
  2370. iCell = jCell + 1
  2371. jCell = jCell + 1
  2372. inc uCell
  2373. else:
  2374. rstMessage(p, meIllformedTable,
  2375. "spanning underline does not match main table columns",
  2376. p.tok[nextLine].line, p.tok[nextLine].col)
  2377. proc parseSimpleTableRow(p: var RstParser, cols: RstCols, colChar: char): PRstNode =
  2378. ## Parses 1 row in RST simple table.
  2379. # Consider that columns may be spanning (united by using underline like ----):
  2380. let nextLine = tokenAfterNewline(p)
  2381. var unitedCols: RstCols
  2382. var afterSpan: int
  2383. if p.tok[nextLine].kind == tkAdornment and p.tok[nextLine].symbol[0] == '-':
  2384. afterSpan = getColumns(p, unitedCols, nextLine)
  2385. if unitedCols == cols and p.tok[nextLine].symbol[0] == colChar:
  2386. # legacy rst.nim compat.: allow punctuation like `----` in main boundaries
  2387. afterSpan = nextLine
  2388. unitedCols.setLen 0
  2389. else:
  2390. afterSpan = nextLine
  2391. template colEnd(i): int =
  2392. if i == cols.len - 1: high(int) # last column has no limit
  2393. elif unitedCols.len > 0: unitedCols[i].stop else: cols[i].stop
  2394. template colStart(i): int =
  2395. if unitedCols.len > 0: unitedCols[i].start else: cols[i].start
  2396. var row = newSeq[string](if unitedCols.len > 0: unitedCols.len else: cols.len)
  2397. var spans: seq[int] = getSpans(p, nextLine, cols, unitedCols)
  2398. let line = currentTok(p).line
  2399. # Iterate over the lines a single cell may span:
  2400. while true:
  2401. var nCell = 0
  2402. # distribute tokens between cells in the current line:
  2403. while currentTok(p).kind notin {tkIndent, tkEof}:
  2404. if tokEnd(p) <= colEnd(nCell):
  2405. if tokStart(p) < colStart(nCell):
  2406. if currentTok(p).kind != tkWhite:
  2407. rstMessage(p, meIllformedTable,
  2408. "this word crosses table column from the left")
  2409. else:
  2410. inc p.idx
  2411. else:
  2412. row[nCell].add(currentTok(p).symbol)
  2413. inc p.idx
  2414. else:
  2415. if tokStart(p) < colEnd(nCell) and currentTok(p).kind != tkWhite:
  2416. rstMessage(p, meIllformedTable,
  2417. "this word crosses table column from the right")
  2418. inc nCell
  2419. if currentTok(p).kind == tkIndent: inc p.idx
  2420. if tokEnd(p) <= colEnd(0): break
  2421. # Continued current cells because the 1st column is empty.
  2422. if currentTok(p).kind in {tkEof, tkAdornment}:
  2423. break
  2424. for nCell in countup(1, high(row)): row[nCell].add('\n')
  2425. result = newRstNode(rnTableRow)
  2426. var q: RstParser
  2427. for uCell in 0 ..< row.len:
  2428. initParser(q, p.s)
  2429. q.col = colStart(uCell)
  2430. q.line = line - 1
  2431. getTokens(row[uCell], q.tok)
  2432. let cell = newRstNode(rnTableDataCell)
  2433. cell.span = if spans.len == 0: 0 else: spans[uCell]
  2434. cell.add(parseDoc(q))
  2435. result.add(cell)
  2436. if afterSpan > p.idx:
  2437. p.idx = afterSpan
  2438. proc parseSimpleTable(p: var RstParser): PRstNode =
  2439. var cols: RstCols
  2440. result = newRstNodeA(p, rnTable)
  2441. let startIdx = getColumns(p, cols, p.idx)
  2442. let colChar = currentTok(p).symbol[0]
  2443. checkColumns(p, cols)
  2444. p.idx = startIdx
  2445. result.colCount = cols.len
  2446. while true:
  2447. if currentTok(p).kind == tkAdornment:
  2448. checkColumns(p, cols)
  2449. p.idx = tokenAfterNewline(p)
  2450. if currentTok(p).kind in {tkEof, tkIndent}:
  2451. # skip last adornment line:
  2452. break
  2453. if result.sons.len > 0: result.sons[^1].endsHeader = true
  2454. # fix rnTableDataCell -> rnTableHeaderCell for previous table rows:
  2455. for nRow in 0 ..< result.sons.len:
  2456. for nCell in 0 ..< result.sons[nRow].len:
  2457. template cell: PRstNode = result.sons[nRow].sons[nCell]
  2458. cell = PRstNode(kind: rnTableHeaderCell, sons: cell.sons,
  2459. span: cell.span, anchor: cell.anchor)
  2460. if currentTok(p).kind == tkEof: break
  2461. let tabRow = parseSimpleTableRow(p, cols, colChar)
  2462. result.add tabRow
  2463. proc readTableRow(p: var RstParser): ColSeq =
  2464. if currentTok(p).symbol == "|": inc p.idx
  2465. while currentTok(p).kind notin {tkIndent, tkEof}:
  2466. var limits: ColumnLimits
  2467. limits.first = p.idx
  2468. while currentTok(p).kind notin {tkIndent, tkEof}:
  2469. if currentTok(p).symbol == "|" and prevTok(p).symbol != "\\": break
  2470. inc p.idx
  2471. limits.last = p.idx
  2472. result.add(limits)
  2473. if currentTok(p).kind in {tkIndent, tkEof}: break
  2474. inc p.idx
  2475. p.idx = tokenAfterNewline(p)
  2476. proc getColContents(p: var RstParser, colLim: ColumnLimits): string =
  2477. for i in colLim.first ..< colLim.last:
  2478. result.add(p.tok[i].symbol)
  2479. result.strip
  2480. proc isValidDelimiterRow(p: var RstParser, colNum: int): bool =
  2481. let row = readTableRow(p)
  2482. if row.len != colNum: return false
  2483. for limits in row:
  2484. let content = getColContents(p, limits)
  2485. if content.len < 3 or not (content.startsWith("--") or content.startsWith(":-")):
  2486. return false
  2487. return true
  2488. proc parseMarkdownTable(p: var RstParser): PRstNode =
  2489. var
  2490. row: ColSeq
  2491. a, b: PRstNode
  2492. q: RstParser
  2493. result = newRstNodeA(p, rnMarkdownTable)
  2494. proc parseRow(p: var RstParser, cellKind: RstNodeKind, result: PRstNode) =
  2495. row = readTableRow(p)
  2496. if result.colCount == 0: result.colCount = row.len # table header
  2497. elif row.len < result.colCount: row.setLen(result.colCount)
  2498. a = newRstNode(rnTableRow)
  2499. for j in 0 ..< result.colCount:
  2500. b = newRstNode(cellKind)
  2501. initParser(q, p.s)
  2502. q.col = p.col
  2503. q.line = currentTok(p).line - 1
  2504. getTokens(getColContents(p, row[j]), q.tok)
  2505. b.add(parseDoc(q))
  2506. a.add(b)
  2507. result.add(a)
  2508. parseRow(p, rnTableHeaderCell, result)
  2509. if not isValidDelimiterRow(p, result.colCount):
  2510. rstMessage(p, meMarkdownIllformedTable)
  2511. while predNL(p) and currentTok(p).symbol == "|":
  2512. parseRow(p, rnTableDataCell, result)
  2513. proc parseTransition(p: var RstParser): PRstNode =
  2514. result = newRstNodeA(p, rnTransition)
  2515. inc p.idx
  2516. if currentTok(p).kind == tkIndent: inc p.idx
  2517. if currentTok(p).kind == tkIndent: inc p.idx
  2518. proc parseBulletList(p: var RstParser): PRstNode =
  2519. result = nil
  2520. if nextTok(p).kind == tkWhite:
  2521. var bullet = currentTok(p).symbol
  2522. var col = currentTok(p).col
  2523. result = newRstNodeA(p, rnBulletList)
  2524. pushInd(p, p.tok[p.idx + 2].col)
  2525. inc p.idx, 2
  2526. while true:
  2527. var item = newRstNode(rnBulletItem)
  2528. parseSection(p, item)
  2529. result.add(item)
  2530. if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
  2531. nextTok(p).symbol == bullet and
  2532. p.tok[p.idx + 2].kind == tkWhite:
  2533. inc p.idx, 3
  2534. else:
  2535. break
  2536. popInd(p)
  2537. proc parseOptionList(p: var RstParser): PRstNode =
  2538. result = newRstNodeA(p, rnOptionList)
  2539. let col = currentTok(p).col
  2540. var order = 1
  2541. while true:
  2542. if currentTok(p).col == col and isOptionList(p):
  2543. var a = newRstNode(rnOptionGroup)
  2544. var b = newRstNode(rnDescription)
  2545. var c = newRstNode(rnOptionListItem)
  2546. if match(p, p.idx, "//w"): inc p.idx
  2547. while currentTok(p).kind notin {tkIndent, tkEof}:
  2548. if currentTok(p).kind == tkWhite and currentTok(p).symbol.len > 1:
  2549. inc p.idx
  2550. break
  2551. a.add(newLeaf(p))
  2552. inc p.idx
  2553. var j = tokenAfterNewline(p)
  2554. if j > 0 and p.tok[j - 1].kind == tkIndent and p.tok[j - 1].ival > currInd(p):
  2555. pushInd(p, p.tok[j - 1].ival)
  2556. parseSection(p, b)
  2557. popInd(p)
  2558. else:
  2559. parseLine(p, b)
  2560. while currentTok(p).kind == tkIndent: inc p.idx
  2561. c.add(a)
  2562. c.add(b)
  2563. c.order = order; inc order
  2564. result.add(c)
  2565. else:
  2566. if currentTok(p).kind != tkEof: dec p.idx # back to tkIndent
  2567. break
  2568. proc parseMdDefinitionList(p: var RstParser): PRstNode =
  2569. ## Parses (Pandoc/kramdown/PHPextra) Mardkown definition lists.
  2570. result = newRstNodeA(p, rnMdDefList)
  2571. let termCol = currentTok(p).col
  2572. while true:
  2573. var item = newRstNode(rnDefItem)
  2574. var term = newRstNode(rnDefName)
  2575. parseLine(p, term)
  2576. skipNewlines(p)
  2577. inc p.idx, 2 # skip ":" and space
  2578. item.add(term)
  2579. while true:
  2580. var def = newRstNode(rnDefBody)
  2581. let indent = getMdBlockIndent(p)
  2582. pushInd(p, indent)
  2583. parseSection(p, def)
  2584. popInd(p)
  2585. item.add(def)
  2586. let j = skipNewlines(p, p.idx)
  2587. if isMdDefBody(p, j, termCol): # parse next definition body
  2588. p.idx = j + 2 # skip ":" and space
  2589. else:
  2590. break
  2591. result.add(item)
  2592. let j = skipNewlines(p, p.idx)
  2593. if p.tok[j].col == termCol and isMdDefListItem(p, j):
  2594. p.idx = j # parse next item
  2595. else:
  2596. break
  2597. proc parseDefinitionList(p: var RstParser): PRstNode =
  2598. result = nil
  2599. var j = tokenAfterNewline(p) - 1
  2600. if j >= 1 and p.tok[j].kind == tkIndent and
  2601. p.tok[j].ival > currInd(p) and p.tok[j - 1].symbol != "::":
  2602. var col = currentTok(p).col
  2603. result = newRstNodeA(p, rnDefList)
  2604. while true:
  2605. if isOptionList(p):
  2606. break # option list has priority over def.list
  2607. j = p.idx
  2608. var a = newRstNode(rnDefName)
  2609. parseLine(p, a)
  2610. if currentTok(p).kind == tkIndent and
  2611. currentTok(p).ival > currInd(p) and
  2612. nextTok(p).symbol != "::" and
  2613. nextTok(p).kind notin {tkIndent, tkEof}:
  2614. pushInd(p, currentTok(p).ival)
  2615. var b = newRstNode(rnDefBody)
  2616. parseSection(p, b)
  2617. var c = newRstNode(rnDefItem)
  2618. c.add(a)
  2619. c.add(b)
  2620. result.add(c)
  2621. popInd(p)
  2622. else:
  2623. p.idx = j
  2624. break
  2625. if currentTok(p).kind == tkIndent and currentTok(p).ival == col:
  2626. inc p.idx
  2627. j = tokenAfterNewline(p) - 1
  2628. if j >= 1 and p.tok[j].kind == tkIndent and p.tok[j].ival > col and
  2629. p.tok[j-1].symbol != "::" and p.tok[j+1].kind != tkIndent:
  2630. discard
  2631. else:
  2632. break
  2633. if result.len == 0: result = nil
  2634. proc parseEnumList(p: var RstParser): PRstNode =
  2635. const
  2636. wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ",
  2637. "(x) ", "x) ", "x. "]
  2638. # enumerator patterns, where 'x' means letter and 'n' means number
  2639. wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3] # number of tokens
  2640. wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0]
  2641. # position of enumeration sequence (number/letter) in enumerator
  2642. let col = currentTok(p).col
  2643. var w = 0
  2644. while w < wildcards.len:
  2645. if match(p, p.idx, wildcards[w]): break
  2646. inc w
  2647. assert w < wildcards.len
  2648. proc checkAfterNewline(p: RstParser, report: bool): bool =
  2649. ## If no indentation on the next line then parse as a normal paragraph
  2650. ## according to the RST spec. And report a warning with suggestions
  2651. let j = tokenAfterNewline(p, start=p.idx+1)
  2652. let requiredIndent = p.tok[p.idx+wildToken[w]].col
  2653. if p.tok[j].kind notin {tkIndent, tkEof} and
  2654. p.tok[j].col < requiredIndent and
  2655. (p.tok[j].col > col or
  2656. (p.tok[j].col == col and not match(p, j, wildcards[w]))):
  2657. if report:
  2658. let n = p.line + p.tok[j].line
  2659. let msg = "\n" & """
  2660. not enough indentation on line $2
  2661. (should be at column $3 if it's a continuation of enum. list),
  2662. or no blank line after line $1 (if it should be the next paragraph),
  2663. or no escaping \ at the beginning of line $1
  2664. (if lines $1..$2 are a normal paragraph, not enum. list)""".dedent
  2665. let c = p.col + requiredIndent + ColRstOffset
  2666. rstMessage(p, mwRstStyle, msg % [$(n-1), $n, $c],
  2667. p.tok[j].line, p.tok[j].col)
  2668. result = false
  2669. else:
  2670. result = true
  2671. if not checkAfterNewline(p, report = true):
  2672. return nil
  2673. result = newRstNodeA(p, rnEnumList)
  2674. let autoEnums = if roSupportMarkdown in p.s.options: @["#", "1"] else: @["#"]
  2675. var prevAE = "" # so as not allow mixing auto-enumerators `1` and `#`
  2676. var curEnum = 1
  2677. for i in 0 ..< wildToken[w]-1: # add first enumerator with (, ), and .
  2678. if p.tok[p.idx + i].symbol == "#":
  2679. prevAE = "#"
  2680. result.labelFmt.add "1"
  2681. else:
  2682. result.labelFmt.add p.tok[p.idx + i].symbol
  2683. var prevEnum = p.tok[p.idx + wildIndex[w]].symbol
  2684. inc p.idx, wildToken[w]
  2685. while true:
  2686. var item = newRstNode(rnEnumItem)
  2687. pushInd(p, currentTok(p).col)
  2688. parseSection(p, item)
  2689. popInd(p)
  2690. result.add(item)
  2691. if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
  2692. match(p, p.idx+1, wildcards[w]):
  2693. # don't report to avoid duplication of warning since for
  2694. # subsequent enum. items parseEnumList will be called second time:
  2695. if not checkAfterNewline(p, report = false):
  2696. break
  2697. let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol
  2698. # check that it's in sequence: enumerator == next(prevEnum)
  2699. if "n" in wildcards[w]: # arabic numeral
  2700. let prevEnumI = try: parseInt(prevEnum) except: 1
  2701. if enumerator in autoEnums:
  2702. if prevAE != "" and enumerator != prevAE:
  2703. break
  2704. prevAE = enumerator
  2705. curEnum = prevEnumI + 1
  2706. else: curEnum = (try: parseInt(enumerator) except: 1)
  2707. if curEnum - prevEnumI != 1:
  2708. break
  2709. prevEnum = enumerator
  2710. else: # a..z
  2711. let prevEnumI = ord(prevEnum[0])
  2712. if enumerator == "#": curEnum = prevEnumI + 1
  2713. else: curEnum = ord(enumerator[0])
  2714. if curEnum - prevEnumI != 1:
  2715. break
  2716. prevEnum = $chr(curEnum)
  2717. inc p.idx, 1 + wildToken[w]
  2718. else:
  2719. break
  2720. proc sonKind(father: PRstNode, i: int): RstNodeKind =
  2721. result = rnLeaf
  2722. if i < father.len: result = father.sons[i].kind
  2723. proc parseSection(p: var RstParser, result: PRstNode) =
  2724. ## parse top-level RST elements: sections, transitions and body elements.
  2725. while true:
  2726. var leave = false
  2727. assert(p.idx >= 0)
  2728. while currentTok(p).kind == tkIndent:
  2729. if currInd(p) == currentTok(p).ival:
  2730. inc p.idx
  2731. elif currentTok(p).ival > currInd(p):
  2732. if roPreferMarkdown in p.s.options: # Markdown => normal paragraphs
  2733. if currentTok(p).ival - currInd(p) >= 4:
  2734. result.add parseLiteralBlock(p)
  2735. else:
  2736. pushInd(p, currentTok(p).ival)
  2737. parseSection(p, result)
  2738. popInd(p)
  2739. else: # RST mode => block quotes
  2740. pushInd(p, currentTok(p).ival)
  2741. var a = newRstNodeA(p, rnBlockQuote)
  2742. parseSection(p, a)
  2743. result.add(a)
  2744. popInd(p)
  2745. else:
  2746. while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
  2747. inc p.idx # skip blank lines
  2748. leave = true
  2749. break
  2750. if leave or currentTok(p).kind == tkEof: break
  2751. var a: PRstNode = nil
  2752. var k = whichSection(p)
  2753. case k
  2754. of rnLiteralBlock:
  2755. inc p.idx # skip '::'
  2756. a = parseLiteralBlock(p)
  2757. of rnBulletList: a = parseBulletList(p)
  2758. of rnLineBlock: a = parseLineBlock(p)
  2759. of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p)
  2760. of rnDirective: a = parseDotDot(p)
  2761. of rnEnumList: a = parseEnumList(p)
  2762. of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)")
  2763. of rnParagraph: discard
  2764. of rnDefList: a = parseDefinitionList(p)
  2765. of rnMdDefList: a = parseMdDefinitionList(p)
  2766. of rnFieldList:
  2767. if p.idx > 0: dec p.idx
  2768. a = parseFields(p)
  2769. of rnTransition: a = parseTransition(p)
  2770. of rnHeadline, rnMarkdownHeadline: a = parseHeadline(p)
  2771. of rnOverline: a = parseOverline(p)
  2772. of rnTable: a = parseSimpleTable(p)
  2773. of rnMarkdownTable: a = parseMarkdownTable(p)
  2774. of rnOptionList: a = parseOptionList(p)
  2775. else:
  2776. #InternalError("rst.parseSection()")
  2777. discard
  2778. if a == nil and k != rnDirective:
  2779. a = newRstNodeA(p, rnParagraph)
  2780. parseParagraph(p, a)
  2781. result.addIfNotNil(a)
  2782. if sonKind(result, 0) == rnParagraph and sonKind(result, 1) != rnParagraph:
  2783. result.sons[0] = newRstNode(rnInner, result.sons[0].sons,
  2784. anchor=result.sons[0].anchor)
  2785. proc parseSectionWrapper(p: var RstParser): PRstNode =
  2786. result = newRstNode(rnInner)
  2787. parseSection(p, result)
  2788. while result.kind == rnInner and result.len == 1:
  2789. result = result.sons[0]
  2790. proc parseDoc(p: var RstParser): PRstNode =
  2791. result = parseSectionWrapper(p)
  2792. if currentTok(p).kind != tkEof:
  2793. rstMessage(p, meGeneralParseError)
  2794. type
  2795. DirFlag = enum
  2796. hasArg, hasOptions, argIsFile, argIsWord
  2797. DirFlags = set[DirFlag]
  2798. SectionParser = proc (p: var RstParser): PRstNode {.nimcall, gcsafe.}
  2799. proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode =
  2800. ## Parses arguments and options for a directive block.
  2801. ##
  2802. ## A directive block will always have three sons: the arguments for the
  2803. ## directive (rnDirArg), the options (rnFieldList) and the directive
  2804. ## content block. This proc parses the two first nodes, the 3rd is left to
  2805. ## the outer `parseDirective` call.
  2806. ##
  2807. ## Both rnDirArg and rnFieldList children nodes might be nil, so you need to
  2808. ## check them before accessing.
  2809. result = newRstNodeA(p, k)
  2810. if k == rnCodeBlock: result.info = lineInfo(p)
  2811. var args: PRstNode = nil
  2812. var options: PRstNode = nil
  2813. if hasArg in flags:
  2814. args = newRstNode(rnDirArg)
  2815. if argIsFile in flags:
  2816. while true:
  2817. case currentTok(p).kind
  2818. of tkWord, tkOther, tkPunct, tkAdornment:
  2819. args.add(newLeaf(p))
  2820. inc p.idx
  2821. else: break
  2822. elif argIsWord in flags:
  2823. while currentTok(p).kind == tkWhite: inc p.idx
  2824. if currentTok(p).kind == tkWord:
  2825. args.add(newLeaf(p))
  2826. inc p.idx
  2827. else:
  2828. args = nil
  2829. else:
  2830. parseLine(p, args)
  2831. result.add(args)
  2832. if hasOptions in flags:
  2833. if currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) and
  2834. nextTok(p).symbol == ":":
  2835. pushInd(p, currentTok(p).ival)
  2836. options = parseFields(p)
  2837. popInd(p)
  2838. result.add(options)
  2839. proc indFollows(p: RstParser): bool =
  2840. result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p)
  2841. proc parseBlockContent(p: var RstParser, father: var PRstNode,
  2842. contentParser: SectionParser): bool {.gcsafe.} =
  2843. ## parse the final content part of explicit markup blocks (directives,
  2844. ## footnotes, etc). Returns true if succeeded.
  2845. if currentTok(p).kind != tkIndent or indFollows(p):
  2846. let blockIndent = getWrappableIndent(p)
  2847. pushInd(p, blockIndent)
  2848. let content = contentParser(p)
  2849. popInd(p)
  2850. father.add content
  2851. result = true
  2852. proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags,
  2853. contentParser: SectionParser): PRstNode =
  2854. ## A helper proc that does main work for specific directive procs.
  2855. ## Always returns a generic rnDirective tree with these 3 children:
  2856. ##
  2857. ## 1) rnDirArg
  2858. ## 2) rnFieldList
  2859. ## 3) a node returned by `contentParser`.
  2860. ##
  2861. ## .. warning:: Any of the 3 children may be nil.
  2862. result = parseDirective(p, k, flags)
  2863. if not isNil(contentParser) and
  2864. parseBlockContent(p, result, contentParser):
  2865. discard "result is updated by parseBlockContent"
  2866. else:
  2867. result.add(PRstNode(nil))
  2868. proc parseDirBody(p: var RstParser, contentParser: SectionParser): PRstNode =
  2869. if indFollows(p):
  2870. pushInd(p, currentTok(p).ival)
  2871. result = contentParser(p)
  2872. popInd(p)
  2873. proc dirInclude(p: var RstParser): PRstNode =
  2874. ##
  2875. ## The following options are recognized:
  2876. ##
  2877. ## :start-after: text to find in the external data file
  2878. ##
  2879. ## Only the content after the first occurrence of the specified
  2880. ## text will be included. If text is not found inclusion will
  2881. ## start from beginning of the file
  2882. ##
  2883. ## :end-before: text to find in the external data file
  2884. ##
  2885. ## Only the content before the first occurrence of the specified
  2886. ## text (but after any after text) will be included. If text is
  2887. ## not found inclusion will happen until the end of the file.
  2888. #literal : flag (empty)
  2889. # The entire included text is inserted into the document as a single
  2890. # literal block (useful for program listings).
  2891. #encoding : name of text encoding
  2892. # The text encoding of the external data file. Defaults to the document's
  2893. # encoding (if specified).
  2894. #
  2895. result = nil
  2896. var n = parseDirective(p, rnDirective, {hasArg, argIsFile, hasOptions}, nil)
  2897. var filename = strip(addNodes(n.sons[0]))
  2898. var path = p.findRelativeFile(filename)
  2899. if path == "":
  2900. rstMessage(p, meCannotOpenFile, filename)
  2901. else:
  2902. # XXX: error handling; recursive file inclusion!
  2903. if getFieldValue(n, "literal") != "":
  2904. result = newRstNode(rnLiteralBlock)
  2905. result.add newLeaf(readFile(path))
  2906. else:
  2907. let inputString = readFile(path)
  2908. let startPosition =
  2909. block:
  2910. let searchFor = n.getFieldValue("start-after").strip()
  2911. if searchFor != "":
  2912. let pos = inputString.find(searchFor)
  2913. if pos != -1: pos + searchFor.len
  2914. else: 0
  2915. else:
  2916. 0
  2917. let endPosition =
  2918. block:
  2919. let searchFor = n.getFieldValue("end-before").strip()
  2920. if searchFor != "":
  2921. let pos = inputString.find(searchFor, start = startPosition)
  2922. if pos != -1: pos - 1
  2923. else: 0
  2924. else:
  2925. inputString.len - 1
  2926. var q: RstParser
  2927. initParser(q, p.s)
  2928. let saveFileIdx = p.s.currFileIdx
  2929. setCurrFilename(p.s, path)
  2930. getTokens(
  2931. inputString[startPosition..endPosition],
  2932. q.tok)
  2933. # workaround a GCC bug; more like the interior pointer bug?
  2934. #if find(q.tok[high(q.tok)].symbol, "\0\x01\x02") > 0:
  2935. # InternalError("Too many binary zeros in include file")
  2936. result = parseDoc(q)
  2937. p.s.currFileIdx = saveFileIdx
  2938. proc dirCodeBlock(p: var RstParser, nimExtension = false): PRstNode =
  2939. ## Parses a code block.
  2940. ##
  2941. ## Code blocks are rnDirective trees with a `kind` of rnCodeBlock. See the
  2942. ## description of ``parseDirective`` for further structure information.
  2943. ##
  2944. ## Code blocks can come in two forms, the standard `code directive
  2945. ## <http://docutils.sourceforge.net/docs/ref/rst/directives.html#code>`_ and
  2946. ## the nim extension ``.. code-block::``. If the block is an extension, we
  2947. ## want the default language syntax highlighting to be Nim, so we create a
  2948. ## fake internal field to communicate with the generator. The field is named
  2949. ## ``default-language``, which is unlikely to collide with a field specified
  2950. ## by any random rst input file.
  2951. ##
  2952. ## As an extension this proc will process the ``file`` extension field and if
  2953. ## present will replace the code block with the contents of the referenced
  2954. ## file. This behaviour is disabled in sandboxed mode and can be re-enabled
  2955. ## with the `roSandboxDisabled` flag.
  2956. result = parseDirective(p, rnCodeBlock, {hasArg, hasOptions}, parseLiteralBlock)
  2957. mayLoadFile(p, result)
  2958. # Extend the field block if we are using our custom Nim extension.
  2959. if nimExtension:
  2960. defaultCodeLangNim(p, result)
  2961. proc dirContainer(p: var RstParser): PRstNode =
  2962. result = parseDirective(p, rnContainer, {hasArg}, parseSectionWrapper)
  2963. assert(result.len == 3)
  2964. proc dirImage(p: var RstParser): PRstNode =
  2965. result = parseDirective(p, rnImage, {hasOptions, hasArg, argIsFile}, nil)
  2966. proc dirFigure(p: var RstParser): PRstNode =
  2967. result = parseDirective(p, rnFigure, {hasOptions, hasArg, argIsFile},
  2968. parseSectionWrapper)
  2969. proc dirTitle(p: var RstParser): PRstNode =
  2970. result = parseDirective(p, rnTitle, {hasArg}, nil)
  2971. proc dirContents(p: var RstParser): PRstNode =
  2972. result = parseDirective(p, rnContents, {hasArg}, nil)
  2973. p.s.hasToc = true
  2974. proc dirIndex(p: var RstParser): PRstNode =
  2975. result = parseDirective(p, rnIndex, {}, parseSectionWrapper)
  2976. proc dirAdmonition(p: var RstParser, d: string): PRstNode =
  2977. result = parseDirective(p, rnAdmonition, {}, parseSectionWrapper)
  2978. result.adType = d
  2979. proc dirDefaultRole(p: var RstParser): PRstNode =
  2980. result = parseDirective(p, rnDefaultRole, {hasArg}, nil)
  2981. if result.sons[0].len == 0: p.s.currRole = defaultRole(p.s.options)
  2982. else:
  2983. assert result.sons[0].sons[0].kind == rnLeaf
  2984. p.s.currRole = result.sons[0].sons[0].text
  2985. p.s.currRoleKind = whichRole(p, p.s.currRole)
  2986. proc dirRole(p: var RstParser): PRstNode =
  2987. result = parseDirective(p, rnDirective, {hasArg, hasOptions}, nil)
  2988. # just check that language is supported, TODO: real role association
  2989. let lang = getFieldValue(result, "language").strip
  2990. if lang != "" and getSourceLanguage(lang) == langNone:
  2991. rstMessage(p, mwUnsupportedLanguage, lang)
  2992. proc dirRawAux(p: var RstParser, result: var PRstNode, kind: RstNodeKind,
  2993. contentParser: SectionParser) =
  2994. var filename = getFieldValue(result, "file")
  2995. if filename.len > 0:
  2996. var path = p.findRelativeFile(filename)
  2997. if path.len == 0:
  2998. rstMessage(p, meCannotOpenFile, filename)
  2999. else:
  3000. var f = readFile(path)
  3001. result = newRstNode(kind)
  3002. result.add newLeaf(f)
  3003. else:
  3004. result = newRstNode(kind, result.sons)
  3005. result.add(parseDirBody(p, contentParser))
  3006. proc dirRaw(p: var RstParser): PRstNode =
  3007. #
  3008. #The following options are recognized:
  3009. #
  3010. #file : string (newlines removed)
  3011. # The local filesystem path of a raw data file to be included.
  3012. #
  3013. # html
  3014. # latex
  3015. result = parseDirective(p, rnDirective, {hasOptions, hasArg, argIsWord})
  3016. if result.sons[0] != nil:
  3017. if cmpIgnoreCase(result.sons[0].sons[0].text, "html") == 0:
  3018. dirRawAux(p, result, rnRawHtml, parseLiteralBlock)
  3019. elif cmpIgnoreCase(result.sons[0].sons[0].text, "latex") == 0:
  3020. dirRawAux(p, result, rnRawLatex, parseLiteralBlock)
  3021. else:
  3022. rstMessage(p, meInvalidDirective, result.sons[0].sons[0].text)
  3023. else:
  3024. dirRawAux(p, result, rnRaw, parseSectionWrapper)
  3025. proc selectDir(p: var RstParser, d: string): PRstNode =
  3026. result = nil
  3027. let tok = p.tok[p.idx-2] # report on directive in ".. directive::"
  3028. if roSandboxDisabled notin p.s.options:
  3029. if d notin SandboxDirAllowlist:
  3030. rstMessage(p, meSandboxedDirective, d, tok.line, tok.col)
  3031. case d
  3032. of "admonition", "attention", "caution": result = dirAdmonition(p, d)
  3033. of "code": result = dirCodeBlock(p)
  3034. of "code-block": result = dirCodeBlock(p, nimExtension = true)
  3035. of "container": result = dirContainer(p)
  3036. of "contents": result = dirContents(p)
  3037. of "danger": result = dirAdmonition(p, d)
  3038. of "default-role": result = dirDefaultRole(p)
  3039. of "error": result = dirAdmonition(p, d)
  3040. of "figure": result = dirFigure(p)
  3041. of "hint": result = dirAdmonition(p, d)
  3042. of "image": result = dirImage(p)
  3043. of "important": result = dirAdmonition(p, d)
  3044. of "include": result = dirInclude(p)
  3045. of "index": result = dirIndex(p)
  3046. of "note": result = dirAdmonition(p, d)
  3047. of "raw":
  3048. if roSupportRawDirective in p.s.options:
  3049. result = dirRaw(p)
  3050. else:
  3051. rstMessage(p, meInvalidDirective, d)
  3052. of "role": result = dirRole(p)
  3053. of "tip": result = dirAdmonition(p, d)
  3054. of "title": result = dirTitle(p)
  3055. of "warning": result = dirAdmonition(p, d)
  3056. else:
  3057. rstMessage(p, meInvalidDirective, d, tok.line, tok.col)
  3058. proc prefix(ftnType: FootnoteType): string =
  3059. case ftnType
  3060. of fnManualNumber: result = "footnote-"
  3061. of fnAutoNumber: result = "footnoteauto-"
  3062. of fnAutoNumberLabel: result = "footnote-"
  3063. of fnAutoSymbol: result = "footnotesym-"
  3064. of fnCitation: result = "citation-"
  3065. proc parseFootnote(p: var RstParser): PRstNode {.gcsafe.} =
  3066. ## Parses footnotes and citations, always returns 2 sons:
  3067. ##
  3068. ## 1) footnote label, always containing rnInner with 1 or more sons
  3069. ## 2) footnote body, which may be nil
  3070. inc p.idx
  3071. let label = parseFootnoteName(p, reference=false)
  3072. if label == nil:
  3073. dec p.idx
  3074. return nil
  3075. result = newRstNode(rnFootnote)
  3076. result.add label
  3077. let (fnType, i) = getFootnoteType(label)
  3078. var name = ""
  3079. var anchor = fnType.prefix
  3080. case fnType
  3081. of fnManualNumber:
  3082. addFootnoteNumManual(p, i)
  3083. anchor.add $i
  3084. of fnAutoNumber, fnAutoNumberLabel:
  3085. name = rstnodeToRefname(label)
  3086. addFootnoteNumAuto(p, name)
  3087. if fnType == fnAutoNumberLabel:
  3088. anchor.add name
  3089. else: # fnAutoNumber
  3090. result.order = p.s.lineFootnoteNum.len
  3091. anchor.add $result.order
  3092. of fnAutoSymbol:
  3093. addFootnoteSymAuto(p)
  3094. result.order = p.s.lineFootnoteSym.len
  3095. anchor.add $p.s.lineFootnoteSym.len
  3096. of fnCitation:
  3097. anchor.add rstnodeToRefname(label)
  3098. addAnchorRst(p, anchor, target = result, anchorType = footnoteAnchor)
  3099. result.anchor = anchor
  3100. if currentTok(p).kind == tkWhite: inc p.idx
  3101. discard parseBlockContent(p, result, parseSectionWrapper)
  3102. if result.len < 2:
  3103. result.add nil
  3104. proc parseDotDot(p: var RstParser): PRstNode =
  3105. # parse "explicit markup blocks"
  3106. result = nil
  3107. var n: PRstNode # to store result, workaround for bug 16855
  3108. var col = currentTok(p).col
  3109. inc p.idx
  3110. var d = getDirective(p)
  3111. if d != "":
  3112. pushInd(p, col)
  3113. result = selectDir(p, d)
  3114. popInd(p)
  3115. elif match(p, p.idx, " _"):
  3116. # hyperlink target:
  3117. inc p.idx, 2
  3118. var ending = ":"
  3119. if currentTok(p).symbol == "`":
  3120. inc p.idx
  3121. ending = "`"
  3122. var a = getReferenceName(p, ending)
  3123. if ending == "`":
  3124. if currentTok(p).symbol == ":":
  3125. inc p.idx
  3126. else:
  3127. rstMessage(p, meExpected, ":")
  3128. if currentTok(p).kind == tkWhite: inc p.idx
  3129. var b = untilEol(p)
  3130. if len(b) == 0: # set internal anchor
  3131. p.curAnchors.add ManualAnchor(
  3132. alias: linkName(a), anchor: rstnodeToRefname(a), info: prevLineInfo(p)
  3133. )
  3134. else: # external hyperlink
  3135. setRef(p, rstnodeToRefname(a), b, refType=hyperlinkAlias)
  3136. elif match(p, p.idx, " |"):
  3137. # substitution definitions:
  3138. inc p.idx, 2
  3139. var a = getReferenceName(p, "|")
  3140. var b: PRstNode
  3141. if currentTok(p).kind == tkWhite: inc p.idx
  3142. if cmpIgnoreStyle(currentTok(p).symbol, "replace") == 0:
  3143. inc p.idx
  3144. expect(p, "::")
  3145. b = untilEol(p)
  3146. elif cmpIgnoreStyle(currentTok(p).symbol, "image") == 0:
  3147. inc p.idx
  3148. b = dirImage(p)
  3149. else:
  3150. rstMessage(p, meInvalidDirective, currentTok(p).symbol)
  3151. setSub(p, addNodes(a), b)
  3152. elif match(p, p.idx, " [") and
  3153. (n = parseFootnote(p); n != nil):
  3154. result = n
  3155. else:
  3156. result = parseComment(p, col)
  3157. proc rstParsePass1*(fragment: string,
  3158. line, column: int,
  3159. sharedState: PRstSharedState): PRstNode =
  3160. ## Parses an RST `fragment`.
  3161. ## The result should be further processed by
  3162. ## preparePass2_ and resolveSubs_ (which is pass 2).
  3163. var p: RstParser
  3164. initParser(p, sharedState)
  3165. p.line = line
  3166. p.col = column
  3167. getTokens(fragment, p.tok)
  3168. result = parseDoc(p)
  3169. proc preparePass2*(s: PRstSharedState, mainNode: PRstNode) =
  3170. ## Records titles in node `mainNode` and orders footnotes.
  3171. countTitles(s, mainNode)
  3172. fixHeadlines(s)
  3173. orderFootnotes(s)
  3174. proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode =
  3175. # Associate this link alias with its target and change node kind to
  3176. # rnHyperlink or rnInternalRef appropriately.
  3177. var desc, alias: PRstNode
  3178. if n.kind == rnPandocRef: # link like [desc][alias]
  3179. desc = n.sons[0]
  3180. alias = n.sons[1]
  3181. else: # n.kind == rnRstRef, link like `desc=alias`_
  3182. desc = n
  3183. alias = n
  3184. type LinkDef = object
  3185. ar: AnchorRule
  3186. priority: int
  3187. tooltip: string
  3188. target: PRstNode
  3189. info: TLineInfo
  3190. proc cmp(x, y: LinkDef): int =
  3191. result = cmp(x.priority, y.priority)
  3192. if result == 0:
  3193. result = cmp(x.target, y.target)
  3194. var foundLinks: seq[LinkDef]
  3195. let refn = rstnodeToRefname(alias)
  3196. var hyperlinks = findRef(s, refn)
  3197. for y in hyperlinks:
  3198. foundLinks.add LinkDef(ar: arHyperlink, priority: refPriority(y.kind),
  3199. target: y.value, info: y.info,
  3200. tooltip: "(" & $y.kind & ")")
  3201. let substRst = findMainAnchorRst(s, alias.addNodes, n.info)
  3202. for subst in substRst:
  3203. foundLinks.add LinkDef(ar: arInternalRst, priority: subst.priority,
  3204. target: newLeaf(subst.target.anchor),
  3205. info: subst.info,
  3206. tooltip: "(" & $subst.anchorType & ")")
  3207. # find anchors automatically generated from Nim symbols
  3208. if roNimFile in s.options:
  3209. let substNim = findMainAnchorNim(s, signature=alias, n.info)
  3210. for subst in substNim:
  3211. foundLinks.add LinkDef(ar: arNim, priority: subst.priority,
  3212. target: newLeaf(subst.refname),
  3213. info: subst.info, tooltip: subst.tooltip)
  3214. foundLinks.sort(cmp = cmp, order = Descending)
  3215. let aliasStr = addNodes(alias)
  3216. if foundLinks.len >= 1:
  3217. let kind = if foundLinks[0].ar == arHyperlink: rnHyperlink
  3218. elif foundLinks[0].ar == arNim: rnNimdocRef
  3219. else: rnInternalRef
  3220. result = newRstNode(kind)
  3221. result.sons = @[newRstNode(rnInner, desc.sons), foundLinks[0].target]
  3222. if kind == rnNimdocRef: result.tooltip = foundLinks[0].tooltip
  3223. if foundLinks.len > 1: # report ambiguous link
  3224. var targets = newSeq[string]()
  3225. for l in foundLinks:
  3226. var t = " "
  3227. if s.filenames.len > 1:
  3228. t.add getFilename(s.filenames, l.info.fileIndex)
  3229. let n = l.info.line
  3230. let c = l.info.col + ColRstOffset
  3231. t.add "($1, $2): $3" % [$n, $c, l.tooltip]
  3232. targets.add t
  3233. rstMessage(s.filenames, s.msgHandler, n.info, mwAmbiguousLink,
  3234. "`$1`\n clash:\n$2" % [
  3235. aliasStr, targets.join("\n")])
  3236. else: # nothing found
  3237. result = n
  3238. rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, aliasStr)
  3239. proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode =
  3240. ## Makes pass 2 of RST parsing.
  3241. ## Resolves substitutions and anchor aliases, groups footnotes.
  3242. ## Takes input node `n` and returns the same node with recursive
  3243. ## substitutions in `n.sons` to `result`.
  3244. result = n
  3245. if n == nil: return
  3246. case n.kind
  3247. of rnSubstitutionReferences:
  3248. var x = findSub(s, n)
  3249. if x >= 0:
  3250. result = s.subs[x].value
  3251. else:
  3252. var key = addNodes(n)
  3253. var e = getEnv(key)
  3254. if e != "": result = newLeaf(e)
  3255. else: rstMessage(s.filenames, s.msgHandler, n.info,
  3256. mwUnknownSubstitution, key)
  3257. of rnRstRef, rnPandocRef:
  3258. result = resolveLink(s, n)
  3259. of rnFootnote:
  3260. var (fnType, num) = getFootnoteType(n.sons[0])
  3261. case fnType
  3262. of fnManualNumber, fnCitation:
  3263. discard "no need to alter fixed text"
  3264. of fnAutoNumberLabel, fnAutoNumber:
  3265. if fnType == fnAutoNumberLabel:
  3266. let labelR = rstnodeToRefname(n.sons[0])
  3267. num = getFootnoteNum(s, labelR)
  3268. else:
  3269. num = getFootnoteNum(s, n.order)
  3270. var nn = newRstNode(rnInner)
  3271. nn.add newLeaf($num)
  3272. result.sons[0] = nn
  3273. of fnAutoSymbol:
  3274. let sym = getAutoSymbol(s, n.order)
  3275. n.sons[0].sons[0].text = sym
  3276. n.sons[1] = resolveSubs(s, n.sons[1])
  3277. of rnFootnoteRef:
  3278. var (fnType, num) = getFootnoteType(n.sons[0])
  3279. template addLabel(number: int | string) =
  3280. var nn = newRstNode(rnInner)
  3281. nn.add newLeaf($number)
  3282. result.add(nn)
  3283. var refn = fnType.prefix
  3284. # create new rnFootnoteRef, add final label, and finalize target refn:
  3285. result = newRstNode(rnFootnoteRef, info = n.info)
  3286. case fnType
  3287. of fnManualNumber:
  3288. addLabel num
  3289. refn.add $num
  3290. of fnAutoNumber:
  3291. inc s.currFootnoteNumRef
  3292. addLabel getFootnoteNum(s, s.currFootnoteNumRef)
  3293. refn.add $s.currFootnoteNumRef
  3294. of fnAutoNumberLabel:
  3295. addLabel getFootnoteNum(s, rstnodeToRefname(n))
  3296. refn.add rstnodeToRefname(n)
  3297. of fnAutoSymbol:
  3298. inc s.currFootnoteSymRef
  3299. addLabel getAutoSymbol(s, s.currFootnoteSymRef)
  3300. refn.add $s.currFootnoteSymRef
  3301. of fnCitation:
  3302. result.add n.sons[0]
  3303. refn.add rstnodeToRefname(n)
  3304. # TODO: correctly report ambiguities
  3305. let anchorInfo = findMainAnchorRst(s, refn, n.info)
  3306. if anchorInfo.len != 0:
  3307. result.add newLeaf(anchorInfo[0].target.anchor) # add link
  3308. else:
  3309. rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, refn)
  3310. result.add newLeaf(refn) # add link
  3311. of rnLeaf:
  3312. discard
  3313. else:
  3314. var regroup = false
  3315. for i in 0 ..< n.len:
  3316. n.sons[i] = resolveSubs(s, n.sons[i])
  3317. if n.sons[i] != nil and n.sons[i].kind == rnFootnote:
  3318. regroup = true
  3319. if regroup: # group footnotes together into rnFootnoteGroup
  3320. var newSons: seq[PRstNode]
  3321. var i = 0
  3322. while i < n.len:
  3323. if n.sons[i] != nil and n.sons[i].kind == rnFootnote:
  3324. var grp = newRstNode(rnFootnoteGroup)
  3325. while i < n.len and n.sons[i].kind == rnFootnote:
  3326. grp.sons.add n.sons[i]
  3327. inc i
  3328. newSons.add grp
  3329. else:
  3330. newSons.add n.sons[i]
  3331. inc i
  3332. result.sons = newSons
  3333. proc rstParse*(text, filename: string,
  3334. line, column: int,
  3335. options: RstParseOptions,
  3336. findFile: FindFileHandler = nil,
  3337. msgHandler: MsgHandler = nil):
  3338. tuple[node: PRstNode, filenames: RstFileTable, hasToc: bool] =
  3339. ## Parses the whole `text`. The result is ready for `rstgen.renderRstToOut`,
  3340. ## note that 2nd tuple element should be fed to `initRstGenerator`
  3341. ## argument `filenames` (it is being filled here at least with `filename`
  3342. ## and possibly with other files from RST ``.. include::`` statement).
  3343. var sharedState = newRstSharedState(options, filename, findFile,
  3344. msgHandler, hasToc=false)
  3345. let unresolved = rstParsePass1(text, line, column, sharedState)
  3346. preparePass2(sharedState, unresolved)
  3347. result.node = resolveSubs(sharedState, unresolved)
  3348. result.filenames = sharedState.filenames
  3349. result.hasToc = sharedState.hasToc