texinfo.scm 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216
  1. ;;;; (texinfo) -- parsing of texinfo into SXML
  2. ;;;;
  3. ;;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
  4. ;;;; Copyright (C) 2004, 2009 Andy Wingo <wingo at pobox dot com>
  5. ;;;; Copyright (C) 2001,2002 Oleg Kiselyov <oleg at pobox dot com>
  6. ;;;;
  7. ;;;; This file is based on SSAX's SSAX.scm.
  8. ;;;;
  9. ;;;; This library is free software; you can redistribute it and/or
  10. ;;;; modify it under the terms of the GNU Lesser General Public
  11. ;;;; License as published by the Free Software Foundation; either
  12. ;;;; version 3 of the License, or (at your option) any later version.
  13. ;;;;
  14. ;;;; This library is distributed in the hope that it will be useful,
  15. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. ;;;; Lesser General Public License for more details.
  18. ;;;;
  19. ;;;; You should have received a copy of the GNU Lesser General Public
  20. ;;;; License along with this library; if not, write to the Free Software
  21. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. ;;; Commentary:
  23. ;;
  24. ;; @subheading Texinfo processing in scheme
  25. ;;
  26. ;; This module parses texinfo into SXML. TeX will always be the
  27. ;; processor of choice for print output, of course. However, although
  28. ;; @code{makeinfo} works well for info, its output in other formats is
  29. ;; not very customizable, and the program is not extensible as a whole.
  30. ;; This module aims to provide an extensible framework for texinfo
  31. ;; processing that integrates texinfo into the constellation of SXML
  32. ;; processing tools.
  33. ;;
  34. ;; @subheading Notes on the SXML vocabulary
  35. ;;
  36. ;; Consider the following texinfo fragment:
  37. ;;
  38. ;;@example
  39. ;; @@deffn Primitive set-car! pair value
  40. ;; This function...
  41. ;; @@end deffn
  42. ;;@end example
  43. ;;
  44. ;; Logically, the category (Primitive), name (set-car!), and arguments
  45. ;; (pair value) are ``attributes'' of the deffn, with the description as
  46. ;; the content. However, texinfo allows for @@-commands within the
  47. ;; arguments to an environment, like @code{@@deffn}, which means that
  48. ;; texinfo ``attributes'' are PCDATA. XML attributes, on the other hand,
  49. ;; are CDATA. For this reason, ``attributes'' of texinfo @@-commands are
  50. ;; called ``arguments'', and are grouped under the special element, `%'.
  51. ;;
  52. ;; Because `%' is not a valid NCName, stexinfo is a superset of SXML. In
  53. ;; the interests of interoperability, this module provides a conversion
  54. ;; function to replace the `%' with `texinfo-arguments'.
  55. ;;
  56. ;;; Code:
  57. ;; Comparison to xml output of texinfo (which is rather undocumented):
  58. ;; Doesn't conform to texinfo dtd
  59. ;; No DTD at all, in fact :-/
  60. ;; Actually outputs valid xml, after transforming %
  61. ;; Slower (although with caching the SXML that problem can go away)
  62. ;; Doesn't parse menus (although menus are shite)
  63. ;; Args go in a dedicated element, FBOFW
  64. ;; Definitions are handled a lot better
  65. ;; Does parse comments
  66. ;; Outputs only significant line breaks (a biggie!)
  67. ;; Nodes are treated as anchors, rather than content organizers (a biggie)
  68. ;; (more book-like, less info-like)
  69. ;; TODO
  70. ;; Integration: help, indexing, plain text
  71. (define-module (texinfo)
  72. #:use-module (sxml simple)
  73. #:use-module (sxml transform)
  74. #:use-module (sxml ssax input-parse)
  75. #:use-module (srfi srfi-1)
  76. #:use-module (srfi srfi-13)
  77. #:export (call-with-file-and-dir
  78. texi-command-specs
  79. texi-command-depth
  80. texi-fragment->stexi
  81. texi->stexi
  82. stexi->sxml))
  83. ;; Some utilities
  84. (define (parser-error port message . rest)
  85. (apply throw 'parser-error port message rest))
  86. (define (call-with-file-and-dir filename proc)
  87. "Call the one-argument procedure @var{proc} with an input port that
  88. reads from @var{filename}. During the dynamic extent of @var{proc}'s
  89. execution, the current directory will be @code{(dirname
  90. @var{filename})}. This is useful for parsing documents that can include
  91. files by relative path name."
  92. (let ((current-dir (getcwd)))
  93. (dynamic-wind
  94. (lambda () (chdir (dirname filename)))
  95. (lambda ()
  96. (call-with-input-file (basename filename) proc))
  97. (lambda () (chdir current-dir)))))
  98. ;; Define this version here, because (srfi srfi-11)'s definition uses
  99. ;; syntax-rules, which is really damn slow
  100. (define-macro (let*-values bindings . body)
  101. (if (null? bindings) (cons 'begin body)
  102. (apply
  103. (lambda (vars initializer)
  104. (let ((cont
  105. (cons 'let*-values
  106. (cons (cdr bindings) body))))
  107. (cond
  108. ((not (pair? vars)) ; regular let case, a single var
  109. `(let ((,vars ,initializer)) ,cont))
  110. ((null? (cdr vars)) ; single var, see the prev case
  111. `(let ((,(car vars) ,initializer)) ,cont))
  112. (else ; the most generic case
  113. `(call-with-values (lambda () ,initializer)
  114. (lambda ,vars ,cont))))))
  115. (car bindings))))
  116. ;;========================================================================
  117. ;; Reflection on the XML vocabulary
  118. (define texi-command-specs
  119. #;
  120. "A list of (@var{name} @var{content-model} . @var{args})
  121. @table @var
  122. @item name
  123. The name of an @@-command, as a symbol.
  124. @item content-model
  125. A symbol indicating the syntactic type of the @@-command:
  126. @table @code
  127. @item EMPTY-COMMAND
  128. No content, and no @code{@@end} is coming
  129. @item EOL-ARGS
  130. Unparsed arguments until end of line
  131. @item EOL-TEXT
  132. Parsed arguments until end of line
  133. @item INLINE-ARGS
  134. Unparsed arguments ending with @code{#\\@}}
  135. @item INLINE-TEXT
  136. Parsed arguments ending with @code{#\\@}}
  137. @item ENVIRON
  138. The tag is an environment tag, expect @code{@@end foo}.
  139. @item TABLE-ENVIRON
  140. Like ENVIRON, but with special parsing rules for its arguments.
  141. @item FRAGMENT
  142. For @code{*fragment*}, the command used for parsing fragments of
  143. texinfo documents.
  144. @end table
  145. @code{INLINE-TEXT} commands will receive their arguments within their
  146. bodies, whereas the @code{-ARGS} commands will receive them in their
  147. attribute list.
  148. @code{EOF-TEXT} receives its arguments in its body.
  149. @code{ENVIRON} commands have both: parsed arguments until the end of
  150. line, received through their attribute list, and parsed text until the
  151. @code{@@end}, received in their bodies.
  152. @code{EOF-TEXT-ARGS} receives its arguments in its attribute list, as in
  153. @code{ENVIRON}.
  154. There are four @@-commands that are treated specially. @code{@@include}
  155. is a low-level token that will not be seen by higher-level parsers, so
  156. it has no content-model. @code{@@para} is the paragraph command, which
  157. is only implicit in the texinfo source. @code{@@item} has special
  158. syntax, as noted above, and @code{@@entry} is how this parser treats
  159. @code{@@item} commands within @code{@@table}, @code{@@ftable}, and
  160. @code{@@vtable}.
  161. Also, indexing commands (@code{@@cindex}, etc.) are treated specially.
  162. Their arguments are parsed, but they are needed before entering the
  163. element so that an anchor can be inserted into the text before the index
  164. entry.
  165. @item args
  166. Named arguments to the command, in the same format as the formals for a
  167. lambda. Only present for @code{INLINE-ARGS}, @code{EOL-ARGS},
  168. @code{ENVIRON}, @code{TABLE-ENVIRON} commands.
  169. @end table"
  170. '(;; Special commands
  171. (include #f) ;; this is a low-level token
  172. (para PARAGRAPH)
  173. (item ITEM)
  174. (entry ENTRY . heading)
  175. (noindent EMPTY-COMMAND)
  176. (*fragment* FRAGMENT)
  177. ;; Inline text commands
  178. (*braces* INLINE-TEXT) ;; FIXME: make me irrelevant
  179. (bold INLINE-TEXT)
  180. (sample INLINE-TEXT)
  181. (samp INLINE-TEXT)
  182. (code INLINE-TEXT)
  183. (kbd INLINE-TEXT)
  184. (key INLINE-TEXT)
  185. (var INLINE-TEXT)
  186. (env INLINE-TEXT)
  187. (file INLINE-TEXT)
  188. (command INLINE-TEXT)
  189. (option INLINE-TEXT)
  190. (dfn INLINE-TEXT)
  191. (cite INLINE-TEXT)
  192. (acro INLINE-TEXT)
  193. (url INLINE-TEXT)
  194. (email INLINE-TEXT)
  195. (emph INLINE-TEXT)
  196. (strong INLINE-TEXT)
  197. (sample INLINE-TEXT)
  198. (sc INLINE-TEXT)
  199. (titlefont INLINE-TEXT)
  200. (asis INLINE-TEXT)
  201. (b INLINE-TEXT)
  202. (i INLINE-TEXT)
  203. (r INLINE-TEXT)
  204. (sansserif INLINE-TEXT)
  205. (slanted INLINE-TEXT)
  206. (t INLINE-TEXT)
  207. ;; Inline args commands
  208. (value INLINE-ARGS . (key))
  209. (ref INLINE-ARGS . (node #:opt name section info-file manual))
  210. (xref INLINE-ARGS . (node #:opt name section info-file manual))
  211. (pxref INLINE-ARGS . (node #:opt name section info-file manual))
  212. (uref INLINE-ARGS . (url #:opt title replacement))
  213. (anchor INLINE-ARGS . (name))
  214. (dots INLINE-ARGS . ())
  215. (result INLINE-ARGS . ())
  216. (bullet INLINE-ARGS . ())
  217. (copyright INLINE-ARGS . ())
  218. (tie INLINE-ARGS . ())
  219. (image INLINE-ARGS . (file #:opt width height alt-text extension))
  220. ;; EOL args elements
  221. (node EOL-ARGS . (name #:opt next previous up))
  222. (c EOL-ARGS . all)
  223. (comment EOL-ARGS . all)
  224. (setchapternewpage EOL-ARGS . all)
  225. (sp EOL-ARGS . all)
  226. (page EOL-ARGS . ())
  227. (vskip EOL-ARGS . all)
  228. (syncodeindex EOL-ARGS . all)
  229. (contents EOL-ARGS . ())
  230. (shortcontents EOL-ARGS . ())
  231. (summarycontents EOL-ARGS . ())
  232. (insertcopying EOL-ARGS . ())
  233. (dircategory EOL-ARGS . (category))
  234. (top EOL-ARGS . (title))
  235. (printindex EOL-ARGS . (type))
  236. ;; EOL text commands
  237. (*ENVIRON-ARGS* EOL-TEXT)
  238. (itemx EOL-TEXT)
  239. (set EOL-TEXT)
  240. (center EOL-TEXT)
  241. (title EOL-TEXT)
  242. (subtitle EOL-TEXT)
  243. (author EOL-TEXT)
  244. (chapter EOL-TEXT)
  245. (section EOL-TEXT)
  246. (appendix EOL-TEXT)
  247. (appendixsec EOL-TEXT)
  248. (unnumbered EOL-TEXT)
  249. (unnumberedsec EOL-TEXT)
  250. (subsection EOL-TEXT)
  251. (subsubsection EOL-TEXT)
  252. (appendixsubsec EOL-TEXT)
  253. (appendixsubsubsec EOL-TEXT)
  254. (unnumberedsubsec EOL-TEXT)
  255. (unnumberedsubsubsec EOL-TEXT)
  256. (chapheading EOL-TEXT)
  257. (majorheading EOL-TEXT)
  258. (heading EOL-TEXT)
  259. (subheading EOL-TEXT)
  260. (subsubheading EOL-TEXT)
  261. (deftpx EOL-TEXT-ARGS . (category name . attributes))
  262. (defcvx EOL-TEXT-ARGS . (category class name))
  263. (defivarx EOL-TEXT-ARGS . (class name))
  264. (deftypeivarx EOL-TEXT-ARGS . (class data-type name))
  265. (defopx EOL-TEXT-ARGS . (category class name . arguments))
  266. (deftypeopx EOL-TEXT-ARGS . (category class data-type name . arguments))
  267. (defmethodx EOL-TEXT-ARGS . (class name . arguments))
  268. (deftypemethodx EOL-TEXT-ARGS . (class data-type name . arguments))
  269. (defoptx EOL-TEXT-ARGS . (name))
  270. (defvrx EOL-TEXT-ARGS . (category name))
  271. (defvarx EOL-TEXT-ARGS . (name))
  272. (deftypevrx EOL-TEXT-ARGS . (category data-type name))
  273. (deftypevarx EOL-TEXT-ARGS . (data-type name))
  274. (deffnx EOL-TEXT-ARGS . (category name . arguments))
  275. (deftypefnx EOL-TEXT-ARGS . (category data-type name . arguments))
  276. (defspecx EOL-TEXT-ARGS . (name . arguments))
  277. (defmacx EOL-TEXT-ARGS . (name . arguments))
  278. (defunx EOL-TEXT-ARGS . (name . arguments))
  279. (deftypefunx EOL-TEXT-ARGS . (data-type name . arguments))
  280. ;; Indexing commands
  281. (cindex INDEX . entry)
  282. (findex INDEX . entry)
  283. (vindex INDEX . entry)
  284. (kindex INDEX . entry)
  285. (pindex INDEX . entry)
  286. (tindex INDEX . entry)
  287. ;; Environment commands (those that need @end)
  288. (texinfo ENVIRON . title)
  289. (ignore ENVIRON . ())
  290. (ifinfo ENVIRON . ())
  291. (iftex ENVIRON . ())
  292. (ifhtml ENVIRON . ())
  293. (ifxml ENVIRON . ())
  294. (ifplaintext ENVIRON . ())
  295. (ifnotinfo ENVIRON . ())
  296. (ifnottex ENVIRON . ())
  297. (ifnothtml ENVIRON . ())
  298. (ifnotxml ENVIRON . ())
  299. (ifnotplaintext ENVIRON . ())
  300. (titlepage ENVIRON . ())
  301. (menu ENVIRON . ())
  302. (direntry ENVIRON . ())
  303. (copying ENVIRON . ())
  304. (example ENVIRON . ())
  305. (smallexample ENVIRON . ())
  306. (display ENVIRON . ())
  307. (smalldisplay ENVIRON . ())
  308. (verbatim ENVIRON . ())
  309. (format ENVIRON . ())
  310. (smallformat ENVIRON . ())
  311. (lisp ENVIRON . ())
  312. (smalllisp ENVIRON . ())
  313. (cartouche ENVIRON . ())
  314. (quotation ENVIRON . ())
  315. (deftp ENVIRON . (category name . attributes))
  316. (defcv ENVIRON . (category class name))
  317. (defivar ENVIRON . (class name))
  318. (deftypeivar ENVIRON . (class data-type name))
  319. (defop ENVIRON . (category class name . arguments))
  320. (deftypeop ENVIRON . (category class data-type name . arguments))
  321. (defmethod ENVIRON . (class name . arguments))
  322. (deftypemethod ENVIRON . (class data-type name . arguments))
  323. (defopt ENVIRON . (name))
  324. (defvr ENVIRON . (category name))
  325. (defvar ENVIRON . (name))
  326. (deftypevr ENVIRON . (category data-type name))
  327. (deftypevar ENVIRON . (data-type name))
  328. (deffn ENVIRON . (category name . arguments))
  329. (deftypefn ENVIRON . (category data-type name . arguments))
  330. (defspec ENVIRON . (name . arguments))
  331. (defmac ENVIRON . (name . arguments))
  332. (defun ENVIRON . (name . arguments))
  333. (deftypefun ENVIRON . (data-type name . arguments))
  334. (table TABLE-ENVIRON . (formatter))
  335. (itemize TABLE-ENVIRON . (formatter))
  336. (enumerate TABLE-ENVIRON . (start))
  337. (ftable TABLE-ENVIRON . (formatter))
  338. (vtable TABLE-ENVIRON . (formatter))))
  339. (define command-depths
  340. '((chapter . 1) (section . 2) (subsection . 3) (subsubsection . 4)
  341. (top . 0) (unnumbered . 1) (unnumberedsec . 2)
  342. (unnumberedsubsec . 3) (unnumberedsubsubsec . 4)
  343. (appendix . 1) (appendixsec . 2) (appendixsection . 2)
  344. (appendixsubsec . 3) (appendixsubsubsec . 4)))
  345. (define (texi-command-depth command max-depth)
  346. "Given the texinfo command @var{command}, return its nesting level, or
  347. @code{#f} if it nests too deep for @var{max-depth}.
  348. Examples:
  349. @example
  350. (texi-command-depth 'chapter 4) @result{} 1
  351. (texi-command-depth 'top 4) @result{} 0
  352. (texi-command-depth 'subsection 4) @result{} 3
  353. (texi-command-depth 'appendixsubsec 4) @result{} 3
  354. (texi-command-depth 'subsection 2) @result{} #f
  355. @end example"
  356. (let ((depth (and=> (assq command command-depths) cdr)))
  357. (and depth (<= depth max-depth) depth)))
  358. ;; The % is for arguments
  359. (define (space-significant? command)
  360. (memq command
  361. '(example smallexample verbatim lisp smalllisp menu %)))
  362. ;; Like a DTD for texinfo
  363. (define (command-spec command)
  364. (or (assq command texi-command-specs)
  365. (parser-error #f "Unknown command" command)))
  366. (define (inline-content? content)
  367. (or (eq? content 'INLINE-TEXT) (eq? content 'INLINE-ARGS)))
  368. ;;========================================================================
  369. ;; Lower-level parsers and scanners
  370. ;;
  371. ;; They deal with primitive lexical units (Names, whitespaces, tags) and
  372. ;; with pieces of more generic productions. Most of these parsers must
  373. ;; be called in appropriate context. For example, complete-start-command
  374. ;; must be called only when the @-command start has been detected and
  375. ;; its name token has been read.
  376. ;; Test if a string is made of only whitespace
  377. ;; An empty string is considered made of whitespace as well
  378. (define (string-whitespace? str)
  379. (or (string-null? str)
  380. (string-every char-whitespace? str)))
  381. ;; Like read-text-line, but allows EOF.
  382. (define read-eof-breaks '(*eof* #\return #\newline))
  383. (define (read-eof-line port)
  384. (if (eof-object? (peek-char port))
  385. (peek-char port)
  386. (let* ((line (next-token '() read-eof-breaks
  387. "reading a line" port))
  388. (c (read-char port))) ; must be either \n or \r or EOF
  389. (if (and (eq? c #\return) (eq? (peek-char port) #\newline))
  390. (read-char port)) ; skip \n that follows \r
  391. line)))
  392. (define (skip-whitespace port)
  393. (skip-while '(#\space #\tab #\return #\newline) port))
  394. (define (skip-horizontal-whitespace port)
  395. (skip-while '(#\space #\tab) port))
  396. ;; command ::= Letter+
  397. ;; procedure: read-command PORT
  398. ;;
  399. ;; Read a command starting from the current position in the PORT and
  400. ;; return it as a symbol.
  401. (define (read-command port)
  402. (let ((first-char (peek-char port)))
  403. (or (char-alphabetic? first-char)
  404. (parser-error port "Nonalphabetic @-command char: '" first-char "'")))
  405. (string->symbol
  406. (next-token-of
  407. (lambda (c)
  408. (cond
  409. ((eof-object? c) #f)
  410. ((char-alphabetic? c) c)
  411. (else #f)))
  412. port)))
  413. ;; A token is a primitive lexical unit. It is a record with two fields,
  414. ;; token-head and token-kind.
  415. ;;
  416. ;; Token types:
  417. ;; END The end of a texinfo command. If the command is ended by },
  418. ;; token-head will be #f. Otherwise if the command is ended by
  419. ;; @end COMMAND, token-head will be COMMAND. As a special case,
  420. ;; @bye is the end of a special @texinfo command.
  421. ;; START The start of a texinfo command. The token-head will be a
  422. ;; symbol of the @-command name.
  423. ;; INCLUDE An @include directive. The token-head will be empty -- the
  424. ;; caller is responsible for reading the include file name.
  425. ;; ITEM @item commands have an irregular syntax. They end at the
  426. ;; next @item, or at the end of the environment. For that
  427. ;; read-command-token treats them specially.
  428. (define (make-token kind head) (cons kind head))
  429. (define token? pair?)
  430. (define token-kind car)
  431. (define token-head cdr)
  432. ;; procedure: read-command-token PORT
  433. ;;
  434. ;; This procedure starts parsing of a command token. The current
  435. ;; position in the stream must be #\@. This procedure scans enough of
  436. ;; the input stream to figure out what kind of a command token it is
  437. ;; seeing. The procedure returns a token structure describing the token.
  438. (define (read-command-token port)
  439. (assert-curr-char '(#\@) "start of the command" port)
  440. (let ((peeked (peek-char port)))
  441. (cond
  442. ((memq peeked '(#\! #\. #\? #\@ #\\ #\{ #\}))
  443. ;; @-commands that escape characters
  444. (make-token 'STRING (string (read-char port))))
  445. (else
  446. (let ((name (read-command port)))
  447. (case name
  448. ((end)
  449. ;; got an ending tag
  450. (let ((command (string-trim-both
  451. (read-eof-line port))))
  452. (or (and (not (string-null? command))
  453. (string-every char-alphabetic? command))
  454. (parser-error port "malformed @end" command))
  455. (make-token 'END (string->symbol command))))
  456. ((bye)
  457. ;; the end of the top
  458. (make-token 'END 'texinfo))
  459. ((item)
  460. (make-token 'ITEM 'item))
  461. ((include)
  462. (make-token 'INCLUDE #f))
  463. (else
  464. (make-token 'START name))))))))
  465. ;; procedure+: read-verbatim-body PORT STR-HANDLER SEED
  466. ;;
  467. ;; This procedure must be called after we have read a string
  468. ;; "@verbatim\n" that begins a verbatim section. The current position
  469. ;; must be the first position of the verbatim body. This function reads
  470. ;; _lines_ of the verbatim body and passes them to a STR-HANDLER, a
  471. ;; character data consumer.
  472. ;;
  473. ;; The str-handler is a STR-HANDLER, a procedure STRING1 STRING2 SEED.
  474. ;; The first STRING1 argument to STR-HANDLER never contains a newline.
  475. ;; The second STRING2 argument often will. On the first invocation of the
  476. ;; STR-HANDLER, the seed is the one passed to read-verbatim-body
  477. ;; as the third argument. The result of this first invocation will be
  478. ;; passed as the seed argument to the second invocation of the line
  479. ;; consumer, and so on. The result of the last invocation of the
  480. ;; STR-HANDLER is returned by the read-verbatim-body. Note a
  481. ;; similarity to the fundamental 'fold' iterator.
  482. ;;
  483. ;; Within a verbatim section all characters are taken at their face
  484. ;; value. It ends with "\n@end verbatim(\r)?\n".
  485. ;; Must be called right after the newline after @verbatim.
  486. (define (read-verbatim-body port str-handler seed)
  487. (let loop ((seed seed))
  488. (let ((fragment (next-token '() '(#\newline)
  489. "reading verbatim" port)))
  490. ;; We're reading the char after the 'fragment', which is
  491. ;; #\newline.
  492. (read-char port)
  493. (if (string=? fragment "@end verbatim")
  494. seed
  495. (loop (str-handler fragment "\n" seed))))))
  496. ;; procedure+: read-arguments PORT
  497. ;;
  498. ;; This procedure reads and parses a production ArgumentList.
  499. ;; ArgumentList ::= S* Argument (S* , S* Argument)* S*
  500. ;; Argument ::= ([^@{},])*
  501. ;;
  502. ;; Arguments are the things in braces, i.e @ref{my node} has one
  503. ;; argument, "my node". Most commands taking braces actually don't have
  504. ;; arguments, they process text. For example, in
  505. ;; @emph{@strong{emphasized}}, the emph takes text, because the parse
  506. ;; continues into the braces.
  507. ;;
  508. ;; Any whitespace within Argument is replaced with a single space.
  509. ;; Whitespace around an Argument is trimmed.
  510. ;;
  511. ;; The procedure returns a list of arguments. Afterwards the current
  512. ;; character will be after the final #\}.
  513. (define (read-arguments port stop-char)
  514. (define (split str)
  515. (read-char port) ;; eat the delimiter
  516. (let ((ret (map (lambda (x) (if (string-null? x) #f x))
  517. (map string-trim-both (string-split str #\,)))))
  518. (if (and (pair? ret) (eq? (car ret) #f) (null? (cdr ret)))
  519. '()
  520. ret)))
  521. (split (next-token '() (list stop-char)
  522. "arguments of @-command" port)))
  523. ;; procedure+: complete-start-command COMMAND PORT
  524. ;;
  525. ;; This procedure is to complete parsing of an @-command. The procedure
  526. ;; must be called after the command token has been read. COMMAND is a
  527. ;; TAG-NAME.
  528. ;;
  529. ;; This procedure returns several values:
  530. ;; COMMAND: a symbol.
  531. ;; ARGUMENTS: command's arguments, as an alist.
  532. ;; CONTENT-MODEL: the content model of the command.
  533. ;;
  534. ;; On exit, the current position in PORT will depend on the CONTENT-MODEL.
  535. ;;
  536. ;; Content model Port position
  537. ;; ============= =============
  538. ;; INLINE-TEXT One character after the #\{.
  539. ;; INLINE-ARGS The first character after the #\}.
  540. ;; EOL-TEXT The first non-whitespace character after the command.
  541. ;; ENVIRON, TABLE-ENVIRON, EOL-ARGS, EOL-TEXT
  542. ;; The first character on the next line.
  543. ;; PARAGRAPH, ITEM, EMPTY-COMMAND
  544. ;; The first character after the command.
  545. (define (arguments->attlist port args arg-names)
  546. (let loop ((in args) (names arg-names) (opt? #f) (out '()))
  547. (cond
  548. ((symbol? names) ;; a rest arg
  549. (reverse (if (null? in) out (acons names in out))))
  550. ((and (not (null? names)) (eq? (car names) #:opt))
  551. (loop in (cdr names) #t out))
  552. ((null? in)
  553. (if (or (null? names) opt?)
  554. (reverse out)
  555. (parser-error port "@-command expected more arguments:"
  556. args arg-names names)))
  557. ((null? names)
  558. (parser-error port "@-command didn't expect more arguments:" in))
  559. ((not (car in))
  560. (or (and opt? (loop (cdr in) (cdr names) opt? out))
  561. (parser-error "@-command missing required argument"
  562. (car names))))
  563. (else
  564. (loop (cdr in) (cdr names) opt?
  565. (cons (list (car names) (car in)) out))))))
  566. (define (parse-table-args command port)
  567. (let* ((line (string-trim-both (read-text-line port)))
  568. (length (string-length line)))
  569. (define (get-formatter)
  570. (or (and (not (zero? length))
  571. (eq? (string-ref line 0) #\@)
  572. (let ((f (string->symbol (substring line 1))))
  573. (or (inline-content? (cadr (command-spec f)))
  574. (parser-error
  575. port "@item formatter must be INLINE" f))
  576. f))
  577. (parser-error port "Invalid @item formatter" line)))
  578. (case command
  579. ((enumerate)
  580. (if (zero? length)
  581. '()
  582. `((start
  583. ,(if (or (and (eq? length 1)
  584. (char-alphabetic? (string-ref line 0)))
  585. (string-every char-numeric? line))
  586. line
  587. (parser-error
  588. port "Invalid enumerate start" line))))))
  589. ((itemize)
  590. `((bullet
  591. ,(or (and (eq? length 1) line)
  592. (and (string-null? line) '(bullet))
  593. (list (get-formatter))))))
  594. (else ;; tables of various varieties
  595. `((formatter (,(get-formatter))))))))
  596. (define (complete-start-command command port)
  597. (define (get-arguments type arg-names stop-char)
  598. (arguments->attlist port (read-arguments port stop-char) arg-names))
  599. (let* ((spec (command-spec command))
  600. (type (cadr spec))
  601. (arg-names (cddr spec)))
  602. (case type
  603. ((INLINE-TEXT)
  604. (assert-curr-char '(#\{) "Inline element lacks {" port)
  605. (values command '() type))
  606. ((INLINE-ARGS)
  607. (assert-curr-char '(#\{) "Inline element lacks {" port)
  608. (values command (get-arguments type arg-names #\}) type))
  609. ((EOL-ARGS)
  610. (values command (get-arguments type arg-names #\newline) type))
  611. ((ENVIRON ENTRY INDEX)
  612. (skip-horizontal-whitespace port)
  613. (values command (parse-environment-args command port) type))
  614. ((TABLE-ENVIRON)
  615. (skip-horizontal-whitespace port)
  616. (values command (parse-table-args command port) type))
  617. ((EOL-TEXT)
  618. (skip-horizontal-whitespace port)
  619. (values command '() type))
  620. ((EOL-TEXT-ARGS)
  621. (skip-horizontal-whitespace port)
  622. (values command (parse-eol-text-args command port) type))
  623. ((PARAGRAPH EMPTY-COMMAND ITEM FRAGMENT)
  624. (values command '() type))
  625. (else ;; INCLUDE shouldn't get here
  626. (parser-error port "can't happen")))))
  627. ;;-----------------------------------------------------------------------------
  628. ;; Higher-level parsers and scanners
  629. ;;
  630. ;; They parse productions corresponding entire @-commands.
  631. ;; Only reads @settitle, leaves it to the command parser to finish
  632. ;; reading the title.
  633. (define (take-until-settitle port)
  634. (or (find-string-from-port? "\n@settitle " port)
  635. (parser-error port "No \\n@settitle found"))
  636. (skip-horizontal-whitespace port)
  637. (and (eq? (peek-char port) #\newline)
  638. (parser-error port "You have a @settitle, but no title")))
  639. ;; procedure+: read-char-data PORT EXPECT-EOF? STR-HANDLER SEED
  640. ;;
  641. ;; This procedure is to read the CharData of a texinfo document.
  642. ;;
  643. ;; text ::= (CharData | Command)*
  644. ;;
  645. ;; The procedure reads CharData and stops at @-commands (or
  646. ;; environments). It also stops at an open or close brace.
  647. ;;
  648. ;; port
  649. ;; a PORT to read
  650. ;; expect-eof?
  651. ;; a boolean indicating if EOF is normal, i.e., the character
  652. ;; data may be terminated by the EOF. EOF is normal
  653. ;; while processing the main document.
  654. ;; preserve-ws?
  655. ;; a boolean indicating if we are within a whitespace-preserving
  656. ;; environment. If #t, suppress paragraph detection.
  657. ;; str-handler
  658. ;; a STR-HANDLER, see read-verbatim-body
  659. ;; seed
  660. ;; an argument passed to the first invocation of STR-HANDLER.
  661. ;;
  662. ;; The procedure returns two results: SEED and TOKEN. The SEED is the
  663. ;; result of the last invocation of STR-HANDLER, or the original seed if
  664. ;; STR-HANDLER was never called.
  665. ;;
  666. ;; TOKEN can be either an eof-object (this can happen only if expect-eof?
  667. ;; was #t), or a texinfo token denoting the start or end of a tag.
  668. ;; read-char-data port expect-eof? preserve-ws? str-handler seed
  669. (define read-char-data
  670. (let* ((end-chars-eof '(*eof* #\{ #\} #\@ #\newline)))
  671. (define (handle str-handler str1 str2 seed)
  672. (if (and (string-null? str1) (string-null? str2))
  673. seed
  674. (str-handler str1 str2 seed)))
  675. (lambda (port expect-eof? preserve-ws? str-handler seed)
  676. (let ((end-chars ((if expect-eof? identity cdr) end-chars-eof)))
  677. (let loop ((seed seed))
  678. (let* ((fragment (next-token '() end-chars "reading char data" port))
  679. (term-char (peek-char port))) ; one of end-chars
  680. (cond
  681. ((eof-object? term-char) ; only if expect-eof?
  682. (values (handle str-handler fragment "" seed) term-char))
  683. ((memq term-char '(#\@ #\{ #\}))
  684. (values (handle str-handler fragment "" seed)
  685. (case term-char
  686. ((#\@) (read-command-token port))
  687. ((#\{) (make-token 'START '*braces*))
  688. ((#\}) (read-char port) (make-token 'END #f)))))
  689. ((eq? term-char #\newline)
  690. ;; Always significant, unless directly before an end token.
  691. (let ((c (peek-next-char port)))
  692. (cond
  693. ((eof-object? c)
  694. (or expect-eof?
  695. (parser-error port "EOF while reading char data"))
  696. (values (handle str-handler fragment "" seed) c))
  697. ((eq? c #\@)
  698. (let* ((token (read-command-token port))
  699. (end? (eq? (token-kind token) 'END)))
  700. (values
  701. (handle str-handler fragment (if end? "" " ") seed)
  702. token)))
  703. ((and (not preserve-ws?) (eq? c #\newline))
  704. ;; paragraph-separator ::= #\newline #\newline+
  705. (skip-while '(#\newline) port)
  706. (skip-horizontal-whitespace port)
  707. (values (handle str-handler fragment "" seed)
  708. (make-token 'PARA 'para)))
  709. (else
  710. (loop (handle str-handler fragment
  711. (if preserve-ws? "\n" " ") seed)))))))))))))
  712. ; procedure+: assert-token TOKEN KIND NAME
  713. ; Make sure that TOKEN is of anticipated KIND and has anticipated NAME
  714. (define (assert-token token kind name)
  715. (or (and (token? token)
  716. (eq? kind (token-kind token))
  717. (equal? name (token-head token)))
  718. (parser-error #f "Expecting @end for " name ", got " token)))
  719. ;;========================================================================
  720. ;; Highest-level parsers: Texinfo to SXML
  721. ;; These parsers are a set of syntactic forms to instantiate a SSAX
  722. ;; parser. The user tells what to do with the parsed character and
  723. ;; element data. These latter handlers determine if the parsing follows a
  724. ;; SAX or a DOM model.
  725. ;; syntax: make-command-parser fdown fup str-handler
  726. ;; Create a parser to parse and process one element, including its
  727. ;; character content or children elements. The parser is typically
  728. ;; applied to the root element of a document.
  729. ;; fdown
  730. ;; procedure COMMAND ARGUMENTS EXPECTED-CONTENT SEED
  731. ;;
  732. ;; This procedure is to generate the seed to be passed to handlers
  733. ;; that process the content of the element. This is the function
  734. ;; identified as 'fdown' in the denotational semantics of the XML
  735. ;; parser given in the title comments to (sxml ssax).
  736. ;;
  737. ;; fup
  738. ;; procedure COMMAND ARGUMENTS PARENT-SEED SEED
  739. ;;
  740. ;; This procedure is called when parsing of COMMAND is finished.
  741. ;; The SEED is the result from the last content parser (or from
  742. ;; fdown if the element has the empty content). PARENT-SEED is the
  743. ;; same seed as was passed to fdown. The procedure is to generate a
  744. ;; seed that will be the result of the element parser. This is the
  745. ;; function identified as 'fup' in the denotational semantics of
  746. ;; the XML parser given in the title comments to (sxml ssax).
  747. ;;
  748. ;; str-handler
  749. ;; A STR-HANDLER, see read-verbatim-body
  750. ;;
  751. ;; The generated parser is a
  752. ;; procedure COMMAND PORT SEED
  753. ;;
  754. ;; The procedure must be called *after* the command token has been read.
  755. (define (read-include-file-name port)
  756. (let ((x (string-trim-both (read-eof-line port))))
  757. (if (string-null? x)
  758. (error "no file listed")
  759. x))) ;; fixme: should expand @value{} references
  760. (define (sxml->node-name sxml)
  761. "Turn some sxml string into a valid node name."
  762. (let loop ((in (string->list (sxml->string sxml))) (out '()))
  763. (if (null? in)
  764. (apply string (reverse out))
  765. (if (memq (car in) '(#\{ #\} #\@ #\,))
  766. (loop (cdr in) out)
  767. (loop (cdr in) (cons (car in) out))))))
  768. (define (index command arguments fdown fup parent-seed)
  769. (case command
  770. ((deftp defcv defivar deftypeivar defop deftypeop defmethod
  771. deftypemethod defopt defvr defvar deftypevr deftypevar deffn
  772. deftypefn defspec defmac defun deftypefun)
  773. (let ((args `((name ,(string-append (symbol->string command) "-"
  774. (cadr (assq 'name arguments)))))))
  775. (fup 'anchor args parent-seed
  776. (fdown 'anchor args 'INLINE-ARGS '()))))
  777. ((cindex findex vindex kindex pindex tindex)
  778. (let ((args `((name ,(string-append (symbol->string command) "-"
  779. (sxml->node-name
  780. (assq 'entry arguments)))))))
  781. (fup 'anchor args parent-seed
  782. (fdown 'anchor args 'INLINE-ARGS '()))))
  783. (else parent-seed)))
  784. (define (make-command-parser fdown fup str-handler)
  785. (lambda (command port seed)
  786. (let visit ((command command) (port port) (sig-ws? #f) (parent-seed seed))
  787. (let*-values (((command arguments expected-content)
  788. (complete-start-command command port)))
  789. (let* ((parent-seed (index command arguments fdown fup parent-seed))
  790. (seed (fdown command arguments expected-content parent-seed))
  791. (eof-closes? (or (memq command '(texinfo para *fragment*))
  792. (eq? expected-content 'EOL-TEXT)))
  793. (sig-ws? (or sig-ws? (space-significant? command)))
  794. (up (lambda (s) (fup command arguments parent-seed s)))
  795. (new-para (lambda (s) (fdown 'para '() 'PARAGRAPH s)))
  796. (make-end-para (lambda (p) (lambda (s) (fup 'para '() p s)))))
  797. (define (port-for-content)
  798. (if (eq? expected-content 'EOL-TEXT)
  799. (call-with-input-string (read-text-line port) identity)
  800. port))
  801. (cond
  802. ((memq expected-content '(EMPTY-COMMAND INLINE-ARGS EOL-ARGS INDEX
  803. EOL-TEXT-ARGS))
  804. ;; empty or finished by complete-start-command
  805. (up seed))
  806. ((eq? command 'verbatim)
  807. (up (read-verbatim-body port str-handler seed)))
  808. (else
  809. (let loop ((port (port-for-content))
  810. (expect-eof? eof-closes?)
  811. (end-para identity)
  812. (need-break? (and (not sig-ws?)
  813. (memq expected-content
  814. '(ENVIRON TABLE-ENVIRON
  815. ENTRY ITEM FRAGMENT))))
  816. (seed seed))
  817. (cond
  818. ((and need-break? (or sig-ws? (skip-whitespace port))
  819. (not (memq (peek-char port) '(#\@ #\})))
  820. (not (eof-object? (peek-char port))))
  821. ;; Even if we have an @, it might be inline -- check
  822. ;; that later
  823. (let ((seed (end-para seed)))
  824. (loop port expect-eof? (make-end-para seed) #f
  825. (new-para seed))))
  826. (else
  827. (let*-values (((seed token)
  828. (read-char-data
  829. port expect-eof? sig-ws? str-handler seed)))
  830. (cond
  831. ((eof-object? token)
  832. (case expect-eof?
  833. ((include #f) (end-para seed))
  834. (else (up (end-para seed)))))
  835. (else
  836. (case (token-kind token)
  837. ((STRING)
  838. ;; this is only @-commands that escape
  839. ;; characters: @}, @@, @{ -- new para if need-break
  840. (let ((seed ((if need-break? end-para identity) seed)))
  841. (loop port expect-eof?
  842. (if need-break? (make-end-para seed) end-para) #f
  843. (str-handler (token-head token) ""
  844. ((if need-break? new-para identity)
  845. seed)))))
  846. ((END)
  847. ;; The end will only have a name if it's for an
  848. ;; environment
  849. (cond
  850. ((memq command '(item entry))
  851. (let ((spec (command-spec (token-head token))))
  852. (or (eq? (cadr spec) 'TABLE-ENVIRON)
  853. (parser-error
  854. port "@item not ended by @end table/enumerate/itemize"
  855. token))))
  856. ((eq? expected-content 'ENVIRON)
  857. (assert-token token 'END command)))
  858. (up (end-para seed)))
  859. ((ITEM)
  860. (cond
  861. ((memq command '(enumerate itemize))
  862. (up (visit 'item port sig-ws? (end-para seed))))
  863. ((eq? expected-content 'TABLE-ENVIRON)
  864. (up (visit 'entry port sig-ws? (end-para seed))))
  865. ((memq command '(item entry))
  866. (visit command port sig-ws? (up (end-para seed))))
  867. (else
  868. (parser-error
  869. port "@item must be within a table environment"
  870. command))))
  871. ((PARA)
  872. ;; examine valid paragraphs?
  873. (loop port expect-eof? end-para (not sig-ws?) seed))
  874. ((INCLUDE)
  875. ;; Recurse for include files
  876. (let ((seed (call-with-file-and-dir
  877. (read-include-file-name port)
  878. (lambda (port)
  879. (loop port 'include end-para
  880. need-break? seed)))))
  881. (loop port expect-eof? end-para need-break? seed)))
  882. ((START) ; Start of an @-command
  883. (let* ((head (token-head token))
  884. (type (cadr (command-spec head)))
  885. (inline? (inline-content? type))
  886. (seed ((if (and inline? (not need-break?))
  887. identity end-para) seed))
  888. (end-para (if inline?
  889. (if need-break? (make-end-para seed)
  890. end-para)
  891. identity))
  892. (new-para (if (and inline? need-break?)
  893. new-para identity)))
  894. (loop port expect-eof? end-para (not inline?)
  895. (visit head port sig-ws? (new-para seed)))))
  896. (else
  897. (parser-error port "Unknown token type" token))))))))))))))))
  898. ;; procedure: reverse-collect-str-drop-ws fragments
  899. ;;
  900. ;; Given the list of fragments (some of which are text strings), reverse
  901. ;; the list and concatenate adjacent text strings. We also drop
  902. ;; "unsignificant" whitespace, that is, whitespace in front, behind and
  903. ;; between elements. The whitespace that is included in character data
  904. ;; is not affected.
  905. (define (reverse-collect-str-drop-ws fragments)
  906. (cond
  907. ((null? fragments) ; a shortcut
  908. '())
  909. ((and (string? (car fragments)) ; another shortcut
  910. (null? (cdr fragments)) ; remove single ws-only string
  911. (string-whitespace? (car fragments)))
  912. '())
  913. (else
  914. (let loop ((fragments fragments) (result '()) (strs '())
  915. (all-whitespace? #t))
  916. (cond
  917. ((null? fragments)
  918. (if all-whitespace?
  919. result ; remove leading ws
  920. (cons (apply string-append strs) result)))
  921. ((string? (car fragments))
  922. (loop (cdr fragments) result (cons (car fragments) strs)
  923. (and all-whitespace?
  924. (string-whitespace? (car fragments)))))
  925. (else
  926. (loop (cdr fragments)
  927. (cons
  928. (car fragments)
  929. (cond
  930. ((null? strs) result)
  931. (all-whitespace?
  932. (if (null? result)
  933. result ; remove trailing whitespace
  934. (cons " " result))); replace interstitial ws with
  935. ; one space
  936. (else
  937. (cons (apply string-append strs) result))))
  938. '() #t)))))))
  939. (define (make-dom-parser)
  940. (make-command-parser
  941. (lambda (command args content seed) ; fdown
  942. '())
  943. (lambda (command args parent-seed seed) ; fup
  944. (let ((seed (reverse-collect-str-drop-ws seed)))
  945. (acons command
  946. (if (null? args) seed (acons '% args seed))
  947. parent-seed)))
  948. (lambda (string1 string2 seed) ; str-handler
  949. (if (string-null? string2)
  950. (cons string1 seed)
  951. (cons* string2 string1 seed)))))
  952. (define parse-environment-args
  953. (let ((parser (make-dom-parser)))
  954. ;; duplicate arguments->attlist to avoid unnecessary splitting
  955. (lambda (command port)
  956. (let ((args (cdar (parser '*ENVIRON-ARGS* port '())))
  957. (arg-names (cddr (command-spec command))))
  958. (cond
  959. ((not arg-names)
  960. (if (null? args) '()
  961. (parser-error port "@-command doesn't take args" command)))
  962. ((eq? arg-names #t)
  963. (list (cons 'arguments args)))
  964. (else
  965. (let loop ((args args) (arg-names arg-names) (out '()))
  966. (cond
  967. ((null? arg-names)
  968. (if (null? args) (reverse! out)
  969. (parser-error port "@-command didn't expect more args"
  970. command args)))
  971. ((symbol? arg-names)
  972. (reverse! (acons arg-names args out)))
  973. ((null? args)
  974. (parser-error port "@-command expects more args"
  975. command arg-names))
  976. ((and (string? (car args)) (string-index (car args) #\space))
  977. => (lambda (i)
  978. (let ((rest (substring/shared (car args) (1+ i))))
  979. (if (zero? i)
  980. (loop (cons rest (cdr args)) arg-names out)
  981. (loop (cons rest (cdr args)) (cdr arg-names)
  982. (cons (list (car arg-names)
  983. (substring (car args) 0 i))
  984. out))))))
  985. (else
  986. (loop (cdr args) (cdr arg-names)
  987. (if (and (pair? (car args)) (eq? (caar args) '*braces*))
  988. (acons (car arg-names) (cdar args) out)
  989. (cons (list (car arg-names) (car args)) out))))))))))))
  990. (define (parse-eol-text-args command port)
  991. ;; perhaps parse-environment-args should be named more
  992. ;; generically.
  993. (parse-environment-args command port))
  994. ;; procedure: texi-fragment->stexi STRING
  995. ;;
  996. ;; A DOM parser for a texinfo fragment STRING.
  997. ;;
  998. ;; The procedure returns an SXML tree headed by the special tag,
  999. ;; *fragment*.
  1000. (define (texi-fragment->stexi string-or-port)
  1001. "Parse the texinfo commands in @var{string-or-port}, and return the
  1002. resultant stexi tree. The head of the tree will be the special command,
  1003. @code{*fragment*}."
  1004. (define (parse port)
  1005. (postprocess (car ((make-dom-parser) '*fragment* port '()))))
  1006. (if (input-port? string-or-port)
  1007. (parse string-or-port)
  1008. (call-with-input-string string-or-port parse)))
  1009. ;; procedure: texi->stexi PORT
  1010. ;;
  1011. ;; This is an instance of a SSAX parser above that returns an SXML
  1012. ;; representation of the texinfo document ready to be read at PORT.
  1013. ;;
  1014. ;; The procedure returns an SXML tree. The port points to the
  1015. ;; first character after the @bye, or to the end of the file.
  1016. (define (texi->stexi port)
  1017. "Read a full texinfo document from @var{port} and return the parsed
  1018. stexi tree. The parsing will start at the @code{@@settitle} and end at
  1019. @code{@@bye} or EOF."
  1020. (let ((parser (make-dom-parser)))
  1021. (take-until-settitle port)
  1022. (postprocess (car (parser 'texinfo port '())))))
  1023. (define (car-eq? x y) (and (pair? x) (eq? (car x) y)))
  1024. (define (make-contents tree)
  1025. (define (lp in out depth)
  1026. (cond
  1027. ((null? in) (values in (cons 'enumerate (reverse! out))))
  1028. ((and (pair? (cdr in)) (texi-command-depth (caadr in) 4))
  1029. => (lambda (new-depth)
  1030. (let ((node-name (and (car-eq? (car in) 'node)
  1031. (cadr (assq 'name (cdadar in))))))
  1032. (cond
  1033. ((< new-depth depth)
  1034. (values in (cons 'enumerate (reverse! out))))
  1035. ((> new-depth depth)
  1036. (let ((out-cdr (if (null? out) '() (cdr out)))
  1037. (out-car (if (null? out) (list 'item) (car out))))
  1038. (let*-values (((new-in new-out) (lp in '() (1+ depth))))
  1039. (lp new-in
  1040. (cons (append out-car (list new-out)) out-cdr)
  1041. depth))))
  1042. (else ;; same depth
  1043. (lp (cddr in)
  1044. (cons
  1045. `(item (para
  1046. ,@(if node-name
  1047. `((ref (% (node ,node-name))))
  1048. (cdadr in))))
  1049. out)
  1050. depth))))))
  1051. (else (lp (cdr in) out depth))))
  1052. (let*-values (((_ contents) (lp tree '() 1)))
  1053. `((chapheading "Table of Contents") ,contents)))
  1054. (define (trim-whitespace str trim-left? trim-right?)
  1055. (let* ((left-space? (and (not trim-left?)
  1056. (string-prefix? " " str)))
  1057. (right-space? (and (not trim-right?)
  1058. (string-suffix? " " str)))
  1059. (tail (append! (string-tokenize str)
  1060. (if right-space? '("") '()))))
  1061. (string-join (if left-space? (cons "" tail) tail))))
  1062. (define (postprocess tree)
  1063. (define (loop in out state first? sig-ws?)
  1064. (cond
  1065. ((null? in)
  1066. (values (reverse! out) state))
  1067. ((string? (car in))
  1068. (loop (cdr in)
  1069. (cons (if sig-ws? (car in)
  1070. (trim-whitespace (car in) first? (null? (cdr in))))
  1071. out)
  1072. state #f sig-ws?))
  1073. ((pair? (car in))
  1074. (case (caar in)
  1075. ((set)
  1076. (if (null? (cdar in)) (error "@set missing arguments" in))
  1077. (if (string? (cadar in))
  1078. (let ((i (string-index (cadar in) #\space)))
  1079. (if i
  1080. (loop (cdr in) out
  1081. (acons (substring (cadar in) 0 i)
  1082. (cons (substring (cadar in) (1+ i)) (cddar in))
  1083. state)
  1084. #f sig-ws?)
  1085. (loop (cdr in) out (acons (cadar in) (cddar in) state)
  1086. #f sig-ws?)))
  1087. (error "expected a constant to define for @set" in)))
  1088. ((value)
  1089. (loop (fold-right cons (cdr in)
  1090. (or (and=>
  1091. (assoc (cadr (assq 'key (cdadar in))) state) cdr)
  1092. (error "unknown value" (cdadar in) state)))
  1093. out
  1094. state #f sig-ws?))
  1095. ((copying)
  1096. (loop (cdr in) out (cons (car in) state) #f sig-ws?))
  1097. ((insertcopying)
  1098. (loop (fold-right cons (cdr in)
  1099. (or (cdr (assoc 'copying state))
  1100. (error "copying isn't set yet")))
  1101. out
  1102. state #f sig-ws?))
  1103. ((contents)
  1104. (loop (cdr in) (fold cons out (make-contents tree)) state #f sig-ws?))
  1105. (else
  1106. (let*-values (((kid-out state)
  1107. (loop (car in) '() state #t
  1108. (or sig-ws? (space-significant? (caar in))))))
  1109. (loop (cdr in) (cons kid-out out) state #f sig-ws?)))))
  1110. (else ; a symbol
  1111. (loop (cdr in) (cons (car in) out) state #t sig-ws?))))
  1112. (call-with-values
  1113. (lambda () (loop tree '() '() #t #f))
  1114. (lambda (out state) out)))
  1115. ;; Replace % with texinfo-arguments.
  1116. (define (stexi->sxml tree)
  1117. "Transform the stexi tree @var{tree} into sxml. This involves
  1118. replacing the @code{%} element that keeps the texinfo arguments with an
  1119. element for each argument.
  1120. FIXME: right now it just changes % to @code{texinfo-arguments} -- that
  1121. doesn't hang with the idea of making a dtd at some point"
  1122. (pre-post-order
  1123. tree
  1124. `((% . ,(lambda (x . t) (cons 'texinfo-arguments t)))
  1125. (*text* . ,(lambda (x t) t))
  1126. (*default* . ,(lambda (x . t) (cons x t))))))
  1127. ;;; arch-tag: 73890afa-597c-4264-ae70-46fe7756ffb5
  1128. ;;; texinfo.scm ends here