read-text-outline.scm 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. ;;; read-text-outline --- Read a text outline and display it as a sexp
  2. ;; Copyright (C) 2002, 2006, 2011 Free Software Foundation, Inc.
  3. ;;
  4. ;; This program is free software; you can redistribute it and/or
  5. ;; modify it under the terms of the GNU Lesser General Public License
  6. ;; as published by the Free Software Foundation; either version 3, or
  7. ;; (at your option) any later version.
  8. ;;
  9. ;; This program is distributed in the hope that it will be useful,
  10. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ;; Lesser General Public License for more details.
  13. ;;
  14. ;; You should have received a copy of the GNU Lesser General Public
  15. ;; License along with this software; see the file COPYING.LESSER. If
  16. ;; not, write to the Free Software Foundation, Inc., 51 Franklin
  17. ;; Street, Fifth Floor, Boston, MA 02110-1301 USA
  18. ;;; Author: Thien-Thi Nguyen <ttn@gnu.org>
  19. ;;; Commentary:
  20. ;; Usage: read-text-outline OUTLINE
  21. ;;
  22. ;; Scan OUTLINE file and display a list of trees, the structure of
  23. ;; each reflecting the "levels" in OUTLINE. The recognized outline
  24. ;; format (used to indicate outline headings) is zero or more pairs of
  25. ;; leading spaces followed by "-". Something like:
  26. ;;
  27. ;; - a 0
  28. ;; - b 1
  29. ;; - c 2
  30. ;; - d 1
  31. ;; - e 0
  32. ;; - f 1
  33. ;; - g 2
  34. ;; - h 1
  35. ;;
  36. ;; In this example the levels are shown to the right. The output for
  37. ;; such a file would be the single line:
  38. ;;
  39. ;; (("a" ("b" "c") "d") ("e" ("f" "g") "h"))
  40. ;;
  41. ;; Basically, anything at the beginning of a list is a parent, and the
  42. ;; remaining elements of that list are its children.
  43. ;;
  44. ;;
  45. ;; Usage from a Scheme program: These two procs are exported:
  46. ;;
  47. ;; (read-text-outline . args) ; only first arg is used
  48. ;; (read-text-outline-silently port)
  49. ;; (make-text-outline-reader re specs)
  50. ;;
  51. ;; `make-text-outline-reader' returns a proc that reads from PORT and
  52. ;; returns a list of trees (similar to `read-text-outline-silently').
  53. ;;
  54. ;; RE is a regular expression (string) that is used to identify a header
  55. ;; line of the outline (as opposed to a whitespace line or intervening
  56. ;; text). RE must begin w/ a sub-expression to match the "level prefix"
  57. ;; of the line. You can use `level-submatch-number' in SPECS (explained
  58. ;; below) to specify a number other than 1, the default.
  59. ;;
  60. ;; Normally, the level of the line is taken directly as the length of
  61. ;; its level prefix. This often results in adjacent levels not mapping
  62. ;; to adjacent numbers, which confuses the tree-building portion of the
  63. ;; program, which expects top-level to be 0, first sub-level to be 1,
  64. ;; etc. You can use `level-substring-divisor' or `compute-level' in
  65. ;; SPECS to specify a constant scaling factor or specify a completely
  66. ;; alternative procedure, respectively.
  67. ;;
  68. ;; SPECS is an alist which may contain the following key/value pairs:
  69. ;;
  70. ;; - level-submatch-number NUMBER
  71. ;; - level-substring-divisor NUMBER
  72. ;; - compute-level PROC
  73. ;; - body-submatch-number NUMBER
  74. ;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...)
  75. ;;
  76. ;; The PROC value associated with key `compute-level' should take a
  77. ;; Scheme match structure (as returned by `regexp-exec') and return a
  78. ;; number, the normalized level for that line. If this is specified,
  79. ;; it takes precedence over other level-computation methods.
  80. ;;
  81. ;; Use `body-submatch-number' if RE specifies the whole body, or if you
  82. ;; want to make use of the extra fields parsing. The `extra-fields'
  83. ;; value is a sub-alist, whose keys name additional fields that are to
  84. ;; be recognized. These fields along with `level' are set as object
  85. ;; properties of the final string ("body") that is consed into the tree.
  86. ;; If a field name ends in "?" the field value is set to be #t if there
  87. ;; is a match and the result is not an empty string, and #f otherwise.
  88. ;;
  89. ;;
  90. ;; Bugs and caveats:
  91. ;;
  92. ;; (1) Only the first file specified on the command line is scanned.
  93. ;; (2) TAB characters at the beginnings of lines are not recognized.
  94. ;; (3) Outlines that "skip" levels signal an error. In other words,
  95. ;; this will fail:
  96. ;;
  97. ;; - a 0
  98. ;; - b 1
  99. ;; - c 3 <-- skipped 2 -- error!
  100. ;; - d 1
  101. ;;
  102. ;;
  103. ;; TODO: Determine what's the right thing to do for skips.
  104. ;; Handle TABs.
  105. ;; Make line format customizable via longopts.
  106. ;;; Code:
  107. (define-module (scripts read-text-outline)
  108. :export (read-text-outline
  109. read-text-outline-silently
  110. make-text-outline-reader)
  111. :use-module (ice-9 regex)
  112. :autoload (ice-9 rdelim) (read-line)
  113. :autoload (ice-9 getopt-long) (getopt-long))
  114. (define %include-in-guild-list #f)
  115. (define %summary "Convert textual outlines to s-expressions.")
  116. (define (?? symbol)
  117. (let ((name (symbol->string symbol)))
  118. (string=? "?" (substring name (1- (string-length name))))))
  119. (define (msub n)
  120. (lambda (m)
  121. (match:substring m n)))
  122. (define (??-predicates pair)
  123. (cons (car pair)
  124. (if (?? (car pair))
  125. (lambda (m)
  126. (not (string=? "" (match:substring m (cdr pair)))))
  127. (msub (cdr pair)))))
  128. (define (make-line-parser re specs)
  129. (let* ((rx (let ((fc (substring re 0 1)))
  130. (make-regexp (if (string=? "^" fc)
  131. re
  132. (string-append "^" re)))))
  133. (check (lambda (key)
  134. (assq-ref specs key)))
  135. (level-substring (msub (or (check 'level-submatch-number) 1)))
  136. (extract-level (cond ((check 'compute-level)
  137. => (lambda (proc)
  138. (lambda (m)
  139. (proc m))))
  140. ((check 'level-substring-divisor)
  141. => (lambda (n)
  142. (lambda (m)
  143. (/ (string-length (level-substring m))
  144. n))))
  145. (else
  146. (lambda (m)
  147. (string-length (level-substring m))))))
  148. (extract-body (cond ((check 'body-submatch-number)
  149. => msub)
  150. (else
  151. (lambda (m) (match:suffix m)))))
  152. (misc-props! (cond ((check 'extra-fields)
  153. => (lambda (alist)
  154. (let ((new (map ??-predicates alist)))
  155. (lambda (obj m)
  156. (for-each
  157. (lambda (pair)
  158. (set-object-property!
  159. obj (car pair)
  160. ((cdr pair) m)))
  161. new)))))
  162. (else
  163. (lambda (obj m) #t)))))
  164. ;; retval
  165. (lambda (line)
  166. (cond ((regexp-exec rx line)
  167. => (lambda (m)
  168. (let ((level (extract-level m))
  169. (body (extract-body m)))
  170. (set-object-property! body 'level level)
  171. (misc-props! body m)
  172. body)))
  173. (else #f)))))
  174. (define (make-text-outline-reader re specs)
  175. (let ((parse-line (make-line-parser re specs)))
  176. ;; retval
  177. (lambda (port)
  178. (let* ((all '(start))
  179. (pchain (list))) ; parents chain
  180. (let loop ((line (read-line port))
  181. (prev-level -1) ; how this relates to the first input
  182. ; level determines whether or not we
  183. ; start in "sibling" or "child" mode.
  184. ; in the end, `start' is ignored and
  185. ; it's much easier to ignore parents
  186. ; than siblings (sometimes). this is
  187. ; not to encourage ignorance, however.
  188. (tp all)) ; tail pointer
  189. (or (eof-object? line)
  190. (cond ((parse-line line)
  191. => (lambda (w)
  192. (let* ((words (list w))
  193. (level (object-property w 'level))
  194. (diff (- level prev-level)))
  195. (cond
  196. ;; sibling
  197. ((zero? diff)
  198. ;; just extend the chain
  199. (set-cdr! tp words))
  200. ;; child
  201. ((positive? diff)
  202. (or (= 1 diff)
  203. (error "unhandled diff not 1:" diff line))
  204. ;; parent may be contacted by uncle later (kids
  205. ;; these days!) so save its level
  206. (set-object-property! tp 'level prev-level)
  207. (set! pchain (cons tp pchain))
  208. ;; "push down" car into hierarchy
  209. (set-car! tp (cons (car tp) words)))
  210. ;; uncle
  211. ((negative? diff)
  212. ;; prune back to where levels match
  213. (do ((p pchain (cdr p)))
  214. ((= level (object-property (car p) 'level))
  215. (set! pchain p)))
  216. ;; resume at this level
  217. (set-cdr! (car pchain) words)
  218. (set! pchain (cdr pchain))))
  219. (loop (read-line port) level words))))
  220. (else (loop (read-line port) prev-level tp)))))
  221. (set! all (car all))
  222. (if (eq? 'start all)
  223. '() ; wasteland
  224. (cdr all))))))
  225. (define read-text-outline-silently
  226. (make-text-outline-reader "(([ ][ ])*)- *"
  227. '((level-substring-divisor . 2))))
  228. (define (read-text-outline . args)
  229. (write (read-text-outline-silently (open-file (car args) "r")))
  230. (newline)
  231. #t) ; exit val
  232. (define main read-text-outline)
  233. ;;; read-text-outline ends here