123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- ;;; read-text-outline --- Read a text outline and display it as a sexp
- ;; Copyright (C) 2002, 2006, 2011 Free Software Foundation, Inc.
- ;;
- ;; This program is free software; you can redistribute it and/or
- ;; modify it under the terms of the GNU Lesser General Public License
- ;; as published by the Free Software Foundation; either version 3, or
- ;; (at your option) any later version.
- ;;
- ;; This program is distributed in the hope that it will be useful,
- ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- ;; Lesser General Public License for more details.
- ;;
- ;; You should have received a copy of the GNU Lesser General Public
- ;; License along with this software; see the file COPYING.LESSER. If
- ;; not, write to the Free Software Foundation, Inc., 51 Franklin
- ;; Street, Fifth Floor, Boston, MA 02110-1301 USA
- ;;; Author: Thien-Thi Nguyen <ttn@gnu.org>
- ;;; Commentary:
- ;; Usage: read-text-outline OUTLINE
- ;;
- ;; Scan OUTLINE file and display a list of trees, the structure of
- ;; each reflecting the "levels" in OUTLINE. The recognized outline
- ;; format (used to indicate outline headings) is zero or more pairs of
- ;; leading spaces followed by "-". Something like:
- ;;
- ;; - a 0
- ;; - b 1
- ;; - c 2
- ;; - d 1
- ;; - e 0
- ;; - f 1
- ;; - g 2
- ;; - h 1
- ;;
- ;; In this example the levels are shown to the right. The output for
- ;; such a file would be the single line:
- ;;
- ;; (("a" ("b" "c") "d") ("e" ("f" "g") "h"))
- ;;
- ;; Basically, anything at the beginning of a list is a parent, and the
- ;; remaining elements of that list are its children.
- ;;
- ;;
- ;; Usage from a Scheme program: These two procs are exported:
- ;;
- ;; (read-text-outline . args) ; only first arg is used
- ;; (read-text-outline-silently port)
- ;; (make-text-outline-reader re specs)
- ;;
- ;; `make-text-outline-reader' returns a proc that reads from PORT and
- ;; returns a list of trees (similar to `read-text-outline-silently').
- ;;
- ;; RE is a regular expression (string) that is used to identify a header
- ;; line of the outline (as opposed to a whitespace line or intervening
- ;; text). RE must begin w/ a sub-expression to match the "level prefix"
- ;; of the line. You can use `level-submatch-number' in SPECS (explained
- ;; below) to specify a number other than 1, the default.
- ;;
- ;; Normally, the level of the line is taken directly as the length of
- ;; its level prefix. This often results in adjacent levels not mapping
- ;; to adjacent numbers, which confuses the tree-building portion of the
- ;; program, which expects top-level to be 0, first sub-level to be 1,
- ;; etc. You can use `level-substring-divisor' or `compute-level' in
- ;; SPECS to specify a constant scaling factor or specify a completely
- ;; alternative procedure, respectively.
- ;;
- ;; SPECS is an alist which may contain the following key/value pairs:
- ;;
- ;; - level-submatch-number NUMBER
- ;; - level-substring-divisor NUMBER
- ;; - compute-level PROC
- ;; - body-submatch-number NUMBER
- ;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...)
- ;;
- ;; The PROC value associated with key `compute-level' should take a
- ;; Scheme match structure (as returned by `regexp-exec') and return a
- ;; number, the normalized level for that line. If this is specified,
- ;; it takes precedence over other level-computation methods.
- ;;
- ;; Use `body-submatch-number' if RE specifies the whole body, or if you
- ;; want to make use of the extra fields parsing. The `extra-fields'
- ;; value is a sub-alist, whose keys name additional fields that are to
- ;; be recognized. These fields along with `level' are set as object
- ;; properties of the final string ("body") that is consed into the tree.
- ;; If a field name ends in "?" the field value is set to be #t if there
- ;; is a match and the result is not an empty string, and #f otherwise.
- ;;
- ;;
- ;; Bugs and caveats:
- ;;
- ;; (1) Only the first file specified on the command line is scanned.
- ;; (2) TAB characters at the beginnings of lines are not recognized.
- ;; (3) Outlines that "skip" levels signal an error. In other words,
- ;; this will fail:
- ;;
- ;; - a 0
- ;; - b 1
- ;; - c 3 <-- skipped 2 -- error!
- ;; - d 1
- ;;
- ;;
- ;; TODO: Determine what's the right thing to do for skips.
- ;; Handle TABs.
- ;; Make line format customizable via longopts.
- ;;; Code:
- (define-module (scripts read-text-outline)
- :export (read-text-outline
- read-text-outline-silently
- make-text-outline-reader)
- :use-module (ice-9 regex)
- :autoload (ice-9 rdelim) (read-line)
- :autoload (ice-9 getopt-long) (getopt-long))
- (define %include-in-guild-list #f)
- (define %summary "Convert textual outlines to s-expressions.")
- (define (?? symbol)
- (let ((name (symbol->string symbol)))
- (string=? "?" (substring name (1- (string-length name))))))
- (define (msub n)
- (lambda (m)
- (match:substring m n)))
- (define (??-predicates pair)
- (cons (car pair)
- (if (?? (car pair))
- (lambda (m)
- (not (string=? "" (match:substring m (cdr pair)))))
- (msub (cdr pair)))))
- (define (make-line-parser re specs)
- (let* ((rx (let ((fc (substring re 0 1)))
- (make-regexp (if (string=? "^" fc)
- re
- (string-append "^" re)))))
- (check (lambda (key)
- (assq-ref specs key)))
- (level-substring (msub (or (check 'level-submatch-number) 1)))
- (extract-level (cond ((check 'compute-level)
- => (lambda (proc)
- (lambda (m)
- (proc m))))
- ((check 'level-substring-divisor)
- => (lambda (n)
- (lambda (m)
- (/ (string-length (level-substring m))
- n))))
- (else
- (lambda (m)
- (string-length (level-substring m))))))
- (extract-body (cond ((check 'body-submatch-number)
- => msub)
- (else
- (lambda (m) (match:suffix m)))))
- (misc-props! (cond ((check 'extra-fields)
- => (lambda (alist)
- (let ((new (map ??-predicates alist)))
- (lambda (obj m)
- (for-each
- (lambda (pair)
- (set-object-property!
- obj (car pair)
- ((cdr pair) m)))
- new)))))
- (else
- (lambda (obj m) #t)))))
- ;; retval
- (lambda (line)
- (cond ((regexp-exec rx line)
- => (lambda (m)
- (let ((level (extract-level m))
- (body (extract-body m)))
- (set-object-property! body 'level level)
- (misc-props! body m)
- body)))
- (else #f)))))
- (define (make-text-outline-reader re specs)
- (let ((parse-line (make-line-parser re specs)))
- ;; retval
- (lambda (port)
- (let* ((all '(start))
- (pchain (list))) ; parents chain
- (let loop ((line (read-line port))
- (prev-level -1) ; how this relates to the first input
- ; level determines whether or not we
- ; start in "sibling" or "child" mode.
- ; in the end, `start' is ignored and
- ; it's much easier to ignore parents
- ; than siblings (sometimes). this is
- ; not to encourage ignorance, however.
- (tp all)) ; tail pointer
- (or (eof-object? line)
- (cond ((parse-line line)
- => (lambda (w)
- (let* ((words (list w))
- (level (object-property w 'level))
- (diff (- level prev-level)))
- (cond
- ;; sibling
- ((zero? diff)
- ;; just extend the chain
- (set-cdr! tp words))
- ;; child
- ((positive? diff)
- (or (= 1 diff)
- (error "unhandled diff not 1:" diff line))
- ;; parent may be contacted by uncle later (kids
- ;; these days!) so save its level
- (set-object-property! tp 'level prev-level)
- (set! pchain (cons tp pchain))
- ;; "push down" car into hierarchy
- (set-car! tp (cons (car tp) words)))
- ;; uncle
- ((negative? diff)
- ;; prune back to where levels match
- (do ((p pchain (cdr p)))
- ((= level (object-property (car p) 'level))
- (set! pchain p)))
- ;; resume at this level
- (set-cdr! (car pchain) words)
- (set! pchain (cdr pchain))))
- (loop (read-line port) level words))))
- (else (loop (read-line port) prev-level tp)))))
- (set! all (car all))
- (if (eq? 'start all)
- '() ; wasteland
- (cdr all))))))
- (define read-text-outline-silently
- (make-text-outline-reader "(([ ][ ])*)- *"
- '((level-substring-divisor . 2))))
- (define (read-text-outline . args)
- (write (read-text-outline-silently (open-file (car args) "r")))
- (newline)
- #t) ; exit val
- (define main read-text-outline)
- ;;; read-text-outline ends here
|