read-scheme-source 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. #!/bin/sh
  2. # aside from this initial boilerplate, this is actually -*- scheme -*- code
  3. main='(module-ref (resolve-module '\''(scripts read-scheme-source)) '\'main')'
  4. exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@"
  5. !#
  6. ;;; read-scheme-source --- Read a file, recognizing scheme forms and comments
  7. ;; Copyright (C) 2001, 2006 Free Software Foundation, Inc.
  8. ;;
  9. ;; This program is free software; you can redistribute it and/or
  10. ;; modify it under the terms of the GNU General Public License as
  11. ;; published by the Free Software Foundation; either version 2, or
  12. ;; (at your option) any later version.
  13. ;;
  14. ;; This program is distributed in the hope that it will be useful,
  15. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. ;; General Public License for more details.
  18. ;;
  19. ;; You should have received a copy of the GNU General Public License
  20. ;; along with this software; see the file COPYING. If not, write to
  21. ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  22. ;; Boston, MA 02110-1301 USA
  23. ;;; Author: Thien-Thi Nguyen
  24. ;;; Commentary:
  25. ;; Usage: read-scheme-source FILE1 FILE2 ...
  26. ;;
  27. ;; This program parses each FILE and writes to stdout sexps that describe the
  28. ;; top-level structures of the file: scheme forms, single-line comments, and
  29. ;; hash-bang comments. You can further process these (to associate comments
  30. ;; w/ scheme forms as a kind of documentation, for example).
  31. ;;
  32. ;; The output sexps have one of these forms:
  33. ;;
  34. ;; (quote (filename FILENAME))
  35. ;;
  36. ;; (quote (comment :leading-semicolons N
  37. ;; :text LINE))
  38. ;;
  39. ;; (quote (whitespace :text LINE))
  40. ;;
  41. ;; (quote (hash-bang-comment :line LINUM
  42. ;; :line-count N
  43. ;; :text-list (LINE1 LINE2 ...)))
  44. ;;
  45. ;; (quote (following-form-properties :line LINUM
  46. ;; :line-count N)
  47. ;; :type TYPE
  48. ;; :signature SIGNATURE
  49. ;; :std-int-doc DOCSTRING))
  50. ;;
  51. ;; SEXP
  52. ;;
  53. ;; The first four are straightforward (both FILENAME and LINE are strings sans
  54. ;; newline, while LINUM and N are integers). The last two always go together,
  55. ;; in that order. SEXP is scheme code processed only by `read' and then
  56. ;; `write'.
  57. ;;
  58. ;; The :type field may be omitted if the form is not recognized. Otherwise,
  59. ;; TYPE may be one of: procedure, alias, define-module, variable.
  60. ;;
  61. ;; The :signature field may be omitted if the form is not a procedure.
  62. ;; Otherwise, SIGNATURE is a list showing the procedure's signature.
  63. ;;
  64. ;; If the type is `procedure' and the form has a standard internal docstring
  65. ;; (first body form a string), that is extracted in full -- including any
  66. ;; embedded newlines -- and recorded by field :std-int-doc.
  67. ;;
  68. ;;
  69. ;; Usage from a program: The output list of sexps can be retrieved by scheme
  70. ;; programs w/o having to capture stdout, like so:
  71. ;;
  72. ;; (use-modules (scripts read-scheme-source))
  73. ;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...))
  74. ;;
  75. ;; There are also two convenience procs exported for use by Scheme programs:
  76. ;;
  77. ;; (clump FORMS) --- filter FORMS combining contiguous comment forms that
  78. ;; have the same number of leading semicolons.
  79. ;;
  80. ;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse
  81. ;; the ":tags", and return alist of (TAG . VAL) elems.
  82. ;;
  83. ;; TODO: Add option "--clump-comments", maybe w/ different clumping styles.
  84. ;; Make `annotate!' extensible.
  85. ;;; Code:
  86. (define-module (scripts read-scheme-source)
  87. :use-module (ice-9 rdelim)
  88. :export (read-scheme-source
  89. read-scheme-source-silently
  90. quoted?
  91. clump))
  92. ;; Try to figure out what FORM is and its various attributes.
  93. ;; Call proc NOTE! with key (a symbol) and value.
  94. ;;
  95. (define (annotate! form note!)
  96. (cond ((and (list? form)
  97. (< 2 (length form))
  98. (eq? 'define (car form))
  99. (pair? (cadr form))
  100. (symbol? (caadr form)))
  101. (note! ':type 'procedure)
  102. (note! ':signature (cadr form))
  103. (and (< 3 (length form))
  104. (string? (caddr form))
  105. (note! ':std-int-doc (caddr form))))
  106. ((and (list? form)
  107. (< 2 (length form))
  108. (eq? 'define (car form))
  109. (symbol? (cadr form))
  110. (list? (caddr form))
  111. (< 3 (length (caddr form)))
  112. (eq? 'lambda (car (caddr form)))
  113. (string? (caddr (caddr form))))
  114. (note! ':type 'procedure)
  115. (note! ':signature (cons (cadr form) (cadr (caddr form))))
  116. (note! ':std-int-doc (caddr (caddr form))))
  117. ((and (list? form)
  118. (= 3 (length form))
  119. (eq? 'define (car form))
  120. (symbol? (cadr form))
  121. (symbol? (caddr form)))
  122. (note! ':type 'alias))
  123. ((and (list? form)
  124. (eq? 'define-module (car form)))
  125. (note! ':type 'define-module))
  126. ;; Add other types here.
  127. (else (note! ':type 'variable))))
  128. ;; Process FILE, calling NB! on parsed top-level elements.
  129. ;; Recognized: #!-!# and regular comments in addition to normal forms.
  130. ;;
  131. (define (process file nb!)
  132. (nb! `'(filename ,file))
  133. (let ((hash-bang-rx (make-regexp "^#!"))
  134. (bang-hash-rx (make-regexp "^!#"))
  135. (all-comment-rx (make-regexp "^[ \t]*(;+)"))
  136. (all-whitespace-rx (make-regexp "^[ \t]*$"))
  137. (p (open-input-file file)))
  138. (let loop ((n (1+ (port-line p))) (line (read-line p)))
  139. (or (not n)
  140. (eof-object? line)
  141. (begin
  142. (cond ((regexp-exec hash-bang-rx line)
  143. (let loop ((line (read-line p))
  144. (text (list line)))
  145. (if (or (eof-object? line)
  146. (regexp-exec bang-hash-rx line))
  147. (nb! `'(hash-bang-comment
  148. :line ,n
  149. :line-count ,(1+ (length text))
  150. :text-list ,(reverse
  151. (cons line text))))
  152. (loop (read-line p)
  153. (cons line text)))))
  154. ((regexp-exec all-whitespace-rx line)
  155. (nb! `'(whitespace :text ,line)))
  156. ((regexp-exec all-comment-rx line)
  157. => (lambda (m)
  158. (nb! `'(comment
  159. :leading-semicolons
  160. ,(let ((m1 (vector-ref m 1)))
  161. (- (cdr m1) (car m1)))
  162. :text ,line))))
  163. (else
  164. (unread-string line p)
  165. (let* ((form (read p))
  166. (count (- (port-line p) n))
  167. (props (let* ((props '())
  168. (prop+ (lambda args
  169. (set! props
  170. (append props args)))))
  171. (annotate! form prop+)
  172. props)))
  173. (or (= count 1) ; ugh
  174. (begin
  175. (read-line p)
  176. (set! count (1+ count))))
  177. (nb! `'(following-form-properties
  178. :line ,n
  179. :line-count ,count
  180. ,@props))
  181. (nb! form))))
  182. (loop (1+ (port-line p)) (read-line p)))))))
  183. ;;; entry points
  184. (define (read-scheme-source-silently . files)
  185. "See commentary in module (scripts read-scheme-source)."
  186. (let* ((res '()))
  187. (for-each (lambda (file)
  188. (process file (lambda (e) (set! res (cons e res)))))
  189. files)
  190. (reverse res)))
  191. (define (read-scheme-source . files)
  192. "See commentary in module (scripts read-scheme-source)."
  193. (for-each (lambda (file)
  194. (process file (lambda (e) (write e) (newline))))
  195. files))
  196. ;; Recognize: (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...))
  197. ;; and return alist: ((TAG1 . VAL1) (TAG2 . VAL2) ...)
  198. ;; where the tags are symbols.
  199. ;;
  200. (define (quoted? sym form)
  201. (and (list? form)
  202. (= 2 (length form))
  203. (eq? 'quote (car form))
  204. (let ((inside (cadr form)))
  205. (and (list? inside)
  206. (< 0 (length inside))
  207. (eq? sym (car inside))
  208. (let loop ((ls (cdr inside)) (alist '()))
  209. (if (null? ls)
  210. alist ; retval
  211. (let ((first (car ls)))
  212. (or (symbol? first)
  213. (error "bad list!"))
  214. (loop (cddr ls)
  215. (acons (string->symbol
  216. (substring (symbol->string first) 1))
  217. (cadr ls)
  218. alist)))))))))
  219. ;; Filter FORMS, combining contiguous comment forms that have the same number
  220. ;; of leading semicolons. Do not include in them whitespace lines.
  221. ;; Whitespace lines outside of such comment groupings are ignored, as are
  222. ;; hash-bang comments. All other forms are passed through unchanged.
  223. ;;
  224. (define (clump forms)
  225. (let loop ((forms forms) (acc '()) (pass-this-one-through? #f))
  226. (if (null? forms)
  227. (reverse acc) ; retval
  228. (let ((form (car forms)))
  229. (cond (pass-this-one-through?
  230. (loop (cdr forms) (cons form acc) #f))
  231. ((quoted? 'following-form-properties form)
  232. (loop (cdr forms) (cons form acc) #t))
  233. ((quoted? 'whitespace form) ;;; ignore
  234. (loop (cdr forms) acc #f))
  235. ((quoted? 'hash-bang-comment form) ;;; ignore for now
  236. (loop (cdr forms) acc #f))
  237. ((quoted? 'comment form)
  238. => (lambda (alist)
  239. (let cloop ((inner-forms (cdr forms))
  240. (level (assq-ref alist 'leading-semicolons))
  241. (text (list (assq-ref alist 'text))))
  242. (let ((up (lambda ()
  243. (loop inner-forms
  244. (cons (cons level (reverse text))
  245. acc)
  246. #f))))
  247. (if (null? inner-forms)
  248. (up)
  249. (let ((inner-form (car inner-forms)))
  250. (cond ((quoted? 'comment inner-form)
  251. => (lambda (inner-alist)
  252. (let ((new-level
  253. (assq-ref
  254. inner-alist
  255. 'leading-semicolons)))
  256. (if (= new-level level)
  257. (cloop (cdr inner-forms)
  258. level
  259. (cons (assq-ref
  260. inner-alist
  261. 'text)
  262. text))
  263. (up)))))
  264. (else (up)))))))))
  265. (else (loop (cdr forms) (cons form acc) #f)))))))
  266. ;;; script entry point
  267. (define main read-scheme-source)
  268. ;;; read-scheme-source ends here