123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897 |
- ;;; Scheme reader
- ;;; Copyright (C) 1995-1997,1999-2001,2003-2004,2006-2012,2014-2021
- ;;; Free Software Foundation, Inc.
- ;;;
- ;;; This library is free software: you can redistribute it and/or modify
- ;;; it under the terms of the GNU Lesser General Public License as
- ;;; published by the Free Software Foundation, either version 3 of the
- ;;; License, or (at your option) any later version.
- ;;;
- ;;; This library is distributed in the hope that it will be useful, but
- ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
- ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- ;;; Lesser General Public License for more details.
- ;;;
- ;;; You should have received a copy of the GNU Lesser General Public
- ;;; License along with this program. If not, see
- ;;; <http://www.gnu.org/licenses/>.
- ;;; Commentary:
- ;;;
- ;;; Implementation of Scheme's "read".
- ;;;
- ;;; Code:
- ;; While porting read.c to Scheme, I found these expressions that result
- ;; in undesirable behavior in the C reader. Most all of them are also
- ;; present in the Scheme reader. Probably I should fix all of them, but
- ;; I would first like to prove that the Scheme reader is good enough.
- ;;
- ;; (call-with-input-string "," read)
- ;; (read-disable 'square-brackets), then (call-with-input-string "]" read)
- ;; (call-with-input-string "(#tru1)" read) => '(#t ru1)
- ;; (call-with-input-string "(#true1)" read) => '(#t 1)
- ;; (call-with-input-string "(#fAlse)" read) => '(#f Alse)
- ;; (call-with-input-string "(#f1 #f2 #f3)" read) => error reading array
- ;; #: foo
- ;; #:#|what|#foo
- ;; #@-(1 2 3) => #(1 2 3)
- ;; (#*10101010102) => (#*1010101010 2)
- (define-syntax let*-values
- (syntax-rules ()
- ((_ () . body) (let () . body))
- ((_ ((vars expr) . binds) . body)
- (call-with-values (lambda () expr)
- (lambda vars (let*-values binds . body))))))
- (define bitfield:record-positions? 0)
- (define bitfield:case-insensitive? 2)
- (define bitfield:keyword-style 4)
- (define bitfield:r6rs-escapes? 6)
- (define bitfield:square-brackets? 8)
- (define bitfield:hungry-eol-escapes? 10)
- (define bitfield:curly-infix? 12)
- (define bitfield:r7rs-symbols? 14)
- (define read-option-bits 16)
- (define read-option-mask #b11)
- (define read-option-inherit #b11)
- (define read-options-inherit-all (1- (ash 1 read-option-bits)))
- (define keyword-style-hash-prefix 0)
- (define keyword-style-prefix 1)
- (define keyword-style-postfix 2)
- (define (compute-reader-options port)
- (let ((options (read-options))
- (port-options (or (%port-property port 'port-read-options)
- read-options-inherit-all)))
- (define-syntax-rule (option field exp)
- (let ((port-option (logand port-options (ash read-option-mask field))))
- (if (= port-option (ash read-option-inherit field))
- exp
- port-option)))
- (define (bool key field)
- (option field
- (if (memq key options) (ash 1 field) 0)))
- (define (enum key values field)
- (option field
- (ash (assq-ref values (and=> (memq key options) cadr)) field)))
- (logior (bool 'positions bitfield:record-positions?)
- (bool 'case-insensitive bitfield:case-insensitive?)
- (enum 'keywords '((#f . 0) (prefix . 1) (postfix . 2))
- bitfield:keyword-style)
- (bool 'r6rs-hex-escapes bitfield:r6rs-escapes?)
- (bool 'square-brackets bitfield:square-brackets?)
- (bool 'hungry-eol-escapes bitfield:hungry-eol-escapes?)
- (bool 'curly-infix bitfield:curly-infix?)
- (bool 'r7rs-symbols bitfield:r7rs-symbols?))))
- (define (set-option options field new)
- (logior (ash new field) (logand options (lognot (ash #b11 field)))))
- (define (set-port-read-option! port field value)
- (%set-port-property! port 'port-read-options
- (set-option (or (%port-property port 'port-read-options)
- read-options-inherit-all)
- field value)))
- (define (%read port annotate strip-annotation)
- ;; init read options
- (define opts (compute-reader-options port))
- (define (enabled? field)
- (not (zero? (logand (ash 1 field) opts))))
- (define (set-reader-option! field value)
- (set! opts (set-option opts field value))
- (set-port-read-option! port field value))
- (define (case-insensitive?) (enabled? bitfield:case-insensitive?))
- (define (keyword-style) (logand read-option-mask
- (ash opts (- bitfield:keyword-style))))
- (define (r6rs-escapes?) (enabled? bitfield:r6rs-escapes?))
- (define (square-brackets?) (enabled? bitfield:square-brackets?))
- (define (hungry-eol-escapes?) (enabled? bitfield:hungry-eol-escapes?))
- (define (curly-infix?) (enabled? bitfield:curly-infix?))
- (define (r7rs-symbols?) (enabled? bitfield:r7rs-symbols?))
- (define neoteric 0)
- (define (next) (read-char port))
- (define (peek) (peek-char port))
- (define filename (port-filename port))
- (define (get-pos) (cons (port-line port) (port-column port)))
- ;; We are only ever interested in whether an object is a char or not.
- (define (eof-object? x) (not (char? x)))
- (define (input-error msg args)
- (scm-error 'read-error #f
- (format #f "~A:~S:~S: ~A"
- (or filename "#<unknown port>")
- (1+ (port-line port))
- (1+ (port-column port))
- msg)
- args #f))
- (define-syntax-rule (error msg arg ...)
- (let ((args (list arg ...)))
- (input-error msg args)))
- (define (read-semicolon-comment)
- (let ((ch (next)))
- (cond
- ((eof-object? ch) ch)
- ((eqv? ch #\newline) (next))
- (else (read-semicolon-comment)))))
- (define-syntax-rule (take-until first pred)
- (let lp ((out (list first)))
- (let ((ch (peek)))
- (if (or (eof-object? ch) (pred ch))
- (reverse-list->string out)
- (begin
- (next)
- (lp (cons ch out)))))))
- (define-syntax-rule (take-while first pred)
- (take-until first (lambda (ch) (not (pred ch)))))
- (define (delimiter? ch)
- (case ch
- ((#\( #\) #\; #\" #\space #\return #\ff #\newline #\tab) #t)
- ((#\[ #\]) (or (square-brackets?) (curly-infix?)))
- ((#\{ #\}) (curly-infix?))
- (else #f)))
- (define (read-token ch)
- (take-until ch delimiter?))
- (define (read-mixed-case-symbol ch)
- (let* ((str (read-token ch))
- (len (string-length str)))
- (cond
- ((and (eq? (keyword-style) keyword-style-postfix)
- (> len 1) (eqv? #\: (string-ref str (1- len))))
- (let ((str (substring str 0 (1- len))))
- (symbol->keyword
- (string->symbol
- (if (case-insensitive?)
- (string-downcase str)
- str)))))
- (else
- (string->symbol
- (if (case-insensitive?)
- (string-downcase str)
- str))))))
- (define (read-parenthesized rdelim)
- (define (finish-curly-infix ret)
- ;; Perform syntactic transformations on {...} lists.
- (define (extract-infix-list ls)
- (and (pair? ls)
- (let ((x (car ls))
- (ls (cdr ls)))
- (and (pair? ls)
- (let ((op (car ls))
- (ls (cdr ls)))
- (if (and (pair? ls) (null? (cdr ls)))
- (cons* op x ls)
- (let ((tail (extract-infix-list ls)))
- (and tail
- (equal? (strip-annotation op)
- (strip-annotation (car tail)))
- (cons* op x (cdr tail))))))))))
- (cond
- ((not (eqv? rdelim #\})) ret) ; Only on {...} lists.
- ((not (pair? ret)) ret) ; {} => (); {.x} => x
- ((null? (cdr ret)) (car ret)); {x} => x
- ((and (pair? (cdr ret)) (null? (cddr ret))) ret) ; {x y} => (x y)
- ((extract-infix-list ret)) ; {x + y + ... + z} => (+ x y ... z)
- (else (cons '$nfx$ ret)))) ; {x y . z} => ($nfx$ x y . z)
- (define curly? (eqv? rdelim #\}))
- (finish-curly-infix
- (let lp ((ch (next-non-whitespace)))
- (when (eof-object? ch)
- (error "unexpected end of input while searching for: ~A"
- rdelim))
- (cond
- ((eqv? ch rdelim) '())
- ((or (eqv? ch #\))
- (and (eqv? ch #\]) (or (square-brackets?) (curly-infix?)))
- (and (eqv? ch #\}) (curly-infix?)))
- (error "mismatched close paren: ~A" ch))
- (else
- (let ((expr (read-expr ch)))
- ;; Note that it is possible for scm_read_expression to
- ;; return `.', but not as part of a dotted pair: as in
- ;; #{.}#. Indeed an example is here!
- (if (and (eqv? ch #\.) (eq? (strip-annotation expr) '#{.}#))
- (let* ((tail (read-subexpression "tail of improper list"))
- (close (next-non-whitespace)))
- (unless (eqv? close rdelim)
- (error "missing close paren: ~A" close))
- tail)
- (cons expr (lp (next-non-whitespace))))))))))
- (define (hex-digit ch)
- (case ch
- ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
- (- (char->integer ch) (char->integer #\0)))
- ((#\a #\b #\c #\d #\e #\f)
- (+ 10 (- (char->integer ch) (char->integer #\a))))
- ((#\A #\B #\C #\D #\E #\F)
- (+ 10 (- (char->integer ch) (char->integer #\A))))
- (else #f)))
-
- (define (read-r6rs-hex-escape)
- (let ((ch (next)))
- (cond
- ((hex-digit ch) =>
- (lambda (res)
- (let lp ((res res))
- (let ((ch (next)))
- (cond
- ((hex-digit ch) => (lambda (digit) (lp (+ (* 16 res) digit))))
- ((eqv? ch #\;) (integer->char res))
- ((eof-object? ch)
- (error "unexpected end of input in character escape sequence"))
- (else
- (error "invalid character in escape sequence: ~S" ch)))))))
- ((eof-object? ch)
- (error "unexpected end of input in character escape sequence"))
- (else
- (error "invalid character in escape sequence: ~S" ch)))))
- (define (read-fixed-hex-escape len)
- (let lp ((len len) (res 0))
- (if (zero? len)
- (integer->char res)
- (let ((ch (next)))
- (cond
- ((hex-digit ch) =>
- (lambda (digit)
- (lp (1- len) (+ (* res 16) digit))))
- ((eof-object? ch)
- (error "unexpected end of input in character escape sequence"))
- (else
- (error "invalid character in escape sequence: ~S" ch)))))))
- (define (read-string rdelim)
- (let lp ((out '()))
- (let ((ch (next)))
- (cond
- ((eof-object? ch)
- (error "unexpected end of input while reading string"))
- ((eqv? ch rdelim)
- (reverse-list->string out))
- ((eqv? ch #\\)
- (let ((ch (next)))
- (when (eof-object? ch)
- (error "unexpected end of input while reading string"))
- (cond
- ((eqv? ch #\newline)
- (when (hungry-eol-escapes?)
- ;; Skip intraline whitespace before continuing.
- (let skip ()
- (let ((ch (peek)))
- (when (and (not (eof-object? ch))
- (or (eqv? ch #\tab)
- (eq? (char-general-category ch) 'Zs)))
- (next)
- (skip)))))
- (lp out))
- ((eqv? ch rdelim)
- (lp (cons rdelim out)))
- (else
- (lp
- (cons
- (case ch
- ;; Accept "\(" for use at the beginning of
- ;; lines in multiline strings to avoid
- ;; confusing emacs lisp modes.
- ((#\| #\\ #\() ch)
- ((#\0) #\nul)
- ((#\f) #\ff)
- ((#\n) #\newline)
- ((#\r) #\return)
- ((#\t) #\tab)
- ((#\a) #\alarm)
- ((#\v) #\vtab)
- ((#\b) #\backspace)
- ((#\x)
- (if (or (r6rs-escapes?) (eqv? rdelim #\|))
- (read-r6rs-hex-escape)
- (read-fixed-hex-escape 2)))
- ((#\u)
- (read-fixed-hex-escape 4))
- ((#\U)
- (read-fixed-hex-escape 6))
- (else
- (error "invalid character in escape sequence: ~S" ch)))
- out))))))
- (else
- (lp (cons ch out)))))))
- (define (read-character)
- (let ((ch (next)))
- (cond
- ((eof-object? ch)
- (error "unexpected end of input after #\\"))
- ((delimiter? ch)
- ch)
- (else
- (let* ((tok (read-token ch))
- (len (string-length tok)))
- (define dotted-circle #\x25cc)
- (define r5rs-charnames
- '(("space" . #\x20) ("newline" . #\x0a)))
- (define r6rs-charnames
- '(("nul" . #\x00) ("alarm" . #\x07) ("backspace" . #\x08)
- ("tab" . #\x09) ("linefeed" . #\x0a) ("vtab" . #\x0b)
- ("page" . #\x0c) ("return" . #\x0d) ("esc" . #\x1b)
- ("delete" . #\x7f)))
- (define r7rs-charnames
- '(("escape" . #\x1b)))
- (define C0-control-charnames
- '(("nul" . #\x00) ("soh" . #\x01) ("stx" . #\x02)
- ("etx" . #\x03) ("eot" . #\x04) ("enq" . #\x05)
- ("ack" . #\x06) ("bel" . #\x07) ("bs" . #\x08)
- ("ht" . #\x09) ("lf" . #\x0a) ("vt" . #\x0b)
- ("ff" . #\x0c) ("cr" . #\x0d) ("so" . #\x0e)
- ("si" . #\x0f) ("dle" . #\x10) ("dc1" . #\x11)
- ("dc2" . #\x12) ("dc3" . #\x13) ("dc4" . #\x14)
- ("nak" . #\x15) ("syn" . #\x16) ("etb" . #\x17)
- ("can" . #\x18) ("em" . #\x19) ("sub" . #\x1a)
- ("esc" . #\x1b) ("fs" . #\x1c) ("gs" . #\x1d)
- ("rs" . #\x1e) ("us" . #\x1f) ("sp" . #\x20)
- ("del" . #\x7f)))
- (define alt-charnames
- '(("null" . #\x0) ("nl" . #\x0a) ("np" . #\x0c)))
- ;; Although R6RS and R7RS charnames specified as being
- ;; case-sensitive, Guile matches them case-insensitively, like
- ;; other char names.
- (define (named-char tok alist)
- (let lp ((alist alist))
- (and (pair? alist)
- (if (string-ci=? tok (caar alist))
- (cdar alist)
- (lp (cdr alist))))))
- (cond
- ((= len 1) ch)
- ((and (= len 2) (eqv? (string-ref tok 1) dotted-circle))
- ;; Ignore dotted circles, which may be used to keep
- ;; combining characters from combining with the backslash in
- ;; #\charname.
- ch)
- ((and (<= (char->integer #\0) (char->integer ch) (char->integer #\7))
- (string->number tok 8))
- ;; Specifying a codepoint as an octal value.
- => integer->char)
- ((and (eqv? ch #\x) (> len 1)
- (string->number (substring tok 1) 16))
- ;; Specifying a codepoint as an hexadecimal value. Skip
- ;; initial "x".
- => integer->char)
- ((named-char tok r5rs-charnames))
- ((named-char tok r6rs-charnames))
- ((named-char tok r7rs-charnames))
- ((named-char tok C0-control-charnames))
- ((named-char tok alt-charnames))
- (else
- (error "unknown character name ~a" tok))))))))
- (define (read-vector)
- (list->vector (map strip-annotation (read-parenthesized #\)))))
- (define (read-srfi-4-vector ch)
- (read-array ch))
- (define (maybe-read-boolean-tail tail)
- (let ((len (string-length tail)))
- (let lp ((i 0))
- (or (= i len)
- (let ((ch (peek)))
- (and (not (eof-object? ch))
- (eqv? (char-downcase ch) (string-ref tail i))
- (or (begin
- (next)
- (lp (1+ i)))
- (begin
- (unread-char ch port)
- #f))))))))
- (define (read-false-or-srfi-4-vector)
- (let ((ch (peek)))
- (if (or (eqv? ch #\3)
- (eqv? ch #\6))
- (read-srfi-4-vector #\f)
- (begin
- (maybe-read-boolean-tail "alse")
- #f))))
- (define (read-bytevector)
- (define (expect ch)
- (unless (eqv? (next) ch)
- (error "invalid bytevector prefix" ch)))
- (expect #\u)
- (expect #\8)
- (expect #\()
- (list->typed-array 'vu8 1
- (map strip-annotation (read-parenthesized #\)))))
- ;; FIXME: We should require a terminating delimiter.
- (define (read-bitvector)
- (list->bitvector
- (let lp ()
- (let ((ch (peek)))
- (case ch
- ((#\0) (next) (cons #f (lp)))
- ((#\1) (next) (cons #t (lp)))
- (else '()))))))
- (define (read-boolean ch)
- ;; Historically, Guile hasn't required a delimiter after #f / #t.
- ;; When the longer #false / #true forms were added, we kept this
- ;; behavior. It is terrible and we should change it!!
- (case ch
- ((#\t #\T)
- (maybe-read-boolean-tail "rue")
- #t)
- (else
- (maybe-read-boolean-tail "alse")
- #f)))
- (define (read-keyword)
- (let ((expr (strip-annotation (read-subexpression "keyword"))))
- (unless (symbol? expr)
- (error "keyword prefix #: not followed by a symbol: ~a" expr))
- (symbol->keyword expr)))
- (define (read-array ch)
- (define (read-decimal-integer ch alt)
- ;; This parser has problems but it's what Guile's read.c does. Any
- ;; fix should come later and to both of them.
- (define (decimal-digit ch)
- (and (not (eof-object? ch))
- (let ((digit (- (char->integer ch) (char->integer #\0))))
- (and (<= 0 digit 9) digit))))
- (let*-values (((sign ch) (if (eqv? ch #\-)
- (values -1 (next))
- (values 1 ch))))
- (let lp ((ch ch) (res #f))
- (cond
- ((decimal-digit ch)
- => (lambda (digit)
- (lp (next) (if res (+ (* 10 res) digit) digit))))
- (else
- (values ch (if res (* res sign) alt)))))))
- (define (read-rank ch)
- (let*-values (((ch rank) (read-decimal-integer ch 1)))
- (when (< rank 0)
- (error "array rank must be non-negative"))
- (when (eof-object? ch)
- (error "unexpected end of input while reading array"))
- (values ch rank)))
- (define (read-tag ch)
- (let lp ((ch ch) (chars '()))
- (when (eof-object? ch)
- (error "unexpected end of input while reading array"))
- (if (memv ch '(#\( #\@ #\:))
- (values ch
- (if (null? chars)
- #t
- (string->symbol (list->string (reverse chars)))))
- (lp (next) (cons ch chars)))))
- (define (read-dimension ch)
- (let*-values (((ch lbnd) (if (eqv? ch #\@)
- (read-decimal-integer (next) 0)
- (values ch 0)))
- ((ch len) (if (eqv? ch #\:)
- (read-decimal-integer (next) 0)
- (values ch #f))))
- (when (and len (< len 0))
- (error "array length must be non-negative"))
- (when (eof-object? ch)
- (error "unexpected end of input while reading array"))
- (values ch
- (if len
- (list lbnd (+ lbnd (1- len)))
- lbnd))))
- (define (read-shape ch alt)
- (if (memv ch '(#\@ #\:))
- (let*-values (((ch head) (read-dimension ch))
- ((ch tail) (read-shape ch '())))
- (values ch (cons head tail)))
- (values ch alt)))
- (define (read-elements ch rank)
- (unless (eqv? ch #\()
- (error "missing '(' in vector or array literal"))
- (let ((elts (map strip-annotation (read-parenthesized #\)))))
- (if (zero? rank)
- (begin
- ;; Handle special print syntax of rank zero arrays; see
- ;; scm_i_print_array for a rationale.
- (when (null? elts)
- (error "too few elements in array literal, need 1"))
- (unless (null? (cdr elts))
- (error "too many elements in array literal, need 1"))
- (car elts))
- elts)))
- (let*-values (((ch rank) (read-rank ch))
- ((ch tag) (read-tag ch))
- ((ch shape) (read-shape ch rank))
- ((elts) (read-elements ch rank)))
- (when (and (pair? shape) (not (eqv? (length shape) rank)))
- (error "the number of shape specifications must match the array rank"))
- (list->typed-array tag shape elts)))
- (define (read-number-and-radix ch)
- (let ((tok (string-append "#" (read-token ch))))
- (or (string->number tok)
- (error "unknown # object: ~S" tok))))
- (define (read-extended-symbol)
- (define (next-not-eof)
- (let ((ch (next)))
- (when (eof-object? ch)
- (error "end of input while reading symbol"))
- ch))
- (string->symbol
- (list->string
- (let lp ((saw-brace? #f))
- (let lp/inner ((ch (next-not-eof))
- (saw-brace? saw-brace?))
- (cond
- (saw-brace?
- (if (eqv? ch #\#)
- '()
- ;; Don't eat CH, see
- ;; <https://debbugs.gnu.org/cgi/bugreport.cgi?bug=49623>.
- (cons #\} (lp/inner ch #f))))
- ((eqv? ch #\})
- (lp #t))
- ((eqv? ch #\\)
- ;; It used to be that print.c would print extended-read-syntax
- ;; symbols with backslashes before "non-standard" chars, but
- ;; this routine wouldn't do anything with those escapes.
- ;; Bummer. What we've done is to change print.c to output
- ;; R6RS hex escapes for those characters, relying on the fact
- ;; that the extended read syntax would never put a `\' before
- ;; an `x'. For now, we just ignore other instances of
- ;; backslash in the string.
- (let* ((ch (next-not-eof))
- (ch (if (eqv? ch #\x)
- (read-r6rs-hex-escape)
- ch)))
- (cons ch (lp #f))))
- (else
- (cons ch (lp #f)))))))))
- (define (read-nil)
- ;; Have already read "#\n" -- now read "il".
- (let ((id (read-mixed-case-symbol #\n)))
- (unless (eq? id 'nil)
- (error "unexpected input while reading #nil: ~a" id))
- #nil))
- (define (read-sharp)
- (let* ((ch (next)))
- (cond
- ((eof-object? ch)
- (error "unexpected end of input after #"))
- ((read-hash-procedure ch)
- => (lambda (proc) (proc ch port)))
- (else
- (case ch
- ((#\\) (read-character))
- ((#\() (read-vector))
- ((#\s #\u #\c) (read-srfi-4-vector ch))
- ((#\f) (read-false-or-srfi-4-vector))
- ((#\v) (read-bytevector))
- ((#\*) (read-bitvector))
- ((#\t #\T #\F) (read-boolean ch))
- ((#\:) (read-keyword))
- ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\@)
- (read-array ch))
- ((#\i #\e #\b #\B #\o #\O #\d #\D #\x #\X #\I #\E)
- (read-number-and-radix ch))
- ((#\{) (read-extended-symbol))
- ((#\') (list 'syntax (read-subexpression "syntax expression")))
- ((#\`) (list 'quasisyntax
- (read-subexpression "quasisyntax expression")))
- ((#\,)
- (if (eqv? #\@ (peek))
- (begin
- (next)
- (list 'unsyntax-splicing
- (read-subexpression "unsyntax-splicing expression")))
- (list 'unsyntax (read-subexpression "unsyntax expression"))))
- ((#\n) (read-nil))
- (else
- (error "Unknown # object: ~S" (string #\# ch))))))))
- (define (read-number ch)
- (let* ((str (read-token ch)))
- (or (string->number str)
- (string->symbol (if (case-insensitive?)
- (string-downcase str)
- str)))))
- (define (read-expr* ch)
- (case ch
- ((#\{)
- (cond
- ((curly-infix?)
- (set! neoteric (1+ neoteric))
- (let ((expr (read-parenthesized #\})))
- (set! neoteric (1- neoteric))
- expr))
- (else
- (read-mixed-case-symbol ch))))
- ((#\[)
- (cond
- ((square-brackets?)
- (read-parenthesized #\]))
- ((curly-infix?)
- ;; The syntax of neoteric expressions requires that '[' be a
- ;; delimiter when curly-infix is enabled, so it cannot be part
- ;; of an unescaped symbol. We might as well do something
- ;; useful with it, so we adopt Kawa's convention: [...] =>
- ;; ($bracket-list$ ...)
- ;; FIXME: source locations for this cons
- (cons '$bracket-list$ (read-parenthesized #\])))
- (else
- (read-mixed-case-symbol ch))))
- ((#\()
- (read-parenthesized #\)))
- ((#\")
- (read-string ch))
- ((#\|)
- (if (r7rs-symbols?)
- (string->symbol (read-string ch))
- (read-mixed-case-symbol ch)))
- ((#\')
- (list 'quote (read-subexpression "quoted expression")))
- ((#\`)
- (list 'quasiquote (read-subexpression "quasiquoted expression")))
- ((#\,)
- (cond
- ((eqv? #\@ (peek))
- (next)
- (list 'unquote-splicing (read-subexpression "subexpression of ,@")))
- (else
- (list 'unquote (read-subexpression "unquoted expression")))))
- ((#\#)
- ;; FIXME: read-sharp should recur if we read a comment
- (read-sharp))
- ((#\))
- (error "unexpected \")\""))
- ((#\})
- (if (curly-infix?)
- (error "unexpected \"}\"")
- (read-mixed-case-symbol ch)))
- ((#\])
- (if (square-brackets?)
- (error "unexpected \"]\"")
- (read-mixed-case-symbol ch)))
- ((#\:)
- (if (eq? (keyword-style) keyword-style-prefix)
- ;; FIXME: Don't skip whitespace here.
- (let ((sym (read-subexpression ":keyword")))
- (symbol->keyword (strip-annotation sym)))
- (read-mixed-case-symbol ch)))
- ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\+ #\- #\.)
- (read-number ch))
- (else
- (read-mixed-case-symbol ch))))
- (define (read-neoteric ch)
- (let lp ((expr (read-expr* ch)))
- ;; 'expr' is the first component of the neoteric expression. If
- ;; the next character is '(', '[', or '{', (without any
- ;; intervening whitespace), we use it to construct a new
- ;; expression, and loop. For example:
- ;; f{n - 1}(x) => ((f (- n 1)) x).
- (case (peek)
- ((#\() ;; e(...) => (e ...)
- (next)
- (lp (cons expr (read-parenthesized #\)))))
- ((#\[) ;; e[...] => ($bracket-apply$ e ...)
- (next)
- (lp (cons* '$bracket-apply$ expr (read-parenthesized #\]))))
- ((#\{) ;; e{} => (e); e{...} => (e {...})
- (next)
- (let ((args (read-parenthesized #\})))
- (lp (if (null? args)
- (list expr)
- (list expr args)))))
- (else
- expr))))
- (define (read-expr ch)
- (let ((line (port-line port))
- (column (port-column port)))
- (annotate line
- column
- (if (zero? neoteric)
- (read-expr* ch)
- (read-neoteric ch)))))
- (define (read-directive)
- (define (directive-char? ch)
- (and (char? ch)
- (or (eqv? ch #\-)
- (char-alphabetic? ch)
- (char-numeric? ch))))
- (let ((ch (peek)))
- (cond
- ((directive-char? ch)
- (next)
- (string->symbol (take-while ch directive-char?)))
- (else
- #f))))
- (define (skip-scsh-comment)
- (let lp ((ch (next)))
- (cond
- ((eof-object? ch)
- (error "unterminated `#! ... !#' comment"))
- ((eqv? ch #\!)
- (let ((ch (next)))
- (if (eqv? ch #\#)
- (next)
- (lp ch))))
- (else
- (lp (next))))))
- (define (process-shebang)
- ;; After having read #!, we complete either with #!r6rs,
- ;; #!fold-case, #!no-fold-case, #!curly-infix,
- ;; #!curly-infix-and-bracket-lists, or a SCSH block comment
- ;; terminated by !#.
- (let ((sym (read-directive)))
- (cond
- ((eq? sym 'r6rs)
- (set-reader-option! bitfield:case-insensitive? 0)
- (set-reader-option! bitfield:r6rs-escapes? 1)
- (set-reader-option! bitfield:square-brackets? 1)
- (set-reader-option! bitfield:keyword-style keyword-style-hash-prefix)
- (set-reader-option! bitfield:hungry-eol-escapes? 1)
- (next))
- ((eq? sym 'fold-case)
- (set-reader-option! bitfield:case-insensitive? 1)
- (next))
- ((eq? sym 'no-fold-case)
- (set-reader-option! bitfield:case-insensitive? 0)
- (next))
- ((eq? sym 'curly-infix)
- (set-reader-option! bitfield:curly-infix? 1)
- (next))
- ((eq? sym 'curly-infix-and-bracket-lists)
- (set-reader-option! bitfield:curly-infix? 1)
- (set-reader-option! bitfield:square-brackets? 0)
- (next))
- (else
- (skip-scsh-comment)))))
- (define (skip-eol-comment)
- (let ((ch (next)))
- (cond
- ((eof-object? ch) ch)
- ((eq? ch #\newline) (next))
- (else (skip-eol-comment)))))
- ;; Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
- ;; nested.
- (define (skip-r6rs-block-comment)
- ;; We have read #|, now looking for |#.
- (let ((ch (next)))
- (when (eof-object? ch)
- (error "unterminated `#| ... |#' comment"))
- (cond
- ((and (eqv? ch #\|) (eqv? (peek) #\#))
- ;; Done.
- (next)
- (values))
- ((and (eqv? ch #\#) (eqv? (peek) #\|))
- ;; A nested comment.
- (next)
- (skip-r6rs-block-comment)
- (skip-r6rs-block-comment))
- (else
- (skip-r6rs-block-comment)))))
- (define (read-subexpression what)
- (let ((ch (next-non-whitespace)))
- (when (eof-object? ch)
- (error (string-append "unexpected end of input while reading " what)))
- (read-expr ch)))
- (define (next-non-whitespace)
- (let lp ((ch (next)))
- (case ch
- ((#\;)
- (lp (skip-eol-comment)))
- ((#\#)
- (case (peek)
- ((#\!)
- (next)
- (lp (process-shebang)))
- ((#\;)
- (next)
- (read-subexpression "#; comment")
- (next-non-whitespace))
- ((#\|)
- (if (read-hash-procedure #\|)
- ch
- (begin
- (next)
- (skip-r6rs-block-comment)
- (next-non-whitespace))))
- (else ch)))
- ((#\space #\return #\ff #\newline #\tab)
- (next-non-whitespace))
- (else ch))))
- (let ((ch (next-non-whitespace)))
- (if (eof-object? ch)
- ch
- (read-expr ch))))
- (define* (read #:optional (port (current-input-port)))
- (define filename (port-filename port))
- (define annotate
- (if (memq 'positions (read-options))
- (lambda (line column datum)
- (when (and (supports-source-properties? datum)
- ;; Line or column can be invalid via
- ;; set-port-column! or ungetting chars beyond start
- ;; of line.
- (<= 0 line)
- (<= 1 column))
- ;; We always capture the column after one char of lookahead;
- ;; subtract off that lookahead value.
- (set-source-properties! datum
- `((filename . ,filename)
- (line . ,line)
- (column . ,(1- column)))))
- datum)
- (lambda (line column datum)
- datum)))
- (%read port annotate identity))
- (define* (read-syntax #:optional (port (current-input-port)))
- (define filename (port-filename port))
- (define (annotate line column datum)
- ;; Usually when reading compound expressions consisting of multiple
- ;; syntax objects, like lists, the "leaves" of the expression are
- ;; annotated but the "root" isn't. Like in (A . B), A and B will be
- ;; annotated but the pair won't. Therefore the usually correct
- ;; thing to do is to just annotate the result. However in the case
- ;; of reading ( . C), the result is the already annotated C, which
- ;; we don't want to re-annotate. Therefore we avoid re-annotating
- ;; already annotated objects.
- (if (syntax? datum)
- datum
- (datum->syntax #f ; No lexical context.
- datum
- #:source (vector filename line (1- column)))))
- (%read port annotate syntax->datum))
|